embulk-output-kafka 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/build.gradle +1 -1
- data/src/main/java/org/embulk/output/kafka/AvroFormatColumnVisitor.java +14 -3
- data/src/main/java/org/embulk/output/kafka/JsonFormatColumnVisitor.java +19 -4
- data/src/main/java/org/embulk/output/kafka/KafkaOutputColumnVisitor.java +37 -7
- data/src/main/java/org/embulk/output/kafka/KafkaOutputPlugin.java +42 -14
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d02c758c81651e54d09421f19c1ba4970ddb04c4
|
4
|
+
data.tar.gz: 18c7cbb212fa721f11427250c6132d6cf937a292
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4f6cd307587c3edc55b87404b708a9a21ad904e9685ca1e28eebb662a1cead956175173ce6d8d04821d1a5d3866c143beb10a67bb67ad8e71416b207cbdf31da
|
7
|
+
data.tar.gz: 13de4897add59955cab7a7adbdb252a1a843b8042f94c5f23faca63d4bda1e094c4003f0486244c3e50ec30f2ec5004728bce496713ab8051edf6fed4ff636f8
|
data/README.md
CHANGED
@@ -18,6 +18,7 @@
|
|
18
18
|
- **avsc**: inline avro schema config (json, default: `null`)
|
19
19
|
- **ignore_columns**: remove columns from output (array(string), default: `[]`)
|
20
20
|
- **key_column_name**: use column value as record key (string, default: `null`, if this parameter is null, set random number as record key, and it can use column in `ignore_columns`)
|
21
|
+
- **partition_column_name**: use column value as partition id (string, default: `null`, this value is prefer to `key_column_name`, and if partition_column value is null, use key_column for partitioning)
|
21
22
|
- **record_batch_size**: kafka producer record batch size (integer, default: `1000`)
|
22
23
|
- **acks**: kafka producer require acks (string, default: `"1"`)
|
23
24
|
- **retries**: kafka producer max retry count (integer, default: `1`)
|
data/build.gradle
CHANGED
@@ -17,13 +17,24 @@ import java.util.stream.Collectors;
|
|
17
17
|
public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
|
18
18
|
{
|
19
19
|
private Schema avroSchema;
|
20
|
-
|
20
|
+
private GenericRecord genericRecord;
|
21
21
|
|
22
|
-
|
22
|
+
AvroFormatColumnVisitor(KafkaOutputPlugin.PluginTask task, PageReader pageReader, Schema avroSchema)
|
23
23
|
{
|
24
24
|
super(task, pageReader);
|
25
25
|
this.avroSchema = avroSchema;
|
26
|
-
|
26
|
+
}
|
27
|
+
|
28
|
+
GenericRecord getGenericRecord()
|
29
|
+
{
|
30
|
+
return genericRecord;
|
31
|
+
}
|
32
|
+
|
33
|
+
@Override
|
34
|
+
void reset()
|
35
|
+
{
|
36
|
+
super.reset();
|
37
|
+
this.genericRecord = new GenericData.Record(avroSchema);
|
27
38
|
}
|
28
39
|
|
29
40
|
@Override
|
@@ -13,14 +13,25 @@ import java.time.format.DateTimeFormatter;
|
|
13
13
|
public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
14
14
|
{
|
15
15
|
private ObjectMapper objectMapper;
|
16
|
-
|
16
|
+
private ObjectNode jsonNode;
|
17
17
|
|
18
18
|
private static DateTimeFormatter timestampFormatter = DateTimeFormatter.ISO_INSTANT;
|
19
19
|
|
20
|
-
|
20
|
+
JsonFormatColumnVisitor(KafkaOutputPlugin.PluginTask task, PageReader pageReader, ObjectMapper objectMapper)
|
21
21
|
{
|
22
22
|
super(task, pageReader);
|
23
23
|
this.objectMapper = objectMapper;
|
24
|
+
}
|
25
|
+
|
26
|
+
ObjectNode getJsonNode()
|
27
|
+
{
|
28
|
+
return jsonNode;
|
29
|
+
}
|
30
|
+
|
31
|
+
@Override
|
32
|
+
void reset()
|
33
|
+
{
|
34
|
+
super.reset();
|
24
35
|
this.jsonNode = objectMapper.createObjectNode();
|
25
36
|
}
|
26
37
|
|
@@ -42,6 +53,8 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
42
53
|
@Override
|
43
54
|
public void longColumn(Column column)
|
44
55
|
{
|
56
|
+
super.longColumn(column);
|
57
|
+
|
45
58
|
if (isIgnoreColumn(column)) {
|
46
59
|
return;
|
47
60
|
}
|
@@ -58,6 +71,8 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
58
71
|
@Override
|
59
72
|
public void doubleColumn(Column column)
|
60
73
|
{
|
74
|
+
super.doubleColumn(column);
|
75
|
+
|
61
76
|
if (isIgnoreColumn(column)) {
|
62
77
|
return;
|
63
78
|
}
|
@@ -68,12 +83,13 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
68
83
|
}
|
69
84
|
|
70
85
|
jsonNode.put(column.getName(), pageReader.getDouble(column));
|
71
|
-
super.doubleColumn(column);
|
72
86
|
}
|
73
87
|
|
74
88
|
@Override
|
75
89
|
public void stringColumn(Column column)
|
76
90
|
{
|
91
|
+
super.stringColumn(column);
|
92
|
+
|
77
93
|
if (isIgnoreColumn(column)) {
|
78
94
|
return;
|
79
95
|
}
|
@@ -84,7 +100,6 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
84
100
|
}
|
85
101
|
|
86
102
|
jsonNode.put(column.getName(), pageReader.getString(column));
|
87
|
-
super.stringColumn(column);
|
88
103
|
}
|
89
104
|
|
90
105
|
@Override
|
@@ -6,32 +6,57 @@ import org.embulk.spi.PageReader;
|
|
6
6
|
|
7
7
|
public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
|
8
8
|
{
|
9
|
-
KafkaOutputPlugin.PluginTask task;
|
9
|
+
private KafkaOutputPlugin.PluginTask task;
|
10
10
|
PageReader pageReader;
|
11
|
+
private String partitionColumnName;
|
11
12
|
|
12
|
-
|
13
|
-
|
13
|
+
private Object recordKey = null;
|
14
|
+
private String topicName = null;
|
15
|
+
private Integer partition = null;
|
14
16
|
|
15
|
-
|
17
|
+
KafkaOutputColumnVisitor(KafkaOutputPlugin.PluginTask task, PageReader pageReader)
|
16
18
|
{
|
17
19
|
this.task = task;
|
18
20
|
this.pageReader = pageReader;
|
21
|
+
this.partitionColumnName = task.getPartitionColumnName().orElse(null);
|
19
22
|
}
|
20
23
|
|
21
|
-
|
24
|
+
Object getRecordKey()
|
25
|
+
{
|
26
|
+
return recordKey;
|
27
|
+
}
|
28
|
+
|
29
|
+
private void setRecordKey(Column column, Object value)
|
22
30
|
{
|
23
31
|
if (task.getKeyColumnName().isPresent() && task.getKeyColumnName().get().equals(column.getName())) {
|
24
32
|
recordKey = value;
|
25
33
|
}
|
26
34
|
}
|
27
35
|
|
28
|
-
|
36
|
+
String getTopicName()
|
37
|
+
{
|
38
|
+
return topicName;
|
39
|
+
}
|
40
|
+
|
41
|
+
private void setTopicName(Column column, String value)
|
29
42
|
{
|
30
43
|
if (task.getTopicColumn().isPresent() && task.getTopicColumn().get().equals(column.getName())) {
|
31
44
|
topicName = value;
|
32
45
|
}
|
33
46
|
}
|
34
47
|
|
48
|
+
Integer getPartition()
|
49
|
+
{
|
50
|
+
return partition;
|
51
|
+
}
|
52
|
+
|
53
|
+
void reset()
|
54
|
+
{
|
55
|
+
this.recordKey = null;
|
56
|
+
this.topicName = null;
|
57
|
+
this.partition = null;
|
58
|
+
}
|
59
|
+
|
35
60
|
boolean isIgnoreColumn(Column column)
|
36
61
|
{
|
37
62
|
return task.getIgnoreColumns().stream().anyMatch(name -> name.equals(column.getName()));
|
@@ -41,7 +66,12 @@ public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
|
|
41
66
|
public void longColumn(Column column)
|
42
67
|
{
|
43
68
|
if (!pageReader.isNull(column)) {
|
44
|
-
|
69
|
+
long value = pageReader.getLong(column);
|
70
|
+
setRecordKey(column, value);
|
71
|
+
|
72
|
+
if (partitionColumnName != null && partitionColumnName.equals(column.getName())) {
|
73
|
+
partition = Long.valueOf(value).intValue();
|
74
|
+
}
|
45
75
|
}
|
46
76
|
}
|
47
77
|
|
@@ -4,7 +4,10 @@ import com.fasterxml.jackson.annotation.JsonCreator;
|
|
4
4
|
import com.fasterxml.jackson.annotation.JsonValue;
|
5
5
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
6
6
|
import com.fasterxml.jackson.databind.node.ObjectNode;
|
7
|
-
import
|
7
|
+
import com.google.common.collect.ImmutableList;
|
8
|
+
import org.apache.kafka.clients.admin.AdminClient;
|
9
|
+
import org.apache.kafka.clients.admin.AdminClientConfig;
|
10
|
+
import org.apache.kafka.clients.admin.DescribeTopicsResult;
|
8
11
|
import org.apache.kafka.clients.producer.KafkaProducer;
|
9
12
|
import org.apache.kafka.clients.producer.ProducerRecord;
|
10
13
|
import org.embulk.config.Config;
|
@@ -15,7 +18,6 @@ import org.embulk.config.ConfigSource;
|
|
15
18
|
import org.embulk.config.Task;
|
16
19
|
import org.embulk.config.TaskReport;
|
17
20
|
import org.embulk.config.TaskSource;
|
18
|
-
import org.embulk.spi.ColumnConfig;
|
19
21
|
import org.embulk.spi.Exec;
|
20
22
|
import org.embulk.spi.OutputPlugin;
|
21
23
|
import org.embulk.spi.Page;
|
@@ -32,7 +34,11 @@ import java.util.Locale;
|
|
32
34
|
import java.util.Map;
|
33
35
|
import java.util.Optional;
|
34
36
|
import java.util.PrimitiveIterator;
|
37
|
+
import java.util.Properties;
|
35
38
|
import java.util.Random;
|
39
|
+
import java.util.concurrent.ExecutionException;
|
40
|
+
import java.util.concurrent.TimeUnit;
|
41
|
+
import java.util.concurrent.TimeoutException;
|
36
42
|
import java.util.concurrent.atomic.AtomicInteger;
|
37
43
|
|
38
44
|
public class KafkaOutputPlugin
|
@@ -96,6 +102,10 @@ public class KafkaOutputPlugin
|
|
96
102
|
@ConfigDefault("null")
|
97
103
|
public Optional<String> getKeyColumnName();
|
98
104
|
|
105
|
+
@Config("partition_column_name")
|
106
|
+
@ConfigDefault("null")
|
107
|
+
public Optional<String> getPartitionColumnName();
|
108
|
+
|
99
109
|
@Config("record_batch_size")
|
100
110
|
@ConfigDefault("1000")
|
101
111
|
public int getRecordBatchSize();
|
@@ -124,17 +134,31 @@ public class KafkaOutputPlugin
|
|
124
134
|
private static ObjectMapper objectMapper = new ObjectMapper();
|
125
135
|
private Logger logger = LoggerFactory.getLogger(getClass());
|
126
136
|
|
137
|
+
private AdminClient getKafkaAdminClient(PluginTask task)
|
138
|
+
{
|
139
|
+
Properties properties = new Properties();
|
140
|
+
properties.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, task.getBrokers());
|
141
|
+
AdminClient adminClient = AdminClient.create(properties);
|
142
|
+
return adminClient;
|
143
|
+
}
|
144
|
+
|
127
145
|
@Override
|
128
146
|
public ConfigDiff transaction(ConfigSource config,
|
129
147
|
Schema schema, int taskCount,
|
130
148
|
Control control)
|
131
149
|
{
|
132
150
|
PluginTask task = config.loadConfig(PluginTask.class);
|
151
|
+
AdminClient adminClient = getKafkaAdminClient(task);
|
152
|
+
DescribeTopicsResult result = adminClient.describeTopics(ImmutableList.of(task.getTopic()));
|
153
|
+
try {
|
154
|
+
if (result.all().get(30, TimeUnit.SECONDS).size() == 0) {
|
155
|
+
throw new RuntimeException("target topic is not found");
|
156
|
+
}
|
157
|
+
}
|
158
|
+
catch (InterruptedException | ExecutionException | TimeoutException e) {
|
159
|
+
throw new RuntimeException("failed to connect kafka brokers");
|
160
|
+
}
|
133
161
|
|
134
|
-
// retryable (idempotent) output:
|
135
|
-
// return resume(task.dump(), schema, taskCount, control);
|
136
|
-
|
137
|
-
// non-retryable (non-idempotent) output:
|
138
162
|
control.run(task.dump());
|
139
163
|
return Exec.newConfigDiff();
|
140
164
|
}
|
@@ -179,22 +203,24 @@ public class KafkaOutputPlugin
|
|
179
203
|
AtomicInteger recordLoggingCount = new AtomicInteger(1);
|
180
204
|
|
181
205
|
return new TransactionalPageOutput() {
|
206
|
+
private JsonFormatColumnVisitor columnVisitor = new JsonFormatColumnVisitor(task, pageReader, objectMapper);
|
207
|
+
|
182
208
|
@Override
|
183
209
|
public void add(Page page)
|
184
210
|
{
|
185
211
|
pageReader.setPage(page);
|
186
212
|
while (pageReader.nextRecord()) {
|
187
|
-
|
213
|
+
columnVisitor.reset();
|
188
214
|
|
189
215
|
pageReader.getSchema().visitColumns(columnVisitor);
|
190
216
|
|
191
|
-
Object recordKey = columnVisitor.
|
217
|
+
Object recordKey = columnVisitor.getRecordKey();
|
192
218
|
if (recordKey == null) {
|
193
219
|
recordKey = randomLong.next();
|
194
220
|
}
|
195
221
|
|
196
|
-
String targetTopic = columnVisitor.
|
197
|
-
ProducerRecord<Object, ObjectNode> producerRecord = new ProducerRecord<>(targetTopic, recordKey, columnVisitor.
|
222
|
+
String targetTopic = columnVisitor.getTopicName() != null ? columnVisitor.getTopicName() : task.getTopic();
|
223
|
+
ProducerRecord<Object, ObjectNode> producerRecord = new ProducerRecord<>(targetTopic, recordKey, columnVisitor.getJsonNode());
|
198
224
|
producer.send(producerRecord, (metadata, exception) -> {
|
199
225
|
if (exception != null) {
|
200
226
|
logger.error("produce error", exception);
|
@@ -269,23 +295,25 @@ public class KafkaOutputPlugin
|
|
269
295
|
final org.apache.avro.Schema finalAvroSchema = avroSchema;
|
270
296
|
return new TransactionalPageOutput()
|
271
297
|
{
|
298
|
+
private AvroFormatColumnVisitor columnVisitor = new AvroFormatColumnVisitor(task, pageReader, finalAvroSchema);
|
299
|
+
|
272
300
|
@Override
|
273
301
|
public void add(Page page)
|
274
302
|
{
|
275
303
|
pageReader.setPage(page);
|
276
304
|
while (pageReader.nextRecord()) {
|
277
|
-
|
305
|
+
columnVisitor.reset();
|
278
306
|
|
279
307
|
pageReader.getSchema().visitColumns(columnVisitor);
|
280
308
|
|
281
|
-
Object recordKey = columnVisitor.
|
309
|
+
Object recordKey = columnVisitor.getRecordKey();
|
282
310
|
if (recordKey == null) {
|
283
311
|
recordKey = randomLong.next();
|
284
312
|
}
|
285
313
|
|
286
|
-
String targetTopic = columnVisitor.
|
314
|
+
String targetTopic = columnVisitor.getTopicName() != null ? columnVisitor.getTopicName() : task.getTopic();
|
287
315
|
|
288
|
-
ProducerRecord<Object, Object> producerRecord = new ProducerRecord<>(targetTopic, recordKey, columnVisitor.
|
316
|
+
ProducerRecord<Object, Object> producerRecord = new ProducerRecord<>(targetTopic, columnVisitor.getPartition(), recordKey, columnVisitor.getGenericRecord());
|
289
317
|
producer.send(producerRecord, (metadata, exception) -> {
|
290
318
|
if (exception != null) {
|
291
319
|
logger.error("produce error", exception);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- joker1007
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-10-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -54,7 +54,7 @@ files:
|
|
54
54
|
- classpath/common-config-5.3.0.jar
|
55
55
|
- classpath/common-utils-5.3.0.jar
|
56
56
|
- classpath/commons-compress-1.18.jar
|
57
|
-
- classpath/embulk-output-kafka-0.1.
|
57
|
+
- classpath/embulk-output-kafka-0.1.4.jar
|
58
58
|
- classpath/jackson-annotations-2.9.0.jar
|
59
59
|
- classpath/jackson-core-2.9.9.jar
|
60
60
|
- classpath/jackson-databind-2.9.9.jar
|