embulk-output-kafka 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/build.gradle +1 -1
- data/src/main/java/org/embulk/output/kafka/AvroFormatColumnVisitor.java +14 -3
- data/src/main/java/org/embulk/output/kafka/JsonFormatColumnVisitor.java +19 -4
- data/src/main/java/org/embulk/output/kafka/KafkaOutputColumnVisitor.java +37 -7
- data/src/main/java/org/embulk/output/kafka/KafkaOutputPlugin.java +42 -14
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d02c758c81651e54d09421f19c1ba4970ddb04c4
|
4
|
+
data.tar.gz: 18c7cbb212fa721f11427250c6132d6cf937a292
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4f6cd307587c3edc55b87404b708a9a21ad904e9685ca1e28eebb662a1cead956175173ce6d8d04821d1a5d3866c143beb10a67bb67ad8e71416b207cbdf31da
|
7
|
+
data.tar.gz: 13de4897add59955cab7a7adbdb252a1a843b8042f94c5f23faca63d4bda1e094c4003f0486244c3e50ec30f2ec5004728bce496713ab8051edf6fed4ff636f8
|
data/README.md
CHANGED
@@ -18,6 +18,7 @@
|
|
18
18
|
- **avsc**: inline avro schema config (json, default: `null`)
|
19
19
|
- **ignore_columns**: remove columns from output (array(string), default: `[]`)
|
20
20
|
- **key_column_name**: use column value as record key (string, default: `null`, if this parameter is null, set random number as record key, and it can use column in `ignore_columns`)
|
21
|
+
- **partition_column_name**: use column value as partition id (string, default: `null`, this value is prefer to `key_column_name`, and if partition_column value is null, use key_column for partitioning)
|
21
22
|
- **record_batch_size**: kafka producer record batch size (integer, default: `1000`)
|
22
23
|
- **acks**: kafka producer require acks (string, default: `"1"`)
|
23
24
|
- **retries**: kafka producer max retry count (integer, default: `1`)
|
data/build.gradle
CHANGED
@@ -17,13 +17,24 @@ import java.util.stream.Collectors;
|
|
17
17
|
public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
|
18
18
|
{
|
19
19
|
private Schema avroSchema;
|
20
|
-
|
20
|
+
private GenericRecord genericRecord;
|
21
21
|
|
22
|
-
|
22
|
+
AvroFormatColumnVisitor(KafkaOutputPlugin.PluginTask task, PageReader pageReader, Schema avroSchema)
|
23
23
|
{
|
24
24
|
super(task, pageReader);
|
25
25
|
this.avroSchema = avroSchema;
|
26
|
-
|
26
|
+
}
|
27
|
+
|
28
|
+
GenericRecord getGenericRecord()
|
29
|
+
{
|
30
|
+
return genericRecord;
|
31
|
+
}
|
32
|
+
|
33
|
+
@Override
|
34
|
+
void reset()
|
35
|
+
{
|
36
|
+
super.reset();
|
37
|
+
this.genericRecord = new GenericData.Record(avroSchema);
|
27
38
|
}
|
28
39
|
|
29
40
|
@Override
|
@@ -13,14 +13,25 @@ import java.time.format.DateTimeFormatter;
|
|
13
13
|
public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
14
14
|
{
|
15
15
|
private ObjectMapper objectMapper;
|
16
|
-
|
16
|
+
private ObjectNode jsonNode;
|
17
17
|
|
18
18
|
private static DateTimeFormatter timestampFormatter = DateTimeFormatter.ISO_INSTANT;
|
19
19
|
|
20
|
-
|
20
|
+
JsonFormatColumnVisitor(KafkaOutputPlugin.PluginTask task, PageReader pageReader, ObjectMapper objectMapper)
|
21
21
|
{
|
22
22
|
super(task, pageReader);
|
23
23
|
this.objectMapper = objectMapper;
|
24
|
+
}
|
25
|
+
|
26
|
+
ObjectNode getJsonNode()
|
27
|
+
{
|
28
|
+
return jsonNode;
|
29
|
+
}
|
30
|
+
|
31
|
+
@Override
|
32
|
+
void reset()
|
33
|
+
{
|
34
|
+
super.reset();
|
24
35
|
this.jsonNode = objectMapper.createObjectNode();
|
25
36
|
}
|
26
37
|
|
@@ -42,6 +53,8 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
42
53
|
@Override
|
43
54
|
public void longColumn(Column column)
|
44
55
|
{
|
56
|
+
super.longColumn(column);
|
57
|
+
|
45
58
|
if (isIgnoreColumn(column)) {
|
46
59
|
return;
|
47
60
|
}
|
@@ -58,6 +71,8 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
58
71
|
@Override
|
59
72
|
public void doubleColumn(Column column)
|
60
73
|
{
|
74
|
+
super.doubleColumn(column);
|
75
|
+
|
61
76
|
if (isIgnoreColumn(column)) {
|
62
77
|
return;
|
63
78
|
}
|
@@ -68,12 +83,13 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
68
83
|
}
|
69
84
|
|
70
85
|
jsonNode.put(column.getName(), pageReader.getDouble(column));
|
71
|
-
super.doubleColumn(column);
|
72
86
|
}
|
73
87
|
|
74
88
|
@Override
|
75
89
|
public void stringColumn(Column column)
|
76
90
|
{
|
91
|
+
super.stringColumn(column);
|
92
|
+
|
77
93
|
if (isIgnoreColumn(column)) {
|
78
94
|
return;
|
79
95
|
}
|
@@ -84,7 +100,6 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
84
100
|
}
|
85
101
|
|
86
102
|
jsonNode.put(column.getName(), pageReader.getString(column));
|
87
|
-
super.stringColumn(column);
|
88
103
|
}
|
89
104
|
|
90
105
|
@Override
|
@@ -6,32 +6,57 @@ import org.embulk.spi.PageReader;
|
|
6
6
|
|
7
7
|
public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
|
8
8
|
{
|
9
|
-
KafkaOutputPlugin.PluginTask task;
|
9
|
+
private KafkaOutputPlugin.PluginTask task;
|
10
10
|
PageReader pageReader;
|
11
|
+
private String partitionColumnName;
|
11
12
|
|
12
|
-
|
13
|
-
|
13
|
+
private Object recordKey = null;
|
14
|
+
private String topicName = null;
|
15
|
+
private Integer partition = null;
|
14
16
|
|
15
|
-
|
17
|
+
KafkaOutputColumnVisitor(KafkaOutputPlugin.PluginTask task, PageReader pageReader)
|
16
18
|
{
|
17
19
|
this.task = task;
|
18
20
|
this.pageReader = pageReader;
|
21
|
+
this.partitionColumnName = task.getPartitionColumnName().orElse(null);
|
19
22
|
}
|
20
23
|
|
21
|
-
|
24
|
+
Object getRecordKey()
|
25
|
+
{
|
26
|
+
return recordKey;
|
27
|
+
}
|
28
|
+
|
29
|
+
private void setRecordKey(Column column, Object value)
|
22
30
|
{
|
23
31
|
if (task.getKeyColumnName().isPresent() && task.getKeyColumnName().get().equals(column.getName())) {
|
24
32
|
recordKey = value;
|
25
33
|
}
|
26
34
|
}
|
27
35
|
|
28
|
-
|
36
|
+
String getTopicName()
|
37
|
+
{
|
38
|
+
return topicName;
|
39
|
+
}
|
40
|
+
|
41
|
+
private void setTopicName(Column column, String value)
|
29
42
|
{
|
30
43
|
if (task.getTopicColumn().isPresent() && task.getTopicColumn().get().equals(column.getName())) {
|
31
44
|
topicName = value;
|
32
45
|
}
|
33
46
|
}
|
34
47
|
|
48
|
+
Integer getPartition()
|
49
|
+
{
|
50
|
+
return partition;
|
51
|
+
}
|
52
|
+
|
53
|
+
void reset()
|
54
|
+
{
|
55
|
+
this.recordKey = null;
|
56
|
+
this.topicName = null;
|
57
|
+
this.partition = null;
|
58
|
+
}
|
59
|
+
|
35
60
|
boolean isIgnoreColumn(Column column)
|
36
61
|
{
|
37
62
|
return task.getIgnoreColumns().stream().anyMatch(name -> name.equals(column.getName()));
|
@@ -41,7 +66,12 @@ public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
|
|
41
66
|
public void longColumn(Column column)
|
42
67
|
{
|
43
68
|
if (!pageReader.isNull(column)) {
|
44
|
-
|
69
|
+
long value = pageReader.getLong(column);
|
70
|
+
setRecordKey(column, value);
|
71
|
+
|
72
|
+
if (partitionColumnName != null && partitionColumnName.equals(column.getName())) {
|
73
|
+
partition = Long.valueOf(value).intValue();
|
74
|
+
}
|
45
75
|
}
|
46
76
|
}
|
47
77
|
|
@@ -4,7 +4,10 @@ import com.fasterxml.jackson.annotation.JsonCreator;
|
|
4
4
|
import com.fasterxml.jackson.annotation.JsonValue;
|
5
5
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
6
6
|
import com.fasterxml.jackson.databind.node.ObjectNode;
|
7
|
-
import
|
7
|
+
import com.google.common.collect.ImmutableList;
|
8
|
+
import org.apache.kafka.clients.admin.AdminClient;
|
9
|
+
import org.apache.kafka.clients.admin.AdminClientConfig;
|
10
|
+
import org.apache.kafka.clients.admin.DescribeTopicsResult;
|
8
11
|
import org.apache.kafka.clients.producer.KafkaProducer;
|
9
12
|
import org.apache.kafka.clients.producer.ProducerRecord;
|
10
13
|
import org.embulk.config.Config;
|
@@ -15,7 +18,6 @@ import org.embulk.config.ConfigSource;
|
|
15
18
|
import org.embulk.config.Task;
|
16
19
|
import org.embulk.config.TaskReport;
|
17
20
|
import org.embulk.config.TaskSource;
|
18
|
-
import org.embulk.spi.ColumnConfig;
|
19
21
|
import org.embulk.spi.Exec;
|
20
22
|
import org.embulk.spi.OutputPlugin;
|
21
23
|
import org.embulk.spi.Page;
|
@@ -32,7 +34,11 @@ import java.util.Locale;
|
|
32
34
|
import java.util.Map;
|
33
35
|
import java.util.Optional;
|
34
36
|
import java.util.PrimitiveIterator;
|
37
|
+
import java.util.Properties;
|
35
38
|
import java.util.Random;
|
39
|
+
import java.util.concurrent.ExecutionException;
|
40
|
+
import java.util.concurrent.TimeUnit;
|
41
|
+
import java.util.concurrent.TimeoutException;
|
36
42
|
import java.util.concurrent.atomic.AtomicInteger;
|
37
43
|
|
38
44
|
public class KafkaOutputPlugin
|
@@ -96,6 +102,10 @@ public class KafkaOutputPlugin
|
|
96
102
|
@ConfigDefault("null")
|
97
103
|
public Optional<String> getKeyColumnName();
|
98
104
|
|
105
|
+
@Config("partition_column_name")
|
106
|
+
@ConfigDefault("null")
|
107
|
+
public Optional<String> getPartitionColumnName();
|
108
|
+
|
99
109
|
@Config("record_batch_size")
|
100
110
|
@ConfigDefault("1000")
|
101
111
|
public int getRecordBatchSize();
|
@@ -124,17 +134,31 @@ public class KafkaOutputPlugin
|
|
124
134
|
private static ObjectMapper objectMapper = new ObjectMapper();
|
125
135
|
private Logger logger = LoggerFactory.getLogger(getClass());
|
126
136
|
|
137
|
+
private AdminClient getKafkaAdminClient(PluginTask task)
|
138
|
+
{
|
139
|
+
Properties properties = new Properties();
|
140
|
+
properties.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, task.getBrokers());
|
141
|
+
AdminClient adminClient = AdminClient.create(properties);
|
142
|
+
return adminClient;
|
143
|
+
}
|
144
|
+
|
127
145
|
@Override
|
128
146
|
public ConfigDiff transaction(ConfigSource config,
|
129
147
|
Schema schema, int taskCount,
|
130
148
|
Control control)
|
131
149
|
{
|
132
150
|
PluginTask task = config.loadConfig(PluginTask.class);
|
151
|
+
AdminClient adminClient = getKafkaAdminClient(task);
|
152
|
+
DescribeTopicsResult result = adminClient.describeTopics(ImmutableList.of(task.getTopic()));
|
153
|
+
try {
|
154
|
+
if (result.all().get(30, TimeUnit.SECONDS).size() == 0) {
|
155
|
+
throw new RuntimeException("target topic is not found");
|
156
|
+
}
|
157
|
+
}
|
158
|
+
catch (InterruptedException | ExecutionException | TimeoutException e) {
|
159
|
+
throw new RuntimeException("failed to connect kafka brokers");
|
160
|
+
}
|
133
161
|
|
134
|
-
// retryable (idempotent) output:
|
135
|
-
// return resume(task.dump(), schema, taskCount, control);
|
136
|
-
|
137
|
-
// non-retryable (non-idempotent) output:
|
138
162
|
control.run(task.dump());
|
139
163
|
return Exec.newConfigDiff();
|
140
164
|
}
|
@@ -179,22 +203,24 @@ public class KafkaOutputPlugin
|
|
179
203
|
AtomicInteger recordLoggingCount = new AtomicInteger(1);
|
180
204
|
|
181
205
|
return new TransactionalPageOutput() {
|
206
|
+
private JsonFormatColumnVisitor columnVisitor = new JsonFormatColumnVisitor(task, pageReader, objectMapper);
|
207
|
+
|
182
208
|
@Override
|
183
209
|
public void add(Page page)
|
184
210
|
{
|
185
211
|
pageReader.setPage(page);
|
186
212
|
while (pageReader.nextRecord()) {
|
187
|
-
|
213
|
+
columnVisitor.reset();
|
188
214
|
|
189
215
|
pageReader.getSchema().visitColumns(columnVisitor);
|
190
216
|
|
191
|
-
Object recordKey = columnVisitor.
|
217
|
+
Object recordKey = columnVisitor.getRecordKey();
|
192
218
|
if (recordKey == null) {
|
193
219
|
recordKey = randomLong.next();
|
194
220
|
}
|
195
221
|
|
196
|
-
String targetTopic = columnVisitor.
|
197
|
-
ProducerRecord<Object, ObjectNode> producerRecord = new ProducerRecord<>(targetTopic, recordKey, columnVisitor.
|
222
|
+
String targetTopic = columnVisitor.getTopicName() != null ? columnVisitor.getTopicName() : task.getTopic();
|
223
|
+
ProducerRecord<Object, ObjectNode> producerRecord = new ProducerRecord<>(targetTopic, recordKey, columnVisitor.getJsonNode());
|
198
224
|
producer.send(producerRecord, (metadata, exception) -> {
|
199
225
|
if (exception != null) {
|
200
226
|
logger.error("produce error", exception);
|
@@ -269,23 +295,25 @@ public class KafkaOutputPlugin
|
|
269
295
|
final org.apache.avro.Schema finalAvroSchema = avroSchema;
|
270
296
|
return new TransactionalPageOutput()
|
271
297
|
{
|
298
|
+
private AvroFormatColumnVisitor columnVisitor = new AvroFormatColumnVisitor(task, pageReader, finalAvroSchema);
|
299
|
+
|
272
300
|
@Override
|
273
301
|
public void add(Page page)
|
274
302
|
{
|
275
303
|
pageReader.setPage(page);
|
276
304
|
while (pageReader.nextRecord()) {
|
277
|
-
|
305
|
+
columnVisitor.reset();
|
278
306
|
|
279
307
|
pageReader.getSchema().visitColumns(columnVisitor);
|
280
308
|
|
281
|
-
Object recordKey = columnVisitor.
|
309
|
+
Object recordKey = columnVisitor.getRecordKey();
|
282
310
|
if (recordKey == null) {
|
283
311
|
recordKey = randomLong.next();
|
284
312
|
}
|
285
313
|
|
286
|
-
String targetTopic = columnVisitor.
|
314
|
+
String targetTopic = columnVisitor.getTopicName() != null ? columnVisitor.getTopicName() : task.getTopic();
|
287
315
|
|
288
|
-
ProducerRecord<Object, Object> producerRecord = new ProducerRecord<>(targetTopic, recordKey, columnVisitor.
|
316
|
+
ProducerRecord<Object, Object> producerRecord = new ProducerRecord<>(targetTopic, columnVisitor.getPartition(), recordKey, columnVisitor.getGenericRecord());
|
289
317
|
producer.send(producerRecord, (metadata, exception) -> {
|
290
318
|
if (exception != null) {
|
291
319
|
logger.error("produce error", exception);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- joker1007
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-10-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -54,7 +54,7 @@ files:
|
|
54
54
|
- classpath/common-config-5.3.0.jar
|
55
55
|
- classpath/common-utils-5.3.0.jar
|
56
56
|
- classpath/commons-compress-1.18.jar
|
57
|
-
- classpath/embulk-output-kafka-0.1.
|
57
|
+
- classpath/embulk-output-kafka-0.1.4.jar
|
58
58
|
- classpath/jackson-annotations-2.9.0.jar
|
59
59
|
- classpath/jackson-core-2.9.9.jar
|
60
60
|
- classpath/jackson-databind-2.9.9.jar
|