embulk-output-kafka 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fb1edf4ce79bd490f0f01662547d36c2e6d2dd6e
4
- data.tar.gz: 44b9affb8e9d1385314274b5d1ac083fd5a4848e
3
+ metadata.gz: d02c758c81651e54d09421f19c1ba4970ddb04c4
4
+ data.tar.gz: 18c7cbb212fa721f11427250c6132d6cf937a292
5
5
  SHA512:
6
- metadata.gz: 5c76d2cca3d141b7f44208449ba092ee732993183de6e6f19ec4e5280f302f0e3a4b5746414332103aa492b0cb7ea7df55159fa936248839bb68a8280ecfc060
7
- data.tar.gz: 28dcd4f04aceec97e78c427b2b859266b9adb5147ae8cd28a63c5df86840d5c4051af049b225eefe01d410007afdd17222ec96fc3525cf0cd28e60c30414f6a2
6
+ metadata.gz: 4f6cd307587c3edc55b87404b708a9a21ad904e9685ca1e28eebb662a1cead956175173ce6d8d04821d1a5d3866c143beb10a67bb67ad8e71416b207cbdf31da
7
+ data.tar.gz: 13de4897add59955cab7a7adbdb252a1a843b8042f94c5f23faca63d4bda1e094c4003f0486244c3e50ec30f2ec5004728bce496713ab8051edf6fed4ff636f8
data/README.md CHANGED
@@ -18,6 +18,7 @@
18
18
  - **avsc**: inline avro schema config (json, default: `null`)
19
19
  - **ignore_columns**: remove columns from output (array(string), default: `[]`)
20
20
  - **key_column_name**: use column value as record key (string, default: `null`, if this parameter is null, set random number as record key, and it can use column in `ignore_columns`)
21
+ - **partition_column_name**: use column value as partition id (string, default: `null`, this value is prefer to `key_column_name`, and if partition_column value is null, use key_column for partitioning)
21
22
  - **record_batch_size**: kafka producer record batch size (integer, default: `1000`)
22
23
  - **acks**: kafka producer require acks (string, default: `"1"`)
23
24
  - **retries**: kafka producer max retry count (integer, default: `1`)
data/build.gradle CHANGED
@@ -17,7 +17,7 @@ configurations {
17
17
  provided
18
18
  }
19
19
 
20
- version = "0.1.3"
20
+ version = "0.1.4"
21
21
 
22
22
  sourceCompatibility = 1.8
23
23
  targetCompatibility = 1.8
@@ -17,13 +17,24 @@ import java.util.stream.Collectors;
17
17
  public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
18
18
  {
19
19
  private Schema avroSchema;
20
- public GenericRecord genericRecord;
20
+ private GenericRecord genericRecord;
21
21
 
22
- public AvroFormatColumnVisitor(KafkaOutputPlugin.PluginTask task, PageReader pageReader, Schema avroSchema, GenericRecord genericRecord)
22
+ AvroFormatColumnVisitor(KafkaOutputPlugin.PluginTask task, PageReader pageReader, Schema avroSchema)
23
23
  {
24
24
  super(task, pageReader);
25
25
  this.avroSchema = avroSchema;
26
- this.genericRecord = genericRecord;
26
+ }
27
+
28
+ GenericRecord getGenericRecord()
29
+ {
30
+ return genericRecord;
31
+ }
32
+
33
+ @Override
34
+ void reset()
35
+ {
36
+ super.reset();
37
+ this.genericRecord = new GenericData.Record(avroSchema);
27
38
  }
28
39
 
29
40
  @Override
@@ -13,14 +13,25 @@ import java.time.format.DateTimeFormatter;
13
13
  public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
14
14
  {
15
15
  private ObjectMapper objectMapper;
16
- public ObjectNode jsonNode;
16
+ private ObjectNode jsonNode;
17
17
 
18
18
  private static DateTimeFormatter timestampFormatter = DateTimeFormatter.ISO_INSTANT;
19
19
 
20
- public JsonFormatColumnVisitor(KafkaOutputPlugin.PluginTask task, PageReader pageReader, ObjectMapper objectMapper)
20
+ JsonFormatColumnVisitor(KafkaOutputPlugin.PluginTask task, PageReader pageReader, ObjectMapper objectMapper)
21
21
  {
22
22
  super(task, pageReader);
23
23
  this.objectMapper = objectMapper;
24
+ }
25
+
26
+ ObjectNode getJsonNode()
27
+ {
28
+ return jsonNode;
29
+ }
30
+
31
+ @Override
32
+ void reset()
33
+ {
34
+ super.reset();
24
35
  this.jsonNode = objectMapper.createObjectNode();
25
36
  }
26
37
 
@@ -42,6 +53,8 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
42
53
  @Override
43
54
  public void longColumn(Column column)
44
55
  {
56
+ super.longColumn(column);
57
+
45
58
  if (isIgnoreColumn(column)) {
46
59
  return;
47
60
  }
@@ -58,6 +71,8 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
58
71
  @Override
59
72
  public void doubleColumn(Column column)
60
73
  {
74
+ super.doubleColumn(column);
75
+
61
76
  if (isIgnoreColumn(column)) {
62
77
  return;
63
78
  }
@@ -68,12 +83,13 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
68
83
  }
69
84
 
70
85
  jsonNode.put(column.getName(), pageReader.getDouble(column));
71
- super.doubleColumn(column);
72
86
  }
73
87
 
74
88
  @Override
75
89
  public void stringColumn(Column column)
76
90
  {
91
+ super.stringColumn(column);
92
+
77
93
  if (isIgnoreColumn(column)) {
78
94
  return;
79
95
  }
@@ -84,7 +100,6 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
84
100
  }
85
101
 
86
102
  jsonNode.put(column.getName(), pageReader.getString(column));
87
- super.stringColumn(column);
88
103
  }
89
104
 
90
105
  @Override
@@ -6,32 +6,57 @@ import org.embulk.spi.PageReader;
6
6
 
7
7
  public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
8
8
  {
9
- KafkaOutputPlugin.PluginTask task;
9
+ private KafkaOutputPlugin.PluginTask task;
10
10
  PageReader pageReader;
11
+ private String partitionColumnName;
11
12
 
12
- public Object recordKey = null;
13
- public String topicName = null;
13
+ private Object recordKey = null;
14
+ private String topicName = null;
15
+ private Integer partition = null;
14
16
 
15
- public KafkaOutputColumnVisitor(KafkaOutputPlugin.PluginTask task, PageReader pageReader)
17
+ KafkaOutputColumnVisitor(KafkaOutputPlugin.PluginTask task, PageReader pageReader)
16
18
  {
17
19
  this.task = task;
18
20
  this.pageReader = pageReader;
21
+ this.partitionColumnName = task.getPartitionColumnName().orElse(null);
19
22
  }
20
23
 
21
- void setRecordKey(Column column, Object value)
24
+ Object getRecordKey()
25
+ {
26
+ return recordKey;
27
+ }
28
+
29
+ private void setRecordKey(Column column, Object value)
22
30
  {
23
31
  if (task.getKeyColumnName().isPresent() && task.getKeyColumnName().get().equals(column.getName())) {
24
32
  recordKey = value;
25
33
  }
26
34
  }
27
35
 
28
- void setTopicName(Column column, String value)
36
+ String getTopicName()
37
+ {
38
+ return topicName;
39
+ }
40
+
41
+ private void setTopicName(Column column, String value)
29
42
  {
30
43
  if (task.getTopicColumn().isPresent() && task.getTopicColumn().get().equals(column.getName())) {
31
44
  topicName = value;
32
45
  }
33
46
  }
34
47
 
48
+ Integer getPartition()
49
+ {
50
+ return partition;
51
+ }
52
+
53
+ void reset()
54
+ {
55
+ this.recordKey = null;
56
+ this.topicName = null;
57
+ this.partition = null;
58
+ }
59
+
35
60
  boolean isIgnoreColumn(Column column)
36
61
  {
37
62
  return task.getIgnoreColumns().stream().anyMatch(name -> name.equals(column.getName()));
@@ -41,7 +66,12 @@ public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
41
66
  public void longColumn(Column column)
42
67
  {
43
68
  if (!pageReader.isNull(column)) {
44
- setRecordKey(column, pageReader.getLong(column));
69
+ long value = pageReader.getLong(column);
70
+ setRecordKey(column, value);
71
+
72
+ if (partitionColumnName != null && partitionColumnName.equals(column.getName())) {
73
+ partition = Long.valueOf(value).intValue();
74
+ }
45
75
  }
46
76
  }
47
77
 
@@ -4,7 +4,10 @@ import com.fasterxml.jackson.annotation.JsonCreator;
4
4
  import com.fasterxml.jackson.annotation.JsonValue;
5
5
  import com.fasterxml.jackson.databind.ObjectMapper;
6
6
  import com.fasterxml.jackson.databind.node.ObjectNode;
7
- import org.apache.avro.generic.GenericData;
7
+ import com.google.common.collect.ImmutableList;
8
+ import org.apache.kafka.clients.admin.AdminClient;
9
+ import org.apache.kafka.clients.admin.AdminClientConfig;
10
+ import org.apache.kafka.clients.admin.DescribeTopicsResult;
8
11
  import org.apache.kafka.clients.producer.KafkaProducer;
9
12
  import org.apache.kafka.clients.producer.ProducerRecord;
10
13
  import org.embulk.config.Config;
@@ -15,7 +18,6 @@ import org.embulk.config.ConfigSource;
15
18
  import org.embulk.config.Task;
16
19
  import org.embulk.config.TaskReport;
17
20
  import org.embulk.config.TaskSource;
18
- import org.embulk.spi.ColumnConfig;
19
21
  import org.embulk.spi.Exec;
20
22
  import org.embulk.spi.OutputPlugin;
21
23
  import org.embulk.spi.Page;
@@ -32,7 +34,11 @@ import java.util.Locale;
32
34
  import java.util.Map;
33
35
  import java.util.Optional;
34
36
  import java.util.PrimitiveIterator;
37
+ import java.util.Properties;
35
38
  import java.util.Random;
39
+ import java.util.concurrent.ExecutionException;
40
+ import java.util.concurrent.TimeUnit;
41
+ import java.util.concurrent.TimeoutException;
36
42
  import java.util.concurrent.atomic.AtomicInteger;
37
43
 
38
44
  public class KafkaOutputPlugin
@@ -96,6 +102,10 @@ public class KafkaOutputPlugin
96
102
  @ConfigDefault("null")
97
103
  public Optional<String> getKeyColumnName();
98
104
 
105
+ @Config("partition_column_name")
106
+ @ConfigDefault("null")
107
+ public Optional<String> getPartitionColumnName();
108
+
99
109
  @Config("record_batch_size")
100
110
  @ConfigDefault("1000")
101
111
  public int getRecordBatchSize();
@@ -124,17 +134,31 @@ public class KafkaOutputPlugin
124
134
  private static ObjectMapper objectMapper = new ObjectMapper();
125
135
  private Logger logger = LoggerFactory.getLogger(getClass());
126
136
 
137
+ private AdminClient getKafkaAdminClient(PluginTask task)
138
+ {
139
+ Properties properties = new Properties();
140
+ properties.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, task.getBrokers());
141
+ AdminClient adminClient = AdminClient.create(properties);
142
+ return adminClient;
143
+ }
144
+
127
145
  @Override
128
146
  public ConfigDiff transaction(ConfigSource config,
129
147
  Schema schema, int taskCount,
130
148
  Control control)
131
149
  {
132
150
  PluginTask task = config.loadConfig(PluginTask.class);
151
+ AdminClient adminClient = getKafkaAdminClient(task);
152
+ DescribeTopicsResult result = adminClient.describeTopics(ImmutableList.of(task.getTopic()));
153
+ try {
154
+ if (result.all().get(30, TimeUnit.SECONDS).size() == 0) {
155
+ throw new RuntimeException("target topic is not found");
156
+ }
157
+ }
158
+ catch (InterruptedException | ExecutionException | TimeoutException e) {
159
+ throw new RuntimeException("failed to connect kafka brokers");
160
+ }
133
161
 
134
- // retryable (idempotent) output:
135
- // return resume(task.dump(), schema, taskCount, control);
136
-
137
- // non-retryable (non-idempotent) output:
138
162
  control.run(task.dump());
139
163
  return Exec.newConfigDiff();
140
164
  }
@@ -179,22 +203,24 @@ public class KafkaOutputPlugin
179
203
  AtomicInteger recordLoggingCount = new AtomicInteger(1);
180
204
 
181
205
  return new TransactionalPageOutput() {
206
+ private JsonFormatColumnVisitor columnVisitor = new JsonFormatColumnVisitor(task, pageReader, objectMapper);
207
+
182
208
  @Override
183
209
  public void add(Page page)
184
210
  {
185
211
  pageReader.setPage(page);
186
212
  while (pageReader.nextRecord()) {
187
- JsonFormatColumnVisitor columnVisitor = new JsonFormatColumnVisitor(task, pageReader, objectMapper);
213
+ columnVisitor.reset();
188
214
 
189
215
  pageReader.getSchema().visitColumns(columnVisitor);
190
216
 
191
- Object recordKey = columnVisitor.recordKey;
217
+ Object recordKey = columnVisitor.getRecordKey();
192
218
  if (recordKey == null) {
193
219
  recordKey = randomLong.next();
194
220
  }
195
221
 
196
- String targetTopic = columnVisitor.topicName != null ? columnVisitor.topicName : task.getTopic();
197
- ProducerRecord<Object, ObjectNode> producerRecord = new ProducerRecord<>(targetTopic, recordKey, columnVisitor.jsonNode);
222
+ String targetTopic = columnVisitor.getTopicName() != null ? columnVisitor.getTopicName() : task.getTopic();
223
+ ProducerRecord<Object, ObjectNode> producerRecord = new ProducerRecord<>(targetTopic, recordKey, columnVisitor.getJsonNode());
198
224
  producer.send(producerRecord, (metadata, exception) -> {
199
225
  if (exception != null) {
200
226
  logger.error("produce error", exception);
@@ -269,23 +295,25 @@ public class KafkaOutputPlugin
269
295
  final org.apache.avro.Schema finalAvroSchema = avroSchema;
270
296
  return new TransactionalPageOutput()
271
297
  {
298
+ private AvroFormatColumnVisitor columnVisitor = new AvroFormatColumnVisitor(task, pageReader, finalAvroSchema);
299
+
272
300
  @Override
273
301
  public void add(Page page)
274
302
  {
275
303
  pageReader.setPage(page);
276
304
  while (pageReader.nextRecord()) {
277
- AvroFormatColumnVisitor columnVisitor = new AvroFormatColumnVisitor(task, pageReader, finalAvroSchema, new GenericData.Record(finalAvroSchema));
305
+ columnVisitor.reset();
278
306
 
279
307
  pageReader.getSchema().visitColumns(columnVisitor);
280
308
 
281
- Object recordKey = columnVisitor.recordKey;
309
+ Object recordKey = columnVisitor.getRecordKey();
282
310
  if (recordKey == null) {
283
311
  recordKey = randomLong.next();
284
312
  }
285
313
 
286
- String targetTopic = columnVisitor.topicName != null ? columnVisitor.topicName : task.getTopic();
314
+ String targetTopic = columnVisitor.getTopicName() != null ? columnVisitor.getTopicName() : task.getTopic();
287
315
 
288
- ProducerRecord<Object, Object> producerRecord = new ProducerRecord<>(targetTopic, recordKey, columnVisitor.genericRecord);
316
+ ProducerRecord<Object, Object> producerRecord = new ProducerRecord<>(targetTopic, columnVisitor.getPartition(), recordKey, columnVisitor.getGenericRecord());
289
317
  producer.send(producerRecord, (metadata, exception) -> {
290
318
  if (exception != null) {
291
319
  logger.error("produce error", exception);
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - joker1007
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-09-13 00:00:00.000000000 Z
11
+ date: 2019-10-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -54,7 +54,7 @@ files:
54
54
  - classpath/common-config-5.3.0.jar
55
55
  - classpath/common-utils-5.3.0.jar
56
56
  - classpath/commons-compress-1.18.jar
57
- - classpath/embulk-output-kafka-0.1.3.jar
57
+ - classpath/embulk-output-kafka-0.1.4.jar
58
58
  - classpath/jackson-annotations-2.9.0.jar
59
59
  - classpath/jackson-core-2.9.9.jar
60
60
  - classpath/jackson-databind-2.9.9.jar