embulk-output-kafka 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fb1edf4ce79bd490f0f01662547d36c2e6d2dd6e
4
- data.tar.gz: 44b9affb8e9d1385314274b5d1ac083fd5a4848e
3
+ metadata.gz: d02c758c81651e54d09421f19c1ba4970ddb04c4
4
+ data.tar.gz: 18c7cbb212fa721f11427250c6132d6cf937a292
5
5
  SHA512:
6
- metadata.gz: 5c76d2cca3d141b7f44208449ba092ee732993183de6e6f19ec4e5280f302f0e3a4b5746414332103aa492b0cb7ea7df55159fa936248839bb68a8280ecfc060
7
- data.tar.gz: 28dcd4f04aceec97e78c427b2b859266b9adb5147ae8cd28a63c5df86840d5c4051af049b225eefe01d410007afdd17222ec96fc3525cf0cd28e60c30414f6a2
6
+ metadata.gz: 4f6cd307587c3edc55b87404b708a9a21ad904e9685ca1e28eebb662a1cead956175173ce6d8d04821d1a5d3866c143beb10a67bb67ad8e71416b207cbdf31da
7
+ data.tar.gz: 13de4897add59955cab7a7adbdb252a1a843b8042f94c5f23faca63d4bda1e094c4003f0486244c3e50ec30f2ec5004728bce496713ab8051edf6fed4ff636f8
data/README.md CHANGED
@@ -18,6 +18,7 @@
18
18
  - **avsc**: inline avro schema config (json, default: `null`)
19
19
  - **ignore_columns**: remove columns from output (array(string), default: `[]`)
20
20
  - **key_column_name**: use column value as record key (string, default: `null`, if this parameter is null, set random number as record key, and it can use column in `ignore_columns`)
21
+ - **partition_column_name**: use column value as partition id (string, default: `null`, this value is prefer to `key_column_name`, and if partition_column value is null, use key_column for partitioning)
21
22
  - **record_batch_size**: kafka producer record batch size (integer, default: `1000`)
22
23
  - **acks**: kafka producer require acks (string, default: `"1"`)
23
24
  - **retries**: kafka producer max retry count (integer, default: `1`)
data/build.gradle CHANGED
@@ -17,7 +17,7 @@ configurations {
17
17
  provided
18
18
  }
19
19
 
20
- version = "0.1.3"
20
+ version = "0.1.4"
21
21
 
22
22
  sourceCompatibility = 1.8
23
23
  targetCompatibility = 1.8
@@ -17,13 +17,24 @@ import java.util.stream.Collectors;
17
17
  public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
18
18
  {
19
19
  private Schema avroSchema;
20
- public GenericRecord genericRecord;
20
+ private GenericRecord genericRecord;
21
21
 
22
- public AvroFormatColumnVisitor(KafkaOutputPlugin.PluginTask task, PageReader pageReader, Schema avroSchema, GenericRecord genericRecord)
22
+ AvroFormatColumnVisitor(KafkaOutputPlugin.PluginTask task, PageReader pageReader, Schema avroSchema)
23
23
  {
24
24
  super(task, pageReader);
25
25
  this.avroSchema = avroSchema;
26
- this.genericRecord = genericRecord;
26
+ }
27
+
28
+ GenericRecord getGenericRecord()
29
+ {
30
+ return genericRecord;
31
+ }
32
+
33
+ @Override
34
+ void reset()
35
+ {
36
+ super.reset();
37
+ this.genericRecord = new GenericData.Record(avroSchema);
27
38
  }
28
39
 
29
40
  @Override
@@ -13,14 +13,25 @@ import java.time.format.DateTimeFormatter;
13
13
  public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
14
14
  {
15
15
  private ObjectMapper objectMapper;
16
- public ObjectNode jsonNode;
16
+ private ObjectNode jsonNode;
17
17
 
18
18
  private static DateTimeFormatter timestampFormatter = DateTimeFormatter.ISO_INSTANT;
19
19
 
20
- public JsonFormatColumnVisitor(KafkaOutputPlugin.PluginTask task, PageReader pageReader, ObjectMapper objectMapper)
20
+ JsonFormatColumnVisitor(KafkaOutputPlugin.PluginTask task, PageReader pageReader, ObjectMapper objectMapper)
21
21
  {
22
22
  super(task, pageReader);
23
23
  this.objectMapper = objectMapper;
24
+ }
25
+
26
+ ObjectNode getJsonNode()
27
+ {
28
+ return jsonNode;
29
+ }
30
+
31
+ @Override
32
+ void reset()
33
+ {
34
+ super.reset();
24
35
  this.jsonNode = objectMapper.createObjectNode();
25
36
  }
26
37
 
@@ -42,6 +53,8 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
42
53
  @Override
43
54
  public void longColumn(Column column)
44
55
  {
56
+ super.longColumn(column);
57
+
45
58
  if (isIgnoreColumn(column)) {
46
59
  return;
47
60
  }
@@ -58,6 +71,8 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
58
71
  @Override
59
72
  public void doubleColumn(Column column)
60
73
  {
74
+ super.doubleColumn(column);
75
+
61
76
  if (isIgnoreColumn(column)) {
62
77
  return;
63
78
  }
@@ -68,12 +83,13 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
68
83
  }
69
84
 
70
85
  jsonNode.put(column.getName(), pageReader.getDouble(column));
71
- super.doubleColumn(column);
72
86
  }
73
87
 
74
88
  @Override
75
89
  public void stringColumn(Column column)
76
90
  {
91
+ super.stringColumn(column);
92
+
77
93
  if (isIgnoreColumn(column)) {
78
94
  return;
79
95
  }
@@ -84,7 +100,6 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
84
100
  }
85
101
 
86
102
  jsonNode.put(column.getName(), pageReader.getString(column));
87
- super.stringColumn(column);
88
103
  }
89
104
 
90
105
  @Override
@@ -6,32 +6,57 @@ import org.embulk.spi.PageReader;
6
6
 
7
7
  public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
8
8
  {
9
- KafkaOutputPlugin.PluginTask task;
9
+ private KafkaOutputPlugin.PluginTask task;
10
10
  PageReader pageReader;
11
+ private String partitionColumnName;
11
12
 
12
- public Object recordKey = null;
13
- public String topicName = null;
13
+ private Object recordKey = null;
14
+ private String topicName = null;
15
+ private Integer partition = null;
14
16
 
15
- public KafkaOutputColumnVisitor(KafkaOutputPlugin.PluginTask task, PageReader pageReader)
17
+ KafkaOutputColumnVisitor(KafkaOutputPlugin.PluginTask task, PageReader pageReader)
16
18
  {
17
19
  this.task = task;
18
20
  this.pageReader = pageReader;
21
+ this.partitionColumnName = task.getPartitionColumnName().orElse(null);
19
22
  }
20
23
 
21
- void setRecordKey(Column column, Object value)
24
+ Object getRecordKey()
25
+ {
26
+ return recordKey;
27
+ }
28
+
29
+ private void setRecordKey(Column column, Object value)
22
30
  {
23
31
  if (task.getKeyColumnName().isPresent() && task.getKeyColumnName().get().equals(column.getName())) {
24
32
  recordKey = value;
25
33
  }
26
34
  }
27
35
 
28
- void setTopicName(Column column, String value)
36
+ String getTopicName()
37
+ {
38
+ return topicName;
39
+ }
40
+
41
+ private void setTopicName(Column column, String value)
29
42
  {
30
43
  if (task.getTopicColumn().isPresent() && task.getTopicColumn().get().equals(column.getName())) {
31
44
  topicName = value;
32
45
  }
33
46
  }
34
47
 
48
+ Integer getPartition()
49
+ {
50
+ return partition;
51
+ }
52
+
53
+ void reset()
54
+ {
55
+ this.recordKey = null;
56
+ this.topicName = null;
57
+ this.partition = null;
58
+ }
59
+
35
60
  boolean isIgnoreColumn(Column column)
36
61
  {
37
62
  return task.getIgnoreColumns().stream().anyMatch(name -> name.equals(column.getName()));
@@ -41,7 +66,12 @@ public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
41
66
  public void longColumn(Column column)
42
67
  {
43
68
  if (!pageReader.isNull(column)) {
44
- setRecordKey(column, pageReader.getLong(column));
69
+ long value = pageReader.getLong(column);
70
+ setRecordKey(column, value);
71
+
72
+ if (partitionColumnName != null && partitionColumnName.equals(column.getName())) {
73
+ partition = Long.valueOf(value).intValue();
74
+ }
45
75
  }
46
76
  }
47
77
 
@@ -4,7 +4,10 @@ import com.fasterxml.jackson.annotation.JsonCreator;
4
4
  import com.fasterxml.jackson.annotation.JsonValue;
5
5
  import com.fasterxml.jackson.databind.ObjectMapper;
6
6
  import com.fasterxml.jackson.databind.node.ObjectNode;
7
- import org.apache.avro.generic.GenericData;
7
+ import com.google.common.collect.ImmutableList;
8
+ import org.apache.kafka.clients.admin.AdminClient;
9
+ import org.apache.kafka.clients.admin.AdminClientConfig;
10
+ import org.apache.kafka.clients.admin.DescribeTopicsResult;
8
11
  import org.apache.kafka.clients.producer.KafkaProducer;
9
12
  import org.apache.kafka.clients.producer.ProducerRecord;
10
13
  import org.embulk.config.Config;
@@ -15,7 +18,6 @@ import org.embulk.config.ConfigSource;
15
18
  import org.embulk.config.Task;
16
19
  import org.embulk.config.TaskReport;
17
20
  import org.embulk.config.TaskSource;
18
- import org.embulk.spi.ColumnConfig;
19
21
  import org.embulk.spi.Exec;
20
22
  import org.embulk.spi.OutputPlugin;
21
23
  import org.embulk.spi.Page;
@@ -32,7 +34,11 @@ import java.util.Locale;
32
34
  import java.util.Map;
33
35
  import java.util.Optional;
34
36
  import java.util.PrimitiveIterator;
37
+ import java.util.Properties;
35
38
  import java.util.Random;
39
+ import java.util.concurrent.ExecutionException;
40
+ import java.util.concurrent.TimeUnit;
41
+ import java.util.concurrent.TimeoutException;
36
42
  import java.util.concurrent.atomic.AtomicInteger;
37
43
 
38
44
  public class KafkaOutputPlugin
@@ -96,6 +102,10 @@ public class KafkaOutputPlugin
96
102
  @ConfigDefault("null")
97
103
  public Optional<String> getKeyColumnName();
98
104
 
105
+ @Config("partition_column_name")
106
+ @ConfigDefault("null")
107
+ public Optional<String> getPartitionColumnName();
108
+
99
109
  @Config("record_batch_size")
100
110
  @ConfigDefault("1000")
101
111
  public int getRecordBatchSize();
@@ -124,17 +134,31 @@ public class KafkaOutputPlugin
124
134
  private static ObjectMapper objectMapper = new ObjectMapper();
125
135
  private Logger logger = LoggerFactory.getLogger(getClass());
126
136
 
137
+ private AdminClient getKafkaAdminClient(PluginTask task)
138
+ {
139
+ Properties properties = new Properties();
140
+ properties.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, task.getBrokers());
141
+ AdminClient adminClient = AdminClient.create(properties);
142
+ return adminClient;
143
+ }
144
+
127
145
  @Override
128
146
  public ConfigDiff transaction(ConfigSource config,
129
147
  Schema schema, int taskCount,
130
148
  Control control)
131
149
  {
132
150
  PluginTask task = config.loadConfig(PluginTask.class);
151
+ AdminClient adminClient = getKafkaAdminClient(task);
152
+ DescribeTopicsResult result = adminClient.describeTopics(ImmutableList.of(task.getTopic()));
153
+ try {
154
+ if (result.all().get(30, TimeUnit.SECONDS).size() == 0) {
155
+ throw new RuntimeException("target topic is not found");
156
+ }
157
+ }
158
+ catch (InterruptedException | ExecutionException | TimeoutException e) {
159
+ throw new RuntimeException("failed to connect kafka brokers");
160
+ }
133
161
 
134
- // retryable (idempotent) output:
135
- // return resume(task.dump(), schema, taskCount, control);
136
-
137
- // non-retryable (non-idempotent) output:
138
162
  control.run(task.dump());
139
163
  return Exec.newConfigDiff();
140
164
  }
@@ -179,22 +203,24 @@ public class KafkaOutputPlugin
179
203
  AtomicInteger recordLoggingCount = new AtomicInteger(1);
180
204
 
181
205
  return new TransactionalPageOutput() {
206
+ private JsonFormatColumnVisitor columnVisitor = new JsonFormatColumnVisitor(task, pageReader, objectMapper);
207
+
182
208
  @Override
183
209
  public void add(Page page)
184
210
  {
185
211
  pageReader.setPage(page);
186
212
  while (pageReader.nextRecord()) {
187
- JsonFormatColumnVisitor columnVisitor = new JsonFormatColumnVisitor(task, pageReader, objectMapper);
213
+ columnVisitor.reset();
188
214
 
189
215
  pageReader.getSchema().visitColumns(columnVisitor);
190
216
 
191
- Object recordKey = columnVisitor.recordKey;
217
+ Object recordKey = columnVisitor.getRecordKey();
192
218
  if (recordKey == null) {
193
219
  recordKey = randomLong.next();
194
220
  }
195
221
 
196
- String targetTopic = columnVisitor.topicName != null ? columnVisitor.topicName : task.getTopic();
197
- ProducerRecord<Object, ObjectNode> producerRecord = new ProducerRecord<>(targetTopic, recordKey, columnVisitor.jsonNode);
222
+ String targetTopic = columnVisitor.getTopicName() != null ? columnVisitor.getTopicName() : task.getTopic();
223
+ ProducerRecord<Object, ObjectNode> producerRecord = new ProducerRecord<>(targetTopic, recordKey, columnVisitor.getJsonNode());
198
224
  producer.send(producerRecord, (metadata, exception) -> {
199
225
  if (exception != null) {
200
226
  logger.error("produce error", exception);
@@ -269,23 +295,25 @@ public class KafkaOutputPlugin
269
295
  final org.apache.avro.Schema finalAvroSchema = avroSchema;
270
296
  return new TransactionalPageOutput()
271
297
  {
298
+ private AvroFormatColumnVisitor columnVisitor = new AvroFormatColumnVisitor(task, pageReader, finalAvroSchema);
299
+
272
300
  @Override
273
301
  public void add(Page page)
274
302
  {
275
303
  pageReader.setPage(page);
276
304
  while (pageReader.nextRecord()) {
277
- AvroFormatColumnVisitor columnVisitor = new AvroFormatColumnVisitor(task, pageReader, finalAvroSchema, new GenericData.Record(finalAvroSchema));
305
+ columnVisitor.reset();
278
306
 
279
307
  pageReader.getSchema().visitColumns(columnVisitor);
280
308
 
281
- Object recordKey = columnVisitor.recordKey;
309
+ Object recordKey = columnVisitor.getRecordKey();
282
310
  if (recordKey == null) {
283
311
  recordKey = randomLong.next();
284
312
  }
285
313
 
286
- String targetTopic = columnVisitor.topicName != null ? columnVisitor.topicName : task.getTopic();
314
+ String targetTopic = columnVisitor.getTopicName() != null ? columnVisitor.getTopicName() : task.getTopic();
287
315
 
288
- ProducerRecord<Object, Object> producerRecord = new ProducerRecord<>(targetTopic, recordKey, columnVisitor.genericRecord);
316
+ ProducerRecord<Object, Object> producerRecord = new ProducerRecord<>(targetTopic, columnVisitor.getPartition(), recordKey, columnVisitor.getGenericRecord());
289
317
  producer.send(producerRecord, (metadata, exception) -> {
290
318
  if (exception != null) {
291
319
  logger.error("produce error", exception);
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - joker1007
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-09-13 00:00:00.000000000 Z
11
+ date: 2019-10-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -54,7 +54,7 @@ files:
54
54
  - classpath/common-config-5.3.0.jar
55
55
  - classpath/common-utils-5.3.0.jar
56
56
  - classpath/commons-compress-1.18.jar
57
- - classpath/embulk-output-kafka-0.1.3.jar
57
+ - classpath/embulk-output-kafka-0.1.4.jar
58
58
  - classpath/jackson-annotations-2.9.0.jar
59
59
  - classpath/jackson-core-2.9.9.jar
60
60
  - classpath/jackson-databind-2.9.9.jar