embulk-output-kafka 0.1.3 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. checksums.yaml +5 -5
  2. data/.circleci/config.yml +44 -0
  3. data/.github/dependabot.yml +11 -0
  4. data/README.md +6 -1
  5. data/build.gradle +29 -12
  6. data/docker-compose.yml +1 -1
  7. data/src/main/java/org/embulk/output/kafka/AvroFormatColumnVisitor.java +26 -10
  8. data/src/main/java/org/embulk/output/kafka/AvroFormatTransactionalPageOutput.java +13 -0
  9. data/src/main/java/org/embulk/output/kafka/JsonFormatColumnVisitor.java +27 -5
  10. data/src/main/java/org/embulk/output/kafka/JsonFormatTransactionalPageOutput.java +13 -0
  11. data/src/main/java/org/embulk/output/kafka/KafkaJsonSerializer.java +4 -0
  12. data/src/main/java/org/embulk/output/kafka/KafkaOutputColumnVisitor.java +62 -8
  13. data/src/main/java/org/embulk/output/kafka/KafkaOutputPlugin.java +82 -145
  14. data/src/main/java/org/embulk/output/kafka/KafkaTransactionalPageOutput.java +104 -0
  15. data/src/main/java/org/embulk/output/kafka/RecordProducerFactory.java +3 -3
  16. data/src/test/java/org/embulk/output/kafka/TestKafkaOutputPlugin.java +384 -0
  17. data/src/test/resources/config_complex.yml +9 -28
  18. data/src/test/resources/config_complex_avro.yml +23 -42
  19. data/src/test/resources/config_simple.yml +5 -22
  20. data/src/test/resources/config_simple_avro.yml +14 -32
  21. data/src/test/resources/config_simple_avro_avsc_file.yml +7 -25
  22. data/src/test/resources/config_with_column_for_deletion.yml +7 -0
  23. data/src/test/resources/config_with_column_for_deletion_avro.yml +18 -0
  24. data/src/test/resources/config_with_key_column.yml +6 -23
  25. data/src/test/resources/config_with_partition_column.yml +6 -0
  26. data/src/test/resources/in1.csv +4 -4
  27. data/src/test/resources/in_complex.csv +4 -4
  28. data/src/test/resources/in_with_deletion.csv +4 -0
  29. metadata +30 -24
@@ -4,9 +4,17 @@ import com.fasterxml.jackson.annotation.JsonCreator;
4
4
  import com.fasterxml.jackson.annotation.JsonValue;
5
5
  import com.fasterxml.jackson.databind.ObjectMapper;
6
6
  import com.fasterxml.jackson.databind.node.ObjectNode;
7
- import org.apache.avro.generic.GenericData;
7
+ import com.google.common.collect.ImmutableList;
8
+ import io.confluent.kafka.schemaregistry.client.CachedSchemaRegistryClient;
9
+ import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient;
10
+ import io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException;
11
+ import io.confluent.kafka.schemaregistry.testutil.MockSchemaRegistry;
12
+ import io.confluent.kafka.serializers.subject.TopicNameStrategy;
13
+ import io.confluent.kafka.serializers.subject.strategy.SubjectNameStrategy;
14
+ import org.apache.kafka.clients.admin.AdminClient;
15
+ import org.apache.kafka.clients.admin.AdminClientConfig;
16
+ import org.apache.kafka.clients.admin.DescribeTopicsResult;
8
17
  import org.apache.kafka.clients.producer.KafkaProducer;
9
- import org.apache.kafka.clients.producer.ProducerRecord;
10
18
  import org.embulk.config.Config;
11
19
  import org.embulk.config.ConfigDefault;
12
20
  import org.embulk.config.ConfigDiff;
@@ -15,15 +23,11 @@ import org.embulk.config.ConfigSource;
15
23
  import org.embulk.config.Task;
16
24
  import org.embulk.config.TaskReport;
17
25
  import org.embulk.config.TaskSource;
18
- import org.embulk.spi.ColumnConfig;
19
26
  import org.embulk.spi.Exec;
20
27
  import org.embulk.spi.OutputPlugin;
21
- import org.embulk.spi.Page;
22
28
  import org.embulk.spi.PageReader;
23
29
  import org.embulk.spi.Schema;
24
30
  import org.embulk.spi.TransactionalPageOutput;
25
- import org.slf4j.Logger;
26
- import org.slf4j.LoggerFactory;
27
31
 
28
32
  import java.io.File;
29
33
  import java.io.IOException;
@@ -31,9 +35,10 @@ import java.util.List;
31
35
  import java.util.Locale;
32
36
  import java.util.Map;
33
37
  import java.util.Optional;
34
- import java.util.PrimitiveIterator;
35
- import java.util.Random;
36
- import java.util.concurrent.atomic.AtomicInteger;
38
+ import java.util.Properties;
39
+ import java.util.concurrent.ExecutionException;
40
+ import java.util.concurrent.TimeUnit;
41
+ import java.util.concurrent.TimeoutException;
37
42
 
38
43
  public class KafkaOutputPlugin
39
44
  implements OutputPlugin
@@ -96,8 +101,12 @@ public class KafkaOutputPlugin
96
101
  @ConfigDefault("null")
97
102
  public Optional<String> getKeyColumnName();
98
103
 
104
+ @Config("partition_column_name")
105
+ @ConfigDefault("null")
106
+ public Optional<String> getPartitionColumnName();
107
+
99
108
  @Config("record_batch_size")
100
- @ConfigDefault("1000")
109
+ @ConfigDefault("16384")
101
110
  public int getRecordBatchSize();
102
111
 
103
112
  @Config("acks")
@@ -119,10 +128,22 @@ public class KafkaOutputPlugin
119
128
  @Config("value_subject_name_strategy")
120
129
  @ConfigDefault("null")
121
130
  public Optional<String> getValueSubjectNameStrategy();
131
+
132
+ @Config("column_for_deletion")
133
+ @ConfigDefault("null")
134
+ public Optional<String> getColumnForDeletion();
122
135
  }
123
136
 
124
137
  private static ObjectMapper objectMapper = new ObjectMapper();
125
- private Logger logger = LoggerFactory.getLogger(getClass());
138
+
139
+ private static final int SCHEMA_REGISTRY_IDENTITY_MAP_CAPACITY = 1000;
140
+
141
+ private AdminClient getKafkaAdminClient(PluginTask task)
142
+ {
143
+ Properties properties = new Properties();
144
+ properties.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, task.getBrokers());
145
+ return AdminClient.create(properties);
146
+ }
126
147
 
127
148
  @Override
128
149
  public ConfigDiff transaction(ConfigSource config,
@@ -130,11 +151,17 @@ public class KafkaOutputPlugin
130
151
  Control control)
131
152
  {
132
153
  PluginTask task = config.loadConfig(PluginTask.class);
154
+ AdminClient adminClient = getKafkaAdminClient(task);
155
+ DescribeTopicsResult result = adminClient.describeTopics(ImmutableList.of(task.getTopic()));
156
+ try {
157
+ if (result.all().get(30, TimeUnit.SECONDS).size() == 0) {
158
+ throw new RuntimeException("target topic is not found");
159
+ }
160
+ }
161
+ catch (InterruptedException | ExecutionException | TimeoutException e) {
162
+ throw new RuntimeException("failed to connect kafka brokers");
163
+ }
133
164
 
134
- // retryable (idempotent) output:
135
- // return resume(task.dump(), schema, taskCount, control);
136
-
137
- // non-retryable (non-idempotent) output:
138
165
  control.run(task.dump());
139
166
  return Exec.newConfigDiff();
140
167
  }
@@ -165,95 +192,48 @@ public class KafkaOutputPlugin
165
192
  case AVRO_WITH_SCHEMA_REGISTRY:
166
193
  return buildPageOutputForAvroWithSchemaRegistry(task, schema, taskIndex);
167
194
  default:
168
- throw new ConfigException("Unknow serialize format");
195
+ throw new ConfigException("Unknown serialize format");
169
196
  }
170
197
  }
171
198
 
172
199
  private TransactionalPageOutput buildPageOutputForJson(PluginTask task, Schema schema, int taskIndex)
173
200
  {
174
201
  KafkaProducer<Object, ObjectNode> producer = RecordProducerFactory.getForJson(task, schema, task.getOtherProducerConfigs());
175
-
176
202
  PageReader pageReader = new PageReader(schema);
177
- PrimitiveIterator.OfLong randomLong = new Random().longs(1, Long.MAX_VALUE).iterator();
178
- AtomicInteger counter = new AtomicInteger(0);
179
- AtomicInteger recordLoggingCount = new AtomicInteger(1);
180
-
181
- return new TransactionalPageOutput() {
182
- @Override
183
- public void add(Page page)
184
- {
185
- pageReader.setPage(page);
186
- while (pageReader.nextRecord()) {
187
- JsonFormatColumnVisitor columnVisitor = new JsonFormatColumnVisitor(task, pageReader, objectMapper);
188
-
189
- pageReader.getSchema().visitColumns(columnVisitor);
190
-
191
- Object recordKey = columnVisitor.recordKey;
192
- if (recordKey == null) {
193
- recordKey = randomLong.next();
194
- }
195
-
196
- String targetTopic = columnVisitor.topicName != null ? columnVisitor.topicName : task.getTopic();
197
- ProducerRecord<Object, ObjectNode> producerRecord = new ProducerRecord<>(targetTopic, recordKey, columnVisitor.jsonNode);
198
- producer.send(producerRecord, (metadata, exception) -> {
199
- if (exception != null) {
200
- logger.error("produce error", exception);
201
- }
202
-
203
- logger.debug("sent record: {key: {}, value: {}}", producerRecord.key(), producerRecord.value());
204
-
205
- int current = counter.incrementAndGet();
206
- if (current >= recordLoggingCount.get()) {
207
- logger.info("[task-{}] Producer sent {} records", String.format("%04d", taskIndex), current);
208
- recordLoggingCount.set(recordLoggingCount.get() * 2);
209
- }
210
- });
211
- }
212
- }
213
-
214
- @Override
215
- public void finish()
216
- {
217
- producer.flush();
218
- }
219
-
220
- @Override
221
- public void close()
222
- {
223
- producer.close();
224
- }
203
+ KafkaOutputColumnVisitor<ObjectNode> columnVisitor = new JsonFormatColumnVisitor(task, pageReader, objectMapper);
225
204
 
226
- @Override
227
- public void abort()
228
- {
229
- producer.flush();
230
- producer.close();
231
- }
232
-
233
- @Override
234
- public TaskReport commit()
235
- {
236
- return null;
237
- }
238
- };
205
+ return new JsonFormatTransactionalPageOutput(producer, pageReader, columnVisitor, task.getTopic(), taskIndex);
239
206
  }
240
207
 
241
208
  private TransactionalPageOutput buildPageOutputForAvroWithSchemaRegistry(PluginTask task, Schema schema, int taskIndex)
242
209
  {
243
210
  KafkaProducer<Object, Object> producer = RecordProducerFactory.getForAvroWithSchemaRegistry(task, schema, task.getOtherProducerConfigs());
244
-
245
211
  PageReader pageReader = new PageReader(schema);
212
+ org.apache.avro.Schema avroSchema = getAvroSchema(task);
213
+ AvroFormatColumnVisitor avroFormatColumnVisitor = new AvroFormatColumnVisitor(task, pageReader, avroSchema);
246
214
 
215
+ return new AvroFormatTransactionalPageOutput(producer, pageReader, avroFormatColumnVisitor, task.getTopic(), taskIndex);
216
+ }
217
+
218
+ private org.apache.avro.Schema getAvroSchema(PluginTask task)
219
+ {
247
220
  org.apache.avro.Schema avroSchema = null;
248
- if (!task.getAvsc().isPresent() && !task.getAvscFile().isPresent() || task.getAvsc().isPresent() == task.getAvscFile().isPresent()) {
221
+ if (!task.getSchemaRegistryUrl().isPresent()) {
222
+ throw new ConfigException("avro_with_schema_registry format needs schema_registry_url");
223
+ }
224
+
225
+ if (task.getAvsc().isPresent() && task.getAvscFile().isPresent()) {
249
226
  throw new ConfigException("avro_with_schema_registry format needs either one of avsc and avsc_file");
250
227
  }
228
+
251
229
  if (task.getAvsc().isPresent()) {
252
230
  avroSchema = new org.apache.avro.Schema.Parser().parse(task.getAvsc().get().toString());
231
+ return avroSchema;
253
232
  }
254
233
  if (task.getAvscFile().isPresent()) {
255
234
  try {
256
235
  avroSchema = new org.apache.avro.Schema.Parser().parse(task.getAvscFile().get());
236
+ return avroSchema;
257
237
  }
258
238
  catch (IOException e) {
259
239
  e.printStackTrace();
@@ -261,71 +241,28 @@ public class KafkaOutputPlugin
261
241
  }
262
242
  }
263
243
 
264
- PrimitiveIterator.OfLong randomLong = new Random().longs(1, Long.MAX_VALUE).iterator();
265
-
266
- AtomicInteger counter = new AtomicInteger(0);
267
- AtomicInteger recordLoggingCount = new AtomicInteger(1);
268
-
269
- final org.apache.avro.Schema finalAvroSchema = avroSchema;
270
- return new TransactionalPageOutput()
271
- {
272
- @Override
273
- public void add(Page page)
274
- {
275
- pageReader.setPage(page);
276
- while (pageReader.nextRecord()) {
277
- AvroFormatColumnVisitor columnVisitor = new AvroFormatColumnVisitor(task, pageReader, finalAvroSchema, new GenericData.Record(finalAvroSchema));
278
-
279
- pageReader.getSchema().visitColumns(columnVisitor);
280
-
281
- Object recordKey = columnVisitor.recordKey;
282
- if (recordKey == null) {
283
- recordKey = randomLong.next();
284
- }
285
-
286
- String targetTopic = columnVisitor.topicName != null ? columnVisitor.topicName : task.getTopic();
287
-
288
- ProducerRecord<Object, Object> producerRecord = new ProducerRecord<>(targetTopic, recordKey, columnVisitor.genericRecord);
289
- producer.send(producerRecord, (metadata, exception) -> {
290
- if (exception != null) {
291
- logger.error("produce error", exception);
292
- }
293
-
294
- logger.debug("sent record: {key: {}, value: {}}", producerRecord.key(), producerRecord.value());
295
-
296
- int current = counter.incrementAndGet();
297
- if (current >= recordLoggingCount.get()) {
298
- logger.info("[task-{}] Producer sent {} records", String.format("%04d", taskIndex), current);
299
- recordLoggingCount.set(recordLoggingCount.get() * 2);
300
- }
301
- });
302
- }
303
- }
304
-
305
- @Override
306
- public void finish()
307
- {
308
- producer.flush();
309
- }
310
-
311
- @Override
312
- public void close()
313
- {
314
- producer.close();
315
- }
316
-
317
- @Override
318
- public void abort()
319
- {
320
- producer.flush();
321
- producer.close();
322
- }
244
+ SchemaRegistryClient schemaRegistryClient = getSchemaRegistryClient(task.getSchemaRegistryUrl().get());
245
+ SubjectNameStrategy subjectNameStrategy = new TopicNameStrategy();
246
+ String subjectName = subjectNameStrategy.subjectName(task.getTopic(), false, null);
247
+ try {
248
+ String schema = schemaRegistryClient.getLatestSchemaMetadata(subjectName).getSchema();
249
+ avroSchema = new org.apache.avro.Schema.Parser().parse(schema);
250
+ return avroSchema;
251
+ }
252
+ catch (IOException | RestClientException e) {
253
+ throw new ConfigException("cannot fetch latest schema from schema registry.", e);
254
+ }
255
+ }
323
256
 
324
- @Override
325
- public TaskReport commit()
326
- {
327
- return null;
328
- }
329
- };
257
+ private static final String MOCK_SCHEMA_REGISTRY_PREFIX = "mock://";
258
+ private SchemaRegistryClient getSchemaRegistryClient(String url)
259
+ {
260
+ if (url.startsWith(MOCK_SCHEMA_REGISTRY_PREFIX)) {
261
+ String mockScope = url.substring(MOCK_SCHEMA_REGISTRY_PREFIX.length());
262
+ return MockSchemaRegistry.getClientForScope(mockScope);
263
+ }
264
+ else {
265
+ return new CachedSchemaRegistryClient(url, SCHEMA_REGISTRY_IDENTITY_MAP_CAPACITY);
266
+ }
330
267
  }
331
268
  }
@@ -0,0 +1,104 @@
1
+ package org.embulk.output.kafka;
2
+
3
+ import org.apache.kafka.clients.producer.KafkaProducer;
4
+ import org.apache.kafka.clients.producer.ProducerRecord;
5
+ import org.embulk.config.TaskReport;
6
+ import org.embulk.spi.Page;
7
+ import org.embulk.spi.PageReader;
8
+ import org.embulk.spi.TransactionalPageOutput;
9
+ import org.slf4j.Logger;
10
+ import org.slf4j.LoggerFactory;
11
+
12
+ import java.util.PrimitiveIterator;
13
+ import java.util.Random;
14
+ import java.util.concurrent.atomic.AtomicLong;
15
+
16
+ public abstract class KafkaTransactionalPageOutput<P, T extends P> implements TransactionalPageOutput
17
+ {
18
+ private static final Logger logger = LoggerFactory.getLogger(KafkaTransactionalPageOutput.class);
19
+
20
+ private final KafkaProducer<Object, P> producer;
21
+ private final PageReader pageReader;
22
+ private final KafkaOutputColumnVisitor<T> columnVisitor;
23
+ private final String topic;
24
+ private final int taskIndex;
25
+
26
+ private final PrimitiveIterator.OfLong randomLong = new Random().longs(1, Long.MAX_VALUE).iterator();
27
+ private final AtomicLong counter = new AtomicLong(0);
28
+ private final AtomicLong recordLoggingCount = new AtomicLong(1);
29
+
30
+ public KafkaTransactionalPageOutput(
31
+ KafkaProducer<Object, P> producer,
32
+ PageReader pageReader,
33
+ KafkaOutputColumnVisitor<T> columnVisitor,
34
+ String topic, int taskIndex)
35
+ {
36
+ this.producer = producer;
37
+ this.pageReader = pageReader;
38
+ this.columnVisitor = columnVisitor;
39
+ this.topic = topic;
40
+ this.taskIndex = taskIndex;
41
+ }
42
+
43
+ @Override
44
+ public void add(Page page)
45
+ {
46
+ pageReader.setPage(page);
47
+ while (pageReader.nextRecord()) {
48
+ columnVisitor.reset();
49
+
50
+ pageReader.getSchema().visitColumns(columnVisitor);
51
+
52
+ Object recordKey = columnVisitor.getRecordKey();
53
+ if (recordKey == null) {
54
+ recordKey = randomLong.next();
55
+ }
56
+
57
+ String targetTopic = columnVisitor.getTopicName() != null ? columnVisitor.getTopicName() : topic;
58
+
59
+ ProducerRecord<Object, P> producerRecord = new ProducerRecord<>(targetTopic, columnVisitor.getPartition(), recordKey, columnVisitor.getRecord());
60
+ producer.send(producerRecord, (metadata, exception) -> {
61
+ if (exception != null) {
62
+ logger.error("produce error", exception);
63
+ }
64
+
65
+ logger.debug("sent record: {topic: {}, key: {}, value: {}, partition: {}}",
66
+ producerRecord.topic(),
67
+ producerRecord.key(),
68
+ producerRecord.value(),
69
+ producerRecord.partition());
70
+
71
+ long current = counter.incrementAndGet();
72
+ if (current >= recordLoggingCount.get()) {
73
+ logger.info("[task-{}] Producer sent {} records", String.format("%04d", taskIndex), current);
74
+ recordLoggingCount.set(recordLoggingCount.get() * 2);
75
+ }
76
+ });
77
+ }
78
+ }
79
+
80
+ @Override
81
+ public void finish()
82
+ {
83
+ producer.flush();
84
+ }
85
+
86
+ @Override
87
+ public void close()
88
+ {
89
+ producer.close();
90
+ }
91
+
92
+ @Override
93
+ public void abort()
94
+ {
95
+ producer.flush();
96
+ producer.close();
97
+ }
98
+
99
+ @Override
100
+ public TaskReport commit()
101
+ {
102
+ return null;
103
+ }
104
+ };
@@ -2,7 +2,7 @@ package org.embulk.output.kafka;
2
2
 
3
3
  import com.fasterxml.jackson.databind.node.ObjectNode;
4
4
  import com.google.common.collect.ImmutableMap;
5
- import io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig;
5
+ import io.confluent.kafka.serializers.AbstractKafkaSchemaSerDeConfig;
6
6
  import io.confluent.kafka.serializers.KafkaAvroSerializer;
7
7
  import org.apache.kafka.clients.producer.KafkaProducer;
8
8
  import org.apache.kafka.clients.producer.ProducerConfig;
@@ -96,10 +96,10 @@ class RecordProducerFactory
96
96
  String schemaRegistryUrl = task.getSchemaRegistryUrl().orElseThrow(() -> new ConfigException("avro_with_schema_registry format needs schema_registry_url"));
97
97
 
98
98
  ImmutableMap.Builder<String, String> builder = ImmutableMap.<String, String>builder()
99
- .put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
99
+ .put(AbstractKafkaSchemaSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
100
100
 
101
101
  if (task.getValueSubjectNameStrategy().isPresent()) {
102
- builder.put(AbstractKafkaAvroSerDeConfig.VALUE_SUBJECT_NAME_STRATEGY, task.getValueSubjectNameStrategy().get());
102
+ builder.put(AbstractKafkaSchemaSerDeConfig.VALUE_SUBJECT_NAME_STRATEGY, task.getValueSubjectNameStrategy().get());
103
103
  }
104
104
 
105
105
  Map<String, String> avroSerializerConfigs = builder.build();
@@ -1,5 +1,389 @@
1
1
  package org.embulk.output.kafka;
2
2
 
3
+ import static org.hamcrest.MatcherAssert.assertThat;
4
+ import static org.hamcrest.Matchers.hasItem;
5
+ import static org.junit.Assert.assertEquals;
6
+ import static org.junit.Assert.assertNotNull;
7
+ import static org.junit.Assert.assertNull;
8
+
9
+ import com.fasterxml.jackson.databind.JsonNode;
10
+ import com.fasterxml.jackson.databind.ObjectMapper;
11
+ import com.google.common.collect.ImmutableList;
12
+ import com.google.common.io.Resources;
13
+ import com.salesforce.kafka.test.KafkaTestUtils;
14
+ import com.salesforce.kafka.test.junit4.SharedKafkaTestResource;
15
+ import io.confluent.kafka.schemaregistry.ParsedSchema;
16
+ import io.confluent.kafka.schemaregistry.avro.AvroSchema;
17
+ import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient;
18
+ import io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException;
19
+ import io.confluent.kafka.schemaregistry.testutil.MockSchemaRegistry;
20
+ import io.confluent.kafka.serializers.KafkaAvroDeserializer;
21
+ import java.io.IOException;
22
+ import java.nio.file.Paths;
23
+ import java.time.Instant;
24
+ import java.util.ArrayList;
25
+ import java.util.HashMap;
26
+ import java.util.List;
27
+ import java.util.stream.Collectors;
28
+ import org.apache.avro.generic.GenericRecord;
29
+ import org.apache.kafka.clients.consumer.ConsumerRecord;
30
+ import org.apache.kafka.common.serialization.StringDeserializer;
31
+ import org.embulk.config.ConfigSource;
32
+ import org.embulk.spi.OutputPlugin;
33
+ import org.embulk.test.TestingEmbulk;
34
+ import org.junit.After;
35
+ import org.junit.Before;
36
+ import org.junit.ClassRule;
37
+ import org.junit.Rule;
38
+ import org.junit.Test;
39
+
3
40
  public class TestKafkaOutputPlugin
4
41
  {
42
+ @ClassRule
43
+ public static final SharedKafkaTestResource sharedKafkaTestResource = new SharedKafkaTestResource()
44
+ .withBrokers(3);
45
+
46
+ @Rule
47
+ public TestingEmbulk embulk = TestingEmbulk.builder()
48
+ .registerPlugin(OutputPlugin.class, "kafka", KafkaOutputPlugin.class)
49
+ .build();
50
+
51
+ private KafkaTestUtils kafkaTestUtils;
52
+ private final static ObjectMapper objectMapper = new ObjectMapper();
53
+
54
+ @Before
55
+ public void setUp() {
56
+ kafkaTestUtils = sharedKafkaTestResource.getKafkaTestUtils();
57
+ kafkaTestUtils.createTopic("json-topic", 8, (short) 1);
58
+ kafkaTestUtils.createTopic("json-complex-topic", 8, (short) 1);
59
+ kafkaTestUtils.createTopic("avro-simple-topic", 8, (short) 1);
60
+ kafkaTestUtils.createTopic("avro-complex-topic", 8, (short) 1);
61
+ }
62
+
63
+ @After
64
+ public void tearDown() {
65
+ kafkaTestUtils.getAdminClient().deleteTopics(ImmutableList.of(
66
+ "json-topic", "json-complex-topic", "avro-simple-topic", "avro-complex-topic"
67
+ ));
68
+ }
69
+
70
+ @Test
71
+ public void testSimpleJson() throws IOException
72
+ {
73
+ ConfigSource configSource = embulk.loadYamlResource("config_simple.yml");
74
+ configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
75
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
76
+ List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
77
+ .consumeAllRecordsFromTopic("json-topic", StringDeserializer.class,
78
+ StringDeserializer.class);
79
+
80
+ assertEquals(3, consumerRecords.size());
81
+ List<JsonNode> deserializedRecords = new ArrayList<>();
82
+ for (ConsumerRecord<String, String> record : consumerRecords) {
83
+ deserializedRecords.add(objectMapper.readTree(record.value()));
84
+ }
85
+ List<String> ids = deserializedRecords.stream()
86
+ .map(r -> r.get("id").asText())
87
+ .collect(Collectors.toList());
88
+ List<Integer> intItems = deserializedRecords.stream()
89
+ .map(r -> r.get("int_item").asInt())
90
+ .collect(Collectors.toList());
91
+ List<String> varcharItems = deserializedRecords.stream()
92
+ .map(r -> r.get("varchar_item").asText())
93
+ .collect(Collectors.toList());
94
+
95
+ assertThat(ids, hasItem("A001"));
96
+ assertThat(ids, hasItem("A002"));
97
+ assertThat(ids, hasItem("A003"));
98
+ assertThat(intItems, hasItem(1));
99
+ assertThat(intItems, hasItem(2));
100
+ assertThat(intItems, hasItem(3));
101
+ assertThat(varcharItems, hasItem("a"));
102
+ assertThat(varcharItems, hasItem("b"));
103
+ assertThat(varcharItems, hasItem("c"));
104
+ }
105
+
106
+ @Test
107
+ public void testComplexJson() throws IOException
108
+ {
109
+ ConfigSource configSource = embulk.loadYamlResource("config_complex.yml");
110
+ configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
111
+
112
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in_complex.csv").getPath()));
113
+ List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
114
+ .consumeAllRecordsFromTopic("json-complex-topic", StringDeserializer.class,
115
+ StringDeserializer.class);
116
+
117
+ assertEquals(3, consumerRecords.size());
118
+ List<JsonNode> deserializedRecords = new ArrayList<>();
119
+ for (ConsumerRecord<String, String> record : consumerRecords) {
120
+ deserializedRecords.add(objectMapper.readTree(record.value()));
121
+ }
122
+ List<String> ids = deserializedRecords.stream()
123
+ .map(r -> r.get("id").asText())
124
+ .collect(Collectors.toList());
125
+ List<Integer> intItems = deserializedRecords.stream()
126
+ .map(r -> r.get("int_item").asInt())
127
+ .collect(Collectors.toList());
128
+ List<List<Integer>> arrayItems = deserializedRecords.stream()
129
+ .map(r -> ImmutableList.of(
130
+ r.get("array").get(0).asInt(),
131
+ r.get("array").get(1).asInt(),
132
+ r.get("array").get(2).asInt()
133
+ ))
134
+ .collect(Collectors.toList());
135
+
136
+ assertThat(ids, hasItem("A001"));
137
+ assertThat(ids, hasItem("A002"));
138
+ assertThat(ids, hasItem("A003"));
139
+ assertThat(intItems, hasItem(9));
140
+ assertThat(intItems, hasItem(0));
141
+ assertThat(arrayItems.get(0), hasItem(1));
142
+ assertThat(arrayItems.get(0), hasItem(2));
143
+ assertThat(arrayItems.get(0), hasItem(3));
144
+ }
145
+
146
+ @Test
147
+ public void testSimpleAvro() throws IOException {
148
+ ConfigSource configSource = embulk.loadYamlResource("config_simple_avro.yml");
149
+ configSource.set("brokers", ImmutableList
150
+ .of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
151
+
152
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
153
+
154
+ SchemaRegistryClient schemaRegistryClient = MockSchemaRegistry
155
+ .getClientForScope("embulk-output-kafka");
156
+ KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(schemaRegistryClient);
157
+
158
+ List<ConsumerRecord<byte[], byte[]>> consumerRecords = kafkaTestUtils
159
+ .consumeAllRecordsFromTopic("avro-simple-topic");
160
+
161
+ assertEquals(3, consumerRecords.size());
162
+ List<GenericRecord> genericRecords = consumerRecords.stream().map(r -> (GenericRecord) kafkaAvroDeserializer
163
+ .deserialize("avro-simple-topic", r.value())).collect(Collectors.toList());
164
+
165
+ List<String> ids = genericRecords.stream()
166
+ .map(r -> String.valueOf(r.get("id")))
167
+ .collect(Collectors.toList());
168
+ List<Long> intItems = genericRecords.stream()
169
+ .map(r -> (Long) r.get("int_item"))
170
+ .collect(Collectors.toList());
171
+ List<String> varcharItems = genericRecords.stream()
172
+ .map(r -> String.valueOf(r.get("varchar_item")))
173
+ .collect(Collectors.toList());
174
+
175
+ assertThat(ids, hasItem("A001"));
176
+ assertThat(ids, hasItem("A002"));
177
+ assertThat(ids, hasItem("A003"));
178
+ assertThat(intItems, hasItem(1L));
179
+ assertThat(intItems, hasItem(2L));
180
+ assertThat(intItems, hasItem(3L));
181
+ assertThat(varcharItems, hasItem("a"));
182
+ assertThat(varcharItems, hasItem("b"));
183
+ assertThat(varcharItems, hasItem("c"));
184
+ }
185
+
186
+ @Test
187
+ public void testSimpleAvroSchemaFromRegistry() throws IOException, RestClientException
188
+ {
189
+ ConfigSource configSource = embulk.loadYamlResource("config_simple_avro.yml");
190
+ Object avsc = configSource.get(Object.class, "avsc");
191
+ String avscString = objectMapper.writeValueAsString(avsc);
192
+ configSource.set("avsc", null);
193
+ ParsedSchema parsedSchema = new AvroSchema(avscString);
194
+ MockSchemaRegistry.getClientForScope("embulk-output-kafka")
195
+ .register("avro-simple-topic-value", parsedSchema);
196
+ configSource.set("brokers", ImmutableList
197
+ .of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
198
+
199
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
200
+
201
+ SchemaRegistryClient schemaRegistryClient = MockSchemaRegistry
202
+ .getClientForScope("embulk-output-kafka");
203
+ KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(schemaRegistryClient);
204
+
205
+ List<ConsumerRecord<byte[], byte[]>> consumerRecords = kafkaTestUtils
206
+ .consumeAllRecordsFromTopic("avro-simple-topic");
207
+
208
+ assertEquals(3, consumerRecords.size());
209
+ List<GenericRecord> genericRecords = consumerRecords.stream().map(r -> (GenericRecord) kafkaAvroDeserializer
210
+ .deserialize("avro-simple-topic", r.value())).collect(Collectors.toList());
211
+
212
+ List<String> ids = genericRecords.stream()
213
+ .map(r -> String.valueOf(r.get("id")))
214
+ .collect(Collectors.toList());
215
+ List<Long> intItems = genericRecords.stream()
216
+ .map(r -> (Long) r.get("int_item"))
217
+ .collect(Collectors.toList());
218
+ List<String> varcharItems = genericRecords.stream()
219
+ .map(r -> String.valueOf(r.get("varchar_item")))
220
+ .collect(Collectors.toList());
221
+
222
+ assertThat(ids, hasItem("A001"));
223
+ assertThat(ids, hasItem("A002"));
224
+ assertThat(ids, hasItem("A003"));
225
+ assertThat(intItems, hasItem(1L));
226
+ assertThat(intItems, hasItem(2L));
227
+ assertThat(intItems, hasItem(3L));
228
+ assertThat(varcharItems, hasItem("a"));
229
+ assertThat(varcharItems, hasItem("b"));
230
+ assertThat(varcharItems, hasItem("c"));
231
+ }
232
+
233
+ @Test
234
+ public void testSimpleAvroAvscFile() throws IOException {
235
+ ConfigSource configSource = embulk.loadYamlResource("config_simple_avro_avsc_file.yml");
236
+ configSource.set("brokers", ImmutableList
237
+ .of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
238
+
239
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
240
+
241
+ SchemaRegistryClient schemaRegistryClient = MockSchemaRegistry
242
+ .getClientForScope("embulk-output-kafka");
243
+ KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(schemaRegistryClient);
244
+
245
+ List<ConsumerRecord<byte[], byte[]>> consumerRecords = kafkaTestUtils
246
+ .consumeAllRecordsFromTopic("avro-simple-topic");
247
+
248
+ assertEquals(3, consumerRecords.size());
249
+ List<GenericRecord> genericRecords = consumerRecords.stream().map(r -> (GenericRecord) kafkaAvroDeserializer
250
+ .deserialize("avro-simple-topic", r.value())).collect(Collectors.toList());
251
+
252
+ List<String> ids = genericRecords.stream()
253
+ .map(r -> String.valueOf(r.get("id")))
254
+ .collect(Collectors.toList());
255
+ List<Long> intItems = genericRecords.stream()
256
+ .map(r -> (Long) r.get("int_item"))
257
+ .collect(Collectors.toList());
258
+ List<String> varcharItems = genericRecords.stream()
259
+ .map(r -> String.valueOf(r.get("varchar_item")))
260
+ .collect(Collectors.toList());
261
+
262
+ assertThat(ids, hasItem("A001"));
263
+ assertThat(ids, hasItem("A002"));
264
+ assertThat(ids, hasItem("A003"));
265
+ assertThat(intItems, hasItem(1L));
266
+ assertThat(intItems, hasItem(2L));
267
+ assertThat(intItems, hasItem(3L));
268
+ assertThat(varcharItems, hasItem("a"));
269
+ assertThat(varcharItems, hasItem("b"));
270
+ assertThat(varcharItems, hasItem("c"));
271
+ }
272
+
273
+ @Test
274
+ public void testSimpleAvroComplex() throws IOException {
275
+ ConfigSource configSource = embulk.loadYamlResource("config_complex_avro.yml");
276
+ configSource.set("brokers", ImmutableList
277
+ .of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
278
+
279
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in_complex.csv").getPath()));
280
+
281
+ SchemaRegistryClient schemaRegistryClient = MockSchemaRegistry
282
+ .getClientForScope("embulk-output-kafka");
283
+ KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(schemaRegistryClient);
284
+
285
+ List<ConsumerRecord<byte[], byte[]>> consumerRecords = kafkaTestUtils
286
+ .consumeAllRecordsFromTopic("avro-complex-topic");
287
+
288
+ assertEquals(3, consumerRecords.size());
289
+ List<GenericRecord> genericRecords = consumerRecords.stream().map(r -> (GenericRecord) kafkaAvroDeserializer
290
+ .deserialize("avro-complex-topic", r.value())).collect(Collectors.toList());
291
+
292
+ List<String> ids = genericRecords.stream()
293
+ .map(r -> String.valueOf(r.get("id")))
294
+ .collect(Collectors.toList());
295
+ List<Long> intItems = genericRecords.stream()
296
+ .map(r -> (Long) r.get("int_item"))
297
+ .collect(Collectors.toList());
298
+ List<Instant> timeItems = genericRecords.stream()
299
+ .map(r -> Instant.ofEpochMilli((long) r.get("time")))
300
+ .collect(Collectors.toList());
301
+
302
+ assertThat(ids, hasItem("A001"));
303
+ assertThat(ids, hasItem("A002"));
304
+ assertThat(ids, hasItem("A003"));
305
+ assertThat(intItems, hasItem(9L));
306
+ assertThat(intItems, hasItem(0L));
307
+ assertThat(timeItems, hasItem(Instant.parse("2018-02-01T12:15:18.000Z")));
308
+ assertThat(timeItems, hasItem(Instant.parse("2018-02-02T12:15:18.000Z")));
309
+ assertThat(timeItems, hasItem(Instant.parse("2018-02-03T12:15:18.000Z")));
310
+ }
311
+
312
+ @Test
313
+ public void testKeyColumnConfig() throws IOException
314
+ {
315
+ ConfigSource configSource = embulk.loadYamlResource("config_with_key_column.yml");
316
+ configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
317
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
318
+ List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
319
+ .consumeAllRecordsFromTopic("json-topic", StringDeserializer.class,
320
+ StringDeserializer.class);
321
+
322
+ assertEquals(3, consumerRecords.size());
323
+ List<String> keys = new ArrayList<>();
324
+ for (ConsumerRecord<String, String> record : consumerRecords) {
325
+ keys.add(record.key());
326
+ }
327
+
328
+ assertThat(keys, hasItem("A001"));
329
+ assertThat(keys, hasItem("A002"));
330
+ assertThat(keys, hasItem("A003"));
331
+ }
332
+
333
+ @Test
334
+ public void testPartitionColumnConfig() throws IOException
335
+ {
336
+ ConfigSource configSource = embulk.loadYamlResource("config_with_partition_column.yml");
337
+ configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
338
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
339
+ List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
340
+ .consumeAllRecordsFromTopic("json-topic", StringDeserializer.class,
341
+ StringDeserializer.class);
342
+
343
+ assertEquals(3, consumerRecords.size());
344
+ List<Integer> partitions = new ArrayList<>();
345
+ for (ConsumerRecord<String, String> record : consumerRecords) {
346
+ partitions.add(record.partition());
347
+ }
348
+
349
+ assertThat(partitions, hasItem(1));
350
+ assertThat(partitions, hasItem(2));
351
+ assertThat(partitions, hasItem(3));
352
+ }
353
+
354
+ @Test
355
+ public void testColumnForDeletion() throws IOException
356
+ {
357
+ ConfigSource configSource = embulk.loadYamlResource("config_with_column_for_deletion.yml");
358
+ configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
359
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in_with_deletion.csv").getPath()));
360
+ List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
361
+ .consumeAllRecordsFromTopic("json-topic", StringDeserializer.class,
362
+ StringDeserializer.class);
363
+
364
+ assertEquals(3, consumerRecords.size());
365
+ HashMap<String, String> recordMap = new HashMap<>();
366
+ consumerRecords.forEach(record -> recordMap.put(record.key(), record.value()));
367
+ assertNotNull(recordMap.get("A001"));
368
+ assertNotNull(recordMap.get("A003"));
369
+ assertNull(recordMap.get("A002"));
370
+ }
371
+
372
+ @Test
373
+ public void testColumnForDeletionAvro() throws IOException
374
+ {
375
+ ConfigSource configSource = embulk.loadYamlResource("config_with_column_for_deletion_avro.yml");
376
+ configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
377
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in_with_deletion.csv").getPath()));
378
+ List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
379
+ .consumeAllRecordsFromTopic("avro-simple-topic", StringDeserializer.class,
380
+ StringDeserializer.class);
381
+
382
+ assertEquals(3, consumerRecords.size());
383
+ HashMap<String, String> recordMap = new HashMap<>();
384
+ consumerRecords.forEach(record -> recordMap.put(record.key(), record.value()));
385
+ assertNotNull(recordMap.get("A001"));
386
+ assertNotNull(recordMap.get("A003"));
387
+ assertNull(recordMap.get("A002"));
388
+ }
5
389
  }