embulk-output-kafka 0.1.3 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (29) hide show
  1. checksums.yaml +5 -5
  2. data/.circleci/config.yml +44 -0
  3. data/.github/dependabot.yml +11 -0
  4. data/README.md +6 -1
  5. data/build.gradle +29 -12
  6. data/docker-compose.yml +1 -1
  7. data/src/main/java/org/embulk/output/kafka/AvroFormatColumnVisitor.java +26 -10
  8. data/src/main/java/org/embulk/output/kafka/AvroFormatTransactionalPageOutput.java +13 -0
  9. data/src/main/java/org/embulk/output/kafka/JsonFormatColumnVisitor.java +27 -5
  10. data/src/main/java/org/embulk/output/kafka/JsonFormatTransactionalPageOutput.java +13 -0
  11. data/src/main/java/org/embulk/output/kafka/KafkaJsonSerializer.java +4 -0
  12. data/src/main/java/org/embulk/output/kafka/KafkaOutputColumnVisitor.java +62 -8
  13. data/src/main/java/org/embulk/output/kafka/KafkaOutputPlugin.java +82 -145
  14. data/src/main/java/org/embulk/output/kafka/KafkaTransactionalPageOutput.java +104 -0
  15. data/src/main/java/org/embulk/output/kafka/RecordProducerFactory.java +3 -3
  16. data/src/test/java/org/embulk/output/kafka/TestKafkaOutputPlugin.java +384 -0
  17. data/src/test/resources/config_complex.yml +9 -28
  18. data/src/test/resources/config_complex_avro.yml +23 -42
  19. data/src/test/resources/config_simple.yml +5 -22
  20. data/src/test/resources/config_simple_avro.yml +14 -32
  21. data/src/test/resources/config_simple_avro_avsc_file.yml +7 -25
  22. data/src/test/resources/config_with_column_for_deletion.yml +7 -0
  23. data/src/test/resources/config_with_column_for_deletion_avro.yml +18 -0
  24. data/src/test/resources/config_with_key_column.yml +6 -23
  25. data/src/test/resources/config_with_partition_column.yml +6 -0
  26. data/src/test/resources/in1.csv +4 -4
  27. data/src/test/resources/in_complex.csv +4 -4
  28. data/src/test/resources/in_with_deletion.csv +4 -0
  29. metadata +30 -24
@@ -4,9 +4,17 @@ import com.fasterxml.jackson.annotation.JsonCreator;
4
4
  import com.fasterxml.jackson.annotation.JsonValue;
5
5
  import com.fasterxml.jackson.databind.ObjectMapper;
6
6
  import com.fasterxml.jackson.databind.node.ObjectNode;
7
- import org.apache.avro.generic.GenericData;
7
+ import com.google.common.collect.ImmutableList;
8
+ import io.confluent.kafka.schemaregistry.client.CachedSchemaRegistryClient;
9
+ import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient;
10
+ import io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException;
11
+ import io.confluent.kafka.schemaregistry.testutil.MockSchemaRegistry;
12
+ import io.confluent.kafka.serializers.subject.TopicNameStrategy;
13
+ import io.confluent.kafka.serializers.subject.strategy.SubjectNameStrategy;
14
+ import org.apache.kafka.clients.admin.AdminClient;
15
+ import org.apache.kafka.clients.admin.AdminClientConfig;
16
+ import org.apache.kafka.clients.admin.DescribeTopicsResult;
8
17
  import org.apache.kafka.clients.producer.KafkaProducer;
9
- import org.apache.kafka.clients.producer.ProducerRecord;
10
18
  import org.embulk.config.Config;
11
19
  import org.embulk.config.ConfigDefault;
12
20
  import org.embulk.config.ConfigDiff;
@@ -15,15 +23,11 @@ import org.embulk.config.ConfigSource;
15
23
  import org.embulk.config.Task;
16
24
  import org.embulk.config.TaskReport;
17
25
  import org.embulk.config.TaskSource;
18
- import org.embulk.spi.ColumnConfig;
19
26
  import org.embulk.spi.Exec;
20
27
  import org.embulk.spi.OutputPlugin;
21
- import org.embulk.spi.Page;
22
28
  import org.embulk.spi.PageReader;
23
29
  import org.embulk.spi.Schema;
24
30
  import org.embulk.spi.TransactionalPageOutput;
25
- import org.slf4j.Logger;
26
- import org.slf4j.LoggerFactory;
27
31
 
28
32
  import java.io.File;
29
33
  import java.io.IOException;
@@ -31,9 +35,10 @@ import java.util.List;
31
35
  import java.util.Locale;
32
36
  import java.util.Map;
33
37
  import java.util.Optional;
34
- import java.util.PrimitiveIterator;
35
- import java.util.Random;
36
- import java.util.concurrent.atomic.AtomicInteger;
38
+ import java.util.Properties;
39
+ import java.util.concurrent.ExecutionException;
40
+ import java.util.concurrent.TimeUnit;
41
+ import java.util.concurrent.TimeoutException;
37
42
 
38
43
  public class KafkaOutputPlugin
39
44
  implements OutputPlugin
@@ -96,8 +101,12 @@ public class KafkaOutputPlugin
96
101
  @ConfigDefault("null")
97
102
  public Optional<String> getKeyColumnName();
98
103
 
104
+ @Config("partition_column_name")
105
+ @ConfigDefault("null")
106
+ public Optional<String> getPartitionColumnName();
107
+
99
108
  @Config("record_batch_size")
100
- @ConfigDefault("1000")
109
+ @ConfigDefault("16384")
101
110
  public int getRecordBatchSize();
102
111
 
103
112
  @Config("acks")
@@ -119,10 +128,22 @@ public class KafkaOutputPlugin
119
128
  @Config("value_subject_name_strategy")
120
129
  @ConfigDefault("null")
121
130
  public Optional<String> getValueSubjectNameStrategy();
131
+
132
+ @Config("column_for_deletion")
133
+ @ConfigDefault("null")
134
+ public Optional<String> getColumnForDeletion();
122
135
  }
123
136
 
124
137
  private static ObjectMapper objectMapper = new ObjectMapper();
125
- private Logger logger = LoggerFactory.getLogger(getClass());
138
+
139
+ private static final int SCHEMA_REGISTRY_IDENTITY_MAP_CAPACITY = 1000;
140
+
141
+ private AdminClient getKafkaAdminClient(PluginTask task)
142
+ {
143
+ Properties properties = new Properties();
144
+ properties.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, task.getBrokers());
145
+ return AdminClient.create(properties);
146
+ }
126
147
 
127
148
  @Override
128
149
  public ConfigDiff transaction(ConfigSource config,
@@ -130,11 +151,17 @@ public class KafkaOutputPlugin
130
151
  Control control)
131
152
  {
132
153
  PluginTask task = config.loadConfig(PluginTask.class);
154
+ AdminClient adminClient = getKafkaAdminClient(task);
155
+ DescribeTopicsResult result = adminClient.describeTopics(ImmutableList.of(task.getTopic()));
156
+ try {
157
+ if (result.all().get(30, TimeUnit.SECONDS).size() == 0) {
158
+ throw new RuntimeException("target topic is not found");
159
+ }
160
+ }
161
+ catch (InterruptedException | ExecutionException | TimeoutException e) {
162
+ throw new RuntimeException("failed to connect kafka brokers");
163
+ }
133
164
 
134
- // retryable (idempotent) output:
135
- // return resume(task.dump(), schema, taskCount, control);
136
-
137
- // non-retryable (non-idempotent) output:
138
165
  control.run(task.dump());
139
166
  return Exec.newConfigDiff();
140
167
  }
@@ -165,95 +192,48 @@ public class KafkaOutputPlugin
165
192
  case AVRO_WITH_SCHEMA_REGISTRY:
166
193
  return buildPageOutputForAvroWithSchemaRegistry(task, schema, taskIndex);
167
194
  default:
168
- throw new ConfigException("Unknow serialize format");
195
+ throw new ConfigException("Unknown serialize format");
169
196
  }
170
197
  }
171
198
 
172
199
  private TransactionalPageOutput buildPageOutputForJson(PluginTask task, Schema schema, int taskIndex)
173
200
  {
174
201
  KafkaProducer<Object, ObjectNode> producer = RecordProducerFactory.getForJson(task, schema, task.getOtherProducerConfigs());
175
-
176
202
  PageReader pageReader = new PageReader(schema);
177
- PrimitiveIterator.OfLong randomLong = new Random().longs(1, Long.MAX_VALUE).iterator();
178
- AtomicInteger counter = new AtomicInteger(0);
179
- AtomicInteger recordLoggingCount = new AtomicInteger(1);
180
-
181
- return new TransactionalPageOutput() {
182
- @Override
183
- public void add(Page page)
184
- {
185
- pageReader.setPage(page);
186
- while (pageReader.nextRecord()) {
187
- JsonFormatColumnVisitor columnVisitor = new JsonFormatColumnVisitor(task, pageReader, objectMapper);
188
-
189
- pageReader.getSchema().visitColumns(columnVisitor);
190
-
191
- Object recordKey = columnVisitor.recordKey;
192
- if (recordKey == null) {
193
- recordKey = randomLong.next();
194
- }
195
-
196
- String targetTopic = columnVisitor.topicName != null ? columnVisitor.topicName : task.getTopic();
197
- ProducerRecord<Object, ObjectNode> producerRecord = new ProducerRecord<>(targetTopic, recordKey, columnVisitor.jsonNode);
198
- producer.send(producerRecord, (metadata, exception) -> {
199
- if (exception != null) {
200
- logger.error("produce error", exception);
201
- }
202
-
203
- logger.debug("sent record: {key: {}, value: {}}", producerRecord.key(), producerRecord.value());
204
-
205
- int current = counter.incrementAndGet();
206
- if (current >= recordLoggingCount.get()) {
207
- logger.info("[task-{}] Producer sent {} records", String.format("%04d", taskIndex), current);
208
- recordLoggingCount.set(recordLoggingCount.get() * 2);
209
- }
210
- });
211
- }
212
- }
213
-
214
- @Override
215
- public void finish()
216
- {
217
- producer.flush();
218
- }
219
-
220
- @Override
221
- public void close()
222
- {
223
- producer.close();
224
- }
203
+ KafkaOutputColumnVisitor<ObjectNode> columnVisitor = new JsonFormatColumnVisitor(task, pageReader, objectMapper);
225
204
 
226
- @Override
227
- public void abort()
228
- {
229
- producer.flush();
230
- producer.close();
231
- }
232
-
233
- @Override
234
- public TaskReport commit()
235
- {
236
- return null;
237
- }
238
- };
205
+ return new JsonFormatTransactionalPageOutput(producer, pageReader, columnVisitor, task.getTopic(), taskIndex);
239
206
  }
240
207
 
241
208
  private TransactionalPageOutput buildPageOutputForAvroWithSchemaRegistry(PluginTask task, Schema schema, int taskIndex)
242
209
  {
243
210
  KafkaProducer<Object, Object> producer = RecordProducerFactory.getForAvroWithSchemaRegistry(task, schema, task.getOtherProducerConfigs());
244
-
245
211
  PageReader pageReader = new PageReader(schema);
212
+ org.apache.avro.Schema avroSchema = getAvroSchema(task);
213
+ AvroFormatColumnVisitor avroFormatColumnVisitor = new AvroFormatColumnVisitor(task, pageReader, avroSchema);
246
214
 
215
+ return new AvroFormatTransactionalPageOutput(producer, pageReader, avroFormatColumnVisitor, task.getTopic(), taskIndex);
216
+ }
217
+
218
+ private org.apache.avro.Schema getAvroSchema(PluginTask task)
219
+ {
247
220
  org.apache.avro.Schema avroSchema = null;
248
- if (!task.getAvsc().isPresent() && !task.getAvscFile().isPresent() || task.getAvsc().isPresent() == task.getAvscFile().isPresent()) {
221
+ if (!task.getSchemaRegistryUrl().isPresent()) {
222
+ throw new ConfigException("avro_with_schema_registry format needs schema_registry_url");
223
+ }
224
+
225
+ if (task.getAvsc().isPresent() && task.getAvscFile().isPresent()) {
249
226
  throw new ConfigException("avro_with_schema_registry format needs either one of avsc and avsc_file");
250
227
  }
228
+
251
229
  if (task.getAvsc().isPresent()) {
252
230
  avroSchema = new org.apache.avro.Schema.Parser().parse(task.getAvsc().get().toString());
231
+ return avroSchema;
253
232
  }
254
233
  if (task.getAvscFile().isPresent()) {
255
234
  try {
256
235
  avroSchema = new org.apache.avro.Schema.Parser().parse(task.getAvscFile().get());
236
+ return avroSchema;
257
237
  }
258
238
  catch (IOException e) {
259
239
  e.printStackTrace();
@@ -261,71 +241,28 @@ public class KafkaOutputPlugin
261
241
  }
262
242
  }
263
243
 
264
- PrimitiveIterator.OfLong randomLong = new Random().longs(1, Long.MAX_VALUE).iterator();
265
-
266
- AtomicInteger counter = new AtomicInteger(0);
267
- AtomicInteger recordLoggingCount = new AtomicInteger(1);
268
-
269
- final org.apache.avro.Schema finalAvroSchema = avroSchema;
270
- return new TransactionalPageOutput()
271
- {
272
- @Override
273
- public void add(Page page)
274
- {
275
- pageReader.setPage(page);
276
- while (pageReader.nextRecord()) {
277
- AvroFormatColumnVisitor columnVisitor = new AvroFormatColumnVisitor(task, pageReader, finalAvroSchema, new GenericData.Record(finalAvroSchema));
278
-
279
- pageReader.getSchema().visitColumns(columnVisitor);
280
-
281
- Object recordKey = columnVisitor.recordKey;
282
- if (recordKey == null) {
283
- recordKey = randomLong.next();
284
- }
285
-
286
- String targetTopic = columnVisitor.topicName != null ? columnVisitor.topicName : task.getTopic();
287
-
288
- ProducerRecord<Object, Object> producerRecord = new ProducerRecord<>(targetTopic, recordKey, columnVisitor.genericRecord);
289
- producer.send(producerRecord, (metadata, exception) -> {
290
- if (exception != null) {
291
- logger.error("produce error", exception);
292
- }
293
-
294
- logger.debug("sent record: {key: {}, value: {}}", producerRecord.key(), producerRecord.value());
295
-
296
- int current = counter.incrementAndGet();
297
- if (current >= recordLoggingCount.get()) {
298
- logger.info("[task-{}] Producer sent {} records", String.format("%04d", taskIndex), current);
299
- recordLoggingCount.set(recordLoggingCount.get() * 2);
300
- }
301
- });
302
- }
303
- }
304
-
305
- @Override
306
- public void finish()
307
- {
308
- producer.flush();
309
- }
310
-
311
- @Override
312
- public void close()
313
- {
314
- producer.close();
315
- }
316
-
317
- @Override
318
- public void abort()
319
- {
320
- producer.flush();
321
- producer.close();
322
- }
244
+ SchemaRegistryClient schemaRegistryClient = getSchemaRegistryClient(task.getSchemaRegistryUrl().get());
245
+ SubjectNameStrategy subjectNameStrategy = new TopicNameStrategy();
246
+ String subjectName = subjectNameStrategy.subjectName(task.getTopic(), false, null);
247
+ try {
248
+ String schema = schemaRegistryClient.getLatestSchemaMetadata(subjectName).getSchema();
249
+ avroSchema = new org.apache.avro.Schema.Parser().parse(schema);
250
+ return avroSchema;
251
+ }
252
+ catch (IOException | RestClientException e) {
253
+ throw new ConfigException("cannot fetch latest schema from schema registry.", e);
254
+ }
255
+ }
323
256
 
324
- @Override
325
- public TaskReport commit()
326
- {
327
- return null;
328
- }
329
- };
257
+ private static final String MOCK_SCHEMA_REGISTRY_PREFIX = "mock://";
258
+ private SchemaRegistryClient getSchemaRegistryClient(String url)
259
+ {
260
+ if (url.startsWith(MOCK_SCHEMA_REGISTRY_PREFIX)) {
261
+ String mockScope = url.substring(MOCK_SCHEMA_REGISTRY_PREFIX.length());
262
+ return MockSchemaRegistry.getClientForScope(mockScope);
263
+ }
264
+ else {
265
+ return new CachedSchemaRegistryClient(url, SCHEMA_REGISTRY_IDENTITY_MAP_CAPACITY);
266
+ }
330
267
  }
331
268
  }
@@ -0,0 +1,104 @@
1
+ package org.embulk.output.kafka;
2
+
3
+ import org.apache.kafka.clients.producer.KafkaProducer;
4
+ import org.apache.kafka.clients.producer.ProducerRecord;
5
+ import org.embulk.config.TaskReport;
6
+ import org.embulk.spi.Page;
7
+ import org.embulk.spi.PageReader;
8
+ import org.embulk.spi.TransactionalPageOutput;
9
+ import org.slf4j.Logger;
10
+ import org.slf4j.LoggerFactory;
11
+
12
+ import java.util.PrimitiveIterator;
13
+ import java.util.Random;
14
+ import java.util.concurrent.atomic.AtomicLong;
15
+
16
+ public abstract class KafkaTransactionalPageOutput<P, T extends P> implements TransactionalPageOutput
17
+ {
18
+ private static final Logger logger = LoggerFactory.getLogger(KafkaTransactionalPageOutput.class);
19
+
20
+ private final KafkaProducer<Object, P> producer;
21
+ private final PageReader pageReader;
22
+ private final KafkaOutputColumnVisitor<T> columnVisitor;
23
+ private final String topic;
24
+ private final int taskIndex;
25
+
26
+ private final PrimitiveIterator.OfLong randomLong = new Random().longs(1, Long.MAX_VALUE).iterator();
27
+ private final AtomicLong counter = new AtomicLong(0);
28
+ private final AtomicLong recordLoggingCount = new AtomicLong(1);
29
+
30
+ public KafkaTransactionalPageOutput(
31
+ KafkaProducer<Object, P> producer,
32
+ PageReader pageReader,
33
+ KafkaOutputColumnVisitor<T> columnVisitor,
34
+ String topic, int taskIndex)
35
+ {
36
+ this.producer = producer;
37
+ this.pageReader = pageReader;
38
+ this.columnVisitor = columnVisitor;
39
+ this.topic = topic;
40
+ this.taskIndex = taskIndex;
41
+ }
42
+
43
+ @Override
44
+ public void add(Page page)
45
+ {
46
+ pageReader.setPage(page);
47
+ while (pageReader.nextRecord()) {
48
+ columnVisitor.reset();
49
+
50
+ pageReader.getSchema().visitColumns(columnVisitor);
51
+
52
+ Object recordKey = columnVisitor.getRecordKey();
53
+ if (recordKey == null) {
54
+ recordKey = randomLong.next();
55
+ }
56
+
57
+ String targetTopic = columnVisitor.getTopicName() != null ? columnVisitor.getTopicName() : topic;
58
+
59
+ ProducerRecord<Object, P> producerRecord = new ProducerRecord<>(targetTopic, columnVisitor.getPartition(), recordKey, columnVisitor.getRecord());
60
+ producer.send(producerRecord, (metadata, exception) -> {
61
+ if (exception != null) {
62
+ logger.error("produce error", exception);
63
+ }
64
+
65
+ logger.debug("sent record: {topic: {}, key: {}, value: {}, partition: {}}",
66
+ producerRecord.topic(),
67
+ producerRecord.key(),
68
+ producerRecord.value(),
69
+ producerRecord.partition());
70
+
71
+ long current = counter.incrementAndGet();
72
+ if (current >= recordLoggingCount.get()) {
73
+ logger.info("[task-{}] Producer sent {} records", String.format("%04d", taskIndex), current);
74
+ recordLoggingCount.set(recordLoggingCount.get() * 2);
75
+ }
76
+ });
77
+ }
78
+ }
79
+
80
+ @Override
81
+ public void finish()
82
+ {
83
+ producer.flush();
84
+ }
85
+
86
+ @Override
87
+ public void close()
88
+ {
89
+ producer.close();
90
+ }
91
+
92
+ @Override
93
+ public void abort()
94
+ {
95
+ producer.flush();
96
+ producer.close();
97
+ }
98
+
99
+ @Override
100
+ public TaskReport commit()
101
+ {
102
+ return null;
103
+ }
104
+ };
@@ -2,7 +2,7 @@ package org.embulk.output.kafka;
2
2
 
3
3
  import com.fasterxml.jackson.databind.node.ObjectNode;
4
4
  import com.google.common.collect.ImmutableMap;
5
- import io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig;
5
+ import io.confluent.kafka.serializers.AbstractKafkaSchemaSerDeConfig;
6
6
  import io.confluent.kafka.serializers.KafkaAvroSerializer;
7
7
  import org.apache.kafka.clients.producer.KafkaProducer;
8
8
  import org.apache.kafka.clients.producer.ProducerConfig;
@@ -96,10 +96,10 @@ class RecordProducerFactory
96
96
  String schemaRegistryUrl = task.getSchemaRegistryUrl().orElseThrow(() -> new ConfigException("avro_with_schema_registry format needs schema_registry_url"));
97
97
 
98
98
  ImmutableMap.Builder<String, String> builder = ImmutableMap.<String, String>builder()
99
- .put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
99
+ .put(AbstractKafkaSchemaSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
100
100
 
101
101
  if (task.getValueSubjectNameStrategy().isPresent()) {
102
- builder.put(AbstractKafkaAvroSerDeConfig.VALUE_SUBJECT_NAME_STRATEGY, task.getValueSubjectNameStrategy().get());
102
+ builder.put(AbstractKafkaSchemaSerDeConfig.VALUE_SUBJECT_NAME_STRATEGY, task.getValueSubjectNameStrategy().get());
103
103
  }
104
104
 
105
105
  Map<String, String> avroSerializerConfigs = builder.build();
@@ -1,5 +1,389 @@
1
1
  package org.embulk.output.kafka;
2
2
 
3
+ import static org.hamcrest.MatcherAssert.assertThat;
4
+ import static org.hamcrest.Matchers.hasItem;
5
+ import static org.junit.Assert.assertEquals;
6
+ import static org.junit.Assert.assertNotNull;
7
+ import static org.junit.Assert.assertNull;
8
+
9
+ import com.fasterxml.jackson.databind.JsonNode;
10
+ import com.fasterxml.jackson.databind.ObjectMapper;
11
+ import com.google.common.collect.ImmutableList;
12
+ import com.google.common.io.Resources;
13
+ import com.salesforce.kafka.test.KafkaTestUtils;
14
+ import com.salesforce.kafka.test.junit4.SharedKafkaTestResource;
15
+ import io.confluent.kafka.schemaregistry.ParsedSchema;
16
+ import io.confluent.kafka.schemaregistry.avro.AvroSchema;
17
+ import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient;
18
+ import io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException;
19
+ import io.confluent.kafka.schemaregistry.testutil.MockSchemaRegistry;
20
+ import io.confluent.kafka.serializers.KafkaAvroDeserializer;
21
+ import java.io.IOException;
22
+ import java.nio.file.Paths;
23
+ import java.time.Instant;
24
+ import java.util.ArrayList;
25
+ import java.util.HashMap;
26
+ import java.util.List;
27
+ import java.util.stream.Collectors;
28
+ import org.apache.avro.generic.GenericRecord;
29
+ import org.apache.kafka.clients.consumer.ConsumerRecord;
30
+ import org.apache.kafka.common.serialization.StringDeserializer;
31
+ import org.embulk.config.ConfigSource;
32
+ import org.embulk.spi.OutputPlugin;
33
+ import org.embulk.test.TestingEmbulk;
34
+ import org.junit.After;
35
+ import org.junit.Before;
36
+ import org.junit.ClassRule;
37
+ import org.junit.Rule;
38
+ import org.junit.Test;
39
+
3
40
  public class TestKafkaOutputPlugin
4
41
  {
42
+ @ClassRule
43
+ public static final SharedKafkaTestResource sharedKafkaTestResource = new SharedKafkaTestResource()
44
+ .withBrokers(3);
45
+
46
+ @Rule
47
+ public TestingEmbulk embulk = TestingEmbulk.builder()
48
+ .registerPlugin(OutputPlugin.class, "kafka", KafkaOutputPlugin.class)
49
+ .build();
50
+
51
+ private KafkaTestUtils kafkaTestUtils;
52
+ private final static ObjectMapper objectMapper = new ObjectMapper();
53
+
54
+ @Before
55
+ public void setUp() {
56
+ kafkaTestUtils = sharedKafkaTestResource.getKafkaTestUtils();
57
+ kafkaTestUtils.createTopic("json-topic", 8, (short) 1);
58
+ kafkaTestUtils.createTopic("json-complex-topic", 8, (short) 1);
59
+ kafkaTestUtils.createTopic("avro-simple-topic", 8, (short) 1);
60
+ kafkaTestUtils.createTopic("avro-complex-topic", 8, (short) 1);
61
+ }
62
+
63
+ @After
64
+ public void tearDown() {
65
+ kafkaTestUtils.getAdminClient().deleteTopics(ImmutableList.of(
66
+ "json-topic", "json-complex-topic", "avro-simple-topic", "avro-complex-topic"
67
+ ));
68
+ }
69
+
70
+ @Test
71
+ public void testSimpleJson() throws IOException
72
+ {
73
+ ConfigSource configSource = embulk.loadYamlResource("config_simple.yml");
74
+ configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
75
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
76
+ List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
77
+ .consumeAllRecordsFromTopic("json-topic", StringDeserializer.class,
78
+ StringDeserializer.class);
79
+
80
+ assertEquals(3, consumerRecords.size());
81
+ List<JsonNode> deserializedRecords = new ArrayList<>();
82
+ for (ConsumerRecord<String, String> record : consumerRecords) {
83
+ deserializedRecords.add(objectMapper.readTree(record.value()));
84
+ }
85
+ List<String> ids = deserializedRecords.stream()
86
+ .map(r -> r.get("id").asText())
87
+ .collect(Collectors.toList());
88
+ List<Integer> intItems = deserializedRecords.stream()
89
+ .map(r -> r.get("int_item").asInt())
90
+ .collect(Collectors.toList());
91
+ List<String> varcharItems = deserializedRecords.stream()
92
+ .map(r -> r.get("varchar_item").asText())
93
+ .collect(Collectors.toList());
94
+
95
+ assertThat(ids, hasItem("A001"));
96
+ assertThat(ids, hasItem("A002"));
97
+ assertThat(ids, hasItem("A003"));
98
+ assertThat(intItems, hasItem(1));
99
+ assertThat(intItems, hasItem(2));
100
+ assertThat(intItems, hasItem(3));
101
+ assertThat(varcharItems, hasItem("a"));
102
+ assertThat(varcharItems, hasItem("b"));
103
+ assertThat(varcharItems, hasItem("c"));
104
+ }
105
+
106
+ @Test
107
+ public void testComplexJson() throws IOException
108
+ {
109
+ ConfigSource configSource = embulk.loadYamlResource("config_complex.yml");
110
+ configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
111
+
112
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in_complex.csv").getPath()));
113
+ List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
114
+ .consumeAllRecordsFromTopic("json-complex-topic", StringDeserializer.class,
115
+ StringDeserializer.class);
116
+
117
+ assertEquals(3, consumerRecords.size());
118
+ List<JsonNode> deserializedRecords = new ArrayList<>();
119
+ for (ConsumerRecord<String, String> record : consumerRecords) {
120
+ deserializedRecords.add(objectMapper.readTree(record.value()));
121
+ }
122
+ List<String> ids = deserializedRecords.stream()
123
+ .map(r -> r.get("id").asText())
124
+ .collect(Collectors.toList());
125
+ List<Integer> intItems = deserializedRecords.stream()
126
+ .map(r -> r.get("int_item").asInt())
127
+ .collect(Collectors.toList());
128
+ List<List<Integer>> arrayItems = deserializedRecords.stream()
129
+ .map(r -> ImmutableList.of(
130
+ r.get("array").get(0).asInt(),
131
+ r.get("array").get(1).asInt(),
132
+ r.get("array").get(2).asInt()
133
+ ))
134
+ .collect(Collectors.toList());
135
+
136
+ assertThat(ids, hasItem("A001"));
137
+ assertThat(ids, hasItem("A002"));
138
+ assertThat(ids, hasItem("A003"));
139
+ assertThat(intItems, hasItem(9));
140
+ assertThat(intItems, hasItem(0));
141
+ assertThat(arrayItems.get(0), hasItem(1));
142
+ assertThat(arrayItems.get(0), hasItem(2));
143
+ assertThat(arrayItems.get(0), hasItem(3));
144
+ }
145
+
146
+ @Test
147
+ public void testSimpleAvro() throws IOException {
148
+ ConfigSource configSource = embulk.loadYamlResource("config_simple_avro.yml");
149
+ configSource.set("brokers", ImmutableList
150
+ .of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
151
+
152
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
153
+
154
+ SchemaRegistryClient schemaRegistryClient = MockSchemaRegistry
155
+ .getClientForScope("embulk-output-kafka");
156
+ KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(schemaRegistryClient);
157
+
158
+ List<ConsumerRecord<byte[], byte[]>> consumerRecords = kafkaTestUtils
159
+ .consumeAllRecordsFromTopic("avro-simple-topic");
160
+
161
+ assertEquals(3, consumerRecords.size());
162
+ List<GenericRecord> genericRecords = consumerRecords.stream().map(r -> (GenericRecord) kafkaAvroDeserializer
163
+ .deserialize("avro-simple-topic", r.value())).collect(Collectors.toList());
164
+
165
+ List<String> ids = genericRecords.stream()
166
+ .map(r -> String.valueOf(r.get("id")))
167
+ .collect(Collectors.toList());
168
+ List<Long> intItems = genericRecords.stream()
169
+ .map(r -> (Long) r.get("int_item"))
170
+ .collect(Collectors.toList());
171
+ List<String> varcharItems = genericRecords.stream()
172
+ .map(r -> String.valueOf(r.get("varchar_item")))
173
+ .collect(Collectors.toList());
174
+
175
+ assertThat(ids, hasItem("A001"));
176
+ assertThat(ids, hasItem("A002"));
177
+ assertThat(ids, hasItem("A003"));
178
+ assertThat(intItems, hasItem(1L));
179
+ assertThat(intItems, hasItem(2L));
180
+ assertThat(intItems, hasItem(3L));
181
+ assertThat(varcharItems, hasItem("a"));
182
+ assertThat(varcharItems, hasItem("b"));
183
+ assertThat(varcharItems, hasItem("c"));
184
+ }
185
+
186
+ @Test
187
+ public void testSimpleAvroSchemaFromRegistry() throws IOException, RestClientException
188
+ {
189
+ ConfigSource configSource = embulk.loadYamlResource("config_simple_avro.yml");
190
+ Object avsc = configSource.get(Object.class, "avsc");
191
+ String avscString = objectMapper.writeValueAsString(avsc);
192
+ configSource.set("avsc", null);
193
+ ParsedSchema parsedSchema = new AvroSchema(avscString);
194
+ MockSchemaRegistry.getClientForScope("embulk-output-kafka")
195
+ .register("avro-simple-topic-value", parsedSchema);
196
+ configSource.set("brokers", ImmutableList
197
+ .of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
198
+
199
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
200
+
201
+ SchemaRegistryClient schemaRegistryClient = MockSchemaRegistry
202
+ .getClientForScope("embulk-output-kafka");
203
+ KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(schemaRegistryClient);
204
+
205
+ List<ConsumerRecord<byte[], byte[]>> consumerRecords = kafkaTestUtils
206
+ .consumeAllRecordsFromTopic("avro-simple-topic");
207
+
208
+ assertEquals(3, consumerRecords.size());
209
+ List<GenericRecord> genericRecords = consumerRecords.stream().map(r -> (GenericRecord) kafkaAvroDeserializer
210
+ .deserialize("avro-simple-topic", r.value())).collect(Collectors.toList());
211
+
212
+ List<String> ids = genericRecords.stream()
213
+ .map(r -> String.valueOf(r.get("id")))
214
+ .collect(Collectors.toList());
215
+ List<Long> intItems = genericRecords.stream()
216
+ .map(r -> (Long) r.get("int_item"))
217
+ .collect(Collectors.toList());
218
+ List<String> varcharItems = genericRecords.stream()
219
+ .map(r -> String.valueOf(r.get("varchar_item")))
220
+ .collect(Collectors.toList());
221
+
222
+ assertThat(ids, hasItem("A001"));
223
+ assertThat(ids, hasItem("A002"));
224
+ assertThat(ids, hasItem("A003"));
225
+ assertThat(intItems, hasItem(1L));
226
+ assertThat(intItems, hasItem(2L));
227
+ assertThat(intItems, hasItem(3L));
228
+ assertThat(varcharItems, hasItem("a"));
229
+ assertThat(varcharItems, hasItem("b"));
230
+ assertThat(varcharItems, hasItem("c"));
231
+ }
232
+
233
+ @Test
234
+ public void testSimpleAvroAvscFile() throws IOException {
235
+ ConfigSource configSource = embulk.loadYamlResource("config_simple_avro_avsc_file.yml");
236
+ configSource.set("brokers", ImmutableList
237
+ .of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
238
+
239
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
240
+
241
+ SchemaRegistryClient schemaRegistryClient = MockSchemaRegistry
242
+ .getClientForScope("embulk-output-kafka");
243
+ KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(schemaRegistryClient);
244
+
245
+ List<ConsumerRecord<byte[], byte[]>> consumerRecords = kafkaTestUtils
246
+ .consumeAllRecordsFromTopic("avro-simple-topic");
247
+
248
+ assertEquals(3, consumerRecords.size());
249
+ List<GenericRecord> genericRecords = consumerRecords.stream().map(r -> (GenericRecord) kafkaAvroDeserializer
250
+ .deserialize("avro-simple-topic", r.value())).collect(Collectors.toList());
251
+
252
+ List<String> ids = genericRecords.stream()
253
+ .map(r -> String.valueOf(r.get("id")))
254
+ .collect(Collectors.toList());
255
+ List<Long> intItems = genericRecords.stream()
256
+ .map(r -> (Long) r.get("int_item"))
257
+ .collect(Collectors.toList());
258
+ List<String> varcharItems = genericRecords.stream()
259
+ .map(r -> String.valueOf(r.get("varchar_item")))
260
+ .collect(Collectors.toList());
261
+
262
+ assertThat(ids, hasItem("A001"));
263
+ assertThat(ids, hasItem("A002"));
264
+ assertThat(ids, hasItem("A003"));
265
+ assertThat(intItems, hasItem(1L));
266
+ assertThat(intItems, hasItem(2L));
267
+ assertThat(intItems, hasItem(3L));
268
+ assertThat(varcharItems, hasItem("a"));
269
+ assertThat(varcharItems, hasItem("b"));
270
+ assertThat(varcharItems, hasItem("c"));
271
+ }
272
+
273
+ @Test
274
+ public void testSimpleAvroComplex() throws IOException {
275
+ ConfigSource configSource = embulk.loadYamlResource("config_complex_avro.yml");
276
+ configSource.set("brokers", ImmutableList
277
+ .of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
278
+
279
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in_complex.csv").getPath()));
280
+
281
+ SchemaRegistryClient schemaRegistryClient = MockSchemaRegistry
282
+ .getClientForScope("embulk-output-kafka");
283
+ KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(schemaRegistryClient);
284
+
285
+ List<ConsumerRecord<byte[], byte[]>> consumerRecords = kafkaTestUtils
286
+ .consumeAllRecordsFromTopic("avro-complex-topic");
287
+
288
+ assertEquals(3, consumerRecords.size());
289
+ List<GenericRecord> genericRecords = consumerRecords.stream().map(r -> (GenericRecord) kafkaAvroDeserializer
290
+ .deserialize("avro-complex-topic", r.value())).collect(Collectors.toList());
291
+
292
+ List<String> ids = genericRecords.stream()
293
+ .map(r -> String.valueOf(r.get("id")))
294
+ .collect(Collectors.toList());
295
+ List<Long> intItems = genericRecords.stream()
296
+ .map(r -> (Long) r.get("int_item"))
297
+ .collect(Collectors.toList());
298
+ List<Instant> timeItems = genericRecords.stream()
299
+ .map(r -> Instant.ofEpochMilli((long) r.get("time")))
300
+ .collect(Collectors.toList());
301
+
302
+ assertThat(ids, hasItem("A001"));
303
+ assertThat(ids, hasItem("A002"));
304
+ assertThat(ids, hasItem("A003"));
305
+ assertThat(intItems, hasItem(9L));
306
+ assertThat(intItems, hasItem(0L));
307
+ assertThat(timeItems, hasItem(Instant.parse("2018-02-01T12:15:18.000Z")));
308
+ assertThat(timeItems, hasItem(Instant.parse("2018-02-02T12:15:18.000Z")));
309
+ assertThat(timeItems, hasItem(Instant.parse("2018-02-03T12:15:18.000Z")));
310
+ }
311
+
312
+ @Test
313
+ public void testKeyColumnConfig() throws IOException
314
+ {
315
+ ConfigSource configSource = embulk.loadYamlResource("config_with_key_column.yml");
316
+ configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
317
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
318
+ List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
319
+ .consumeAllRecordsFromTopic("json-topic", StringDeserializer.class,
320
+ StringDeserializer.class);
321
+
322
+ assertEquals(3, consumerRecords.size());
323
+ List<String> keys = new ArrayList<>();
324
+ for (ConsumerRecord<String, String> record : consumerRecords) {
325
+ keys.add(record.key());
326
+ }
327
+
328
+ assertThat(keys, hasItem("A001"));
329
+ assertThat(keys, hasItem("A002"));
330
+ assertThat(keys, hasItem("A003"));
331
+ }
332
+
333
+ @Test
334
+ public void testPartitionColumnConfig() throws IOException
335
+ {
336
+ ConfigSource configSource = embulk.loadYamlResource("config_with_partition_column.yml");
337
+ configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
338
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
339
+ List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
340
+ .consumeAllRecordsFromTopic("json-topic", StringDeserializer.class,
341
+ StringDeserializer.class);
342
+
343
+ assertEquals(3, consumerRecords.size());
344
+ List<Integer> partitions = new ArrayList<>();
345
+ for (ConsumerRecord<String, String> record : consumerRecords) {
346
+ partitions.add(record.partition());
347
+ }
348
+
349
+ assertThat(partitions, hasItem(1));
350
+ assertThat(partitions, hasItem(2));
351
+ assertThat(partitions, hasItem(3));
352
+ }
353
+
354
+ @Test
355
+ public void testColumnForDeletion() throws IOException
356
+ {
357
+ ConfigSource configSource = embulk.loadYamlResource("config_with_column_for_deletion.yml");
358
+ configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
359
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in_with_deletion.csv").getPath()));
360
+ List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
361
+ .consumeAllRecordsFromTopic("json-topic", StringDeserializer.class,
362
+ StringDeserializer.class);
363
+
364
+ assertEquals(3, consumerRecords.size());
365
+ HashMap<String, String> recordMap = new HashMap<>();
366
+ consumerRecords.forEach(record -> recordMap.put(record.key(), record.value()));
367
+ assertNotNull(recordMap.get("A001"));
368
+ assertNotNull(recordMap.get("A003"));
369
+ assertNull(recordMap.get("A002"));
370
+ }
371
+
372
+ @Test
373
+ public void testColumnForDeletionAvro() throws IOException
374
+ {
375
+ ConfigSource configSource = embulk.loadYamlResource("config_with_column_for_deletion_avro.yml");
376
+ configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
377
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in_with_deletion.csv").getPath()));
378
+ List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
379
+ .consumeAllRecordsFromTopic("avro-simple-topic", StringDeserializer.class,
380
+ StringDeserializer.class);
381
+
382
+ assertEquals(3, consumerRecords.size());
383
+ HashMap<String, String> recordMap = new HashMap<>();
384
+ consumerRecords.forEach(record -> recordMap.put(record.key(), record.value()));
385
+ assertNotNull(recordMap.get("A001"));
386
+ assertNotNull(recordMap.get("A003"));
387
+ assertNull(recordMap.get("A002"));
388
+ }
5
389
  }