embulk-output-kafka 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (29) hide show
  1. checksums.yaml +5 -5
  2. data/.circleci/config.yml +44 -0
  3. data/.github/dependabot.yml +11 -0
  4. data/README.md +5 -1
  5. data/build.gradle +29 -12
  6. data/docker-compose.yml +1 -1
  7. data/src/main/java/org/embulk/output/kafka/AvroFormatColumnVisitor.java +13 -8
  8. data/src/main/java/org/embulk/output/kafka/AvroFormatTransactionalPageOutput.java +13 -0
  9. data/src/main/java/org/embulk/output/kafka/JsonFormatColumnVisitor.java +9 -2
  10. data/src/main/java/org/embulk/output/kafka/JsonFormatTransactionalPageOutput.java +13 -0
  11. data/src/main/java/org/embulk/output/kafka/KafkaJsonSerializer.java +4 -0
  12. data/src/main/java/org/embulk/output/kafka/KafkaOutputColumnVisitor.java +25 -1
  13. data/src/main/java/org/embulk/output/kafka/KafkaOutputPlugin.java +54 -153
  14. data/src/main/java/org/embulk/output/kafka/KafkaTransactionalPageOutput.java +104 -0
  15. data/src/main/java/org/embulk/output/kafka/RecordProducerFactory.java +3 -3
  16. data/src/test/java/org/embulk/output/kafka/TestKafkaOutputPlugin.java +384 -0
  17. data/src/test/resources/config_complex.yml +9 -28
  18. data/src/test/resources/config_complex_avro.yml +23 -42
  19. data/src/test/resources/config_simple.yml +5 -22
  20. data/src/test/resources/config_simple_avro.yml +14 -32
  21. data/src/test/resources/config_simple_avro_avsc_file.yml +7 -25
  22. data/src/test/resources/config_with_column_for_deletion.yml +7 -0
  23. data/src/test/resources/config_with_column_for_deletion_avro.yml +18 -0
  24. data/src/test/resources/config_with_key_column.yml +6 -23
  25. data/src/test/resources/config_with_partition_column.yml +6 -0
  26. data/src/test/resources/in1.csv +4 -4
  27. data/src/test/resources/in_complex.csv +4 -4
  28. data/src/test/resources/in_with_deletion.csv +4 -0
  29. metadata +30 -24
@@ -0,0 +1,104 @@
1
+ package org.embulk.output.kafka;
2
+
3
+ import org.apache.kafka.clients.producer.KafkaProducer;
4
+ import org.apache.kafka.clients.producer.ProducerRecord;
5
+ import org.embulk.config.TaskReport;
6
+ import org.embulk.spi.Page;
7
+ import org.embulk.spi.PageReader;
8
+ import org.embulk.spi.TransactionalPageOutput;
9
+ import org.slf4j.Logger;
10
+ import org.slf4j.LoggerFactory;
11
+
12
+ import java.util.PrimitiveIterator;
13
+ import java.util.Random;
14
+ import java.util.concurrent.atomic.AtomicLong;
15
+
16
+ public abstract class KafkaTransactionalPageOutput<P, T extends P> implements TransactionalPageOutput
17
+ {
18
+ private static final Logger logger = LoggerFactory.getLogger(KafkaTransactionalPageOutput.class);
19
+
20
+ private final KafkaProducer<Object, P> producer;
21
+ private final PageReader pageReader;
22
+ private final KafkaOutputColumnVisitor<T> columnVisitor;
23
+ private final String topic;
24
+ private final int taskIndex;
25
+
26
+ private final PrimitiveIterator.OfLong randomLong = new Random().longs(1, Long.MAX_VALUE).iterator();
27
+ private final AtomicLong counter = new AtomicLong(0);
28
+ private final AtomicLong recordLoggingCount = new AtomicLong(1);
29
+
30
+ public KafkaTransactionalPageOutput(
31
+ KafkaProducer<Object, P> producer,
32
+ PageReader pageReader,
33
+ KafkaOutputColumnVisitor<T> columnVisitor,
34
+ String topic, int taskIndex)
35
+ {
36
+ this.producer = producer;
37
+ this.pageReader = pageReader;
38
+ this.columnVisitor = columnVisitor;
39
+ this.topic = topic;
40
+ this.taskIndex = taskIndex;
41
+ }
42
+
43
+ @Override
44
+ public void add(Page page)
45
+ {
46
+ pageReader.setPage(page);
47
+ while (pageReader.nextRecord()) {
48
+ columnVisitor.reset();
49
+
50
+ pageReader.getSchema().visitColumns(columnVisitor);
51
+
52
+ Object recordKey = columnVisitor.getRecordKey();
53
+ if (recordKey == null) {
54
+ recordKey = randomLong.next();
55
+ }
56
+
57
+ String targetTopic = columnVisitor.getTopicName() != null ? columnVisitor.getTopicName() : topic;
58
+
59
+ ProducerRecord<Object, P> producerRecord = new ProducerRecord<>(targetTopic, columnVisitor.getPartition(), recordKey, columnVisitor.getRecord());
60
+ producer.send(producerRecord, (metadata, exception) -> {
61
+ if (exception != null) {
62
+ logger.error("produce error", exception);
63
+ }
64
+
65
+ logger.debug("sent record: {topic: {}, key: {}, value: {}, partition: {}}",
66
+ producerRecord.topic(),
67
+ producerRecord.key(),
68
+ producerRecord.value(),
69
+ producerRecord.partition());
70
+
71
+ long current = counter.incrementAndGet();
72
+ if (current >= recordLoggingCount.get()) {
73
+ logger.info("[task-{}] Producer sent {} records", String.format("%04d", taskIndex), current);
74
+ recordLoggingCount.set(recordLoggingCount.get() * 2);
75
+ }
76
+ });
77
+ }
78
+ }
79
+
80
+ @Override
81
+ public void finish()
82
+ {
83
+ producer.flush();
84
+ }
85
+
86
+ @Override
87
+ public void close()
88
+ {
89
+ producer.close();
90
+ }
91
+
92
+ @Override
93
+ public void abort()
94
+ {
95
+ producer.flush();
96
+ producer.close();
97
+ }
98
+
99
+ @Override
100
+ public TaskReport commit()
101
+ {
102
+ return null;
103
+ }
104
+ };
@@ -2,7 +2,7 @@ package org.embulk.output.kafka;
2
2
 
3
3
  import com.fasterxml.jackson.databind.node.ObjectNode;
4
4
  import com.google.common.collect.ImmutableMap;
5
- import io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig;
5
+ import io.confluent.kafka.serializers.AbstractKafkaSchemaSerDeConfig;
6
6
  import io.confluent.kafka.serializers.KafkaAvroSerializer;
7
7
  import org.apache.kafka.clients.producer.KafkaProducer;
8
8
  import org.apache.kafka.clients.producer.ProducerConfig;
@@ -96,10 +96,10 @@ class RecordProducerFactory
96
96
  String schemaRegistryUrl = task.getSchemaRegistryUrl().orElseThrow(() -> new ConfigException("avro_with_schema_registry format needs schema_registry_url"));
97
97
 
98
98
  ImmutableMap.Builder<String, String> builder = ImmutableMap.<String, String>builder()
99
- .put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
99
+ .put(AbstractKafkaSchemaSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
100
100
 
101
101
  if (task.getValueSubjectNameStrategy().isPresent()) {
102
- builder.put(AbstractKafkaAvroSerDeConfig.VALUE_SUBJECT_NAME_STRATEGY, task.getValueSubjectNameStrategy().get());
102
+ builder.put(AbstractKafkaSchemaSerDeConfig.VALUE_SUBJECT_NAME_STRATEGY, task.getValueSubjectNameStrategy().get());
103
103
  }
104
104
 
105
105
  Map<String, String> avroSerializerConfigs = builder.build();
@@ -1,5 +1,389 @@
1
1
  package org.embulk.output.kafka;
2
2
 
3
+ import static org.hamcrest.MatcherAssert.assertThat;
4
+ import static org.hamcrest.Matchers.hasItem;
5
+ import static org.junit.Assert.assertEquals;
6
+ import static org.junit.Assert.assertNotNull;
7
+ import static org.junit.Assert.assertNull;
8
+
9
+ import com.fasterxml.jackson.databind.JsonNode;
10
+ import com.fasterxml.jackson.databind.ObjectMapper;
11
+ import com.google.common.collect.ImmutableList;
12
+ import com.google.common.io.Resources;
13
+ import com.salesforce.kafka.test.KafkaTestUtils;
14
+ import com.salesforce.kafka.test.junit4.SharedKafkaTestResource;
15
+ import io.confluent.kafka.schemaregistry.ParsedSchema;
16
+ import io.confluent.kafka.schemaregistry.avro.AvroSchema;
17
+ import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient;
18
+ import io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException;
19
+ import io.confluent.kafka.schemaregistry.testutil.MockSchemaRegistry;
20
+ import io.confluent.kafka.serializers.KafkaAvroDeserializer;
21
+ import java.io.IOException;
22
+ import java.nio.file.Paths;
23
+ import java.time.Instant;
24
+ import java.util.ArrayList;
25
+ import java.util.HashMap;
26
+ import java.util.List;
27
+ import java.util.stream.Collectors;
28
+ import org.apache.avro.generic.GenericRecord;
29
+ import org.apache.kafka.clients.consumer.ConsumerRecord;
30
+ import org.apache.kafka.common.serialization.StringDeserializer;
31
+ import org.embulk.config.ConfigSource;
32
+ import org.embulk.spi.OutputPlugin;
33
+ import org.embulk.test.TestingEmbulk;
34
+ import org.junit.After;
35
+ import org.junit.Before;
36
+ import org.junit.ClassRule;
37
+ import org.junit.Rule;
38
+ import org.junit.Test;
39
+
3
40
  public class TestKafkaOutputPlugin
4
41
  {
42
+ @ClassRule
43
+ public static final SharedKafkaTestResource sharedKafkaTestResource = new SharedKafkaTestResource()
44
+ .withBrokers(3);
45
+
46
+ @Rule
47
+ public TestingEmbulk embulk = TestingEmbulk.builder()
48
+ .registerPlugin(OutputPlugin.class, "kafka", KafkaOutputPlugin.class)
49
+ .build();
50
+
51
+ private KafkaTestUtils kafkaTestUtils;
52
+ private final static ObjectMapper objectMapper = new ObjectMapper();
53
+
54
+ @Before
55
+ public void setUp() {
56
+ kafkaTestUtils = sharedKafkaTestResource.getKafkaTestUtils();
57
+ kafkaTestUtils.createTopic("json-topic", 8, (short) 1);
58
+ kafkaTestUtils.createTopic("json-complex-topic", 8, (short) 1);
59
+ kafkaTestUtils.createTopic("avro-simple-topic", 8, (short) 1);
60
+ kafkaTestUtils.createTopic("avro-complex-topic", 8, (short) 1);
61
+ }
62
+
63
+ @After
64
+ public void tearDown() {
65
+ kafkaTestUtils.getAdminClient().deleteTopics(ImmutableList.of(
66
+ "json-topic", "json-complex-topic", "avro-simple-topic", "avro-complex-topic"
67
+ ));
68
+ }
69
+
70
+ @Test
71
+ public void testSimpleJson() throws IOException
72
+ {
73
+ ConfigSource configSource = embulk.loadYamlResource("config_simple.yml");
74
+ configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
75
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
76
+ List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
77
+ .consumeAllRecordsFromTopic("json-topic", StringDeserializer.class,
78
+ StringDeserializer.class);
79
+
80
+ assertEquals(3, consumerRecords.size());
81
+ List<JsonNode> deserializedRecords = new ArrayList<>();
82
+ for (ConsumerRecord<String, String> record : consumerRecords) {
83
+ deserializedRecords.add(objectMapper.readTree(record.value()));
84
+ }
85
+ List<String> ids = deserializedRecords.stream()
86
+ .map(r -> r.get("id").asText())
87
+ .collect(Collectors.toList());
88
+ List<Integer> intItems = deserializedRecords.stream()
89
+ .map(r -> r.get("int_item").asInt())
90
+ .collect(Collectors.toList());
91
+ List<String> varcharItems = deserializedRecords.stream()
92
+ .map(r -> r.get("varchar_item").asText())
93
+ .collect(Collectors.toList());
94
+
95
+ assertThat(ids, hasItem("A001"));
96
+ assertThat(ids, hasItem("A002"));
97
+ assertThat(ids, hasItem("A003"));
98
+ assertThat(intItems, hasItem(1));
99
+ assertThat(intItems, hasItem(2));
100
+ assertThat(intItems, hasItem(3));
101
+ assertThat(varcharItems, hasItem("a"));
102
+ assertThat(varcharItems, hasItem("b"));
103
+ assertThat(varcharItems, hasItem("c"));
104
+ }
105
+
106
+ @Test
107
+ public void testComplexJson() throws IOException
108
+ {
109
+ ConfigSource configSource = embulk.loadYamlResource("config_complex.yml");
110
+ configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
111
+
112
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in_complex.csv").getPath()));
113
+ List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
114
+ .consumeAllRecordsFromTopic("json-complex-topic", StringDeserializer.class,
115
+ StringDeserializer.class);
116
+
117
+ assertEquals(3, consumerRecords.size());
118
+ List<JsonNode> deserializedRecords = new ArrayList<>();
119
+ for (ConsumerRecord<String, String> record : consumerRecords) {
120
+ deserializedRecords.add(objectMapper.readTree(record.value()));
121
+ }
122
+ List<String> ids = deserializedRecords.stream()
123
+ .map(r -> r.get("id").asText())
124
+ .collect(Collectors.toList());
125
+ List<Integer> intItems = deserializedRecords.stream()
126
+ .map(r -> r.get("int_item").asInt())
127
+ .collect(Collectors.toList());
128
+ List<List<Integer>> arrayItems = deserializedRecords.stream()
129
+ .map(r -> ImmutableList.of(
130
+ r.get("array").get(0).asInt(),
131
+ r.get("array").get(1).asInt(),
132
+ r.get("array").get(2).asInt()
133
+ ))
134
+ .collect(Collectors.toList());
135
+
136
+ assertThat(ids, hasItem("A001"));
137
+ assertThat(ids, hasItem("A002"));
138
+ assertThat(ids, hasItem("A003"));
139
+ assertThat(intItems, hasItem(9));
140
+ assertThat(intItems, hasItem(0));
141
+ assertThat(arrayItems.get(0), hasItem(1));
142
+ assertThat(arrayItems.get(0), hasItem(2));
143
+ assertThat(arrayItems.get(0), hasItem(3));
144
+ }
145
+
146
+ @Test
147
+ public void testSimpleAvro() throws IOException {
148
+ ConfigSource configSource = embulk.loadYamlResource("config_simple_avro.yml");
149
+ configSource.set("brokers", ImmutableList
150
+ .of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
151
+
152
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
153
+
154
+ SchemaRegistryClient schemaRegistryClient = MockSchemaRegistry
155
+ .getClientForScope("embulk-output-kafka");
156
+ KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(schemaRegistryClient);
157
+
158
+ List<ConsumerRecord<byte[], byte[]>> consumerRecords = kafkaTestUtils
159
+ .consumeAllRecordsFromTopic("avro-simple-topic");
160
+
161
+ assertEquals(3, consumerRecords.size());
162
+ List<GenericRecord> genericRecords = consumerRecords.stream().map(r -> (GenericRecord) kafkaAvroDeserializer
163
+ .deserialize("avro-simple-topic", r.value())).collect(Collectors.toList());
164
+
165
+ List<String> ids = genericRecords.stream()
166
+ .map(r -> String.valueOf(r.get("id")))
167
+ .collect(Collectors.toList());
168
+ List<Long> intItems = genericRecords.stream()
169
+ .map(r -> (Long) r.get("int_item"))
170
+ .collect(Collectors.toList());
171
+ List<String> varcharItems = genericRecords.stream()
172
+ .map(r -> String.valueOf(r.get("varchar_item")))
173
+ .collect(Collectors.toList());
174
+
175
+ assertThat(ids, hasItem("A001"));
176
+ assertThat(ids, hasItem("A002"));
177
+ assertThat(ids, hasItem("A003"));
178
+ assertThat(intItems, hasItem(1L));
179
+ assertThat(intItems, hasItem(2L));
180
+ assertThat(intItems, hasItem(3L));
181
+ assertThat(varcharItems, hasItem("a"));
182
+ assertThat(varcharItems, hasItem("b"));
183
+ assertThat(varcharItems, hasItem("c"));
184
+ }
185
+
186
+ @Test
187
+ public void testSimpleAvroSchemaFromRegistry() throws IOException, RestClientException
188
+ {
189
+ ConfigSource configSource = embulk.loadYamlResource("config_simple_avro.yml");
190
+ Object avsc = configSource.get(Object.class, "avsc");
191
+ String avscString = objectMapper.writeValueAsString(avsc);
192
+ configSource.set("avsc", null);
193
+ ParsedSchema parsedSchema = new AvroSchema(avscString);
194
+ MockSchemaRegistry.getClientForScope("embulk-output-kafka")
195
+ .register("avro-simple-topic-value", parsedSchema);
196
+ configSource.set("brokers", ImmutableList
197
+ .of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
198
+
199
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
200
+
201
+ SchemaRegistryClient schemaRegistryClient = MockSchemaRegistry
202
+ .getClientForScope("embulk-output-kafka");
203
+ KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(schemaRegistryClient);
204
+
205
+ List<ConsumerRecord<byte[], byte[]>> consumerRecords = kafkaTestUtils
206
+ .consumeAllRecordsFromTopic("avro-simple-topic");
207
+
208
+ assertEquals(3, consumerRecords.size());
209
+ List<GenericRecord> genericRecords = consumerRecords.stream().map(r -> (GenericRecord) kafkaAvroDeserializer
210
+ .deserialize("avro-simple-topic", r.value())).collect(Collectors.toList());
211
+
212
+ List<String> ids = genericRecords.stream()
213
+ .map(r -> String.valueOf(r.get("id")))
214
+ .collect(Collectors.toList());
215
+ List<Long> intItems = genericRecords.stream()
216
+ .map(r -> (Long) r.get("int_item"))
217
+ .collect(Collectors.toList());
218
+ List<String> varcharItems = genericRecords.stream()
219
+ .map(r -> String.valueOf(r.get("varchar_item")))
220
+ .collect(Collectors.toList());
221
+
222
+ assertThat(ids, hasItem("A001"));
223
+ assertThat(ids, hasItem("A002"));
224
+ assertThat(ids, hasItem("A003"));
225
+ assertThat(intItems, hasItem(1L));
226
+ assertThat(intItems, hasItem(2L));
227
+ assertThat(intItems, hasItem(3L));
228
+ assertThat(varcharItems, hasItem("a"));
229
+ assertThat(varcharItems, hasItem("b"));
230
+ assertThat(varcharItems, hasItem("c"));
231
+ }
232
+
233
+ @Test
234
+ public void testSimpleAvroAvscFile() throws IOException {
235
+ ConfigSource configSource = embulk.loadYamlResource("config_simple_avro_avsc_file.yml");
236
+ configSource.set("brokers", ImmutableList
237
+ .of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
238
+
239
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
240
+
241
+ SchemaRegistryClient schemaRegistryClient = MockSchemaRegistry
242
+ .getClientForScope("embulk-output-kafka");
243
+ KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(schemaRegistryClient);
244
+
245
+ List<ConsumerRecord<byte[], byte[]>> consumerRecords = kafkaTestUtils
246
+ .consumeAllRecordsFromTopic("avro-simple-topic");
247
+
248
+ assertEquals(3, consumerRecords.size());
249
+ List<GenericRecord> genericRecords = consumerRecords.stream().map(r -> (GenericRecord) kafkaAvroDeserializer
250
+ .deserialize("avro-simple-topic", r.value())).collect(Collectors.toList());
251
+
252
+ List<String> ids = genericRecords.stream()
253
+ .map(r -> String.valueOf(r.get("id")))
254
+ .collect(Collectors.toList());
255
+ List<Long> intItems = genericRecords.stream()
256
+ .map(r -> (Long) r.get("int_item"))
257
+ .collect(Collectors.toList());
258
+ List<String> varcharItems = genericRecords.stream()
259
+ .map(r -> String.valueOf(r.get("varchar_item")))
260
+ .collect(Collectors.toList());
261
+
262
+ assertThat(ids, hasItem("A001"));
263
+ assertThat(ids, hasItem("A002"));
264
+ assertThat(ids, hasItem("A003"));
265
+ assertThat(intItems, hasItem(1L));
266
+ assertThat(intItems, hasItem(2L));
267
+ assertThat(intItems, hasItem(3L));
268
+ assertThat(varcharItems, hasItem("a"));
269
+ assertThat(varcharItems, hasItem("b"));
270
+ assertThat(varcharItems, hasItem("c"));
271
+ }
272
+
273
+ @Test
274
+ public void testSimpleAvroComplex() throws IOException {
275
+ ConfigSource configSource = embulk.loadYamlResource("config_complex_avro.yml");
276
+ configSource.set("brokers", ImmutableList
277
+ .of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
278
+
279
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in_complex.csv").getPath()));
280
+
281
+ SchemaRegistryClient schemaRegistryClient = MockSchemaRegistry
282
+ .getClientForScope("embulk-output-kafka");
283
+ KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(schemaRegistryClient);
284
+
285
+ List<ConsumerRecord<byte[], byte[]>> consumerRecords = kafkaTestUtils
286
+ .consumeAllRecordsFromTopic("avro-complex-topic");
287
+
288
+ assertEquals(3, consumerRecords.size());
289
+ List<GenericRecord> genericRecords = consumerRecords.stream().map(r -> (GenericRecord) kafkaAvroDeserializer
290
+ .deserialize("avro-complex-topic", r.value())).collect(Collectors.toList());
291
+
292
+ List<String> ids = genericRecords.stream()
293
+ .map(r -> String.valueOf(r.get("id")))
294
+ .collect(Collectors.toList());
295
+ List<Long> intItems = genericRecords.stream()
296
+ .map(r -> (Long) r.get("int_item"))
297
+ .collect(Collectors.toList());
298
+ List<Instant> timeItems = genericRecords.stream()
299
+ .map(r -> Instant.ofEpochMilli((long) r.get("time")))
300
+ .collect(Collectors.toList());
301
+
302
+ assertThat(ids, hasItem("A001"));
303
+ assertThat(ids, hasItem("A002"));
304
+ assertThat(ids, hasItem("A003"));
305
+ assertThat(intItems, hasItem(9L));
306
+ assertThat(intItems, hasItem(0L));
307
+ assertThat(timeItems, hasItem(Instant.parse("2018-02-01T12:15:18.000Z")));
308
+ assertThat(timeItems, hasItem(Instant.parse("2018-02-02T12:15:18.000Z")));
309
+ assertThat(timeItems, hasItem(Instant.parse("2018-02-03T12:15:18.000Z")));
310
+ }
311
+
312
+ @Test
313
+ public void testKeyColumnConfig() throws IOException
314
+ {
315
+ ConfigSource configSource = embulk.loadYamlResource("config_with_key_column.yml");
316
+ configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
317
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
318
+ List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
319
+ .consumeAllRecordsFromTopic("json-topic", StringDeserializer.class,
320
+ StringDeserializer.class);
321
+
322
+ assertEquals(3, consumerRecords.size());
323
+ List<String> keys = new ArrayList<>();
324
+ for (ConsumerRecord<String, String> record : consumerRecords) {
325
+ keys.add(record.key());
326
+ }
327
+
328
+ assertThat(keys, hasItem("A001"));
329
+ assertThat(keys, hasItem("A002"));
330
+ assertThat(keys, hasItem("A003"));
331
+ }
332
+
333
+ @Test
334
+ public void testPartitionColumnConfig() throws IOException
335
+ {
336
+ ConfigSource configSource = embulk.loadYamlResource("config_with_partition_column.yml");
337
+ configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
338
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
339
+ List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
340
+ .consumeAllRecordsFromTopic("json-topic", StringDeserializer.class,
341
+ StringDeserializer.class);
342
+
343
+ assertEquals(3, consumerRecords.size());
344
+ List<Integer> partitions = new ArrayList<>();
345
+ for (ConsumerRecord<String, String> record : consumerRecords) {
346
+ partitions.add(record.partition());
347
+ }
348
+
349
+ assertThat(partitions, hasItem(1));
350
+ assertThat(partitions, hasItem(2));
351
+ assertThat(partitions, hasItem(3));
352
+ }
353
+
354
+ @Test
355
+ public void testColumnForDeletion() throws IOException
356
+ {
357
+ ConfigSource configSource = embulk.loadYamlResource("config_with_column_for_deletion.yml");
358
+ configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
359
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in_with_deletion.csv").getPath()));
360
+ List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
361
+ .consumeAllRecordsFromTopic("json-topic", StringDeserializer.class,
362
+ StringDeserializer.class);
363
+
364
+ assertEquals(3, consumerRecords.size());
365
+ HashMap<String, String> recordMap = new HashMap<>();
366
+ consumerRecords.forEach(record -> recordMap.put(record.key(), record.value()));
367
+ assertNotNull(recordMap.get("A001"));
368
+ assertNotNull(recordMap.get("A003"));
369
+ assertNull(recordMap.get("A002"));
370
+ }
371
+
372
+ @Test
373
+ public void testColumnForDeletionAvro() throws IOException
374
+ {
375
+ ConfigSource configSource = embulk.loadYamlResource("config_with_column_for_deletion_avro.yml");
376
+ configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
377
+ embulk.runOutput(configSource, Paths.get(Resources.getResource("in_with_deletion.csv").getPath()));
378
+ List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
379
+ .consumeAllRecordsFromTopic("avro-simple-topic", StringDeserializer.class,
380
+ StringDeserializer.class);
381
+
382
+ assertEquals(3, consumerRecords.size());
383
+ HashMap<String, String> recordMap = new HashMap<>();
384
+ consumerRecords.forEach(record -> recordMap.put(record.key(), record.value()));
385
+ assertNotNull(recordMap.get("A001"));
386
+ assertNotNull(recordMap.get("A003"));
387
+ assertNull(recordMap.get("A002"));
388
+ }
5
389
  }