embulk-output-kafka 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.circleci/config.yml +44 -0
- data/.github/dependabot.yml +11 -0
- data/README.md +5 -1
- data/build.gradle +29 -12
- data/docker-compose.yml +1 -1
- data/src/main/java/org/embulk/output/kafka/AvroFormatColumnVisitor.java +13 -8
- data/src/main/java/org/embulk/output/kafka/AvroFormatTransactionalPageOutput.java +13 -0
- data/src/main/java/org/embulk/output/kafka/JsonFormatColumnVisitor.java +9 -2
- data/src/main/java/org/embulk/output/kafka/JsonFormatTransactionalPageOutput.java +13 -0
- data/src/main/java/org/embulk/output/kafka/KafkaJsonSerializer.java +4 -0
- data/src/main/java/org/embulk/output/kafka/KafkaOutputColumnVisitor.java +25 -1
- data/src/main/java/org/embulk/output/kafka/KafkaOutputPlugin.java +54 -153
- data/src/main/java/org/embulk/output/kafka/KafkaTransactionalPageOutput.java +104 -0
- data/src/main/java/org/embulk/output/kafka/RecordProducerFactory.java +3 -3
- data/src/test/java/org/embulk/output/kafka/TestKafkaOutputPlugin.java +384 -0
- data/src/test/resources/config_complex.yml +9 -28
- data/src/test/resources/config_complex_avro.yml +23 -42
- data/src/test/resources/config_simple.yml +5 -22
- data/src/test/resources/config_simple_avro.yml +14 -32
- data/src/test/resources/config_simple_avro_avsc_file.yml +7 -25
- data/src/test/resources/config_with_column_for_deletion.yml +7 -0
- data/src/test/resources/config_with_column_for_deletion_avro.yml +18 -0
- data/src/test/resources/config_with_key_column.yml +6 -23
- data/src/test/resources/config_with_partition_column.yml +6 -0
- data/src/test/resources/in1.csv +4 -4
- data/src/test/resources/in_complex.csv +4 -4
- data/src/test/resources/in_with_deletion.csv +4 -0
- metadata +30 -24
@@ -0,0 +1,104 @@
|
|
1
|
+
package org.embulk.output.kafka;
|
2
|
+
|
3
|
+
import org.apache.kafka.clients.producer.KafkaProducer;
|
4
|
+
import org.apache.kafka.clients.producer.ProducerRecord;
|
5
|
+
import org.embulk.config.TaskReport;
|
6
|
+
import org.embulk.spi.Page;
|
7
|
+
import org.embulk.spi.PageReader;
|
8
|
+
import org.embulk.spi.TransactionalPageOutput;
|
9
|
+
import org.slf4j.Logger;
|
10
|
+
import org.slf4j.LoggerFactory;
|
11
|
+
|
12
|
+
import java.util.PrimitiveIterator;
|
13
|
+
import java.util.Random;
|
14
|
+
import java.util.concurrent.atomic.AtomicLong;
|
15
|
+
|
16
|
+
public abstract class KafkaTransactionalPageOutput<P, T extends P> implements TransactionalPageOutput
|
17
|
+
{
|
18
|
+
private static final Logger logger = LoggerFactory.getLogger(KafkaTransactionalPageOutput.class);
|
19
|
+
|
20
|
+
private final KafkaProducer<Object, P> producer;
|
21
|
+
private final PageReader pageReader;
|
22
|
+
private final KafkaOutputColumnVisitor<T> columnVisitor;
|
23
|
+
private final String topic;
|
24
|
+
private final int taskIndex;
|
25
|
+
|
26
|
+
private final PrimitiveIterator.OfLong randomLong = new Random().longs(1, Long.MAX_VALUE).iterator();
|
27
|
+
private final AtomicLong counter = new AtomicLong(0);
|
28
|
+
private final AtomicLong recordLoggingCount = new AtomicLong(1);
|
29
|
+
|
30
|
+
public KafkaTransactionalPageOutput(
|
31
|
+
KafkaProducer<Object, P> producer,
|
32
|
+
PageReader pageReader,
|
33
|
+
KafkaOutputColumnVisitor<T> columnVisitor,
|
34
|
+
String topic, int taskIndex)
|
35
|
+
{
|
36
|
+
this.producer = producer;
|
37
|
+
this.pageReader = pageReader;
|
38
|
+
this.columnVisitor = columnVisitor;
|
39
|
+
this.topic = topic;
|
40
|
+
this.taskIndex = taskIndex;
|
41
|
+
}
|
42
|
+
|
43
|
+
@Override
|
44
|
+
public void add(Page page)
|
45
|
+
{
|
46
|
+
pageReader.setPage(page);
|
47
|
+
while (pageReader.nextRecord()) {
|
48
|
+
columnVisitor.reset();
|
49
|
+
|
50
|
+
pageReader.getSchema().visitColumns(columnVisitor);
|
51
|
+
|
52
|
+
Object recordKey = columnVisitor.getRecordKey();
|
53
|
+
if (recordKey == null) {
|
54
|
+
recordKey = randomLong.next();
|
55
|
+
}
|
56
|
+
|
57
|
+
String targetTopic = columnVisitor.getTopicName() != null ? columnVisitor.getTopicName() : topic;
|
58
|
+
|
59
|
+
ProducerRecord<Object, P> producerRecord = new ProducerRecord<>(targetTopic, columnVisitor.getPartition(), recordKey, columnVisitor.getRecord());
|
60
|
+
producer.send(producerRecord, (metadata, exception) -> {
|
61
|
+
if (exception != null) {
|
62
|
+
logger.error("produce error", exception);
|
63
|
+
}
|
64
|
+
|
65
|
+
logger.debug("sent record: {topic: {}, key: {}, value: {}, partition: {}}",
|
66
|
+
producerRecord.topic(),
|
67
|
+
producerRecord.key(),
|
68
|
+
producerRecord.value(),
|
69
|
+
producerRecord.partition());
|
70
|
+
|
71
|
+
long current = counter.incrementAndGet();
|
72
|
+
if (current >= recordLoggingCount.get()) {
|
73
|
+
logger.info("[task-{}] Producer sent {} records", String.format("%04d", taskIndex), current);
|
74
|
+
recordLoggingCount.set(recordLoggingCount.get() * 2);
|
75
|
+
}
|
76
|
+
});
|
77
|
+
}
|
78
|
+
}
|
79
|
+
|
80
|
+
@Override
|
81
|
+
public void finish()
|
82
|
+
{
|
83
|
+
producer.flush();
|
84
|
+
}
|
85
|
+
|
86
|
+
@Override
|
87
|
+
public void close()
|
88
|
+
{
|
89
|
+
producer.close();
|
90
|
+
}
|
91
|
+
|
92
|
+
@Override
|
93
|
+
public void abort()
|
94
|
+
{
|
95
|
+
producer.flush();
|
96
|
+
producer.close();
|
97
|
+
}
|
98
|
+
|
99
|
+
@Override
|
100
|
+
public TaskReport commit()
|
101
|
+
{
|
102
|
+
return null;
|
103
|
+
}
|
104
|
+
};
|
@@ -2,7 +2,7 @@ package org.embulk.output.kafka;
|
|
2
2
|
|
3
3
|
import com.fasterxml.jackson.databind.node.ObjectNode;
|
4
4
|
import com.google.common.collect.ImmutableMap;
|
5
|
-
import io.confluent.kafka.serializers.
|
5
|
+
import io.confluent.kafka.serializers.AbstractKafkaSchemaSerDeConfig;
|
6
6
|
import io.confluent.kafka.serializers.KafkaAvroSerializer;
|
7
7
|
import org.apache.kafka.clients.producer.KafkaProducer;
|
8
8
|
import org.apache.kafka.clients.producer.ProducerConfig;
|
@@ -96,10 +96,10 @@ class RecordProducerFactory
|
|
96
96
|
String schemaRegistryUrl = task.getSchemaRegistryUrl().orElseThrow(() -> new ConfigException("avro_with_schema_registry format needs schema_registry_url"));
|
97
97
|
|
98
98
|
ImmutableMap.Builder<String, String> builder = ImmutableMap.<String, String>builder()
|
99
|
-
.put(
|
99
|
+
.put(AbstractKafkaSchemaSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
|
100
100
|
|
101
101
|
if (task.getValueSubjectNameStrategy().isPresent()) {
|
102
|
-
builder.put(
|
102
|
+
builder.put(AbstractKafkaSchemaSerDeConfig.VALUE_SUBJECT_NAME_STRATEGY, task.getValueSubjectNameStrategy().get());
|
103
103
|
}
|
104
104
|
|
105
105
|
Map<String, String> avroSerializerConfigs = builder.build();
|
@@ -1,5 +1,389 @@
|
|
1
1
|
package org.embulk.output.kafka;
|
2
2
|
|
3
|
+
import static org.hamcrest.MatcherAssert.assertThat;
|
4
|
+
import static org.hamcrest.Matchers.hasItem;
|
5
|
+
import static org.junit.Assert.assertEquals;
|
6
|
+
import static org.junit.Assert.assertNotNull;
|
7
|
+
import static org.junit.Assert.assertNull;
|
8
|
+
|
9
|
+
import com.fasterxml.jackson.databind.JsonNode;
|
10
|
+
import com.fasterxml.jackson.databind.ObjectMapper;
|
11
|
+
import com.google.common.collect.ImmutableList;
|
12
|
+
import com.google.common.io.Resources;
|
13
|
+
import com.salesforce.kafka.test.KafkaTestUtils;
|
14
|
+
import com.salesforce.kafka.test.junit4.SharedKafkaTestResource;
|
15
|
+
import io.confluent.kafka.schemaregistry.ParsedSchema;
|
16
|
+
import io.confluent.kafka.schemaregistry.avro.AvroSchema;
|
17
|
+
import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient;
|
18
|
+
import io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException;
|
19
|
+
import io.confluent.kafka.schemaregistry.testutil.MockSchemaRegistry;
|
20
|
+
import io.confluent.kafka.serializers.KafkaAvroDeserializer;
|
21
|
+
import java.io.IOException;
|
22
|
+
import java.nio.file.Paths;
|
23
|
+
import java.time.Instant;
|
24
|
+
import java.util.ArrayList;
|
25
|
+
import java.util.HashMap;
|
26
|
+
import java.util.List;
|
27
|
+
import java.util.stream.Collectors;
|
28
|
+
import org.apache.avro.generic.GenericRecord;
|
29
|
+
import org.apache.kafka.clients.consumer.ConsumerRecord;
|
30
|
+
import org.apache.kafka.common.serialization.StringDeserializer;
|
31
|
+
import org.embulk.config.ConfigSource;
|
32
|
+
import org.embulk.spi.OutputPlugin;
|
33
|
+
import org.embulk.test.TestingEmbulk;
|
34
|
+
import org.junit.After;
|
35
|
+
import org.junit.Before;
|
36
|
+
import org.junit.ClassRule;
|
37
|
+
import org.junit.Rule;
|
38
|
+
import org.junit.Test;
|
39
|
+
|
3
40
|
public class TestKafkaOutputPlugin
|
4
41
|
{
|
42
|
+
@ClassRule
|
43
|
+
public static final SharedKafkaTestResource sharedKafkaTestResource = new SharedKafkaTestResource()
|
44
|
+
.withBrokers(3);
|
45
|
+
|
46
|
+
@Rule
|
47
|
+
public TestingEmbulk embulk = TestingEmbulk.builder()
|
48
|
+
.registerPlugin(OutputPlugin.class, "kafka", KafkaOutputPlugin.class)
|
49
|
+
.build();
|
50
|
+
|
51
|
+
private KafkaTestUtils kafkaTestUtils;
|
52
|
+
private final static ObjectMapper objectMapper = new ObjectMapper();
|
53
|
+
|
54
|
+
@Before
|
55
|
+
public void setUp() {
|
56
|
+
kafkaTestUtils = sharedKafkaTestResource.getKafkaTestUtils();
|
57
|
+
kafkaTestUtils.createTopic("json-topic", 8, (short) 1);
|
58
|
+
kafkaTestUtils.createTopic("json-complex-topic", 8, (short) 1);
|
59
|
+
kafkaTestUtils.createTopic("avro-simple-topic", 8, (short) 1);
|
60
|
+
kafkaTestUtils.createTopic("avro-complex-topic", 8, (short) 1);
|
61
|
+
}
|
62
|
+
|
63
|
+
@After
|
64
|
+
public void tearDown() {
|
65
|
+
kafkaTestUtils.getAdminClient().deleteTopics(ImmutableList.of(
|
66
|
+
"json-topic", "json-complex-topic", "avro-simple-topic", "avro-complex-topic"
|
67
|
+
));
|
68
|
+
}
|
69
|
+
|
70
|
+
@Test
|
71
|
+
public void testSimpleJson() throws IOException
|
72
|
+
{
|
73
|
+
ConfigSource configSource = embulk.loadYamlResource("config_simple.yml");
|
74
|
+
configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
|
75
|
+
embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
|
76
|
+
List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
|
77
|
+
.consumeAllRecordsFromTopic("json-topic", StringDeserializer.class,
|
78
|
+
StringDeserializer.class);
|
79
|
+
|
80
|
+
assertEquals(3, consumerRecords.size());
|
81
|
+
List<JsonNode> deserializedRecords = new ArrayList<>();
|
82
|
+
for (ConsumerRecord<String, String> record : consumerRecords) {
|
83
|
+
deserializedRecords.add(objectMapper.readTree(record.value()));
|
84
|
+
}
|
85
|
+
List<String> ids = deserializedRecords.stream()
|
86
|
+
.map(r -> r.get("id").asText())
|
87
|
+
.collect(Collectors.toList());
|
88
|
+
List<Integer> intItems = deserializedRecords.stream()
|
89
|
+
.map(r -> r.get("int_item").asInt())
|
90
|
+
.collect(Collectors.toList());
|
91
|
+
List<String> varcharItems = deserializedRecords.stream()
|
92
|
+
.map(r -> r.get("varchar_item").asText())
|
93
|
+
.collect(Collectors.toList());
|
94
|
+
|
95
|
+
assertThat(ids, hasItem("A001"));
|
96
|
+
assertThat(ids, hasItem("A002"));
|
97
|
+
assertThat(ids, hasItem("A003"));
|
98
|
+
assertThat(intItems, hasItem(1));
|
99
|
+
assertThat(intItems, hasItem(2));
|
100
|
+
assertThat(intItems, hasItem(3));
|
101
|
+
assertThat(varcharItems, hasItem("a"));
|
102
|
+
assertThat(varcharItems, hasItem("b"));
|
103
|
+
assertThat(varcharItems, hasItem("c"));
|
104
|
+
}
|
105
|
+
|
106
|
+
@Test
|
107
|
+
public void testComplexJson() throws IOException
|
108
|
+
{
|
109
|
+
ConfigSource configSource = embulk.loadYamlResource("config_complex.yml");
|
110
|
+
configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
|
111
|
+
|
112
|
+
embulk.runOutput(configSource, Paths.get(Resources.getResource("in_complex.csv").getPath()));
|
113
|
+
List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
|
114
|
+
.consumeAllRecordsFromTopic("json-complex-topic", StringDeserializer.class,
|
115
|
+
StringDeserializer.class);
|
116
|
+
|
117
|
+
assertEquals(3, consumerRecords.size());
|
118
|
+
List<JsonNode> deserializedRecords = new ArrayList<>();
|
119
|
+
for (ConsumerRecord<String, String> record : consumerRecords) {
|
120
|
+
deserializedRecords.add(objectMapper.readTree(record.value()));
|
121
|
+
}
|
122
|
+
List<String> ids = deserializedRecords.stream()
|
123
|
+
.map(r -> r.get("id").asText())
|
124
|
+
.collect(Collectors.toList());
|
125
|
+
List<Integer> intItems = deserializedRecords.stream()
|
126
|
+
.map(r -> r.get("int_item").asInt())
|
127
|
+
.collect(Collectors.toList());
|
128
|
+
List<List<Integer>> arrayItems = deserializedRecords.stream()
|
129
|
+
.map(r -> ImmutableList.of(
|
130
|
+
r.get("array").get(0).asInt(),
|
131
|
+
r.get("array").get(1).asInt(),
|
132
|
+
r.get("array").get(2).asInt()
|
133
|
+
))
|
134
|
+
.collect(Collectors.toList());
|
135
|
+
|
136
|
+
assertThat(ids, hasItem("A001"));
|
137
|
+
assertThat(ids, hasItem("A002"));
|
138
|
+
assertThat(ids, hasItem("A003"));
|
139
|
+
assertThat(intItems, hasItem(9));
|
140
|
+
assertThat(intItems, hasItem(0));
|
141
|
+
assertThat(arrayItems.get(0), hasItem(1));
|
142
|
+
assertThat(arrayItems.get(0), hasItem(2));
|
143
|
+
assertThat(arrayItems.get(0), hasItem(3));
|
144
|
+
}
|
145
|
+
|
146
|
+
@Test
|
147
|
+
public void testSimpleAvro() throws IOException {
|
148
|
+
ConfigSource configSource = embulk.loadYamlResource("config_simple_avro.yml");
|
149
|
+
configSource.set("brokers", ImmutableList
|
150
|
+
.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
|
151
|
+
|
152
|
+
embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
|
153
|
+
|
154
|
+
SchemaRegistryClient schemaRegistryClient = MockSchemaRegistry
|
155
|
+
.getClientForScope("embulk-output-kafka");
|
156
|
+
KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(schemaRegistryClient);
|
157
|
+
|
158
|
+
List<ConsumerRecord<byte[], byte[]>> consumerRecords = kafkaTestUtils
|
159
|
+
.consumeAllRecordsFromTopic("avro-simple-topic");
|
160
|
+
|
161
|
+
assertEquals(3, consumerRecords.size());
|
162
|
+
List<GenericRecord> genericRecords = consumerRecords.stream().map(r -> (GenericRecord) kafkaAvroDeserializer
|
163
|
+
.deserialize("avro-simple-topic", r.value())).collect(Collectors.toList());
|
164
|
+
|
165
|
+
List<String> ids = genericRecords.stream()
|
166
|
+
.map(r -> String.valueOf(r.get("id")))
|
167
|
+
.collect(Collectors.toList());
|
168
|
+
List<Long> intItems = genericRecords.stream()
|
169
|
+
.map(r -> (Long) r.get("int_item"))
|
170
|
+
.collect(Collectors.toList());
|
171
|
+
List<String> varcharItems = genericRecords.stream()
|
172
|
+
.map(r -> String.valueOf(r.get("varchar_item")))
|
173
|
+
.collect(Collectors.toList());
|
174
|
+
|
175
|
+
assertThat(ids, hasItem("A001"));
|
176
|
+
assertThat(ids, hasItem("A002"));
|
177
|
+
assertThat(ids, hasItem("A003"));
|
178
|
+
assertThat(intItems, hasItem(1L));
|
179
|
+
assertThat(intItems, hasItem(2L));
|
180
|
+
assertThat(intItems, hasItem(3L));
|
181
|
+
assertThat(varcharItems, hasItem("a"));
|
182
|
+
assertThat(varcharItems, hasItem("b"));
|
183
|
+
assertThat(varcharItems, hasItem("c"));
|
184
|
+
}
|
185
|
+
|
186
|
+
@Test
|
187
|
+
public void testSimpleAvroSchemaFromRegistry() throws IOException, RestClientException
|
188
|
+
{
|
189
|
+
ConfigSource configSource = embulk.loadYamlResource("config_simple_avro.yml");
|
190
|
+
Object avsc = configSource.get(Object.class, "avsc");
|
191
|
+
String avscString = objectMapper.writeValueAsString(avsc);
|
192
|
+
configSource.set("avsc", null);
|
193
|
+
ParsedSchema parsedSchema = new AvroSchema(avscString);
|
194
|
+
MockSchemaRegistry.getClientForScope("embulk-output-kafka")
|
195
|
+
.register("avro-simple-topic-value", parsedSchema);
|
196
|
+
configSource.set("brokers", ImmutableList
|
197
|
+
.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
|
198
|
+
|
199
|
+
embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
|
200
|
+
|
201
|
+
SchemaRegistryClient schemaRegistryClient = MockSchemaRegistry
|
202
|
+
.getClientForScope("embulk-output-kafka");
|
203
|
+
KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(schemaRegistryClient);
|
204
|
+
|
205
|
+
List<ConsumerRecord<byte[], byte[]>> consumerRecords = kafkaTestUtils
|
206
|
+
.consumeAllRecordsFromTopic("avro-simple-topic");
|
207
|
+
|
208
|
+
assertEquals(3, consumerRecords.size());
|
209
|
+
List<GenericRecord> genericRecords = consumerRecords.stream().map(r -> (GenericRecord) kafkaAvroDeserializer
|
210
|
+
.deserialize("avro-simple-topic", r.value())).collect(Collectors.toList());
|
211
|
+
|
212
|
+
List<String> ids = genericRecords.stream()
|
213
|
+
.map(r -> String.valueOf(r.get("id")))
|
214
|
+
.collect(Collectors.toList());
|
215
|
+
List<Long> intItems = genericRecords.stream()
|
216
|
+
.map(r -> (Long) r.get("int_item"))
|
217
|
+
.collect(Collectors.toList());
|
218
|
+
List<String> varcharItems = genericRecords.stream()
|
219
|
+
.map(r -> String.valueOf(r.get("varchar_item")))
|
220
|
+
.collect(Collectors.toList());
|
221
|
+
|
222
|
+
assertThat(ids, hasItem("A001"));
|
223
|
+
assertThat(ids, hasItem("A002"));
|
224
|
+
assertThat(ids, hasItem("A003"));
|
225
|
+
assertThat(intItems, hasItem(1L));
|
226
|
+
assertThat(intItems, hasItem(2L));
|
227
|
+
assertThat(intItems, hasItem(3L));
|
228
|
+
assertThat(varcharItems, hasItem("a"));
|
229
|
+
assertThat(varcharItems, hasItem("b"));
|
230
|
+
assertThat(varcharItems, hasItem("c"));
|
231
|
+
}
|
232
|
+
|
233
|
+
@Test
|
234
|
+
public void testSimpleAvroAvscFile() throws IOException {
|
235
|
+
ConfigSource configSource = embulk.loadYamlResource("config_simple_avro_avsc_file.yml");
|
236
|
+
configSource.set("brokers", ImmutableList
|
237
|
+
.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
|
238
|
+
|
239
|
+
embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
|
240
|
+
|
241
|
+
SchemaRegistryClient schemaRegistryClient = MockSchemaRegistry
|
242
|
+
.getClientForScope("embulk-output-kafka");
|
243
|
+
KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(schemaRegistryClient);
|
244
|
+
|
245
|
+
List<ConsumerRecord<byte[], byte[]>> consumerRecords = kafkaTestUtils
|
246
|
+
.consumeAllRecordsFromTopic("avro-simple-topic");
|
247
|
+
|
248
|
+
assertEquals(3, consumerRecords.size());
|
249
|
+
List<GenericRecord> genericRecords = consumerRecords.stream().map(r -> (GenericRecord) kafkaAvroDeserializer
|
250
|
+
.deserialize("avro-simple-topic", r.value())).collect(Collectors.toList());
|
251
|
+
|
252
|
+
List<String> ids = genericRecords.stream()
|
253
|
+
.map(r -> String.valueOf(r.get("id")))
|
254
|
+
.collect(Collectors.toList());
|
255
|
+
List<Long> intItems = genericRecords.stream()
|
256
|
+
.map(r -> (Long) r.get("int_item"))
|
257
|
+
.collect(Collectors.toList());
|
258
|
+
List<String> varcharItems = genericRecords.stream()
|
259
|
+
.map(r -> String.valueOf(r.get("varchar_item")))
|
260
|
+
.collect(Collectors.toList());
|
261
|
+
|
262
|
+
assertThat(ids, hasItem("A001"));
|
263
|
+
assertThat(ids, hasItem("A002"));
|
264
|
+
assertThat(ids, hasItem("A003"));
|
265
|
+
assertThat(intItems, hasItem(1L));
|
266
|
+
assertThat(intItems, hasItem(2L));
|
267
|
+
assertThat(intItems, hasItem(3L));
|
268
|
+
assertThat(varcharItems, hasItem("a"));
|
269
|
+
assertThat(varcharItems, hasItem("b"));
|
270
|
+
assertThat(varcharItems, hasItem("c"));
|
271
|
+
}
|
272
|
+
|
273
|
+
@Test
|
274
|
+
public void testSimpleAvroComplex() throws IOException {
|
275
|
+
ConfigSource configSource = embulk.loadYamlResource("config_complex_avro.yml");
|
276
|
+
configSource.set("brokers", ImmutableList
|
277
|
+
.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
|
278
|
+
|
279
|
+
embulk.runOutput(configSource, Paths.get(Resources.getResource("in_complex.csv").getPath()));
|
280
|
+
|
281
|
+
SchemaRegistryClient schemaRegistryClient = MockSchemaRegistry
|
282
|
+
.getClientForScope("embulk-output-kafka");
|
283
|
+
KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(schemaRegistryClient);
|
284
|
+
|
285
|
+
List<ConsumerRecord<byte[], byte[]>> consumerRecords = kafkaTestUtils
|
286
|
+
.consumeAllRecordsFromTopic("avro-complex-topic");
|
287
|
+
|
288
|
+
assertEquals(3, consumerRecords.size());
|
289
|
+
List<GenericRecord> genericRecords = consumerRecords.stream().map(r -> (GenericRecord) kafkaAvroDeserializer
|
290
|
+
.deserialize("avro-complex-topic", r.value())).collect(Collectors.toList());
|
291
|
+
|
292
|
+
List<String> ids = genericRecords.stream()
|
293
|
+
.map(r -> String.valueOf(r.get("id")))
|
294
|
+
.collect(Collectors.toList());
|
295
|
+
List<Long> intItems = genericRecords.stream()
|
296
|
+
.map(r -> (Long) r.get("int_item"))
|
297
|
+
.collect(Collectors.toList());
|
298
|
+
List<Instant> timeItems = genericRecords.stream()
|
299
|
+
.map(r -> Instant.ofEpochMilli((long) r.get("time")))
|
300
|
+
.collect(Collectors.toList());
|
301
|
+
|
302
|
+
assertThat(ids, hasItem("A001"));
|
303
|
+
assertThat(ids, hasItem("A002"));
|
304
|
+
assertThat(ids, hasItem("A003"));
|
305
|
+
assertThat(intItems, hasItem(9L));
|
306
|
+
assertThat(intItems, hasItem(0L));
|
307
|
+
assertThat(timeItems, hasItem(Instant.parse("2018-02-01T12:15:18.000Z")));
|
308
|
+
assertThat(timeItems, hasItem(Instant.parse("2018-02-02T12:15:18.000Z")));
|
309
|
+
assertThat(timeItems, hasItem(Instant.parse("2018-02-03T12:15:18.000Z")));
|
310
|
+
}
|
311
|
+
|
312
|
+
@Test
|
313
|
+
public void testKeyColumnConfig() throws IOException
|
314
|
+
{
|
315
|
+
ConfigSource configSource = embulk.loadYamlResource("config_with_key_column.yml");
|
316
|
+
configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
|
317
|
+
embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
|
318
|
+
List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
|
319
|
+
.consumeAllRecordsFromTopic("json-topic", StringDeserializer.class,
|
320
|
+
StringDeserializer.class);
|
321
|
+
|
322
|
+
assertEquals(3, consumerRecords.size());
|
323
|
+
List<String> keys = new ArrayList<>();
|
324
|
+
for (ConsumerRecord<String, String> record : consumerRecords) {
|
325
|
+
keys.add(record.key());
|
326
|
+
}
|
327
|
+
|
328
|
+
assertThat(keys, hasItem("A001"));
|
329
|
+
assertThat(keys, hasItem("A002"));
|
330
|
+
assertThat(keys, hasItem("A003"));
|
331
|
+
}
|
332
|
+
|
333
|
+
@Test
|
334
|
+
public void testPartitionColumnConfig() throws IOException
|
335
|
+
{
|
336
|
+
ConfigSource configSource = embulk.loadYamlResource("config_with_partition_column.yml");
|
337
|
+
configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
|
338
|
+
embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
|
339
|
+
List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
|
340
|
+
.consumeAllRecordsFromTopic("json-topic", StringDeserializer.class,
|
341
|
+
StringDeserializer.class);
|
342
|
+
|
343
|
+
assertEquals(3, consumerRecords.size());
|
344
|
+
List<Integer> partitions = new ArrayList<>();
|
345
|
+
for (ConsumerRecord<String, String> record : consumerRecords) {
|
346
|
+
partitions.add(record.partition());
|
347
|
+
}
|
348
|
+
|
349
|
+
assertThat(partitions, hasItem(1));
|
350
|
+
assertThat(partitions, hasItem(2));
|
351
|
+
assertThat(partitions, hasItem(3));
|
352
|
+
}
|
353
|
+
|
354
|
+
@Test
|
355
|
+
public void testColumnForDeletion() throws IOException
|
356
|
+
{
|
357
|
+
ConfigSource configSource = embulk.loadYamlResource("config_with_column_for_deletion.yml");
|
358
|
+
configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
|
359
|
+
embulk.runOutput(configSource, Paths.get(Resources.getResource("in_with_deletion.csv").getPath()));
|
360
|
+
List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
|
361
|
+
.consumeAllRecordsFromTopic("json-topic", StringDeserializer.class,
|
362
|
+
StringDeserializer.class);
|
363
|
+
|
364
|
+
assertEquals(3, consumerRecords.size());
|
365
|
+
HashMap<String, String> recordMap = new HashMap<>();
|
366
|
+
consumerRecords.forEach(record -> recordMap.put(record.key(), record.value()));
|
367
|
+
assertNotNull(recordMap.get("A001"));
|
368
|
+
assertNotNull(recordMap.get("A003"));
|
369
|
+
assertNull(recordMap.get("A002"));
|
370
|
+
}
|
371
|
+
|
372
|
+
@Test
|
373
|
+
public void testColumnForDeletionAvro() throws IOException
|
374
|
+
{
|
375
|
+
ConfigSource configSource = embulk.loadYamlResource("config_with_column_for_deletion_avro.yml");
|
376
|
+
configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
|
377
|
+
embulk.runOutput(configSource, Paths.get(Resources.getResource("in_with_deletion.csv").getPath()));
|
378
|
+
List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
|
379
|
+
.consumeAllRecordsFromTopic("avro-simple-topic", StringDeserializer.class,
|
380
|
+
StringDeserializer.class);
|
381
|
+
|
382
|
+
assertEquals(3, consumerRecords.size());
|
383
|
+
HashMap<String, String> recordMap = new HashMap<>();
|
384
|
+
consumerRecords.forEach(record -> recordMap.put(record.key(), record.value()));
|
385
|
+
assertNotNull(recordMap.get("A001"));
|
386
|
+
assertNotNull(recordMap.get("A003"));
|
387
|
+
assertNull(recordMap.get("A002"));
|
388
|
+
}
|
5
389
|
}
|