embulk-output-kafka 0.1.7 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.circleci/config.yml +44 -0
- data/.github/dependabot.yml +11 -0
- data/README.md +5 -1
- data/build.gradle +29 -12
- data/docker-compose.yml +1 -1
- data/src/main/java/org/embulk/output/kafka/AvroFormatColumnVisitor.java +13 -8
- data/src/main/java/org/embulk/output/kafka/AvroFormatTransactionalPageOutput.java +13 -0
- data/src/main/java/org/embulk/output/kafka/JsonFormatColumnVisitor.java +9 -2
- data/src/main/java/org/embulk/output/kafka/JsonFormatTransactionalPageOutput.java +13 -0
- data/src/main/java/org/embulk/output/kafka/KafkaJsonSerializer.java +4 -0
- data/src/main/java/org/embulk/output/kafka/KafkaOutputColumnVisitor.java +25 -1
- data/src/main/java/org/embulk/output/kafka/KafkaOutputPlugin.java +54 -153
- data/src/main/java/org/embulk/output/kafka/KafkaTransactionalPageOutput.java +104 -0
- data/src/main/java/org/embulk/output/kafka/RecordProducerFactory.java +3 -3
- data/src/test/java/org/embulk/output/kafka/TestKafkaOutputPlugin.java +384 -0
- data/src/test/resources/config_complex.yml +9 -28
- data/src/test/resources/config_complex_avro.yml +23 -42
- data/src/test/resources/config_simple.yml +5 -22
- data/src/test/resources/config_simple_avro.yml +14 -32
- data/src/test/resources/config_simple_avro_avsc_file.yml +7 -25
- data/src/test/resources/config_with_column_for_deletion.yml +7 -0
- data/src/test/resources/config_with_column_for_deletion_avro.yml +18 -0
- data/src/test/resources/config_with_key_column.yml +6 -23
- data/src/test/resources/config_with_partition_column.yml +6 -0
- data/src/test/resources/in1.csv +4 -4
- data/src/test/resources/in_complex.csv +4 -4
- data/src/test/resources/in_with_deletion.csv +4 -0
- metadata +30 -24
@@ -0,0 +1,104 @@
|
|
1
|
+
package org.embulk.output.kafka;
|
2
|
+
|
3
|
+
import org.apache.kafka.clients.producer.KafkaProducer;
|
4
|
+
import org.apache.kafka.clients.producer.ProducerRecord;
|
5
|
+
import org.embulk.config.TaskReport;
|
6
|
+
import org.embulk.spi.Page;
|
7
|
+
import org.embulk.spi.PageReader;
|
8
|
+
import org.embulk.spi.TransactionalPageOutput;
|
9
|
+
import org.slf4j.Logger;
|
10
|
+
import org.slf4j.LoggerFactory;
|
11
|
+
|
12
|
+
import java.util.PrimitiveIterator;
|
13
|
+
import java.util.Random;
|
14
|
+
import java.util.concurrent.atomic.AtomicLong;
|
15
|
+
|
16
|
+
public abstract class KafkaTransactionalPageOutput<P, T extends P> implements TransactionalPageOutput
|
17
|
+
{
|
18
|
+
private static final Logger logger = LoggerFactory.getLogger(KafkaTransactionalPageOutput.class);
|
19
|
+
|
20
|
+
private final KafkaProducer<Object, P> producer;
|
21
|
+
private final PageReader pageReader;
|
22
|
+
private final KafkaOutputColumnVisitor<T> columnVisitor;
|
23
|
+
private final String topic;
|
24
|
+
private final int taskIndex;
|
25
|
+
|
26
|
+
private final PrimitiveIterator.OfLong randomLong = new Random().longs(1, Long.MAX_VALUE).iterator();
|
27
|
+
private final AtomicLong counter = new AtomicLong(0);
|
28
|
+
private final AtomicLong recordLoggingCount = new AtomicLong(1);
|
29
|
+
|
30
|
+
public KafkaTransactionalPageOutput(
|
31
|
+
KafkaProducer<Object, P> producer,
|
32
|
+
PageReader pageReader,
|
33
|
+
KafkaOutputColumnVisitor<T> columnVisitor,
|
34
|
+
String topic, int taskIndex)
|
35
|
+
{
|
36
|
+
this.producer = producer;
|
37
|
+
this.pageReader = pageReader;
|
38
|
+
this.columnVisitor = columnVisitor;
|
39
|
+
this.topic = topic;
|
40
|
+
this.taskIndex = taskIndex;
|
41
|
+
}
|
42
|
+
|
43
|
+
@Override
|
44
|
+
public void add(Page page)
|
45
|
+
{
|
46
|
+
pageReader.setPage(page);
|
47
|
+
while (pageReader.nextRecord()) {
|
48
|
+
columnVisitor.reset();
|
49
|
+
|
50
|
+
pageReader.getSchema().visitColumns(columnVisitor);
|
51
|
+
|
52
|
+
Object recordKey = columnVisitor.getRecordKey();
|
53
|
+
if (recordKey == null) {
|
54
|
+
recordKey = randomLong.next();
|
55
|
+
}
|
56
|
+
|
57
|
+
String targetTopic = columnVisitor.getTopicName() != null ? columnVisitor.getTopicName() : topic;
|
58
|
+
|
59
|
+
ProducerRecord<Object, P> producerRecord = new ProducerRecord<>(targetTopic, columnVisitor.getPartition(), recordKey, columnVisitor.getRecord());
|
60
|
+
producer.send(producerRecord, (metadata, exception) -> {
|
61
|
+
if (exception != null) {
|
62
|
+
logger.error("produce error", exception);
|
63
|
+
}
|
64
|
+
|
65
|
+
logger.debug("sent record: {topic: {}, key: {}, value: {}, partition: {}}",
|
66
|
+
producerRecord.topic(),
|
67
|
+
producerRecord.key(),
|
68
|
+
producerRecord.value(),
|
69
|
+
producerRecord.partition());
|
70
|
+
|
71
|
+
long current = counter.incrementAndGet();
|
72
|
+
if (current >= recordLoggingCount.get()) {
|
73
|
+
logger.info("[task-{}] Producer sent {} records", String.format("%04d", taskIndex), current);
|
74
|
+
recordLoggingCount.set(recordLoggingCount.get() * 2);
|
75
|
+
}
|
76
|
+
});
|
77
|
+
}
|
78
|
+
}
|
79
|
+
|
80
|
+
@Override
|
81
|
+
public void finish()
|
82
|
+
{
|
83
|
+
producer.flush();
|
84
|
+
}
|
85
|
+
|
86
|
+
@Override
|
87
|
+
public void close()
|
88
|
+
{
|
89
|
+
producer.close();
|
90
|
+
}
|
91
|
+
|
92
|
+
@Override
|
93
|
+
public void abort()
|
94
|
+
{
|
95
|
+
producer.flush();
|
96
|
+
producer.close();
|
97
|
+
}
|
98
|
+
|
99
|
+
@Override
|
100
|
+
public TaskReport commit()
|
101
|
+
{
|
102
|
+
return null;
|
103
|
+
}
|
104
|
+
};
|
@@ -2,7 +2,7 @@ package org.embulk.output.kafka;
|
|
2
2
|
|
3
3
|
import com.fasterxml.jackson.databind.node.ObjectNode;
|
4
4
|
import com.google.common.collect.ImmutableMap;
|
5
|
-
import io.confluent.kafka.serializers.
|
5
|
+
import io.confluent.kafka.serializers.AbstractKafkaSchemaSerDeConfig;
|
6
6
|
import io.confluent.kafka.serializers.KafkaAvroSerializer;
|
7
7
|
import org.apache.kafka.clients.producer.KafkaProducer;
|
8
8
|
import org.apache.kafka.clients.producer.ProducerConfig;
|
@@ -96,10 +96,10 @@ class RecordProducerFactory
|
|
96
96
|
String schemaRegistryUrl = task.getSchemaRegistryUrl().orElseThrow(() -> new ConfigException("avro_with_schema_registry format needs schema_registry_url"));
|
97
97
|
|
98
98
|
ImmutableMap.Builder<String, String> builder = ImmutableMap.<String, String>builder()
|
99
|
-
.put(
|
99
|
+
.put(AbstractKafkaSchemaSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
|
100
100
|
|
101
101
|
if (task.getValueSubjectNameStrategy().isPresent()) {
|
102
|
-
builder.put(
|
102
|
+
builder.put(AbstractKafkaSchemaSerDeConfig.VALUE_SUBJECT_NAME_STRATEGY, task.getValueSubjectNameStrategy().get());
|
103
103
|
}
|
104
104
|
|
105
105
|
Map<String, String> avroSerializerConfigs = builder.build();
|
@@ -1,5 +1,389 @@
|
|
1
1
|
package org.embulk.output.kafka;
|
2
2
|
|
3
|
+
import static org.hamcrest.MatcherAssert.assertThat;
|
4
|
+
import static org.hamcrest.Matchers.hasItem;
|
5
|
+
import static org.junit.Assert.assertEquals;
|
6
|
+
import static org.junit.Assert.assertNotNull;
|
7
|
+
import static org.junit.Assert.assertNull;
|
8
|
+
|
9
|
+
import com.fasterxml.jackson.databind.JsonNode;
|
10
|
+
import com.fasterxml.jackson.databind.ObjectMapper;
|
11
|
+
import com.google.common.collect.ImmutableList;
|
12
|
+
import com.google.common.io.Resources;
|
13
|
+
import com.salesforce.kafka.test.KafkaTestUtils;
|
14
|
+
import com.salesforce.kafka.test.junit4.SharedKafkaTestResource;
|
15
|
+
import io.confluent.kafka.schemaregistry.ParsedSchema;
|
16
|
+
import io.confluent.kafka.schemaregistry.avro.AvroSchema;
|
17
|
+
import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient;
|
18
|
+
import io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException;
|
19
|
+
import io.confluent.kafka.schemaregistry.testutil.MockSchemaRegistry;
|
20
|
+
import io.confluent.kafka.serializers.KafkaAvroDeserializer;
|
21
|
+
import java.io.IOException;
|
22
|
+
import java.nio.file.Paths;
|
23
|
+
import java.time.Instant;
|
24
|
+
import java.util.ArrayList;
|
25
|
+
import java.util.HashMap;
|
26
|
+
import java.util.List;
|
27
|
+
import java.util.stream.Collectors;
|
28
|
+
import org.apache.avro.generic.GenericRecord;
|
29
|
+
import org.apache.kafka.clients.consumer.ConsumerRecord;
|
30
|
+
import org.apache.kafka.common.serialization.StringDeserializer;
|
31
|
+
import org.embulk.config.ConfigSource;
|
32
|
+
import org.embulk.spi.OutputPlugin;
|
33
|
+
import org.embulk.test.TestingEmbulk;
|
34
|
+
import org.junit.After;
|
35
|
+
import org.junit.Before;
|
36
|
+
import org.junit.ClassRule;
|
37
|
+
import org.junit.Rule;
|
38
|
+
import org.junit.Test;
|
39
|
+
|
3
40
|
public class TestKafkaOutputPlugin
|
4
41
|
{
|
42
|
+
@ClassRule
|
43
|
+
public static final SharedKafkaTestResource sharedKafkaTestResource = new SharedKafkaTestResource()
|
44
|
+
.withBrokers(3);
|
45
|
+
|
46
|
+
@Rule
|
47
|
+
public TestingEmbulk embulk = TestingEmbulk.builder()
|
48
|
+
.registerPlugin(OutputPlugin.class, "kafka", KafkaOutputPlugin.class)
|
49
|
+
.build();
|
50
|
+
|
51
|
+
private KafkaTestUtils kafkaTestUtils;
|
52
|
+
private final static ObjectMapper objectMapper = new ObjectMapper();
|
53
|
+
|
54
|
+
@Before
|
55
|
+
public void setUp() {
|
56
|
+
kafkaTestUtils = sharedKafkaTestResource.getKafkaTestUtils();
|
57
|
+
kafkaTestUtils.createTopic("json-topic", 8, (short) 1);
|
58
|
+
kafkaTestUtils.createTopic("json-complex-topic", 8, (short) 1);
|
59
|
+
kafkaTestUtils.createTopic("avro-simple-topic", 8, (short) 1);
|
60
|
+
kafkaTestUtils.createTopic("avro-complex-topic", 8, (short) 1);
|
61
|
+
}
|
62
|
+
|
63
|
+
@After
|
64
|
+
public void tearDown() {
|
65
|
+
kafkaTestUtils.getAdminClient().deleteTopics(ImmutableList.of(
|
66
|
+
"json-topic", "json-complex-topic", "avro-simple-topic", "avro-complex-topic"
|
67
|
+
));
|
68
|
+
}
|
69
|
+
|
70
|
+
@Test
|
71
|
+
public void testSimpleJson() throws IOException
|
72
|
+
{
|
73
|
+
ConfigSource configSource = embulk.loadYamlResource("config_simple.yml");
|
74
|
+
configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
|
75
|
+
embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
|
76
|
+
List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
|
77
|
+
.consumeAllRecordsFromTopic("json-topic", StringDeserializer.class,
|
78
|
+
StringDeserializer.class);
|
79
|
+
|
80
|
+
assertEquals(3, consumerRecords.size());
|
81
|
+
List<JsonNode> deserializedRecords = new ArrayList<>();
|
82
|
+
for (ConsumerRecord<String, String> record : consumerRecords) {
|
83
|
+
deserializedRecords.add(objectMapper.readTree(record.value()));
|
84
|
+
}
|
85
|
+
List<String> ids = deserializedRecords.stream()
|
86
|
+
.map(r -> r.get("id").asText())
|
87
|
+
.collect(Collectors.toList());
|
88
|
+
List<Integer> intItems = deserializedRecords.stream()
|
89
|
+
.map(r -> r.get("int_item").asInt())
|
90
|
+
.collect(Collectors.toList());
|
91
|
+
List<String> varcharItems = deserializedRecords.stream()
|
92
|
+
.map(r -> r.get("varchar_item").asText())
|
93
|
+
.collect(Collectors.toList());
|
94
|
+
|
95
|
+
assertThat(ids, hasItem("A001"));
|
96
|
+
assertThat(ids, hasItem("A002"));
|
97
|
+
assertThat(ids, hasItem("A003"));
|
98
|
+
assertThat(intItems, hasItem(1));
|
99
|
+
assertThat(intItems, hasItem(2));
|
100
|
+
assertThat(intItems, hasItem(3));
|
101
|
+
assertThat(varcharItems, hasItem("a"));
|
102
|
+
assertThat(varcharItems, hasItem("b"));
|
103
|
+
assertThat(varcharItems, hasItem("c"));
|
104
|
+
}
|
105
|
+
|
106
|
+
@Test
|
107
|
+
public void testComplexJson() throws IOException
|
108
|
+
{
|
109
|
+
ConfigSource configSource = embulk.loadYamlResource("config_complex.yml");
|
110
|
+
configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
|
111
|
+
|
112
|
+
embulk.runOutput(configSource, Paths.get(Resources.getResource("in_complex.csv").getPath()));
|
113
|
+
List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
|
114
|
+
.consumeAllRecordsFromTopic("json-complex-topic", StringDeserializer.class,
|
115
|
+
StringDeserializer.class);
|
116
|
+
|
117
|
+
assertEquals(3, consumerRecords.size());
|
118
|
+
List<JsonNode> deserializedRecords = new ArrayList<>();
|
119
|
+
for (ConsumerRecord<String, String> record : consumerRecords) {
|
120
|
+
deserializedRecords.add(objectMapper.readTree(record.value()));
|
121
|
+
}
|
122
|
+
List<String> ids = deserializedRecords.stream()
|
123
|
+
.map(r -> r.get("id").asText())
|
124
|
+
.collect(Collectors.toList());
|
125
|
+
List<Integer> intItems = deserializedRecords.stream()
|
126
|
+
.map(r -> r.get("int_item").asInt())
|
127
|
+
.collect(Collectors.toList());
|
128
|
+
List<List<Integer>> arrayItems = deserializedRecords.stream()
|
129
|
+
.map(r -> ImmutableList.of(
|
130
|
+
r.get("array").get(0).asInt(),
|
131
|
+
r.get("array").get(1).asInt(),
|
132
|
+
r.get("array").get(2).asInt()
|
133
|
+
))
|
134
|
+
.collect(Collectors.toList());
|
135
|
+
|
136
|
+
assertThat(ids, hasItem("A001"));
|
137
|
+
assertThat(ids, hasItem("A002"));
|
138
|
+
assertThat(ids, hasItem("A003"));
|
139
|
+
assertThat(intItems, hasItem(9));
|
140
|
+
assertThat(intItems, hasItem(0));
|
141
|
+
assertThat(arrayItems.get(0), hasItem(1));
|
142
|
+
assertThat(arrayItems.get(0), hasItem(2));
|
143
|
+
assertThat(arrayItems.get(0), hasItem(3));
|
144
|
+
}
|
145
|
+
|
146
|
+
@Test
|
147
|
+
public void testSimpleAvro() throws IOException {
|
148
|
+
ConfigSource configSource = embulk.loadYamlResource("config_simple_avro.yml");
|
149
|
+
configSource.set("brokers", ImmutableList
|
150
|
+
.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
|
151
|
+
|
152
|
+
embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
|
153
|
+
|
154
|
+
SchemaRegistryClient schemaRegistryClient = MockSchemaRegistry
|
155
|
+
.getClientForScope("embulk-output-kafka");
|
156
|
+
KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(schemaRegistryClient);
|
157
|
+
|
158
|
+
List<ConsumerRecord<byte[], byte[]>> consumerRecords = kafkaTestUtils
|
159
|
+
.consumeAllRecordsFromTopic("avro-simple-topic");
|
160
|
+
|
161
|
+
assertEquals(3, consumerRecords.size());
|
162
|
+
List<GenericRecord> genericRecords = consumerRecords.stream().map(r -> (GenericRecord) kafkaAvroDeserializer
|
163
|
+
.deserialize("avro-simple-topic", r.value())).collect(Collectors.toList());
|
164
|
+
|
165
|
+
List<String> ids = genericRecords.stream()
|
166
|
+
.map(r -> String.valueOf(r.get("id")))
|
167
|
+
.collect(Collectors.toList());
|
168
|
+
List<Long> intItems = genericRecords.stream()
|
169
|
+
.map(r -> (Long) r.get("int_item"))
|
170
|
+
.collect(Collectors.toList());
|
171
|
+
List<String> varcharItems = genericRecords.stream()
|
172
|
+
.map(r -> String.valueOf(r.get("varchar_item")))
|
173
|
+
.collect(Collectors.toList());
|
174
|
+
|
175
|
+
assertThat(ids, hasItem("A001"));
|
176
|
+
assertThat(ids, hasItem("A002"));
|
177
|
+
assertThat(ids, hasItem("A003"));
|
178
|
+
assertThat(intItems, hasItem(1L));
|
179
|
+
assertThat(intItems, hasItem(2L));
|
180
|
+
assertThat(intItems, hasItem(3L));
|
181
|
+
assertThat(varcharItems, hasItem("a"));
|
182
|
+
assertThat(varcharItems, hasItem("b"));
|
183
|
+
assertThat(varcharItems, hasItem("c"));
|
184
|
+
}
|
185
|
+
|
186
|
+
@Test
|
187
|
+
public void testSimpleAvroSchemaFromRegistry() throws IOException, RestClientException
|
188
|
+
{
|
189
|
+
ConfigSource configSource = embulk.loadYamlResource("config_simple_avro.yml");
|
190
|
+
Object avsc = configSource.get(Object.class, "avsc");
|
191
|
+
String avscString = objectMapper.writeValueAsString(avsc);
|
192
|
+
configSource.set("avsc", null);
|
193
|
+
ParsedSchema parsedSchema = new AvroSchema(avscString);
|
194
|
+
MockSchemaRegistry.getClientForScope("embulk-output-kafka")
|
195
|
+
.register("avro-simple-topic-value", parsedSchema);
|
196
|
+
configSource.set("brokers", ImmutableList
|
197
|
+
.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
|
198
|
+
|
199
|
+
embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
|
200
|
+
|
201
|
+
SchemaRegistryClient schemaRegistryClient = MockSchemaRegistry
|
202
|
+
.getClientForScope("embulk-output-kafka");
|
203
|
+
KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(schemaRegistryClient);
|
204
|
+
|
205
|
+
List<ConsumerRecord<byte[], byte[]>> consumerRecords = kafkaTestUtils
|
206
|
+
.consumeAllRecordsFromTopic("avro-simple-topic");
|
207
|
+
|
208
|
+
assertEquals(3, consumerRecords.size());
|
209
|
+
List<GenericRecord> genericRecords = consumerRecords.stream().map(r -> (GenericRecord) kafkaAvroDeserializer
|
210
|
+
.deserialize("avro-simple-topic", r.value())).collect(Collectors.toList());
|
211
|
+
|
212
|
+
List<String> ids = genericRecords.stream()
|
213
|
+
.map(r -> String.valueOf(r.get("id")))
|
214
|
+
.collect(Collectors.toList());
|
215
|
+
List<Long> intItems = genericRecords.stream()
|
216
|
+
.map(r -> (Long) r.get("int_item"))
|
217
|
+
.collect(Collectors.toList());
|
218
|
+
List<String> varcharItems = genericRecords.stream()
|
219
|
+
.map(r -> String.valueOf(r.get("varchar_item")))
|
220
|
+
.collect(Collectors.toList());
|
221
|
+
|
222
|
+
assertThat(ids, hasItem("A001"));
|
223
|
+
assertThat(ids, hasItem("A002"));
|
224
|
+
assertThat(ids, hasItem("A003"));
|
225
|
+
assertThat(intItems, hasItem(1L));
|
226
|
+
assertThat(intItems, hasItem(2L));
|
227
|
+
assertThat(intItems, hasItem(3L));
|
228
|
+
assertThat(varcharItems, hasItem("a"));
|
229
|
+
assertThat(varcharItems, hasItem("b"));
|
230
|
+
assertThat(varcharItems, hasItem("c"));
|
231
|
+
}
|
232
|
+
|
233
|
+
@Test
|
234
|
+
public void testSimpleAvroAvscFile() throws IOException {
|
235
|
+
ConfigSource configSource = embulk.loadYamlResource("config_simple_avro_avsc_file.yml");
|
236
|
+
configSource.set("brokers", ImmutableList
|
237
|
+
.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
|
238
|
+
|
239
|
+
embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
|
240
|
+
|
241
|
+
SchemaRegistryClient schemaRegistryClient = MockSchemaRegistry
|
242
|
+
.getClientForScope("embulk-output-kafka");
|
243
|
+
KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(schemaRegistryClient);
|
244
|
+
|
245
|
+
List<ConsumerRecord<byte[], byte[]>> consumerRecords = kafkaTestUtils
|
246
|
+
.consumeAllRecordsFromTopic("avro-simple-topic");
|
247
|
+
|
248
|
+
assertEquals(3, consumerRecords.size());
|
249
|
+
List<GenericRecord> genericRecords = consumerRecords.stream().map(r -> (GenericRecord) kafkaAvroDeserializer
|
250
|
+
.deserialize("avro-simple-topic", r.value())).collect(Collectors.toList());
|
251
|
+
|
252
|
+
List<String> ids = genericRecords.stream()
|
253
|
+
.map(r -> String.valueOf(r.get("id")))
|
254
|
+
.collect(Collectors.toList());
|
255
|
+
List<Long> intItems = genericRecords.stream()
|
256
|
+
.map(r -> (Long) r.get("int_item"))
|
257
|
+
.collect(Collectors.toList());
|
258
|
+
List<String> varcharItems = genericRecords.stream()
|
259
|
+
.map(r -> String.valueOf(r.get("varchar_item")))
|
260
|
+
.collect(Collectors.toList());
|
261
|
+
|
262
|
+
assertThat(ids, hasItem("A001"));
|
263
|
+
assertThat(ids, hasItem("A002"));
|
264
|
+
assertThat(ids, hasItem("A003"));
|
265
|
+
assertThat(intItems, hasItem(1L));
|
266
|
+
assertThat(intItems, hasItem(2L));
|
267
|
+
assertThat(intItems, hasItem(3L));
|
268
|
+
assertThat(varcharItems, hasItem("a"));
|
269
|
+
assertThat(varcharItems, hasItem("b"));
|
270
|
+
assertThat(varcharItems, hasItem("c"));
|
271
|
+
}
|
272
|
+
|
273
|
+
@Test
|
274
|
+
public void testSimpleAvroComplex() throws IOException {
|
275
|
+
ConfigSource configSource = embulk.loadYamlResource("config_complex_avro.yml");
|
276
|
+
configSource.set("brokers", ImmutableList
|
277
|
+
.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
|
278
|
+
|
279
|
+
embulk.runOutput(configSource, Paths.get(Resources.getResource("in_complex.csv").getPath()));
|
280
|
+
|
281
|
+
SchemaRegistryClient schemaRegistryClient = MockSchemaRegistry
|
282
|
+
.getClientForScope("embulk-output-kafka");
|
283
|
+
KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(schemaRegistryClient);
|
284
|
+
|
285
|
+
List<ConsumerRecord<byte[], byte[]>> consumerRecords = kafkaTestUtils
|
286
|
+
.consumeAllRecordsFromTopic("avro-complex-topic");
|
287
|
+
|
288
|
+
assertEquals(3, consumerRecords.size());
|
289
|
+
List<GenericRecord> genericRecords = consumerRecords.stream().map(r -> (GenericRecord) kafkaAvroDeserializer
|
290
|
+
.deserialize("avro-complex-topic", r.value())).collect(Collectors.toList());
|
291
|
+
|
292
|
+
List<String> ids = genericRecords.stream()
|
293
|
+
.map(r -> String.valueOf(r.get("id")))
|
294
|
+
.collect(Collectors.toList());
|
295
|
+
List<Long> intItems = genericRecords.stream()
|
296
|
+
.map(r -> (Long) r.get("int_item"))
|
297
|
+
.collect(Collectors.toList());
|
298
|
+
List<Instant> timeItems = genericRecords.stream()
|
299
|
+
.map(r -> Instant.ofEpochMilli((long) r.get("time")))
|
300
|
+
.collect(Collectors.toList());
|
301
|
+
|
302
|
+
assertThat(ids, hasItem("A001"));
|
303
|
+
assertThat(ids, hasItem("A002"));
|
304
|
+
assertThat(ids, hasItem("A003"));
|
305
|
+
assertThat(intItems, hasItem(9L));
|
306
|
+
assertThat(intItems, hasItem(0L));
|
307
|
+
assertThat(timeItems, hasItem(Instant.parse("2018-02-01T12:15:18.000Z")));
|
308
|
+
assertThat(timeItems, hasItem(Instant.parse("2018-02-02T12:15:18.000Z")));
|
309
|
+
assertThat(timeItems, hasItem(Instant.parse("2018-02-03T12:15:18.000Z")));
|
310
|
+
}
|
311
|
+
|
312
|
+
@Test
|
313
|
+
public void testKeyColumnConfig() throws IOException
|
314
|
+
{
|
315
|
+
ConfigSource configSource = embulk.loadYamlResource("config_with_key_column.yml");
|
316
|
+
configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
|
317
|
+
embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
|
318
|
+
List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
|
319
|
+
.consumeAllRecordsFromTopic("json-topic", StringDeserializer.class,
|
320
|
+
StringDeserializer.class);
|
321
|
+
|
322
|
+
assertEquals(3, consumerRecords.size());
|
323
|
+
List<String> keys = new ArrayList<>();
|
324
|
+
for (ConsumerRecord<String, String> record : consumerRecords) {
|
325
|
+
keys.add(record.key());
|
326
|
+
}
|
327
|
+
|
328
|
+
assertThat(keys, hasItem("A001"));
|
329
|
+
assertThat(keys, hasItem("A002"));
|
330
|
+
assertThat(keys, hasItem("A003"));
|
331
|
+
}
|
332
|
+
|
333
|
+
@Test
|
334
|
+
public void testPartitionColumnConfig() throws IOException
|
335
|
+
{
|
336
|
+
ConfigSource configSource = embulk.loadYamlResource("config_with_partition_column.yml");
|
337
|
+
configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
|
338
|
+
embulk.runOutput(configSource, Paths.get(Resources.getResource("in1.csv").getPath()));
|
339
|
+
List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
|
340
|
+
.consumeAllRecordsFromTopic("json-topic", StringDeserializer.class,
|
341
|
+
StringDeserializer.class);
|
342
|
+
|
343
|
+
assertEquals(3, consumerRecords.size());
|
344
|
+
List<Integer> partitions = new ArrayList<>();
|
345
|
+
for (ConsumerRecord<String, String> record : consumerRecords) {
|
346
|
+
partitions.add(record.partition());
|
347
|
+
}
|
348
|
+
|
349
|
+
assertThat(partitions, hasItem(1));
|
350
|
+
assertThat(partitions, hasItem(2));
|
351
|
+
assertThat(partitions, hasItem(3));
|
352
|
+
}
|
353
|
+
|
354
|
+
@Test
|
355
|
+
public void testColumnForDeletion() throws IOException
|
356
|
+
{
|
357
|
+
ConfigSource configSource = embulk.loadYamlResource("config_with_column_for_deletion.yml");
|
358
|
+
configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
|
359
|
+
embulk.runOutput(configSource, Paths.get(Resources.getResource("in_with_deletion.csv").getPath()));
|
360
|
+
List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
|
361
|
+
.consumeAllRecordsFromTopic("json-topic", StringDeserializer.class,
|
362
|
+
StringDeserializer.class);
|
363
|
+
|
364
|
+
assertEquals(3, consumerRecords.size());
|
365
|
+
HashMap<String, String> recordMap = new HashMap<>();
|
366
|
+
consumerRecords.forEach(record -> recordMap.put(record.key(), record.value()));
|
367
|
+
assertNotNull(recordMap.get("A001"));
|
368
|
+
assertNotNull(recordMap.get("A003"));
|
369
|
+
assertNull(recordMap.get("A002"));
|
370
|
+
}
|
371
|
+
|
372
|
+
@Test
|
373
|
+
public void testColumnForDeletionAvro() throws IOException
|
374
|
+
{
|
375
|
+
ConfigSource configSource = embulk.loadYamlResource("config_with_column_for_deletion_avro.yml");
|
376
|
+
configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
|
377
|
+
embulk.runOutput(configSource, Paths.get(Resources.getResource("in_with_deletion.csv").getPath()));
|
378
|
+
List<ConsumerRecord<String, String>> consumerRecords = kafkaTestUtils
|
379
|
+
.consumeAllRecordsFromTopic("avro-simple-topic", StringDeserializer.class,
|
380
|
+
StringDeserializer.class);
|
381
|
+
|
382
|
+
assertEquals(3, consumerRecords.size());
|
383
|
+
HashMap<String, String> recordMap = new HashMap<>();
|
384
|
+
consumerRecords.forEach(record -> recordMap.put(record.key(), record.value()));
|
385
|
+
assertNotNull(recordMap.get("A001"));
|
386
|
+
assertNotNull(recordMap.get("A003"));
|
387
|
+
assertNull(recordMap.get("A002"));
|
388
|
+
}
|
5
389
|
}
|