embulk-input-kafka 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.circleci/config.yml +44 -0
- data/.gitignore +12 -0
- data/LICENSE.txt +21 -0
- data/README.md +53 -0
- data/build.gradle +149 -0
- data/config/checkstyle/checkstyle.xml +128 -0
- data/config/checkstyle/default.xml +108 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +5 -0
- data/gradlew +172 -0
- data/gradlew.bat +84 -0
- data/lib/embulk/input/kafka.rb +3 -0
- data/src/main/java/org/embulk/input/kafka/AbstractKafkaInputColumnVisitor.java +135 -0
- data/src/main/java/org/embulk/input/kafka/AvroFormatColumnVisitor.java +207 -0
- data/src/main/java/org/embulk/input/kafka/JsonFormatColumnVisitor.java +168 -0
- data/src/main/java/org/embulk/input/kafka/KafkaInputPlugin.java +513 -0
- data/src/main/java/org/embulk/input/kafka/KafkaJsonDeserializer.java +38 -0
- data/src/test/avro/ComplexRecordAvro.avsc +18 -0
- data/src/test/avro/SimpleRecordAvro.avsc +11 -0
- data/src/test/java/org/embulk/input/kafka/ComplexRecord.java +75 -0
- data/src/test/java/org/embulk/input/kafka/SimpleRecord.java +39 -0
- data/src/test/java/org/embulk/input/kafka/TestKafkaInputPlugin.java +353 -0
- data/src/test/resources/config_complex.yml +16 -0
- data/src/test/resources/config_complex_avro.yml +17 -0
- data/src/test/resources/config_simple.yml +11 -0
- data/src/test/resources/config_simple_avro.yml +12 -0
- metadata +118 -0
@@ -0,0 +1,38 @@
|
|
1
|
+
package org.embulk.input.kafka;
|
2
|
+
|
3
|
+
import com.fasterxml.jackson.databind.JsonNode;
|
4
|
+
import com.fasterxml.jackson.databind.ObjectMapper;
|
5
|
+
import com.fasterxml.jackson.databind.node.ObjectNode;
|
6
|
+
import com.fasterxml.jackson.datatype.jdk8.Jdk8Module;
|
7
|
+
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
|
8
|
+
import org.apache.kafka.common.serialization.Deserializer;
|
9
|
+
import org.slf4j.Logger;
|
10
|
+
import org.slf4j.LoggerFactory;
|
11
|
+
|
12
|
+
import java.io.IOException;
|
13
|
+
|
14
|
+
public class KafkaJsonDeserializer implements Deserializer<ObjectNode>
|
15
|
+
{
|
16
|
+
private static Logger logger = LoggerFactory.getLogger(KafkaJsonDeserializer.class);
|
17
|
+
private static ObjectMapper mapper = new ObjectMapper()
|
18
|
+
.registerModules(new Jdk8Module(), new JavaTimeModule());
|
19
|
+
|
20
|
+
@Override
|
21
|
+
public ObjectNode deserialize(String topic, byte[] data)
|
22
|
+
{
|
23
|
+
try {
|
24
|
+
JsonNode jsonNode = mapper.readTree(data);
|
25
|
+
if (jsonNode.isObject()) {
|
26
|
+
return (ObjectNode) jsonNode;
|
27
|
+
}
|
28
|
+
else {
|
29
|
+
logger.warn("Ignore current record that is not an object: {}", data);
|
30
|
+
return null;
|
31
|
+
}
|
32
|
+
}
|
33
|
+
catch (IOException e) {
|
34
|
+
e.printStackTrace();
|
35
|
+
return null;
|
36
|
+
}
|
37
|
+
}
|
38
|
+
}
|
@@ -0,0 +1,18 @@
|
|
1
|
+
{
|
2
|
+
"type": "record",
|
3
|
+
"namespace": "org.embulk.input.kafka",
|
4
|
+
"name": "ComplexRecordAvro",
|
5
|
+
"fields": [
|
6
|
+
{"name": "id", "type": "string"},
|
7
|
+
{"name": "int_item", "type": "long"},
|
8
|
+
{"name": "varchar_item", "type": ["null", "string"]},
|
9
|
+
{"name": "time", "type": "long", "logicalType": "timestamp-milli"},
|
10
|
+
{"name": "array", "type": ["null", {"type": "array", "items": "long"}]},
|
11
|
+
{"name": "data", "type": {"type": "record", "name": "InnerData", "fields": [
|
12
|
+
{"name": "hoge", "type": "string"},
|
13
|
+
{"name": "aaa", "type": ["null", "string"]},
|
14
|
+
{"name": "innerArray", "type": {"type": "array", "items": "long"}},
|
15
|
+
{"name": "innerMap", "type": {"type": "map", "values": "long"}}
|
16
|
+
]}}
|
17
|
+
]
|
18
|
+
}
|
@@ -0,0 +1,75 @@
|
|
1
|
+
package org.embulk.input.kafka;
|
2
|
+
|
3
|
+
import com.fasterxml.jackson.annotation.JsonCreator;
|
4
|
+
import com.fasterxml.jackson.annotation.JsonFormat;
|
5
|
+
import com.fasterxml.jackson.annotation.JsonProperty;
|
6
|
+
|
7
|
+
import java.time.Instant;
|
8
|
+
import java.util.List;
|
9
|
+
import java.util.Map;
|
10
|
+
|
11
|
+
public class ComplexRecord
|
12
|
+
{
|
13
|
+
@JsonProperty
|
14
|
+
private String id;
|
15
|
+
|
16
|
+
@JsonProperty("int_item")
|
17
|
+
private Integer intItem;
|
18
|
+
|
19
|
+
@JsonProperty("varchar_item")
|
20
|
+
private String varcharItem;
|
21
|
+
|
22
|
+
@JsonProperty("time")
|
23
|
+
private Instant time;
|
24
|
+
|
25
|
+
@JsonProperty("array")
|
26
|
+
private List<String> array;
|
27
|
+
|
28
|
+
@JsonProperty("data")
|
29
|
+
private Map<String, Map<String, String>> data;
|
30
|
+
|
31
|
+
@JsonCreator
|
32
|
+
public ComplexRecord(@JsonProperty("id") String id, @JsonProperty("int_item") Integer intItem,
|
33
|
+
@JsonProperty("varchar_item") String varcharItem, @JsonProperty("time") Instant time,
|
34
|
+
@JsonProperty("array") List<String> array,
|
35
|
+
@JsonProperty("data") Map<String, Map<String, String>> data)
|
36
|
+
{
|
37
|
+
this.id = id;
|
38
|
+
this.intItem = intItem;
|
39
|
+
this.varcharItem = varcharItem;
|
40
|
+
this.time = time;
|
41
|
+
this.array = array;
|
42
|
+
this.data = data;
|
43
|
+
}
|
44
|
+
|
45
|
+
public String getId()
|
46
|
+
{
|
47
|
+
return id;
|
48
|
+
}
|
49
|
+
|
50
|
+
public Integer getIntItem()
|
51
|
+
{
|
52
|
+
return intItem;
|
53
|
+
}
|
54
|
+
|
55
|
+
public String getVarcharItem()
|
56
|
+
{
|
57
|
+
return varcharItem;
|
58
|
+
}
|
59
|
+
|
60
|
+
public Instant getTime()
|
61
|
+
{
|
62
|
+
return time;
|
63
|
+
}
|
64
|
+
|
65
|
+
public List<String> getArray()
|
66
|
+
{
|
67
|
+
return array;
|
68
|
+
}
|
69
|
+
|
70
|
+
public Map<String, Map<String, String>> getData()
|
71
|
+
{
|
72
|
+
return data;
|
73
|
+
}
|
74
|
+
|
75
|
+
}
|
@@ -0,0 +1,39 @@
|
|
1
|
+
package org.embulk.input.kafka;
|
2
|
+
|
3
|
+
import com.fasterxml.jackson.annotation.JsonCreator;
|
4
|
+
import com.fasterxml.jackson.annotation.JsonProperty;
|
5
|
+
|
6
|
+
public class SimpleRecord
|
7
|
+
{
|
8
|
+
@JsonProperty
|
9
|
+
private String id;
|
10
|
+
|
11
|
+
@JsonProperty("int_item")
|
12
|
+
private Integer intItem;
|
13
|
+
|
14
|
+
@JsonProperty("varchar_item")
|
15
|
+
private String varcharItem;
|
16
|
+
|
17
|
+
@JsonCreator
|
18
|
+
public SimpleRecord(@JsonProperty("id") String id, @JsonProperty("int_item") Integer intItem, @JsonProperty("varchar_item") String varcharItem)
|
19
|
+
{
|
20
|
+
this.id = id;
|
21
|
+
this.intItem = intItem;
|
22
|
+
this.varcharItem = varcharItem;
|
23
|
+
}
|
24
|
+
|
25
|
+
public String getId()
|
26
|
+
{
|
27
|
+
return id;
|
28
|
+
}
|
29
|
+
|
30
|
+
public Integer getIntItem()
|
31
|
+
{
|
32
|
+
return intItem;
|
33
|
+
}
|
34
|
+
|
35
|
+
public String getVarcharItem()
|
36
|
+
{
|
37
|
+
return varcharItem;
|
38
|
+
}
|
39
|
+
}
|
@@ -0,0 +1,353 @@
|
|
1
|
+
package org.embulk.input.kafka;
|
2
|
+
|
3
|
+
import com.fasterxml.jackson.core.JsonProcessingException;
|
4
|
+
import com.fasterxml.jackson.databind.JsonNode;
|
5
|
+
import com.fasterxml.jackson.databind.MappingIterator;
|
6
|
+
import com.fasterxml.jackson.databind.ObjectMapper;
|
7
|
+
import com.fasterxml.jackson.databind.ObjectReader;
|
8
|
+
import com.fasterxml.jackson.databind.SerializationFeature;
|
9
|
+
import com.fasterxml.jackson.dataformat.csv.CsvMapper;
|
10
|
+
import com.fasterxml.jackson.dataformat.csv.CsvParser.Feature;
|
11
|
+
import com.fasterxml.jackson.datatype.jdk8.Jdk8Module;
|
12
|
+
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
|
13
|
+
import com.google.common.collect.ImmutableList;
|
14
|
+
import com.google.common.collect.ImmutableMap;
|
15
|
+
import com.salesforce.kafka.test.KafkaTestUtils;
|
16
|
+
import com.salesforce.kafka.test.junit4.SharedKafkaTestResource;
|
17
|
+
import io.confluent.kafka.serializers.AbstractKafkaSchemaSerDeConfig;
|
18
|
+
import io.confluent.kafka.serializers.KafkaAvroSerializer;
|
19
|
+
import org.apache.kafka.clients.producer.KafkaProducer;
|
20
|
+
import org.apache.kafka.clients.producer.ProducerRecord;
|
21
|
+
import org.apache.kafka.common.serialization.BytesSerializer;
|
22
|
+
import org.apache.kafka.common.utils.Bytes;
|
23
|
+
import org.embulk.config.ConfigSource;
|
24
|
+
import org.embulk.spi.InputPlugin;
|
25
|
+
import org.embulk.test.TestingEmbulk;
|
26
|
+
import org.junit.After;
|
27
|
+
import org.junit.Before;
|
28
|
+
import org.junit.ClassRule;
|
29
|
+
import org.junit.Rule;
|
30
|
+
import org.junit.Test;
|
31
|
+
|
32
|
+
import java.io.IOException;
|
33
|
+
import java.nio.file.Files;
|
34
|
+
import java.nio.file.Path;
|
35
|
+
import java.time.Instant;
|
36
|
+
import java.util.ArrayList;
|
37
|
+
import java.util.HashMap;
|
38
|
+
import java.util.List;
|
39
|
+
import java.util.Map;
|
40
|
+
import java.util.Properties;
|
41
|
+
import java.util.stream.IntStream;
|
42
|
+
|
43
|
+
import static org.junit.Assert.assertEquals;
|
44
|
+
import static org.junit.Assert.assertTrue;
|
45
|
+
|
46
|
+
public class TestKafkaInputPlugin
|
47
|
+
{
|
48
|
+
|
49
|
+
@ClassRule
|
50
|
+
public static final SharedKafkaTestResource sharedKafkaTestResource = new SharedKafkaTestResource()
|
51
|
+
.withBrokers(3);
|
52
|
+
|
53
|
+
@Rule
|
54
|
+
public TestingEmbulk embulk = TestingEmbulk.builder()
|
55
|
+
.registerPlugin(InputPlugin.class, "kafka", KafkaInputPlugin.class)
|
56
|
+
.build();
|
57
|
+
|
58
|
+
private KafkaTestUtils kafkaTestUtils;
|
59
|
+
private final static ObjectMapper objectMapper = new ObjectMapper()
|
60
|
+
.registerModules(new Jdk8Module(), new JavaTimeModule())
|
61
|
+
.configure(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS, false)
|
62
|
+
.configure(SerializationFeature.WRITE_DATE_TIMESTAMPS_AS_NANOSECONDS, false);
|
63
|
+
|
64
|
+
private List<String> topicNames = ImmutableList.of("json-simple-topic", "json-complex-topic", "avro-simple-topic", "avro-complex-topic");
|
65
|
+
|
66
|
+
@Before
|
67
|
+
public void setUp()
|
68
|
+
{
|
69
|
+
kafkaTestUtils = sharedKafkaTestResource.getKafkaTestUtils();
|
70
|
+
topicNames.forEach(topic -> {
|
71
|
+
kafkaTestUtils.createTopic(topic, 48, (short) 1);
|
72
|
+
});
|
73
|
+
}
|
74
|
+
|
75
|
+
@After
|
76
|
+
public void tearDown()
|
77
|
+
{
|
78
|
+
kafkaTestUtils.getAdminClient().deleteTopics(topicNames);
|
79
|
+
}
|
80
|
+
|
81
|
+
@Test
|
82
|
+
public void testSimpleJson() throws IOException
|
83
|
+
{
|
84
|
+
IntStream.rangeClosed(0, 7).forEach(i -> {
|
85
|
+
Map<byte[], byte[]> records = new HashMap<>();
|
86
|
+
IntStream.rangeClosed(0, 2).forEach(j -> {
|
87
|
+
String recordId = "ID-" + i + "-" + j;
|
88
|
+
SimpleRecord simpleRecord = new SimpleRecord(recordId, j, "varchar_" + j);
|
89
|
+
try {
|
90
|
+
String value = objectMapper.writeValueAsString(simpleRecord);
|
91
|
+
records.put(recordId.getBytes(), value.getBytes());
|
92
|
+
} catch (JsonProcessingException e) {
|
93
|
+
throw new RuntimeException(e);
|
94
|
+
}
|
95
|
+
});
|
96
|
+
kafkaTestUtils.produceRecords(records, "json-simple-topic", i);
|
97
|
+
});
|
98
|
+
ConfigSource configSource = embulk.loadYamlResource("config_simple.yml");
|
99
|
+
configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
|
100
|
+
Path outputDir = Files.createTempDirectory("embulk-input-kafka-test-simple-json");
|
101
|
+
Path outputPath = outputDir.resolve("out.csv");
|
102
|
+
embulk.runInput(configSource, outputPath);
|
103
|
+
CsvMapper csvMapper = new CsvMapper();
|
104
|
+
ObjectReader objectReader = csvMapper.readerWithTypedSchemaFor(SimpleRecord.class);
|
105
|
+
MappingIterator<SimpleRecord> it = objectReader
|
106
|
+
.readValues(outputPath.toFile());
|
107
|
+
|
108
|
+
List<SimpleRecord> outputs = new ArrayList<>();
|
109
|
+
it.forEachRemaining(outputs::add);
|
110
|
+
|
111
|
+
assertEquals(24, outputs.size());
|
112
|
+
SimpleRecord simpleRecord = outputs.stream().filter(r -> r.getId().equals("ID-0-1"))
|
113
|
+
.findFirst().get();
|
114
|
+
assertEquals(1, simpleRecord.getIntItem().intValue());
|
115
|
+
}
|
116
|
+
|
117
|
+
@Test
|
118
|
+
public void testComplexJson() throws IOException
|
119
|
+
{
|
120
|
+
IntStream.rangeClosed(0, 7).forEach(i -> {
|
121
|
+
Map<byte[], byte[]> records = new HashMap<>();
|
122
|
+
IntStream.rangeClosed(0, 2).forEach(j -> {
|
123
|
+
String recordId = "ID-" + i + "-" + j;
|
124
|
+
ComplexRecord complexRecord;
|
125
|
+
if (j == 2) {
|
126
|
+
Map<String, String> innerMap = new HashMap<>();
|
127
|
+
innerMap.put("inner-1", null);
|
128
|
+
complexRecord = new ComplexRecord(
|
129
|
+
recordId,
|
130
|
+
null,
|
131
|
+
null,
|
132
|
+
null,
|
133
|
+
null,
|
134
|
+
ImmutableMap
|
135
|
+
.of("key", innerMap));
|
136
|
+
}
|
137
|
+
else {
|
138
|
+
complexRecord = new ComplexRecord(
|
139
|
+
recordId,
|
140
|
+
j,
|
141
|
+
"varchar_" + j,
|
142
|
+
Instant.ofEpochMilli(1597510800000L), // 2020-08-15 17:00:00 +00:00
|
143
|
+
ImmutableList.of("hoge" + j, "fuga" + j),
|
144
|
+
ImmutableMap
|
145
|
+
.of("key", ImmutableMap.of("inner-1", "value" + j, "inner-2", "value" + j)));
|
146
|
+
}
|
147
|
+
try {
|
148
|
+
String value = objectMapper.writeValueAsString(complexRecord);
|
149
|
+
records.put(recordId.getBytes(), value.getBytes());
|
150
|
+
} catch (JsonProcessingException e) {
|
151
|
+
throw new RuntimeException(e);
|
152
|
+
}
|
153
|
+
});
|
154
|
+
kafkaTestUtils.produceRecords(records, "json-complex-topic", i);
|
155
|
+
});
|
156
|
+
ConfigSource configSource = embulk.loadYamlResource("config_complex.yml");
|
157
|
+
configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
|
158
|
+
Path outputDir = Files.createTempDirectory("embulk-input-kafka-test-complex-json");
|
159
|
+
Path outputPath = outputDir.resolve("out.csv");
|
160
|
+
embulk.runInput(configSource, outputPath);
|
161
|
+
|
162
|
+
CsvMapper csvMapper = new CsvMapper();
|
163
|
+
csvMapper.enable(Feature.WRAP_AS_ARRAY);
|
164
|
+
MappingIterator<String[]> it = csvMapper.readerFor(String[].class)
|
165
|
+
.readValues(outputPath.toFile());
|
166
|
+
|
167
|
+
List<String[]> outputs = new ArrayList<>();
|
168
|
+
it.forEachRemaining(outputs::add);
|
169
|
+
|
170
|
+
assertEquals(24, outputs.size());
|
171
|
+
|
172
|
+
String[] row = outputs.stream().filter(r -> r[0].equals("ID-0-1")).findFirst().get();
|
173
|
+
assertEquals("1", row[1]);
|
174
|
+
assertEquals("varchar_1", row[2]);
|
175
|
+
assertEquals("2020-08-15 17:00:00.000000 +0000", row[3]);
|
176
|
+
|
177
|
+
List<String> arrayData = objectMapper.readValue(row[4],
|
178
|
+
objectMapper.getTypeFactory().constructCollectionType(List.class, String.class));
|
179
|
+
assertEquals("hoge1", arrayData.get(0));
|
180
|
+
assertEquals("fuga1", arrayData.get(1));
|
181
|
+
|
182
|
+
JsonNode objectData = objectMapper.readTree(row[5]);
|
183
|
+
assertTrue(objectData.has("key"));
|
184
|
+
assertTrue(objectData.get("key").has("inner-1"));
|
185
|
+
assertEquals("value1", objectData.get("key").get("inner-1").asText());
|
186
|
+
assertTrue(objectData.get("key").has("inner-2"));
|
187
|
+
assertEquals("value1", objectData.get("key").get("inner-2").asText());
|
188
|
+
|
189
|
+
assertEquals("ID-0-1", row[6]);
|
190
|
+
assertEquals("0", row[7]);
|
191
|
+
}
|
192
|
+
|
193
|
+
@Test
|
194
|
+
public void testSimpleJsonWithTimestampSeek() throws IOException
|
195
|
+
{
|
196
|
+
IntStream.rangeClosed(0, 7).forEach(i -> {
|
197
|
+
Map<byte[], byte[]> records = new HashMap<>();
|
198
|
+
IntStream.rangeClosed(0, 2).forEach(j -> {
|
199
|
+
String recordId = "ID-" + i + "-" + j;
|
200
|
+
SimpleRecord simpleRecord = new SimpleRecord(recordId, j, "varchar_" + j);
|
201
|
+
try {
|
202
|
+
String value = objectMapper.writeValueAsString(simpleRecord);
|
203
|
+
records.put(recordId.getBytes(), value.getBytes());
|
204
|
+
} catch (JsonProcessingException e) {
|
205
|
+
throw new RuntimeException(e);
|
206
|
+
}
|
207
|
+
});
|
208
|
+
kafkaTestUtils.produceRecords(records, "json-simple-topic", i);
|
209
|
+
});
|
210
|
+
ConfigSource configSource = embulk.loadYamlResource("config_simple.yml");
|
211
|
+
configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
|
212
|
+
configSource.set("seek_mode", "timestamp");
|
213
|
+
long now = Instant.now().toEpochMilli();
|
214
|
+
configSource.set("timestamp_for_seeking", now);
|
215
|
+
|
216
|
+
IntStream.rangeClosed(0, 7).forEach(i -> {
|
217
|
+
Map<byte[], byte[]> records = new HashMap<>();
|
218
|
+
IntStream.rangeClosed(0, 0).forEach(j -> {
|
219
|
+
String recordId = "ID-AFTER-" + i + "-" + j;
|
220
|
+
SimpleRecord simpleRecord = new SimpleRecord(recordId, j, "varchar_" + j);
|
221
|
+
try {
|
222
|
+
String value = objectMapper.writeValueAsString(simpleRecord);
|
223
|
+
records.put(recordId.getBytes(), value.getBytes());
|
224
|
+
} catch (JsonProcessingException e) {
|
225
|
+
throw new RuntimeException(e);
|
226
|
+
}
|
227
|
+
});
|
228
|
+
kafkaTestUtils.produceRecords(records, "json-simple-topic", i);
|
229
|
+
});
|
230
|
+
|
231
|
+
Path outputDir = Files.createTempDirectory("embulk-input-kafka-test-simple-json");
|
232
|
+
Path outputPath = outputDir.resolve("out.csv");
|
233
|
+
embulk.runInput(configSource, outputPath);
|
234
|
+
CsvMapper csvMapper = new CsvMapper();
|
235
|
+
ObjectReader objectReader = csvMapper.readerWithTypedSchemaFor(SimpleRecord.class);
|
236
|
+
MappingIterator<SimpleRecord> it = objectReader
|
237
|
+
.readValues(outputPath.toFile());
|
238
|
+
|
239
|
+
List<SimpleRecord> outputs = new ArrayList<>();
|
240
|
+
it.forEachRemaining(outputs::add);
|
241
|
+
|
242
|
+
assertEquals(8, outputs.size());
|
243
|
+
SimpleRecord simpleRecord = outputs.stream().filter(r -> r.getId().equals("ID-AFTER-0-0"))
|
244
|
+
.findFirst().get();
|
245
|
+
assertEquals(0, simpleRecord.getIntItem().intValue());
|
246
|
+
}
|
247
|
+
|
248
|
+
@Test
|
249
|
+
public void testSimpleAvro() throws IOException
|
250
|
+
{
|
251
|
+
IntStream.rangeClosed(0, 7).forEach(i -> {
|
252
|
+
List<ProducerRecord<Bytes, Object>> records = new ArrayList<>();
|
253
|
+
IntStream.rangeClosed(0, 2).forEach(j -> {
|
254
|
+
String recordId = "ID-" + i + "-" + j;
|
255
|
+
SimpleRecordAvro simpleRecord = SimpleRecordAvro.newBuilder()
|
256
|
+
.setId(recordId)
|
257
|
+
.setIntItem(j)
|
258
|
+
.setVarcharItem("varchar_" + j)
|
259
|
+
.build();
|
260
|
+
Bytes bytes = Bytes.wrap(recordId.getBytes());
|
261
|
+
records.add(new ProducerRecord<>("avro-simple-topic", bytes, simpleRecord));
|
262
|
+
});
|
263
|
+
Properties producerConfigs = new Properties();
|
264
|
+
producerConfigs.put(AbstractKafkaSchemaSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, "mock://" + KafkaInputPlugin.MOCK_SCHEMA_REGISTRY_SCOPE);
|
265
|
+
KafkaProducer<Bytes, Object> kafkaProducer = kafkaTestUtils
|
266
|
+
.getKafkaProducer(BytesSerializer.class, KafkaAvroSerializer.class, producerConfigs);
|
267
|
+
records.forEach(kafkaProducer::send);
|
268
|
+
kafkaProducer.close();
|
269
|
+
});
|
270
|
+
ConfigSource configSource = embulk.loadYamlResource("config_simple_avro.yml");
|
271
|
+
configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
|
272
|
+
Path outputDir = Files.createTempDirectory("embulk-input-kafka-test-simple-avro");
|
273
|
+
Path outputPath = outputDir.resolve("out.csv");
|
274
|
+
embulk.runInput(configSource, outputPath);
|
275
|
+
CsvMapper csvMapper = new CsvMapper();
|
276
|
+
ObjectReader objectReader = csvMapper.readerWithTypedSchemaFor(SimpleRecord.class);
|
277
|
+
MappingIterator<SimpleRecord> it = objectReader
|
278
|
+
.readValues(outputPath.toFile());
|
279
|
+
|
280
|
+
List<SimpleRecord> outputs = new ArrayList<>();
|
281
|
+
it.forEachRemaining(outputs::add);
|
282
|
+
|
283
|
+
assertEquals(24, outputs.size());
|
284
|
+
SimpleRecord simpleRecord = outputs.stream().filter(r -> r.getId().equals("ID-0-1"))
|
285
|
+
.findFirst().get();
|
286
|
+
assertEquals(1, simpleRecord.getIntItem().intValue());
|
287
|
+
}
|
288
|
+
|
289
|
+
@Test
|
290
|
+
public void testComplexAvro() throws IOException
|
291
|
+
{
|
292
|
+
IntStream.rangeClosed(0, 7).forEach(i -> {
|
293
|
+
List<ProducerRecord<Bytes, Object>> records = new ArrayList<>();
|
294
|
+
IntStream.rangeClosed(0, 2).forEach(j -> {
|
295
|
+
String recordId = "ID-" + i + "-" + j;
|
296
|
+
ComplexRecordAvro complexRecord;
|
297
|
+
if (j == 2) {
|
298
|
+
complexRecord = ComplexRecordAvro.newBuilder()
|
299
|
+
.setId(recordId)
|
300
|
+
.setIntItem(j)
|
301
|
+
.setVarcharItem(null)
|
302
|
+
.setTime(1597510800000L) // 2020-08-15 17:00:00 +00:00
|
303
|
+
.setArray(null)
|
304
|
+
.setData(InnerData.newBuilder()
|
305
|
+
.setAaa(null)
|
306
|
+
.setHoge("hogehoge" + j)
|
307
|
+
.setInnerArray(ImmutableList.of(4L, 5L))
|
308
|
+
.setInnerMap(ImmutableMap.of("key1", 1L, "key2", 2L))
|
309
|
+
.build())
|
310
|
+
.build();
|
311
|
+
}
|
312
|
+
else {
|
313
|
+
complexRecord = ComplexRecordAvro.newBuilder()
|
314
|
+
.setId(recordId)
|
315
|
+
.setIntItem(j)
|
316
|
+
.setVarcharItem("varchar_" + j)
|
317
|
+
.setTime(1597510800000L) // 2020-08-15 17:00:00 +00:00
|
318
|
+
.setArray(ImmutableList.of(1L, 2L, 3L))
|
319
|
+
.setData(InnerData.newBuilder()
|
320
|
+
.setAaa("aaa" + j)
|
321
|
+
.setHoge("hogehoge" + j)
|
322
|
+
.setInnerArray(ImmutableList.of(4L, 5L))
|
323
|
+
.setInnerMap(ImmutableMap.of("key1", 1L, "key2", 2L))
|
324
|
+
.build())
|
325
|
+
.build();
|
326
|
+
}
|
327
|
+
Bytes bytes = Bytes.wrap(recordId.getBytes());
|
328
|
+
records.add(new ProducerRecord<>("avro-complex-topic", bytes, complexRecord));
|
329
|
+
});
|
330
|
+
Properties producerConfigs = new Properties();
|
331
|
+
producerConfigs.put(AbstractKafkaSchemaSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, "mock://" + KafkaInputPlugin.MOCK_SCHEMA_REGISTRY_SCOPE);
|
332
|
+
KafkaProducer<Bytes, Object> kafkaProducer = kafkaTestUtils
|
333
|
+
.getKafkaProducer(BytesSerializer.class, KafkaAvroSerializer.class, producerConfigs);
|
334
|
+
records.forEach(kafkaProducer::send);
|
335
|
+
kafkaProducer.close();
|
336
|
+
});
|
337
|
+
ConfigSource configSource = embulk.loadYamlResource("config_complex_avro.yml");
|
338
|
+
configSource.set("brokers", ImmutableList.of(sharedKafkaTestResource.getKafkaBrokers().getBrokerById(1).getConnectString()));
|
339
|
+
Path outputDir = Files.createTempDirectory("embulk-input-kafka-test-complex-avro");
|
340
|
+
Path outputPath = outputDir.resolve("out.csv");
|
341
|
+
embulk.runInput(configSource, outputPath);
|
342
|
+
|
343
|
+
CsvMapper csvMapper = new CsvMapper();
|
344
|
+
csvMapper.enable(Feature.WRAP_AS_ARRAY);
|
345
|
+
MappingIterator<String[]> it = csvMapper.readerFor(String[].class)
|
346
|
+
.readValues(outputPath.toFile());
|
347
|
+
|
348
|
+
List<String[]> outputs = new ArrayList<>();
|
349
|
+
it.forEachRemaining(outputs::add);
|
350
|
+
|
351
|
+
assertEquals(24, outputs.size());
|
352
|
+
}
|
353
|
+
}
|