embulk-output-kafka 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/LICENSE.txt +21 -0
- data/README.md +110 -0
- data/build.gradle +113 -0
- data/classpath/audience-annotations-0.5.0.jar +0 -0
- data/classpath/avro-1.9.0.jar +0 -0
- data/classpath/common-config-5.3.0.jar +0 -0
- data/classpath/common-utils-5.3.0.jar +0 -0
- data/classpath/commons-compress-1.18.jar +0 -0
- data/classpath/embulk-output-kafka-0.1.0.jar +0 -0
- data/classpath/jackson-annotations-2.9.0.jar +0 -0
- data/classpath/jackson-core-2.9.9.jar +0 -0
- data/classpath/jackson-databind-2.9.9.jar +0 -0
- data/classpath/jline-0.9.94.jar +0 -0
- data/classpath/jsr305-3.0.2.jar +0 -0
- data/classpath/kafka-avro-serializer-5.3.0.jar +0 -0
- data/classpath/kafka-clients-5.3.0-ccs.jar +0 -0
- data/classpath/kafka-schema-registry-client-5.3.0.jar +0 -0
- data/classpath/lz4-java-1.6.0.jar +0 -0
- data/classpath/netty-3.10.6.Final.jar +0 -0
- data/classpath/slf4j-api-1.7.26.jar +0 -0
- data/classpath/snappy-java-1.1.7.3.jar +0 -0
- data/classpath/spotbugs-annotations-3.1.9.jar +0 -0
- data/classpath/zkclient-0.10.jar +0 -0
- data/classpath/zookeeper-3.4.14.jar +0 -0
- data/classpath/zstd-jni-1.4.0-1.jar +0 -0
- data/config/checkstyle/checkstyle.xml +128 -0
- data/config/checkstyle/default.xml +108 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +5 -0
- data/gradlew +172 -0
- data/gradlew.bat +84 -0
- data/lib/embulk/output/kafka.rb +3 -0
- data/src/main/java/org/embulk/output/kafka/AvroFormatColumnVisitor.java +189 -0
- data/src/main/java/org/embulk/output/kafka/JsonFormatColumnVisitor.java +103 -0
- data/src/main/java/org/embulk/output/kafka/KafkaJsonSerializer.java +23 -0
- data/src/main/java/org/embulk/output/kafka/KafkaOutputColumnVisitor.java +53 -0
- data/src/main/java/org/embulk/output/kafka/KafkaOutputPlugin.java +323 -0
- data/src/main/java/org/embulk/output/kafka/RecordProducerFactory.java +105 -0
- data/src/test/java/org/embulk/output/kafka/TestKafkaOutputPlugin.java +5 -0
- data/src/test/resources/SimpleRecord.avsc +9 -0
- data/src/test/resources/config_complex.yml +26 -0
- data/src/test/resources/config_complex_avro.yml +43 -0
- data/src/test/resources/config_simple.yml +22 -0
- data/src/test/resources/config_simple_avro.yml +32 -0
- data/src/test/resources/config_simple_avro_avsc_file.yml +25 -0
- data/src/test/resources/config_with_key_column.yml +23 -0
- data/src/test/resources/in1.csv +4 -0
- data/src/test/resources/in_complex.csv +5 -0
- metadata +121 -0
@@ -0,0 +1,323 @@
|
|
1
|
+
package org.embulk.output.kafka;
|
2
|
+
|
3
|
+
import com.fasterxml.jackson.annotation.JsonCreator;
|
4
|
+
import com.fasterxml.jackson.annotation.JsonValue;
|
5
|
+
import com.fasterxml.jackson.databind.ObjectMapper;
|
6
|
+
import com.fasterxml.jackson.databind.node.ObjectNode;
|
7
|
+
import org.apache.avro.generic.GenericData;
|
8
|
+
import org.apache.kafka.clients.producer.KafkaProducer;
|
9
|
+
import org.apache.kafka.clients.producer.ProducerRecord;
|
10
|
+
import org.embulk.config.Config;
|
11
|
+
import org.embulk.config.ConfigDefault;
|
12
|
+
import org.embulk.config.ConfigDiff;
|
13
|
+
import org.embulk.config.ConfigException;
|
14
|
+
import org.embulk.config.ConfigSource;
|
15
|
+
import org.embulk.config.Task;
|
16
|
+
import org.embulk.config.TaskReport;
|
17
|
+
import org.embulk.config.TaskSource;
|
18
|
+
import org.embulk.spi.Exec;
|
19
|
+
import org.embulk.spi.OutputPlugin;
|
20
|
+
import org.embulk.spi.Page;
|
21
|
+
import org.embulk.spi.PageReader;
|
22
|
+
import org.embulk.spi.Schema;
|
23
|
+
import org.embulk.spi.TransactionalPageOutput;
|
24
|
+
import org.slf4j.Logger;
|
25
|
+
import org.slf4j.LoggerFactory;
|
26
|
+
|
27
|
+
import java.io.File;
|
28
|
+
import java.io.IOException;
|
29
|
+
import java.util.List;
|
30
|
+
import java.util.Locale;
|
31
|
+
import java.util.Map;
|
32
|
+
import java.util.Optional;
|
33
|
+
import java.util.PrimitiveIterator;
|
34
|
+
import java.util.Random;
|
35
|
+
import java.util.concurrent.atomic.AtomicInteger;
|
36
|
+
|
37
|
+
public class KafkaOutputPlugin
|
38
|
+
implements OutputPlugin
|
39
|
+
{
|
40
|
+
public enum RecordSerializeFormat
|
41
|
+
{
|
42
|
+
JSON,
|
43
|
+
AVRO_WITH_SCHEMA_REGISTRY;
|
44
|
+
|
45
|
+
@JsonValue
|
46
|
+
public String toString()
|
47
|
+
{
|
48
|
+
return name().toLowerCase(Locale.ENGLISH);
|
49
|
+
}
|
50
|
+
|
51
|
+
@JsonCreator
|
52
|
+
public static RecordSerializeFormat ofString(String name)
|
53
|
+
{
|
54
|
+
switch (name.toLowerCase(Locale.ENGLISH)) {
|
55
|
+
case "json":
|
56
|
+
return JSON;
|
57
|
+
case "avro_with_schema_registry":
|
58
|
+
return AVRO_WITH_SCHEMA_REGISTRY;
|
59
|
+
default:
|
60
|
+
}
|
61
|
+
|
62
|
+
throw new ConfigException(String.format("Unknown serialize format '%s'. Supported modes are json, avro_with_schema_registry", name));
|
63
|
+
}
|
64
|
+
}
|
65
|
+
|
66
|
+
public interface PluginTask
|
67
|
+
extends Task
|
68
|
+
{
|
69
|
+
@Config("brokers")
|
70
|
+
public List<String> getBrokers();
|
71
|
+
|
72
|
+
@Config("topic")
|
73
|
+
public String getTopic();
|
74
|
+
|
75
|
+
@Config("topic_column")
|
76
|
+
@ConfigDefault("null")
|
77
|
+
public Optional<String> getTopicColumn();
|
78
|
+
|
79
|
+
@Config("schema_registry_url")
|
80
|
+
@ConfigDefault("null")
|
81
|
+
public Optional<String> getSchemaRegistryUrl();
|
82
|
+
|
83
|
+
@Config("serialize_format")
|
84
|
+
public RecordSerializeFormat getRecordSerializeFormat();
|
85
|
+
|
86
|
+
@Config("avsc_file")
|
87
|
+
@ConfigDefault("null")
|
88
|
+
public Optional<File> getAvscFile();
|
89
|
+
|
90
|
+
@Config("avsc")
|
91
|
+
@ConfigDefault("null")
|
92
|
+
public Optional<ObjectNode> getAvsc();
|
93
|
+
|
94
|
+
@Config("key_column_name")
|
95
|
+
@ConfigDefault("null")
|
96
|
+
public Optional<String> getKeyColumnName();
|
97
|
+
|
98
|
+
@Config("record_batch_size")
|
99
|
+
@ConfigDefault("1000")
|
100
|
+
public int getRecordBatchSize();
|
101
|
+
|
102
|
+
@Config("acks")
|
103
|
+
@ConfigDefault("\"1\"")
|
104
|
+
public String getAcks();
|
105
|
+
|
106
|
+
@Config("retries")
|
107
|
+
@ConfigDefault("1")
|
108
|
+
public int getRetries();
|
109
|
+
|
110
|
+
@Config("other_producer_configs")
|
111
|
+
@ConfigDefault("{}")
|
112
|
+
public Map<String, String> getOtherProducerConfigs();
|
113
|
+
}
|
114
|
+
|
115
|
+
private static ObjectMapper objectMapper = new ObjectMapper();
|
116
|
+
private Logger logger = LoggerFactory.getLogger(getClass());
|
117
|
+
private int recordLoggingCount = 1;
|
118
|
+
|
119
|
+
@Override
|
120
|
+
public ConfigDiff transaction(ConfigSource config,
|
121
|
+
Schema schema, int taskCount,
|
122
|
+
Control control)
|
123
|
+
{
|
124
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
125
|
+
|
126
|
+
// retryable (idempotent) output:
|
127
|
+
// return resume(task.dump(), schema, taskCount, control);
|
128
|
+
|
129
|
+
// non-retryable (non-idempotent) output:
|
130
|
+
control.run(task.dump());
|
131
|
+
return Exec.newConfigDiff();
|
132
|
+
}
|
133
|
+
|
134
|
+
@Override
|
135
|
+
public ConfigDiff resume(TaskSource taskSource,
|
136
|
+
Schema schema, int taskCount,
|
137
|
+
Control control)
|
138
|
+
{
|
139
|
+
throw new UnsupportedOperationException("kafka output plugin does not support resuming");
|
140
|
+
}
|
141
|
+
|
142
|
+
@Override
|
143
|
+
public void cleanup(TaskSource taskSource,
|
144
|
+
Schema schema, int taskCount,
|
145
|
+
List<TaskReport> successTaskReports)
|
146
|
+
{
|
147
|
+
}
|
148
|
+
|
149
|
+
@Override
|
150
|
+
public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int taskIndex)
|
151
|
+
{
|
152
|
+
PluginTask task = taskSource.loadTask(PluginTask.class);
|
153
|
+
|
154
|
+
switch (task.getRecordSerializeFormat()) {
|
155
|
+
case JSON:
|
156
|
+
return buildPageOutputForJson(task, schema, taskIndex);
|
157
|
+
case AVRO_WITH_SCHEMA_REGISTRY:
|
158
|
+
return buildPageOutputForAvroWithSchemaRegistry(task, schema, taskIndex);
|
159
|
+
default:
|
160
|
+
throw new ConfigException("Unknow serialize format");
|
161
|
+
}
|
162
|
+
}
|
163
|
+
|
164
|
+
private TransactionalPageOutput buildPageOutputForJson(PluginTask task, Schema schema, int taskIndex)
|
165
|
+
{
|
166
|
+
KafkaProducer<Object, ObjectNode> producer = RecordProducerFactory.getForJson(task, schema, task.getOtherProducerConfigs());
|
167
|
+
|
168
|
+
PageReader pageReader = new PageReader(schema);
|
169
|
+
PrimitiveIterator.OfLong randomLong = new Random().longs(1, Long.MAX_VALUE).iterator();
|
170
|
+
AtomicInteger counter = new AtomicInteger(0);
|
171
|
+
|
172
|
+
return new TransactionalPageOutput() {
|
173
|
+
@Override
|
174
|
+
public void add(Page page)
|
175
|
+
{
|
176
|
+
pageReader.setPage(page);
|
177
|
+
while (pageReader.nextRecord()) {
|
178
|
+
JsonFormatColumnVisitor columnVisitor = new JsonFormatColumnVisitor(task, pageReader, objectMapper);
|
179
|
+
|
180
|
+
pageReader.getSchema().visitColumns(columnVisitor);
|
181
|
+
|
182
|
+
Object recordKey = columnVisitor.recordKey;
|
183
|
+
if (recordKey == null) {
|
184
|
+
recordKey = randomLong.next();
|
185
|
+
}
|
186
|
+
|
187
|
+
String targetTopic = columnVisitor.topicName != null ? columnVisitor.topicName : task.getTopic();
|
188
|
+
ProducerRecord<Object, ObjectNode> producerRecord = new ProducerRecord<>(targetTopic, recordKey, columnVisitor.jsonNode);
|
189
|
+
producer.send(producerRecord, (metadata, exception) -> {
|
190
|
+
if (exception != null) {
|
191
|
+
logger.error("produce error", exception);
|
192
|
+
}
|
193
|
+
|
194
|
+
logger.debug("sent record: {key: {}, value: {}}", producerRecord.key(), producerRecord.value());
|
195
|
+
|
196
|
+
int current = counter.incrementAndGet();
|
197
|
+
if (current >= recordLoggingCount) {
|
198
|
+
logger.info("[task-{}] Producer sent {} records", String.format("%04d", taskIndex), current);
|
199
|
+
recordLoggingCount = recordLoggingCount * 2;
|
200
|
+
}
|
201
|
+
});
|
202
|
+
}
|
203
|
+
}
|
204
|
+
|
205
|
+
@Override
|
206
|
+
public void finish()
|
207
|
+
{
|
208
|
+
producer.flush();
|
209
|
+
}
|
210
|
+
|
211
|
+
@Override
|
212
|
+
public void close()
|
213
|
+
{
|
214
|
+
producer.close();
|
215
|
+
}
|
216
|
+
|
217
|
+
@Override
|
218
|
+
public void abort()
|
219
|
+
{
|
220
|
+
producer.flush();
|
221
|
+
producer.close();
|
222
|
+
}
|
223
|
+
|
224
|
+
@Override
|
225
|
+
public TaskReport commit()
|
226
|
+
{
|
227
|
+
return null;
|
228
|
+
}
|
229
|
+
};
|
230
|
+
}
|
231
|
+
|
232
|
+
private TransactionalPageOutput buildPageOutputForAvroWithSchemaRegistry(PluginTask task, Schema schema, int taskIndex)
|
233
|
+
{
|
234
|
+
KafkaProducer<Object, Object> producer = RecordProducerFactory.getForAvroWithSchemaRegistry(task, schema, task.getOtherProducerConfigs());
|
235
|
+
|
236
|
+
PageReader pageReader = new PageReader(schema);
|
237
|
+
|
238
|
+
org.apache.avro.Schema avroSchema = null;
|
239
|
+
if (!task.getAvsc().isPresent() && !task.getAvscFile().isPresent() || task.getAvsc().isPresent() == task.getAvscFile().isPresent()) {
|
240
|
+
throw new ConfigException("avro_with_schema_registry format needs either one of avsc and avsc_file");
|
241
|
+
}
|
242
|
+
if (task.getAvsc().isPresent()) {
|
243
|
+
avroSchema = new org.apache.avro.Schema.Parser().parse(task.getAvsc().get().toString());
|
244
|
+
}
|
245
|
+
if (task.getAvscFile().isPresent()) {
|
246
|
+
try {
|
247
|
+
avroSchema = new org.apache.avro.Schema.Parser().parse(task.getAvscFile().get());
|
248
|
+
}
|
249
|
+
catch (IOException e) {
|
250
|
+
e.printStackTrace();
|
251
|
+
throw new ConfigException("avsc_file cannot read");
|
252
|
+
}
|
253
|
+
}
|
254
|
+
|
255
|
+
final Object[] key = new Object[1];
|
256
|
+
final String[] topicName = new String[1];
|
257
|
+
PrimitiveIterator.OfLong randomLong = new Random().longs(1, Long.MAX_VALUE).iterator();
|
258
|
+
|
259
|
+
AtomicInteger counter = new AtomicInteger(0);
|
260
|
+
|
261
|
+
final org.apache.avro.Schema finalAvroSchema = avroSchema;
|
262
|
+
return new TransactionalPageOutput()
|
263
|
+
{
|
264
|
+
@Override
|
265
|
+
public void add(Page page)
|
266
|
+
{
|
267
|
+
pageReader.setPage(page);
|
268
|
+
while (pageReader.nextRecord()) {
|
269
|
+
AvroFormatColumnVisitor columnVisitor = new AvroFormatColumnVisitor(task, pageReader, finalAvroSchema, new GenericData.Record(finalAvroSchema));
|
270
|
+
|
271
|
+
pageReader.getSchema().visitColumns(columnVisitor);
|
272
|
+
|
273
|
+
Object recordKey = columnVisitor.recordKey;
|
274
|
+
if (recordKey == null) {
|
275
|
+
recordKey = randomLong.next();
|
276
|
+
}
|
277
|
+
|
278
|
+
String targetTopic = columnVisitor.topicName != null ? columnVisitor.topicName : task.getTopic();
|
279
|
+
|
280
|
+
ProducerRecord<Object, Object> producerRecord = new ProducerRecord<>(targetTopic, recordKey, columnVisitor.genericRecord);
|
281
|
+
producer.send(producerRecord, (metadata, exception) -> {
|
282
|
+
if (exception != null) {
|
283
|
+
logger.error("produce error", exception);
|
284
|
+
}
|
285
|
+
|
286
|
+
logger.debug("sent record: {key: {}, value: {}}", producerRecord.key(), producerRecord.value());
|
287
|
+
|
288
|
+
int current = counter.incrementAndGet();
|
289
|
+
if (current >= recordLoggingCount) {
|
290
|
+
logger.info("[task-{}] Producer sent {} records", String.format("%04d", taskIndex), current);
|
291
|
+
recordLoggingCount = recordLoggingCount * 2;
|
292
|
+
}
|
293
|
+
});
|
294
|
+
}
|
295
|
+
}
|
296
|
+
|
297
|
+
@Override
|
298
|
+
public void finish()
|
299
|
+
{
|
300
|
+
producer.flush();
|
301
|
+
}
|
302
|
+
|
303
|
+
@Override
|
304
|
+
public void close()
|
305
|
+
{
|
306
|
+
producer.close();
|
307
|
+
}
|
308
|
+
|
309
|
+
@Override
|
310
|
+
public void abort()
|
311
|
+
{
|
312
|
+
producer.flush();
|
313
|
+
producer.close();
|
314
|
+
}
|
315
|
+
|
316
|
+
@Override
|
317
|
+
public TaskReport commit()
|
318
|
+
{
|
319
|
+
return null;
|
320
|
+
}
|
321
|
+
};
|
322
|
+
}
|
323
|
+
}
|
@@ -0,0 +1,105 @@
|
|
1
|
+
package org.embulk.output.kafka;
|
2
|
+
|
3
|
+
import com.fasterxml.jackson.databind.node.ObjectNode;
|
4
|
+
import com.google.common.collect.ImmutableMap;
|
5
|
+
import io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig;
|
6
|
+
import io.confluent.kafka.serializers.KafkaAvroSerializer;
|
7
|
+
import org.apache.kafka.clients.producer.KafkaProducer;
|
8
|
+
import org.apache.kafka.clients.producer.ProducerConfig;
|
9
|
+
import org.apache.kafka.common.serialization.DoubleSerializer;
|
10
|
+
import org.apache.kafka.common.serialization.LongSerializer;
|
11
|
+
import org.apache.kafka.common.serialization.StringSerializer;
|
12
|
+
import org.embulk.config.ConfigException;
|
13
|
+
import org.embulk.spi.Column;
|
14
|
+
import org.embulk.spi.ColumnVisitor;
|
15
|
+
import org.embulk.spi.Schema;
|
16
|
+
|
17
|
+
import java.util.Map;
|
18
|
+
import java.util.Properties;
|
19
|
+
|
20
|
+
class RecordProducerFactory
|
21
|
+
{
|
22
|
+
private RecordProducerFactory() {}
|
23
|
+
|
24
|
+
private static Properties buildProperties(KafkaOutputPlugin.PluginTask task, Schema schema, Map<String, String> configs)
|
25
|
+
{
|
26
|
+
Properties kafkaProps = new Properties();
|
27
|
+
|
28
|
+
kafkaProps.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, task.getBrokers());
|
29
|
+
kafkaProps.put(ProducerConfig.ACKS_CONFIG, task.getAcks());
|
30
|
+
kafkaProps.put(ProducerConfig.RETRIES_CONFIG, task.getRetries());
|
31
|
+
kafkaProps.put(ProducerConfig.BATCH_SIZE_CONFIG, task.getRecordBatchSize());
|
32
|
+
|
33
|
+
configs.forEach(kafkaProps::setProperty);
|
34
|
+
|
35
|
+
if (task.getKeyColumnName().isPresent()) {
|
36
|
+
String keyColumnName = task.getKeyColumnName().get();
|
37
|
+
Column column = schema.getColumns().stream()
|
38
|
+
.filter(c -> c.getName().equals(keyColumnName))
|
39
|
+
.findFirst()
|
40
|
+
.orElseThrow(() -> new ConfigException("key column is not found"));
|
41
|
+
|
42
|
+
column.visit(new ColumnVisitor()
|
43
|
+
{
|
44
|
+
@Override
|
45
|
+
public void booleanColumn(Column column)
|
46
|
+
{
|
47
|
+
throw new RuntimeException("boolean column is not supported for key_column_name");
|
48
|
+
}
|
49
|
+
|
50
|
+
@Override
|
51
|
+
public void longColumn(Column column)
|
52
|
+
{
|
53
|
+
kafkaProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, LongSerializer.class);
|
54
|
+
}
|
55
|
+
|
56
|
+
@Override
|
57
|
+
public void doubleColumn(Column column)
|
58
|
+
{
|
59
|
+
kafkaProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, DoubleSerializer.class);
|
60
|
+
}
|
61
|
+
|
62
|
+
@Override
|
63
|
+
public void stringColumn(Column column)
|
64
|
+
{
|
65
|
+
kafkaProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
|
66
|
+
}
|
67
|
+
|
68
|
+
@Override
|
69
|
+
public void timestampColumn(Column column)
|
70
|
+
{
|
71
|
+
throw new RuntimeException("timestamp column is not supported for key_column_name");
|
72
|
+
}
|
73
|
+
|
74
|
+
@Override
|
75
|
+
public void jsonColumn(Column column)
|
76
|
+
{
|
77
|
+
throw new RuntimeException("json column is not supported for key_column_name");
|
78
|
+
}
|
79
|
+
});
|
80
|
+
}
|
81
|
+
else {
|
82
|
+
kafkaProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, LongSerializer.class);
|
83
|
+
}
|
84
|
+
|
85
|
+
return kafkaProps;
|
86
|
+
}
|
87
|
+
|
88
|
+
static KafkaProducer<Object, ObjectNode> getForJson(KafkaOutputPlugin.PluginTask task, Schema schema, Map<String, String> configs)
|
89
|
+
{
|
90
|
+
return new KafkaProducer<>(buildProperties(task, schema, configs), null, new KafkaJsonSerializer());
|
91
|
+
}
|
92
|
+
|
93
|
+
static KafkaProducer<Object, Object> getForAvroWithSchemaRegistry(KafkaOutputPlugin.PluginTask task, Schema schema, Map<String, String> configs)
|
94
|
+
{
|
95
|
+
KafkaAvroSerializer kafkaAvroSerializer = new KafkaAvroSerializer();
|
96
|
+
String schemaRegistryUrl = task.getSchemaRegistryUrl().orElseThrow(() -> new ConfigException("avro_with_schema_registry format needs schema_registry_url"));
|
97
|
+
|
98
|
+
Map<String, String> avroSerializerConfigs = ImmutableMap.<String, String>builder()
|
99
|
+
.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl)
|
100
|
+
.build();
|
101
|
+
kafkaAvroSerializer.configure(avroSerializerConfigs, false);
|
102
|
+
|
103
|
+
return new KafkaProducer<>(buildProperties(task, schema, configs), null, kafkaAvroSerializer);
|
104
|
+
}
|
105
|
+
}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: ./src/test/resources/in_complex
|
4
|
+
parser:
|
5
|
+
charset: UTF-8
|
6
|
+
newline: CRLF
|
7
|
+
type: csv
|
8
|
+
delimiter: "\t"
|
9
|
+
quote: "\0"
|
10
|
+
escape: "\0"
|
11
|
+
null_string: 'NULL'
|
12
|
+
skip_header_lines: 1
|
13
|
+
columns:
|
14
|
+
- {name: 'id', type: string}
|
15
|
+
- {name: 'int_item', type: long}
|
16
|
+
- {name: 'time', type: timestamp, format: "%Y-%m-%dT%H:%M:%S"}
|
17
|
+
- {name: 'array', type: json}
|
18
|
+
- {name: 'data', type: json}
|
19
|
+
out:
|
20
|
+
type: kafka
|
21
|
+
topic: "json-topic"
|
22
|
+
serialize_format: json
|
23
|
+
brokers:
|
24
|
+
- "localhost:9092"
|
25
|
+
other_producer_configs:
|
26
|
+
buffer.memory: "67108864"
|
@@ -0,0 +1,43 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: ./src/test/resources/in_complex
|
4
|
+
parser:
|
5
|
+
charset: UTF-8
|
6
|
+
newline: CRLF
|
7
|
+
type: csv
|
8
|
+
delimiter: "\t"
|
9
|
+
quote: "\0"
|
10
|
+
escape: "\0"
|
11
|
+
null_string: 'NULL'
|
12
|
+
skip_header_lines: 1
|
13
|
+
columns:
|
14
|
+
- {name: 'id', type: string}
|
15
|
+
- {name: 'int_item', type: long}
|
16
|
+
- {name: 'time', type: timestamp, format: "%Y-%m-%dT%H:%M:%S"}
|
17
|
+
- {name: 'array', type: json}
|
18
|
+
- {name: 'data', type: json}
|
19
|
+
out:
|
20
|
+
type: kafka
|
21
|
+
topic: "avro-complex-topic"
|
22
|
+
acks: all
|
23
|
+
retries: 3
|
24
|
+
brokers:
|
25
|
+
- "localhost:9092"
|
26
|
+
schema_registry_url: "http://localhost:48081/"
|
27
|
+
serialize_format: avro_with_schema_registry
|
28
|
+
other_producer_configs:
|
29
|
+
buffer.memory: "67108864"
|
30
|
+
avsc:
|
31
|
+
type: record
|
32
|
+
name: ComplexRecord
|
33
|
+
fields: [
|
34
|
+
{name: "id", type: "string"},
|
35
|
+
{name: "int_item", type: "long"},
|
36
|
+
{name: "time", type: "long", logicalType: "timestamp-milli"},
|
37
|
+
{name: "array", type: {type: "array", items: "long"}},
|
38
|
+
{name: "data", type: {type: "record", name: "InnerData", fields: [
|
39
|
+
{name: "hoge", type: "string"},
|
40
|
+
{name: "aaa", type: ["null", "string"]},
|
41
|
+
{name: "array", type: {type: "array", items: "long"}},
|
42
|
+
]}},
|
43
|
+
]
|
@@ -0,0 +1,22 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: ./src/test/resources/in1
|
4
|
+
parser:
|
5
|
+
charset: UTF-8
|
6
|
+
newline: CRLF
|
7
|
+
type: csv
|
8
|
+
delimiter: ','
|
9
|
+
quote: '"'
|
10
|
+
escape: '"'
|
11
|
+
null_string: 'NULL'
|
12
|
+
skip_header_lines: 1
|
13
|
+
columns:
|
14
|
+
- {name: 'id', type: string}
|
15
|
+
- {name: 'int_item', type: long}
|
16
|
+
- {name: 'varchar_item', type: string}
|
17
|
+
out:
|
18
|
+
type: kafka
|
19
|
+
topic: "json-topic"
|
20
|
+
serialize_format: json
|
21
|
+
brokers:
|
22
|
+
- "localhost:9092"
|
@@ -0,0 +1,32 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: ./src/test/resources/in1
|
4
|
+
parser:
|
5
|
+
charset: UTF-8
|
6
|
+
newline: CRLF
|
7
|
+
type: csv
|
8
|
+
delimiter: ','
|
9
|
+
quote: '"'
|
10
|
+
escape: '"'
|
11
|
+
null_string: 'NULL'
|
12
|
+
skip_header_lines: 1
|
13
|
+
columns:
|
14
|
+
- {name: 'id', type: string}
|
15
|
+
- {name: 'int_item', type: long}
|
16
|
+
- {name: 'varchar_item', type: string}
|
17
|
+
|
18
|
+
out:
|
19
|
+
type: kafka
|
20
|
+
topic: "avro-simple-topic"
|
21
|
+
brokers:
|
22
|
+
- "localhost:9092"
|
23
|
+
schema_registry_url: "http://localhost:48081/"
|
24
|
+
serialize_format: avro_with_schema_registry
|
25
|
+
avsc:
|
26
|
+
type: record
|
27
|
+
name: SimpleRecord
|
28
|
+
fields: [
|
29
|
+
{name: "id", type: "string"},
|
30
|
+
{name: "int_item", type: "long"},
|
31
|
+
{name: "varchar_item", type: "string"},
|
32
|
+
]
|
@@ -0,0 +1,25 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: ./src/test/resources/in1
|
4
|
+
parser:
|
5
|
+
charset: UTF-8
|
6
|
+
newline: CRLF
|
7
|
+
type: csv
|
8
|
+
delimiter: ','
|
9
|
+
quote: '"'
|
10
|
+
escape: '"'
|
11
|
+
null_string: 'NULL'
|
12
|
+
skip_header_lines: 1
|
13
|
+
columns:
|
14
|
+
- {name: 'id', type: string}
|
15
|
+
- {name: 'int_item', type: long}
|
16
|
+
- {name: 'varchar_item', type: string}
|
17
|
+
|
18
|
+
out:
|
19
|
+
type: kafka
|
20
|
+
topic: "avro-simple-topic"
|
21
|
+
brokers:
|
22
|
+
- "localhost:9092"
|
23
|
+
schema_registry_url: "http://localhost:48081/"
|
24
|
+
serialize_format: avro_with_schema_registry
|
25
|
+
avsc_file: ./src/test/resources/SimpleRecord.avsc
|
@@ -0,0 +1,23 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: ./src/test/resources/in1
|
4
|
+
parser:
|
5
|
+
charset: UTF-8
|
6
|
+
newline: CRLF
|
7
|
+
type: csv
|
8
|
+
delimiter: ','
|
9
|
+
quote: '"'
|
10
|
+
escape: '"'
|
11
|
+
null_string: 'NULL'
|
12
|
+
skip_header_lines: 1
|
13
|
+
columns:
|
14
|
+
- {name: 'id', type: string}
|
15
|
+
- {name: 'int_item', type: long}
|
16
|
+
- {name: 'varchar_item', type: string}
|
17
|
+
out:
|
18
|
+
type: kafka
|
19
|
+
topic: "json-topic"
|
20
|
+
serialize_format: json
|
21
|
+
brokers:
|
22
|
+
- "localhost:9092"
|
23
|
+
key_column_name: id
|
@@ -0,0 +1,5 @@
|
|
1
|
+
id int_item time array data
|
2
|
+
A001 9 2018-02-01T12:15:18 [1,2,3] {"hoge": "fuga1", "aaa": "bbb1", "array": [1,2,3]}
|
3
|
+
A002 0 2018-02-02T12:15:18 [1,2,3] {"hoge": "fuga2", "aaa": null, "array": [4,5,6]}
|
4
|
+
A003 9 2018-02-03T12:15:18 [1,2,3] {"hoge": "fuga3", "aaa": "bbb3", "array": [7,8,9]}
|
5
|
+
|