embulk-input-kafka 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,168 @@
1
+ package org.embulk.input.kafka;
2
+
3
+ import com.fasterxml.jackson.databind.JsonNode;
4
+ import com.fasterxml.jackson.databind.node.ArrayNode;
5
+ import com.fasterxml.jackson.databind.node.ObjectNode;
6
+ import org.embulk.input.kafka.KafkaInputPlugin.PluginTask;
7
+ import org.embulk.spi.Column;
8
+ import org.embulk.spi.ColumnVisitor;
9
+ import org.embulk.spi.PageBuilder;
10
+ import org.embulk.spi.time.Timestamp;
11
+ import org.embulk.spi.time.TimestampParser;
12
+ import org.msgpack.value.Value;
13
+ import org.msgpack.value.ValueFactory;
14
+
15
+ import java.util.ArrayList;
16
+ import java.util.HashMap;
17
+ import java.util.List;
18
+ import java.util.Map;
19
+
20
+ public class JsonFormatColumnVisitor extends AbstractKafkaInputColumnVisitor<ObjectNode> implements ColumnVisitor
21
+ {
22
+ public JsonFormatColumnVisitor(PluginTask task, PageBuilder pageBuilder, TimestampParser[] timestampParsers)
23
+ {
24
+ super(task, pageBuilder, timestampParsers);
25
+ }
26
+
27
+ @Override
28
+ public void booleanColumn(Column column)
29
+ {
30
+ JsonNode value = recordValue.get(column.getName());
31
+ if (value.isNull()) {
32
+ pageBuilder.setNull(column);
33
+ return;
34
+ }
35
+
36
+ pageBuilder.setBoolean(column, value.booleanValue());
37
+ }
38
+
39
+ @Override
40
+ public void longColumn(Column column)
41
+ {
42
+ if (super.isKeyColumn(column)) {
43
+ super.longColumnForKey(column);
44
+ return;
45
+ }
46
+
47
+ if (isPartitionColumn(column)) {
48
+ super.longColumnForPartition(column);
49
+ return;
50
+ }
51
+
52
+ JsonNode value = recordValue.get(column.getName());
53
+ if (value.isNull()) {
54
+ pageBuilder.setNull(column);
55
+ return;
56
+ }
57
+
58
+ pageBuilder.setLong(column, value.longValue());
59
+ }
60
+
61
+ @Override
62
+ public void doubleColumn(Column column)
63
+ {
64
+ if (super.isKeyColumn(column)) {
65
+ super.doubleColumnForKey(column);
66
+ return;
67
+ }
68
+
69
+ JsonNode value = recordValue.get(column.getName());
70
+ if (value.isNull()) {
71
+ pageBuilder.setNull(column);
72
+ return;
73
+ }
74
+
75
+ pageBuilder.setDouble(column, value.doubleValue());
76
+ }
77
+
78
+ @Override
79
+ public void stringColumn(Column column)
80
+ {
81
+ if (super.isKeyColumn(column)) {
82
+ super.stringColumnForKey(column);
83
+ return;
84
+ }
85
+
86
+ JsonNode value = recordValue.get(column.getName());
87
+ if (value.isNull()) {
88
+ pageBuilder.setNull(column);
89
+ return;
90
+ }
91
+
92
+ pageBuilder.setString(column, value.textValue());
93
+ }
94
+
95
+ @Override
96
+ public void timestampColumn(Column column)
97
+ {
98
+ if (super.isKeyColumn(column)) {
99
+ super.timestampColumnForKey(column);
100
+ return;
101
+ }
102
+
103
+ JsonNode value = recordValue.get(column.getName());
104
+ if (value.isNull()) {
105
+ pageBuilder.setNull(column);
106
+ return;
107
+ }
108
+
109
+ Timestamp timestamp = timestampParsers[column.getIndex()].parse(value.textValue());
110
+ pageBuilder.setTimestamp(column, timestamp);
111
+ }
112
+
113
+ @Override
114
+ public void jsonColumn(Column column)
115
+ {
116
+ JsonNode jsonNode = recordValue.get(column.getName());
117
+ if (jsonNode.isNull()) {
118
+ pageBuilder.setNull(column);
119
+ return;
120
+ }
121
+
122
+ pageBuilder.setJson(column, convertJsonValueToMsgpackValue(jsonNode));
123
+ }
124
+
125
+ private Value convertArrayToMsgpackValue(ArrayNode node)
126
+ {
127
+ List<Value> values = new ArrayList<>();
128
+ node.forEach(item -> values.add(convertJsonValueToMsgpackValue(item)));
129
+ return ValueFactory.newArray(values);
130
+ }
131
+
132
+ private Value convertObjectToMsgpackValue(ObjectNode node)
133
+ {
134
+ Map<Value, Value> values = new HashMap<>();
135
+ node.fields().forEachRemaining(field -> {
136
+ values.put(ValueFactory.newString(field.getKey()),
137
+ convertJsonValueToMsgpackValue(field.getValue()));
138
+ });
139
+ return ValueFactory.newMap(values);
140
+ }
141
+
142
+ private Value convertJsonValueToMsgpackValue(JsonNode node)
143
+ {
144
+ if (node.isFloatingPointNumber()) {
145
+ return ValueFactory.newFloat(node.doubleValue());
146
+ }
147
+ else if (node.isIntegralNumber()) {
148
+ return ValueFactory.newInteger(node.longValue());
149
+ }
150
+ else if (node.isIntegralNumber()) {
151
+ return ValueFactory.newInteger(node.longValue());
152
+ }
153
+ else if (node.isTextual()) {
154
+ return ValueFactory.newString(node.textValue());
155
+ }
156
+ else if (node.isNull()) {
157
+ return ValueFactory.newNil();
158
+ }
159
+ else if (node.isArray()) {
160
+ return convertArrayToMsgpackValue((ArrayNode) node);
161
+ }
162
+ else if (node.isObject()) {
163
+ return convertObjectToMsgpackValue((ObjectNode) node);
164
+ }
165
+
166
+ throw new RuntimeException("unknown json node");
167
+ }
168
+ }
@@ -0,0 +1,513 @@
1
+ package org.embulk.input.kafka;
2
+
3
+ import com.fasterxml.jackson.annotation.JsonCreator;
4
+ import com.fasterxml.jackson.annotation.JsonValue;
5
+ import com.fasterxml.jackson.databind.node.ObjectNode;
6
+ import com.google.common.collect.ImmutableMap;
7
+ import io.confluent.kafka.serializers.AbstractKafkaSchemaSerDeConfig;
8
+ import io.confluent.kafka.serializers.KafkaAvroDeserializer;
9
+ import org.apache.kafka.clients.consumer.ConsumerConfig;
10
+ import org.apache.kafka.clients.consumer.ConsumerRecord;
11
+ import org.apache.kafka.clients.consumer.ConsumerRecords;
12
+ import org.apache.kafka.clients.consumer.KafkaConsumer;
13
+ import org.apache.kafka.clients.consumer.OffsetAndTimestamp;
14
+ import org.apache.kafka.common.PartitionInfo;
15
+ import org.apache.kafka.common.TopicPartition;
16
+ import org.apache.kafka.common.serialization.BytesDeserializer;
17
+ import org.apache.kafka.common.utils.Bytes;
18
+ import org.embulk.config.Config;
19
+ import org.embulk.config.ConfigDefault;
20
+ import org.embulk.config.ConfigDiff;
21
+ import org.embulk.config.ConfigException;
22
+ import org.embulk.config.ConfigSource;
23
+ import org.embulk.config.Task;
24
+ import org.embulk.config.TaskReport;
25
+ import org.embulk.config.TaskSource;
26
+ import org.embulk.spi.BufferAllocator;
27
+ import org.embulk.spi.Exec;
28
+ import org.embulk.spi.InputPlugin;
29
+ import org.embulk.spi.PageBuilder;
30
+ import org.embulk.spi.PageOutput;
31
+ import org.embulk.spi.Schema;
32
+ import org.embulk.spi.SchemaConfig;
33
+ import org.embulk.spi.time.TimestampParser;
34
+ import org.embulk.spi.util.Timestamps;
35
+ import org.slf4j.Logger;
36
+ import org.slf4j.LoggerFactory;
37
+
38
+ import java.time.Duration;
39
+ import java.util.ArrayList;
40
+ import java.util.List;
41
+ import java.util.Locale;
42
+ import java.util.Map;
43
+ import java.util.Optional;
44
+ import java.util.Properties;
45
+ import java.util.concurrent.CopyOnWriteArrayList;
46
+ import java.util.stream.Collectors;
47
+ import java.util.stream.IntStream;
48
+
49
+ public class KafkaInputPlugin
50
+ implements InputPlugin
51
+ {
52
+ static final String MOCK_SCHEMA_REGISTRY_SCOPE = "embulk-input-kafka";
53
+
54
+ public enum RecordSerializeFormat
55
+ {
56
+ JSON,
57
+ AVRO_WITH_SCHEMA_REGISTRY;
58
+
59
+ @JsonValue
60
+ public String toString()
61
+ {
62
+ return name().toLowerCase(Locale.ENGLISH);
63
+ }
64
+
65
+ @JsonCreator
66
+ public static RecordSerializeFormat ofString(String name)
67
+ {
68
+ switch (name.toLowerCase(Locale.ENGLISH)) {
69
+ case "json":
70
+ return JSON;
71
+ case "avro_with_schema_registry":
72
+ return AVRO_WITH_SCHEMA_REGISTRY;
73
+ default:
74
+ }
75
+
76
+ throw new ConfigException(String.format(
77
+ "Unknown serialize format '%s'. Supported modes are json, avro_with_schema_registry",
78
+ name));
79
+ }
80
+ }
81
+
82
+ public enum SeekMode
83
+ {
84
+ EARLIEST {
85
+ @Override
86
+ public void seek(KafkaConsumer<?, ?> consumer,
87
+ List<TopicPartition> topicPartitions, Optional<Long> timestamp)
88
+ {
89
+ consumer.seekToBeginning(topicPartitions);
90
+ }
91
+ },
92
+ TIMESTAMP {
93
+ @Override
94
+ public void seek(KafkaConsumer<?, ?> consumer,
95
+ List<TopicPartition> topicPartitions, Optional<Long> timestamp)
96
+ {
97
+ if (timestamp.isPresent()) {
98
+ Map<TopicPartition, Long> topicPartitionWithTimestamp = topicPartitions.stream()
99
+ .collect(Collectors
100
+ .toMap(topicPartition -> topicPartition,
101
+ topicPartition -> timestamp.get()));
102
+ Map<TopicPartition, OffsetAndTimestamp> topicPartitionOffsetAndTimestamp = consumer
103
+ .offsetsForTimes(topicPartitionWithTimestamp);
104
+ topicPartitionOffsetAndTimestamp.forEach(((topicPartition, offsetAndTimestamp) -> {
105
+ if (offsetAndTimestamp != null) {
106
+ consumer.seek(topicPartition, offsetAndTimestamp.offset());
107
+ }
108
+ }));
109
+ }
110
+ }
111
+ };
112
+
113
+ @JsonValue
114
+ public String toString()
115
+ {
116
+ return name().toLowerCase(Locale.ENGLISH);
117
+ }
118
+
119
+ @JsonCreator
120
+ public static SeekMode ofString(String name)
121
+ {
122
+ switch (name.toLowerCase(Locale.ENGLISH)) {
123
+ case "earliest":
124
+ return EARLIEST;
125
+ case "timestamp":
126
+ return TIMESTAMP;
127
+ default:
128
+ }
129
+
130
+ throw new ConfigException(String
131
+ .format("Unknown seek mode '%s'. Supported modes are earliest, timestamp",
132
+ name));
133
+ }
134
+
135
+ public abstract void seek(KafkaConsumer<?, ?> consumer, List<TopicPartition> topicPartitions,
136
+ Optional<Long> timestamp);
137
+ }
138
+
139
+ public enum TerminationMode {
140
+ OFFSET_AT_START {
141
+ @Override
142
+ public Map<TopicPartition, Long> getOffsetsForTermination(
143
+ KafkaConsumer<?, ?> consumer,
144
+ List<TopicPartition> topicPartitions)
145
+ {
146
+ return consumer.endOffsets(topicPartitions);
147
+ }
148
+ },
149
+ ENDLESS {
150
+ @Override
151
+ public Map<TopicPartition, Long> getOffsetsForTermination(
152
+ KafkaConsumer<?, ?> consumer,
153
+ List<TopicPartition> topicPartitions)
154
+ {
155
+ return ImmutableMap.of();
156
+ }
157
+ };
158
+
159
+ @JsonCreator
160
+ public static TerminationMode ofString(String name)
161
+ {
162
+ switch (name.toLowerCase(Locale.ENGLISH)) {
163
+ case "offset_at_start":
164
+ return OFFSET_AT_START;
165
+ case "endless":
166
+ return ENDLESS;
167
+ default:
168
+ }
169
+
170
+ throw new ConfigException(String
171
+ .format("Unknown seek mode '%s'. Supported modes are offset_at_start, endless",
172
+ name));
173
+ }
174
+
175
+ public abstract Map<TopicPartition, Long> getOffsetsForTermination(
176
+ KafkaConsumer<?, ?> consumer,
177
+ List<TopicPartition> topicPartitions);
178
+ }
179
+
180
+ public interface PluginTask
181
+ extends Task, TimestampParser.Task
182
+ {
183
+ @Config("brokers")
184
+ public List<String> getBrokers();
185
+
186
+ @Config("topics")
187
+ public List<String> getTopics();
188
+
189
+ @Config("schema_registry_url")
190
+ @ConfigDefault("null")
191
+ public Optional<String> getSchemaRegistryUrl();
192
+
193
+ @Config("serialize_format")
194
+ public RecordSerializeFormat getRecordSerializeFormat();
195
+
196
+ @Config("seek_mode")
197
+ @ConfigDefault("\"earliest\"")
198
+ public SeekMode getSeekMode();
199
+
200
+ @Config("termination_mode")
201
+ @ConfigDefault("\"offset_at_start\"")
202
+ public TerminationMode getTerminationMode();
203
+
204
+ @Config("timestamp_for_seeking")
205
+ @ConfigDefault("null")
206
+ public Optional<Long> getTimestampForSeeking();
207
+
208
+ @Config("key_column_name")
209
+ @ConfigDefault("\"_key\"")
210
+ public String getKeyColumnName();
211
+
212
+ @Config("partition_column_name")
213
+ @ConfigDefault("\"_partition\"")
214
+ public String getPartitionColumnName();
215
+
216
+ @Config("fetch_max_wait_ms")
217
+ @ConfigDefault("30000")
218
+ public int getFetchMaxWaitMs();
219
+
220
+ @Config("max_empty_pollings")
221
+ @ConfigDefault("2")
222
+ public int getMaxEmptyPollings();
223
+
224
+ @Config("other_consumer_configs")
225
+ @ConfigDefault("{}")
226
+ public Map<String, String> getOtherConsumerConfigs();
227
+
228
+ @Config("value_subject_name_strategy")
229
+ @ConfigDefault("null")
230
+ public java.util.Optional<String> getValueSubjectNameStrategy();
231
+
232
+ @Config("columns")
233
+ public SchemaConfig getColumns();
234
+
235
+ @Config("assignments")
236
+ @ConfigDefault("[]")
237
+ public List<List<String>> getAssignments();
238
+
239
+ public void setAssignments(List<List<String>> assignments);
240
+ }
241
+
242
+ private static Logger logger = LoggerFactory.getLogger(KafkaInputPlugin.class);
243
+
244
+ @Override
245
+ public ConfigDiff transaction(ConfigSource config,
246
+ InputPlugin.Control control)
247
+ {
248
+ PluginTask task = config.loadConfig(PluginTask.class);
249
+
250
+ Schema schema = task.getColumns().toSchema();
251
+
252
+ Properties props = new Properties();
253
+ props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, task.getBrokers());
254
+ KafkaConsumer<Bytes, Bytes> consumer = new KafkaConsumer<>(props, new BytesDeserializer(),
255
+ new BytesDeserializer());
256
+ int maxTaskCount = Runtime.getRuntime().availableProcessors() * 2;
257
+
258
+ List<List<String>> assignments = buildAssignments(consumer, task.getTopics(), maxTaskCount);
259
+ int taskCount = Math.min(assignments.size(), maxTaskCount);
260
+
261
+ task.setAssignments(assignments);
262
+
263
+ return resume(task.dump(), schema, taskCount, control);
264
+ }
265
+
266
+ private List<List<String>> buildAssignments(KafkaConsumer<?, ?> consumer, List<String> topics,
267
+ int maxTaskCount)
268
+ {
269
+ List<List<String>> assignments = IntStream.range(0, maxTaskCount)
270
+ .mapToObj(n -> new ArrayList<String>()).collect(Collectors.toList());
271
+ int taskIndex = 0;
272
+ for (String topic : topics) {
273
+ for (PartitionInfo partitionInfo : consumer.partitionsFor(topic)) {
274
+ List<String> list = assignments.get(taskIndex);
275
+ if (list == null) {
276
+ list = new ArrayList<>();
277
+ }
278
+ list.add(String.format("%s:%d", partitionInfo.topic(), partitionInfo.partition()));
279
+ taskIndex += 1;
280
+ taskIndex = taskIndex % maxTaskCount;
281
+ }
282
+ }
283
+
284
+ return assignments;
285
+ }
286
+
287
+ private List<TopicPartition> buildTopicPartitions(List<List<String>> assignments, int taskIndex)
288
+ {
289
+ List<TopicPartition> topicPartitions = new CopyOnWriteArrayList<>();
290
+ assignments.get(taskIndex).forEach(assignmentInfo -> {
291
+ String[] assignmentInfoArray = assignmentInfo.split(":");
292
+ TopicPartition topicPartition = new TopicPartition(assignmentInfoArray[0],
293
+ Integer.parseInt(assignmentInfoArray[1]));
294
+ topicPartitions.add(topicPartition);
295
+ });
296
+
297
+ return topicPartitions;
298
+ }
299
+
300
+ @Override
301
+ public ConfigDiff resume(TaskSource taskSource,
302
+ Schema schema, int taskCount,
303
+ InputPlugin.Control control)
304
+ {
305
+ control.run(taskSource, schema, taskCount);
306
+ return Exec.newConfigDiff();
307
+ }
308
+
309
+ @Override
310
+ public void cleanup(TaskSource taskSource,
311
+ Schema schema, int taskCount,
312
+ List<TaskReport> successTaskReports)
313
+ {
314
+ }
315
+
316
+ @Override
317
+ public TaskReport run(TaskSource taskSource,
318
+ Schema schema, int taskIndex,
319
+ PageOutput output)
320
+ {
321
+ PluginTask task = taskSource.loadTask(PluginTask.class);
322
+
323
+ Properties props = new Properties();
324
+ props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, task.getBrokers());
325
+ task.getOtherConsumerConfigs().forEach(props::setProperty);
326
+ List<TopicPartition> topicPartitions = buildTopicPartitions(task.getAssignments(), taskIndex);
327
+ switch (task.getRecordSerializeFormat()) {
328
+ case JSON:
329
+ JsonInputProcess jsonInputProcess = new JsonInputProcess(task, schema, output, props,
330
+ topicPartitions);
331
+ jsonInputProcess.run();
332
+ break;
333
+ case AVRO_WITH_SCHEMA_REGISTRY:
334
+ AvroInputProcess avroInputProcess = new AvroInputProcess(task, schema, output, props,
335
+ topicPartitions);
336
+ avroInputProcess.run();
337
+ break;
338
+ default:
339
+ throw new ConfigException("Unknown record_serialization_format");
340
+ }
341
+
342
+ TaskReport taskReport = Exec.newTaskReport();
343
+ return taskReport;
344
+ }
345
+
346
+ abstract static class AbstractInputProcess<V>
347
+ {
348
+ protected final PluginTask task;
349
+ private final Schema schema;
350
+ private final PageOutput output;
351
+ protected final Properties props;
352
+ private final List<TopicPartition> topicPartitions;
353
+ protected final TimestampParser[] timestampParsers;
354
+
355
+ protected AbstractInputProcess(PluginTask task, Schema schema,
356
+ PageOutput output, Properties props,
357
+ List<TopicPartition> topicPartitions)
358
+ {
359
+ this.task = task;
360
+ this.schema = schema;
361
+ this.output = output;
362
+ this.props = props;
363
+ this.topicPartitions = topicPartitions;
364
+ this.timestampParsers = Timestamps.newTimestampColumnParsers(task, task.getColumns());
365
+ }
366
+
367
+ public abstract KafkaConsumer<Bytes, V> getConsumer();
368
+
369
+ public abstract AbstractKafkaInputColumnVisitor<V> getColumnVisitor(PageBuilder pageBuilder);
370
+
371
+ public void run()
372
+ {
373
+ try (KafkaConsumer<Bytes, V> consumer = getConsumer()) {
374
+ Map<TopicPartition, Long> offsetsForTermination = task
375
+ .getTerminationMode()
376
+ .getOffsetsForTermination(consumer, topicPartitions);
377
+
378
+ assignAndSeek(task, topicPartitions, offsetsForTermination, consumer);
379
+
380
+ BufferAllocator allocator = Exec.getBufferAllocator();
381
+ try (PageBuilder pageBuilder = new PageBuilder(allocator, schema, output)) {
382
+ final AbstractKafkaInputColumnVisitor<V> columnVisitor = getColumnVisitor(pageBuilder);
383
+
384
+ boolean reassign = false;
385
+ int emptyPollingCount = 0;
386
+
387
+ while (!topicPartitions.isEmpty()) {
388
+ if (reassign) {
389
+ consumer.assign(topicPartitions);
390
+ }
391
+
392
+ ConsumerRecords<Bytes, V> records = consumer
393
+ .poll(Duration.ofMillis(task.getFetchMaxWaitMs()));
394
+
395
+ if (records.isEmpty()) {
396
+ emptyPollingCount += 1;
397
+ logger.info("polling results are empty. remaining count is {}",
398
+ task.getMaxEmptyPollings() - emptyPollingCount);
399
+ if (emptyPollingCount >= task.getMaxEmptyPollings()) {
400
+ break;
401
+ }
402
+ }
403
+
404
+ for (ConsumerRecord<Bytes, V> record : records) {
405
+ if (record.value() != null) {
406
+ columnVisitor.reset(record);
407
+ schema.visitColumns(columnVisitor);
408
+ pageBuilder.addRecord();
409
+ }
410
+
411
+ TopicPartition topicPartition = new TopicPartition(record.topic(),
412
+ record.partition());
413
+ if (task.getTerminationMode() == TerminationMode.OFFSET_AT_START
414
+ && record.offset() >= offsetsForTermination.get(topicPartition) - 1) {
415
+ reassign = true;
416
+ topicPartitions.remove(topicPartition);
417
+ }
418
+ }
419
+ }
420
+
421
+ pageBuilder.finish();
422
+ }
423
+ }
424
+ }
425
+
426
+ private void assignAndSeek(PluginTask task,
427
+ List<TopicPartition> topicPartitions, Map<TopicPartition, Long> offsetsForTermination,
428
+ KafkaConsumer<?, ?> consumer)
429
+ {
430
+ consumer.assign(topicPartitions);
431
+
432
+ task.getSeekMode().seek(consumer, topicPartitions, task.getTimestampForSeeking());
433
+
434
+ for (TopicPartition topicPartition : topicPartitions) {
435
+ long position = consumer.position(topicPartition);
436
+ if (position >= offsetsForTermination.get(topicPartition)) {
437
+ topicPartitions.remove(topicPartition);
438
+ }
439
+ }
440
+
441
+ consumer.assign(topicPartitions);
442
+ }
443
+ }
444
+
445
+ static class JsonInputProcess extends AbstractInputProcess<ObjectNode>
446
+ {
447
+ JsonInputProcess(PluginTask task, Schema schema,
448
+ PageOutput output, Properties props,
449
+ List<TopicPartition> topicPartitions)
450
+ {
451
+ super(task, schema, output, props, topicPartitions);
452
+ }
453
+
454
+ @Override
455
+ public KafkaConsumer<Bytes, ObjectNode> getConsumer()
456
+ {
457
+ return new KafkaConsumer<>(props, new BytesDeserializer(), new KafkaJsonDeserializer());
458
+ }
459
+
460
+ @Override
461
+ public AbstractKafkaInputColumnVisitor<ObjectNode> getColumnVisitor(PageBuilder pageBuilder)
462
+ {
463
+ return new JsonFormatColumnVisitor(task, pageBuilder, timestampParsers);
464
+ }
465
+ }
466
+
467
+ static class AvroInputProcess extends AbstractInputProcess<Object>
468
+ {
469
+ protected AvroInputProcess(PluginTask task, Schema schema, PageOutput output,
470
+ Properties props, List<TopicPartition> topicPartitions)
471
+ {
472
+ super(task, schema, output, props, topicPartitions);
473
+ }
474
+
475
+ private KafkaAvroDeserializer buildKafkaAvroDeserializer()
476
+ {
477
+ KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer();
478
+
479
+ String schemaRegistryUrl = task.getSchemaRegistryUrl().orElseThrow(
480
+ () -> new ConfigException("avro_with_schema_registry format needs schema_registry_url"));
481
+
482
+ ImmutableMap.Builder<String, String> builder = ImmutableMap.<String, String>builder()
483
+ .put(AbstractKafkaSchemaSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
484
+
485
+ if (task.getValueSubjectNameStrategy().isPresent()) {
486
+ builder.put(AbstractKafkaSchemaSerDeConfig.VALUE_SUBJECT_NAME_STRATEGY,
487
+ task.getValueSubjectNameStrategy().get());
488
+ }
489
+ Map<String, String> avroDeserializerConfig = builder.build();
490
+ kafkaAvroDeserializer.configure(avroDeserializerConfig, false);
491
+
492
+ return kafkaAvroDeserializer;
493
+ }
494
+
495
+ @Override
496
+ public KafkaConsumer<Bytes, Object> getConsumer()
497
+ {
498
+ return new KafkaConsumer<>(props, new BytesDeserializer(), buildKafkaAvroDeserializer());
499
+ }
500
+
501
+ @Override
502
+ public AbstractKafkaInputColumnVisitor<Object> getColumnVisitor(PageBuilder pageBuilder)
503
+ {
504
+ return new AvroFormatColumnVisitor(task, pageBuilder, timestampParsers);
505
+ }
506
+ }
507
+
508
+ @Override
509
+ public ConfigDiff guess(ConfigSource config)
510
+ {
511
+ return Exec.newConfigDiff();
512
+ }
513
+ }