embulk-output-kafka 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (29) hide show
  1. checksums.yaml +5 -5
  2. data/.circleci/config.yml +44 -0
  3. data/.github/dependabot.yml +11 -0
  4. data/README.md +5 -1
  5. data/build.gradle +29 -12
  6. data/docker-compose.yml +1 -1
  7. data/src/main/java/org/embulk/output/kafka/AvroFormatColumnVisitor.java +13 -8
  8. data/src/main/java/org/embulk/output/kafka/AvroFormatTransactionalPageOutput.java +13 -0
  9. data/src/main/java/org/embulk/output/kafka/JsonFormatColumnVisitor.java +9 -2
  10. data/src/main/java/org/embulk/output/kafka/JsonFormatTransactionalPageOutput.java +13 -0
  11. data/src/main/java/org/embulk/output/kafka/KafkaJsonSerializer.java +4 -0
  12. data/src/main/java/org/embulk/output/kafka/KafkaOutputColumnVisitor.java +25 -1
  13. data/src/main/java/org/embulk/output/kafka/KafkaOutputPlugin.java +54 -153
  14. data/src/main/java/org/embulk/output/kafka/KafkaTransactionalPageOutput.java +104 -0
  15. data/src/main/java/org/embulk/output/kafka/RecordProducerFactory.java +3 -3
  16. data/src/test/java/org/embulk/output/kafka/TestKafkaOutputPlugin.java +384 -0
  17. data/src/test/resources/config_complex.yml +9 -28
  18. data/src/test/resources/config_complex_avro.yml +23 -42
  19. data/src/test/resources/config_simple.yml +5 -22
  20. data/src/test/resources/config_simple_avro.yml +14 -32
  21. data/src/test/resources/config_simple_avro_avsc_file.yml +7 -25
  22. data/src/test/resources/config_with_column_for_deletion.yml +7 -0
  23. data/src/test/resources/config_with_column_for_deletion_avro.yml +18 -0
  24. data/src/test/resources/config_with_key_column.yml +6 -23
  25. data/src/test/resources/config_with_partition_column.yml +6 -0
  26. data/src/test/resources/in1.csv +4 -4
  27. data/src/test/resources/in_complex.csv +4 -4
  28. data/src/test/resources/in_with_deletion.csv +4 -0
  29. metadata +30 -24
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: c2c8249b98bf48ab359b21e01dc0f4112ebf800f
4
- data.tar.gz: 538847da32ff4ad84f24495c67d790185043ce02
2
+ SHA256:
3
+ metadata.gz: cb0e6dfc4b8b49b93fb6966948e8930e5785cf5a2a89fc5a8f17c80a4e7865c1
4
+ data.tar.gz: 18cb578eba9423c490e7416c906054f8401301b977e796c271182f593dc08563
5
5
  SHA512:
6
- metadata.gz: 0542a0a92e3c8b2a998177c874acf4d0209d0e90889894999daff756840169fe585877af51b5368eaada9ba4eb8bd6c6676fafc8ff5fc84d4edf193df19881de
7
- data.tar.gz: 1cd8fb2c55282f082eef56ea983cbc59cbae551db606f55bf21c1fccfbea37cfe633ef713eb66b089f0229ab5b97ae7b9e79ad38eddfc79d9157ca2e68fc869a
6
+ metadata.gz: 84a080d3cc49d30ad04802162e94de0072aac96d2562649c34cb1623e7734f783e96deeb8082c81ce965b36520c21be122cdf0e292a55697c9d2c89fd767f551
7
+ data.tar.gz: 4eb1b72cb705b2eb473ae76f02085987b4f55a37d9ba7048789ea3133e8056850813d39cfb86b2d4f7482b31fbbb67f5387b2c3a730dfb0bffd2c5f9015d16d0
@@ -0,0 +1,44 @@
1
+ # Java Gradle CircleCI 2.0 configuration file
2
+ #
3
+ # Check https://circleci.com/docs/2.0/language-java/ for more details
4
+ #
5
+ version: 2
6
+ jobs:
7
+ build:
8
+ docker:
9
+ # specify the version you desire here
10
+ - image: circleci/openjdk:8-jdk
11
+
12
+ # Specify service dependencies here if necessary
13
+ # CircleCI maintains a library of pre-built images
14
+ # documented at https://circleci.com/docs/2.0/circleci-images/
15
+ # - image: circleci/postgres:9.4
16
+
17
+ working_directory: ~/repo
18
+
19
+ environment:
20
+ # Customize the JVM maximum heap limit
21
+ JVM_OPTS: -Xmx3200m
22
+ TERM: dumb
23
+
24
+ steps:
25
+ - checkout
26
+
27
+ # Download and cache dependencies
28
+ - restore_cache:
29
+ keys:
30
+ - v1-dependencies-{{ checksum "build.gradle" }}
31
+ # fallback to using the latest cache if no exact match is found
32
+ - v1-dependencies-
33
+
34
+ - run: ./gradlew dependencies
35
+
36
+ - save_cache:
37
+ paths:
38
+ - ~/.gradle
39
+ key: v1-dependencies-{{ checksum "build.gradle" }}
40
+
41
+ # run tests!
42
+ - run: ./gradlew test
43
+ - store_test_results:
44
+ path: build/test-results
@@ -0,0 +1,11 @@
1
+ # To get started with Dependabot version updates, you'll need to specify which
2
+ # package ecosystems to update and where the package manifests are located.
3
+ # Please see the documentation for all configuration options:
4
+ # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
5
+
6
+ version: 2
7
+ updates:
8
+ - package-ecosystem: "gradle" # See documentation for possible values
9
+ directory: "/" # Location of package manifests
10
+ schedule:
11
+ interval: "weekly"
data/README.md CHANGED
@@ -1,4 +1,5 @@
1
1
  # Kafka output plugin for Embulk
2
+ [![CircleCI](https://circleci.com/gh/joker1007/embulk-output-kafka.svg?style=svg)](https://circleci.com/gh/joker1007/embulk-output-kafka)
2
3
 
3
4
  ## Overview
4
5
 
@@ -19,6 +20,7 @@
19
20
  - **ignore_columns**: remove columns from output (array(string), default: `[]`)
20
21
  - **key_column_name**: use column value as record key (string, default: `null`, if this parameter is null, set random number as record key, and it can use column in `ignore_columns`)
21
22
  - **partition_column_name**: use column value as partition id (string, default: `null`, this value is prefer to `key_column_name`, and if partition_column value is null, use key_column for partitioning)
23
+ - **column_for_deletion**: Determine to delete (string, default: `null`, `column_for_deletion` column must be boolean. If the value of the column is `true`, KafkaProducer sends `null` value to a Kafka Broker.)
22
24
  - **record_batch_size**: kafka producer record batch size (integer, default: `1000`)
23
25
  - **acks**: kafka producer require acks (string, default: `"1"`)
24
26
  - **retries**: kafka producer max retry count (integer, default: `1`)
@@ -28,7 +30,9 @@
28
30
  If use `avro_with_schema_registry` format, following configs are required.
29
31
 
30
32
  - **schema_registry_url**
31
- - **avsc** or **avsc_file**
33
+
34
+ If avsc and avsc_file are null, embulk-output-kafka fetch a schema from schema registry.
35
+ But currently, embulk-output-kafka supports only TopicNameStrategy.
32
36
 
33
37
  ## Example
34
38
 
@@ -1,6 +1,6 @@
1
1
  plugins {
2
- id "com.jfrog.bintray" version "1.1"
3
- id "com.github.jruby-gradle.base" version "1.5.0"
2
+ id "com.jfrog.bintray" version "1.8.5"
3
+ id "com.github.jruby-gradle.base" version "1.6.0"
4
4
  id "java"
5
5
  id "checkstyle"
6
6
  }
@@ -17,26 +17,43 @@ configurations {
17
17
  provided
18
18
  }
19
19
 
20
- version = "0.1.7"
20
+ version = "0.1.8"
21
21
 
22
22
  sourceCompatibility = 1.8
23
23
  targetCompatibility = 1.8
24
24
 
25
25
  dependencies {
26
- compile "org.embulk:embulk-core:0.9.17"
27
- provided "org.embulk:embulk-core:0.9.17"
26
+ compile "org.embulk:embulk-core:0.9.22"
27
+ provided "org.embulk:embulk-core:0.9.22"
28
28
  // compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
29
- testCompile "junit:junit:4.12"
30
- testCompile "org.embulk:embulk-test:0.9.17"
31
- testCompile "org.embulk:embulk-standards:0.9.17"
29
+ testCompile "junit:junit:4.13"
30
+ testCompile "org.embulk:embulk-test:0.9.22"
31
+ testCompile "org.embulk:embulk-standards:0.9.22"
32
+ testCompile "org.embulk:embulk-deps-buffer:0.9.22"
32
33
 
33
- compile "org.apache.kafka:kafka-clients:2.3.0"
34
- compile("org.apache.avro:avro:1.9.0") {
34
+ compile("org.apache.kafka:kafka-clients:2.5.1") {
35
+ exclude group: "org.slf4j", module: "slf4j-api"
35
36
  }
36
- compile("io.confluent:kafka-avro-serializer:5.3.0") {
37
+ compile("org.apache.avro:avro:1.10.0") {
38
+ exclude group: "com.fasterxml.jackson.core", module: "jackson-databind"
39
+ exclude group: "com.fasterxml.jackson.core", module: "jackson-annotations"
40
+ exclude group: "com.fasterxml.jackson.core", module: "jackson-core"
41
+ exclude group: "org.slf4j", module: "slf4j-api"
42
+ }
43
+ compile("io.confluent:kafka-avro-serializer:5.5.1") {
44
+ exclude group: "com.fasterxml.jackson.core", module: "jackson-databind"
45
+ exclude group: "com.fasterxml.jackson.core", module: "jackson-annotations"
46
+ exclude group: "com.fasterxml.jackson.core", module: "jackson-core"
47
+ exclude group: "org.slf4j", module: "slf4j-api"
37
48
  }
38
49
 
39
- testCompile("com.github.charithe:kafka-junit:4.1.6") {
50
+ testCompile "com.salesforce.kafka.test:kafka-junit4:3.+"
51
+ testCompile("org.apache.kafka:kafka_2.12:2.5.+") {
52
+ exclude group: "com.fasterxml.jackson.core", module: "jackson-databind"
53
+ exclude group: "com.fasterxml.jackson.core", module: "jackson-annotations"
54
+ exclude group: "com.fasterxml.jackson.core", module: "jackson-core"
55
+ // exclude group: "com.fasterxml.jackson.dataformat", module: "jackson-dataformat-csv"
56
+ exclude group: "com.fasterxml.jackson.datatype", module: "jackson-datatype-jdk8"
40
57
  }
41
58
  }
42
59
 
@@ -23,7 +23,7 @@ services:
23
23
  KAFKA_BROKER_ID: 1
24
24
  KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181'
25
25
  KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
26
- KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker:29092,PLAINTEXT_HOST://broker:9092
26
+ KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092
27
27
  KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
28
28
  KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0
29
29
 
@@ -14,7 +14,7 @@ import java.util.Objects;
14
14
  import java.util.Optional;
15
15
  import java.util.stream.Collectors;
16
16
 
17
- public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
17
+ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor<GenericRecord>
18
18
  {
19
19
  private Schema avroSchema;
20
20
  private GenericRecord genericRecord;
@@ -25,8 +25,13 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
25
25
  this.avroSchema = avroSchema;
26
26
  }
27
27
 
28
- GenericRecord getGenericRecord()
28
+ @Override
29
+ public GenericRecord getRecord()
29
30
  {
31
+ if (isDeletion()) {
32
+ return null;
33
+ }
34
+
30
35
  return genericRecord;
31
36
  }
32
37
 
@@ -40,6 +45,8 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
40
45
  @Override
41
46
  public void booleanColumn(Column column)
42
47
  {
48
+ super.booleanColumn(column);
49
+
43
50
  if (isIgnoreColumn(column)) {
44
51
  return;
45
52
  }
@@ -145,9 +152,8 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
145
152
  switch (avroSchema.getType()) {
146
153
  case ARRAY:
147
154
  if (value.isArrayValue()) {
148
- return value.asArrayValue().list().stream().map(item -> {
149
- return convertMsgPackValueToAvroValue(avroSchema.getElementType(), item);
150
- }).filter(Objects::nonNull).collect(Collectors.toList());
155
+ return value.asArrayValue().list().stream().map(item ->
156
+ convertMsgPackValueToAvroValue(avroSchema.getElementType(), item)).filter(Objects::nonNull).collect(Collectors.toList());
151
157
  }
152
158
  throw new RuntimeException(String.format("Schema mismatch: avro: %s, msgpack: %s", avroSchema.getType().getName(), value.getValueType().name()));
153
159
  case MAP:
@@ -166,9 +172,8 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
166
172
  GenericRecord record = new GenericData.Record(avroSchema);
167
173
  Map<Value, Value> valueMap = value.asMapValue().map();
168
174
  for (org.apache.avro.Schema.Field field : avroSchema.getFields()) {
169
- Optional.ofNullable(valueMap.get(ValueFactory.newString(field.name()))).ifPresent(v -> {
170
- record.put(field.name(), convertMsgPackValueToAvroValue(field.schema(), v));
171
- });
175
+ Optional.ofNullable(valueMap.get(ValueFactory.newString(field.name()))).ifPresent(v ->
176
+ record.put(field.name(), convertMsgPackValueToAvroValue(field.schema(), v)));
172
177
  }
173
178
  return record;
174
179
  }
@@ -0,0 +1,13 @@
1
+ package org.embulk.output.kafka;
2
+
3
+ import org.apache.avro.generic.GenericRecord;
4
+ import org.apache.kafka.clients.producer.KafkaProducer;
5
+ import org.embulk.spi.PageReader;
6
+
7
+ public class AvroFormatTransactionalPageOutput extends KafkaTransactionalPageOutput<Object, GenericRecord>
8
+ {
9
+ public AvroFormatTransactionalPageOutput(KafkaProducer<Object, Object> producer, PageReader pageReader, KafkaOutputColumnVisitor<GenericRecord> columnVisitor, String topic, int taskIndex)
10
+ {
11
+ super(producer, pageReader, columnVisitor, topic, taskIndex);
12
+ }
13
+ };
@@ -10,7 +10,7 @@ import org.msgpack.value.Value;
10
10
  import java.io.IOException;
11
11
  import java.time.format.DateTimeFormatter;
12
12
 
13
- public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
13
+ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor<ObjectNode>
14
14
  {
15
15
  private ObjectMapper objectMapper;
16
16
  private ObjectNode jsonNode;
@@ -23,8 +23,13 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
23
23
  this.objectMapper = objectMapper;
24
24
  }
25
25
 
26
- ObjectNode getJsonNode()
26
+ @Override
27
+ public ObjectNode getRecord()
27
28
  {
29
+ if (isDeletion()) {
30
+ return null;
31
+ }
32
+
28
33
  return jsonNode;
29
34
  }
30
35
 
@@ -38,6 +43,8 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
38
43
  @Override
39
44
  public void booleanColumn(Column column)
40
45
  {
46
+ super.booleanColumn(column);
47
+
41
48
  if (isIgnoreColumn(column)) {
42
49
  return;
43
50
  }
@@ -0,0 +1,13 @@
1
+ package org.embulk.output.kafka;
2
+
3
+ import com.fasterxml.jackson.databind.node.ObjectNode;
4
+ import org.apache.kafka.clients.producer.KafkaProducer;
5
+ import org.embulk.spi.PageReader;
6
+
7
+ public class JsonFormatTransactionalPageOutput extends KafkaTransactionalPageOutput<ObjectNode, ObjectNode>
8
+ {
9
+ public JsonFormatTransactionalPageOutput(KafkaProducer<Object, ObjectNode> producer, PageReader pageReader, KafkaOutputColumnVisitor<ObjectNode> columnVisitor, String topic, int taskIndex)
10
+ {
11
+ super(producer, pageReader, columnVisitor, topic, taskIndex);
12
+ }
13
+ };
@@ -12,6 +12,10 @@ public class KafkaJsonSerializer implements Serializer<ObjectNode>
12
12
  @Override
13
13
  public byte[] serialize(String topic, ObjectNode data)
14
14
  {
15
+ if (data == null) {
16
+ return null;
17
+ }
18
+
15
19
  try {
16
20
  return objectMapper.writeValueAsBytes(data);
17
21
  }
@@ -4,7 +4,7 @@ import org.embulk.spi.Column;
4
4
  import org.embulk.spi.ColumnVisitor;
5
5
  import org.embulk.spi.PageReader;
6
6
 
7
- public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
7
+ public abstract class KafkaOutputColumnVisitor<T> implements ColumnVisitor
8
8
  {
9
9
  private KafkaOutputPlugin.PluginTask task;
10
10
  PageReader pageReader;
@@ -13,6 +13,7 @@ public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
13
13
  private Object recordKey = null;
14
14
  private String topicName = null;
15
15
  private Integer partition = null;
16
+ private boolean deletion = false;
16
17
 
17
18
  KafkaOutputColumnVisitor(KafkaOutputPlugin.PluginTask task, PageReader pageReader)
18
19
  {
@@ -21,6 +22,8 @@ public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
21
22
  this.partitionColumnName = task.getPartitionColumnName().orElse(null);
22
23
  }
23
24
 
25
+ public abstract T getRecord();
26
+
24
27
  Object getRecordKey()
25
28
  {
26
29
  return recordKey;
@@ -50,11 +53,17 @@ public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
50
53
  return partition;
51
54
  }
52
55
 
56
+ boolean isDeletion()
57
+ {
58
+ return deletion;
59
+ }
60
+
53
61
  void reset()
54
62
  {
55
63
  this.recordKey = null;
56
64
  this.topicName = null;
57
65
  this.partition = null;
66
+ this.deletion = false;
58
67
  }
59
68
 
60
69
  boolean isIgnoreColumn(Column column)
@@ -62,6 +71,21 @@ public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
62
71
  return task.getIgnoreColumns().stream().anyMatch(name -> name.equals(column.getName()));
63
72
  }
64
73
 
74
+ boolean isColumnForDeletion(Column column)
75
+ {
76
+ return task.getColumnForDeletion().map(name -> name.equals(column.getName())).orElse(false);
77
+ }
78
+
79
+ @Override
80
+ public void booleanColumn(Column column)
81
+ {
82
+ if (!pageReader.isNull(column)) {
83
+ if (isColumnForDeletion(column)) {
84
+ deletion = pageReader.getBoolean(column);
85
+ }
86
+ }
87
+ }
88
+
65
89
  @Override
66
90
  public void longColumn(Column column)
67
91
  {
@@ -5,11 +5,16 @@ import com.fasterxml.jackson.annotation.JsonValue;
5
5
  import com.fasterxml.jackson.databind.ObjectMapper;
6
6
  import com.fasterxml.jackson.databind.node.ObjectNode;
7
7
  import com.google.common.collect.ImmutableList;
8
+ import io.confluent.kafka.schemaregistry.client.CachedSchemaRegistryClient;
9
+ import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient;
10
+ import io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException;
11
+ import io.confluent.kafka.schemaregistry.testutil.MockSchemaRegistry;
12
+ import io.confluent.kafka.serializers.subject.TopicNameStrategy;
13
+ import io.confluent.kafka.serializers.subject.strategy.SubjectNameStrategy;
8
14
  import org.apache.kafka.clients.admin.AdminClient;
9
15
  import org.apache.kafka.clients.admin.AdminClientConfig;
10
16
  import org.apache.kafka.clients.admin.DescribeTopicsResult;
11
17
  import org.apache.kafka.clients.producer.KafkaProducer;
12
- import org.apache.kafka.clients.producer.ProducerRecord;
13
18
  import org.embulk.config.Config;
14
19
  import org.embulk.config.ConfigDefault;
15
20
  import org.embulk.config.ConfigDiff;
@@ -20,12 +25,9 @@ import org.embulk.config.TaskReport;
20
25
  import org.embulk.config.TaskSource;
21
26
  import org.embulk.spi.Exec;
22
27
  import org.embulk.spi.OutputPlugin;
23
- import org.embulk.spi.Page;
24
28
  import org.embulk.spi.PageReader;
25
29
  import org.embulk.spi.Schema;
26
30
  import org.embulk.spi.TransactionalPageOutput;
27
- import org.slf4j.Logger;
28
- import org.slf4j.LoggerFactory;
29
31
 
30
32
  import java.io.File;
31
33
  import java.io.IOException;
@@ -33,13 +35,10 @@ import java.util.List;
33
35
  import java.util.Locale;
34
36
  import java.util.Map;
35
37
  import java.util.Optional;
36
- import java.util.PrimitiveIterator;
37
38
  import java.util.Properties;
38
- import java.util.Random;
39
39
  import java.util.concurrent.ExecutionException;
40
40
  import java.util.concurrent.TimeUnit;
41
41
  import java.util.concurrent.TimeoutException;
42
- import java.util.concurrent.atomic.AtomicLong;
43
42
 
44
43
  public class KafkaOutputPlugin
45
44
  implements OutputPlugin
@@ -107,7 +106,7 @@ public class KafkaOutputPlugin
107
106
  public Optional<String> getPartitionColumnName();
108
107
 
109
108
  @Config("record_batch_size")
110
- @ConfigDefault("1000")
109
+ @ConfigDefault("16384")
111
110
  public int getRecordBatchSize();
112
111
 
113
112
  @Config("acks")
@@ -129,17 +128,21 @@ public class KafkaOutputPlugin
129
128
  @Config("value_subject_name_strategy")
130
129
  @ConfigDefault("null")
131
130
  public Optional<String> getValueSubjectNameStrategy();
131
+
132
+ @Config("column_for_deletion")
133
+ @ConfigDefault("null")
134
+ public Optional<String> getColumnForDeletion();
132
135
  }
133
136
 
134
137
  private static ObjectMapper objectMapper = new ObjectMapper();
135
- private Logger logger = LoggerFactory.getLogger(getClass());
138
+
139
+ private static final int SCHEMA_REGISTRY_IDENTITY_MAP_CAPACITY = 1000;
136
140
 
137
141
  private AdminClient getKafkaAdminClient(PluginTask task)
138
142
  {
139
143
  Properties properties = new Properties();
140
144
  properties.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, task.getBrokers());
141
- AdminClient adminClient = AdminClient.create(properties);
142
- return adminClient;
145
+ return AdminClient.create(properties);
143
146
  }
144
147
 
145
148
  @Override
@@ -189,101 +192,48 @@ public class KafkaOutputPlugin
189
192
  case AVRO_WITH_SCHEMA_REGISTRY:
190
193
  return buildPageOutputForAvroWithSchemaRegistry(task, schema, taskIndex);
191
194
  default:
192
- throw new ConfigException("Unknow serialize format");
195
+ throw new ConfigException("Unknown serialize format");
193
196
  }
194
197
  }
195
198
 
196
199
  private TransactionalPageOutput buildPageOutputForJson(PluginTask task, Schema schema, int taskIndex)
197
200
  {
198
201
  KafkaProducer<Object, ObjectNode> producer = RecordProducerFactory.getForJson(task, schema, task.getOtherProducerConfigs());
199
-
200
202
  PageReader pageReader = new PageReader(schema);
201
- PrimitiveIterator.OfLong randomLong = new Random().longs(1, Long.MAX_VALUE).iterator();
202
- AtomicLong counter = new AtomicLong(0);
203
- AtomicLong recordLoggingCount = new AtomicLong(1);
204
-
205
- return new TransactionalPageOutput() {
206
- private JsonFormatColumnVisitor columnVisitor = new JsonFormatColumnVisitor(task, pageReader, objectMapper);
207
-
208
- @Override
209
- public void add(Page page)
210
- {
211
- pageReader.setPage(page);
212
- while (pageReader.nextRecord()) {
213
- columnVisitor.reset();
214
-
215
- pageReader.getSchema().visitColumns(columnVisitor);
216
-
217
- Object recordKey = columnVisitor.getRecordKey();
218
- if (recordKey == null) {
219
- recordKey = randomLong.next();
220
- }
221
-
222
- String targetTopic = columnVisitor.getTopicName() != null ? columnVisitor.getTopicName() : task.getTopic();
223
- ProducerRecord<Object, ObjectNode> producerRecord = new ProducerRecord<>(targetTopic, columnVisitor.getPartition(), recordKey, columnVisitor.getJsonNode());
224
- producer.send(producerRecord, (metadata, exception) -> {
225
- if (exception != null) {
226
- logger.error("produce error", exception);
227
- }
228
-
229
- logger.debug("sent record: {topic: {}, key: {}, value: {}, partition: {}}",
230
- producerRecord.topic(),
231
- producerRecord.key(),
232
- producerRecord.value(),
233
- producerRecord.partition());
234
-
235
- long current = counter.incrementAndGet();
236
- if (current >= recordLoggingCount.get()) {
237
- logger.info("[task-{}] Producer sent {} records", String.format("%04d", taskIndex), current);
238
- recordLoggingCount.set(recordLoggingCount.get() * 2);
239
- }
240
- });
241
- }
242
- }
203
+ KafkaOutputColumnVisitor<ObjectNode> columnVisitor = new JsonFormatColumnVisitor(task, pageReader, objectMapper);
243
204
 
244
- @Override
245
- public void finish()
246
- {
247
- producer.flush();
248
- }
249
-
250
- @Override
251
- public void close()
252
- {
253
- producer.close();
254
- }
255
-
256
- @Override
257
- public void abort()
258
- {
259
- producer.flush();
260
- producer.close();
261
- }
262
-
263
- @Override
264
- public TaskReport commit()
265
- {
266
- return null;
267
- }
268
- };
205
+ return new JsonFormatTransactionalPageOutput(producer, pageReader, columnVisitor, task.getTopic(), taskIndex);
269
206
  }
270
207
 
271
208
  private TransactionalPageOutput buildPageOutputForAvroWithSchemaRegistry(PluginTask task, Schema schema, int taskIndex)
272
209
  {
273
210
  KafkaProducer<Object, Object> producer = RecordProducerFactory.getForAvroWithSchemaRegistry(task, schema, task.getOtherProducerConfigs());
274
-
275
211
  PageReader pageReader = new PageReader(schema);
212
+ org.apache.avro.Schema avroSchema = getAvroSchema(task);
213
+ AvroFormatColumnVisitor avroFormatColumnVisitor = new AvroFormatColumnVisitor(task, pageReader, avroSchema);
214
+
215
+ return new AvroFormatTransactionalPageOutput(producer, pageReader, avroFormatColumnVisitor, task.getTopic(), taskIndex);
216
+ }
276
217
 
218
+ private org.apache.avro.Schema getAvroSchema(PluginTask task)
219
+ {
277
220
  org.apache.avro.Schema avroSchema = null;
278
- if (!task.getAvsc().isPresent() && !task.getAvscFile().isPresent() || task.getAvsc().isPresent() == task.getAvscFile().isPresent()) {
221
+ if (!task.getSchemaRegistryUrl().isPresent()) {
222
+ throw new ConfigException("avro_with_schema_registry format needs schema_registry_url");
223
+ }
224
+
225
+ if (task.getAvsc().isPresent() && task.getAvscFile().isPresent()) {
279
226
  throw new ConfigException("avro_with_schema_registry format needs either one of avsc and avsc_file");
280
227
  }
228
+
281
229
  if (task.getAvsc().isPresent()) {
282
230
  avroSchema = new org.apache.avro.Schema.Parser().parse(task.getAvsc().get().toString());
231
+ return avroSchema;
283
232
  }
284
233
  if (task.getAvscFile().isPresent()) {
285
234
  try {
286
235
  avroSchema = new org.apache.avro.Schema.Parser().parse(task.getAvscFile().get());
236
+ return avroSchema;
287
237
  }
288
238
  catch (IOException e) {
289
239
  e.printStackTrace();
@@ -291,77 +241,28 @@ public class KafkaOutputPlugin
291
241
  }
292
242
  }
293
243
 
294
- PrimitiveIterator.OfLong randomLong = new Random().longs(1, Long.MAX_VALUE).iterator();
295
-
296
- AtomicLong counter = new AtomicLong(0);
297
- AtomicLong recordLoggingCount = new AtomicLong(1);
298
-
299
- final org.apache.avro.Schema finalAvroSchema = avroSchema;
300
- return new TransactionalPageOutput()
301
- {
302
- private AvroFormatColumnVisitor columnVisitor = new AvroFormatColumnVisitor(task, pageReader, finalAvroSchema);
303
-
304
- @Override
305
- public void add(Page page)
306
- {
307
- pageReader.setPage(page);
308
- while (pageReader.nextRecord()) {
309
- columnVisitor.reset();
310
-
311
- pageReader.getSchema().visitColumns(columnVisitor);
312
-
313
- Object recordKey = columnVisitor.getRecordKey();
314
- if (recordKey == null) {
315
- recordKey = randomLong.next();
316
- }
317
-
318
- String targetTopic = columnVisitor.getTopicName() != null ? columnVisitor.getTopicName() : task.getTopic();
319
-
320
- ProducerRecord<Object, Object> producerRecord = new ProducerRecord<>(targetTopic, columnVisitor.getPartition(), recordKey, columnVisitor.getGenericRecord());
321
- producer.send(producerRecord, (metadata, exception) -> {
322
- if (exception != null) {
323
- logger.error("produce error", exception);
324
- }
325
-
326
- logger.debug("sent record: {topic: {}, key: {}, value: {}, partition: {}}",
327
- producerRecord.topic(),
328
- producerRecord.key(),
329
- producerRecord.value(),
330
- producerRecord.partition());
331
-
332
- long current = counter.incrementAndGet();
333
- if (current >= recordLoggingCount.get()) {
334
- logger.info("[task-{}] Producer sent {} records", String.format("%04d", taskIndex), current);
335
- recordLoggingCount.set(recordLoggingCount.get() * 2);
336
- }
337
- });
338
- }
339
- }
340
-
341
- @Override
342
- public void finish()
343
- {
344
- producer.flush();
345
- }
346
-
347
- @Override
348
- public void close()
349
- {
350
- producer.close();
351
- }
352
-
353
- @Override
354
- public void abort()
355
- {
356
- producer.flush();
357
- producer.close();
358
- }
244
+ SchemaRegistryClient schemaRegistryClient = getSchemaRegistryClient(task.getSchemaRegistryUrl().get());
245
+ SubjectNameStrategy subjectNameStrategy = new TopicNameStrategy();
246
+ String subjectName = subjectNameStrategy.subjectName(task.getTopic(), false, null);
247
+ try {
248
+ String schema = schemaRegistryClient.getLatestSchemaMetadata(subjectName).getSchema();
249
+ avroSchema = new org.apache.avro.Schema.Parser().parse(schema);
250
+ return avroSchema;
251
+ }
252
+ catch (IOException | RestClientException e) {
253
+ throw new ConfigException("cannot fetch latest schema from schema registry.", e);
254
+ }
255
+ }
359
256
 
360
- @Override
361
- public TaskReport commit()
362
- {
363
- return null;
364
- }
365
- };
257
+ private static final String MOCK_SCHEMA_REGISTRY_PREFIX = "mock://";
258
+ private SchemaRegistryClient getSchemaRegistryClient(String url)
259
+ {
260
+ if (url.startsWith(MOCK_SCHEMA_REGISTRY_PREFIX)) {
261
+ String mockScope = url.substring(MOCK_SCHEMA_REGISTRY_PREFIX.length());
262
+ return MockSchemaRegistry.getClientForScope(mockScope);
263
+ }
264
+ else {
265
+ return new CachedSchemaRegistryClient(url, SCHEMA_REGISTRY_IDENTITY_MAP_CAPACITY);
266
+ }
366
267
  }
367
268
  }