embulk-output-kafka 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. checksums.yaml +5 -5
  2. data/.circleci/config.yml +44 -0
  3. data/.github/dependabot.yml +11 -0
  4. data/README.md +5 -1
  5. data/build.gradle +29 -12
  6. data/docker-compose.yml +1 -1
  7. data/src/main/java/org/embulk/output/kafka/AvroFormatColumnVisitor.java +13 -8
  8. data/src/main/java/org/embulk/output/kafka/AvroFormatTransactionalPageOutput.java +13 -0
  9. data/src/main/java/org/embulk/output/kafka/JsonFormatColumnVisitor.java +9 -2
  10. data/src/main/java/org/embulk/output/kafka/JsonFormatTransactionalPageOutput.java +13 -0
  11. data/src/main/java/org/embulk/output/kafka/KafkaJsonSerializer.java +4 -0
  12. data/src/main/java/org/embulk/output/kafka/KafkaOutputColumnVisitor.java +25 -1
  13. data/src/main/java/org/embulk/output/kafka/KafkaOutputPlugin.java +54 -153
  14. data/src/main/java/org/embulk/output/kafka/KafkaTransactionalPageOutput.java +104 -0
  15. data/src/main/java/org/embulk/output/kafka/RecordProducerFactory.java +3 -3
  16. data/src/test/java/org/embulk/output/kafka/TestKafkaOutputPlugin.java +384 -0
  17. data/src/test/resources/config_complex.yml +9 -28
  18. data/src/test/resources/config_complex_avro.yml +23 -42
  19. data/src/test/resources/config_simple.yml +5 -22
  20. data/src/test/resources/config_simple_avro.yml +14 -32
  21. data/src/test/resources/config_simple_avro_avsc_file.yml +7 -25
  22. data/src/test/resources/config_with_column_for_deletion.yml +7 -0
  23. data/src/test/resources/config_with_column_for_deletion_avro.yml +18 -0
  24. data/src/test/resources/config_with_key_column.yml +6 -23
  25. data/src/test/resources/config_with_partition_column.yml +6 -0
  26. data/src/test/resources/in1.csv +4 -4
  27. data/src/test/resources/in_complex.csv +4 -4
  28. data/src/test/resources/in_with_deletion.csv +4 -0
  29. metadata +30 -24
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: c2c8249b98bf48ab359b21e01dc0f4112ebf800f
4
- data.tar.gz: 538847da32ff4ad84f24495c67d790185043ce02
2
+ SHA256:
3
+ metadata.gz: cb0e6dfc4b8b49b93fb6966948e8930e5785cf5a2a89fc5a8f17c80a4e7865c1
4
+ data.tar.gz: 18cb578eba9423c490e7416c906054f8401301b977e796c271182f593dc08563
5
5
  SHA512:
6
- metadata.gz: 0542a0a92e3c8b2a998177c874acf4d0209d0e90889894999daff756840169fe585877af51b5368eaada9ba4eb8bd6c6676fafc8ff5fc84d4edf193df19881de
7
- data.tar.gz: 1cd8fb2c55282f082eef56ea983cbc59cbae551db606f55bf21c1fccfbea37cfe633ef713eb66b089f0229ab5b97ae7b9e79ad38eddfc79d9157ca2e68fc869a
6
+ metadata.gz: 84a080d3cc49d30ad04802162e94de0072aac96d2562649c34cb1623e7734f783e96deeb8082c81ce965b36520c21be122cdf0e292a55697c9d2c89fd767f551
7
+ data.tar.gz: 4eb1b72cb705b2eb473ae76f02085987b4f55a37d9ba7048789ea3133e8056850813d39cfb86b2d4f7482b31fbbb67f5387b2c3a730dfb0bffd2c5f9015d16d0
@@ -0,0 +1,44 @@
1
+ # Java Gradle CircleCI 2.0 configuration file
2
+ #
3
+ # Check https://circleci.com/docs/2.0/language-java/ for more details
4
+ #
5
+ version: 2
6
+ jobs:
7
+ build:
8
+ docker:
9
+ # specify the version you desire here
10
+ - image: circleci/openjdk:8-jdk
11
+
12
+ # Specify service dependencies here if necessary
13
+ # CircleCI maintains a library of pre-built images
14
+ # documented at https://circleci.com/docs/2.0/circleci-images/
15
+ # - image: circleci/postgres:9.4
16
+
17
+ working_directory: ~/repo
18
+
19
+ environment:
20
+ # Customize the JVM maximum heap limit
21
+ JVM_OPTS: -Xmx3200m
22
+ TERM: dumb
23
+
24
+ steps:
25
+ - checkout
26
+
27
+ # Download and cache dependencies
28
+ - restore_cache:
29
+ keys:
30
+ - v1-dependencies-{{ checksum "build.gradle" }}
31
+ # fallback to using the latest cache if no exact match is found
32
+ - v1-dependencies-
33
+
34
+ - run: ./gradlew dependencies
35
+
36
+ - save_cache:
37
+ paths:
38
+ - ~/.gradle
39
+ key: v1-dependencies-{{ checksum "build.gradle" }}
40
+
41
+ # run tests!
42
+ - run: ./gradlew test
43
+ - store_test_results:
44
+ path: build/test-results
@@ -0,0 +1,11 @@
1
+ # To get started with Dependabot version updates, you'll need to specify which
2
+ # package ecosystems to update and where the package manifests are located.
3
+ # Please see the documentation for all configuration options:
4
+ # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
5
+
6
+ version: 2
7
+ updates:
8
+ - package-ecosystem: "gradle" # See documentation for possible values
9
+ directory: "/" # Location of package manifests
10
+ schedule:
11
+ interval: "weekly"
data/README.md CHANGED
@@ -1,4 +1,5 @@
1
1
  # Kafka output plugin for Embulk
2
+ [![CircleCI](https://circleci.com/gh/joker1007/embulk-output-kafka.svg?style=svg)](https://circleci.com/gh/joker1007/embulk-output-kafka)
2
3
 
3
4
  ## Overview
4
5
 
@@ -19,6 +20,7 @@
19
20
  - **ignore_columns**: remove columns from output (array(string), default: `[]`)
20
21
  - **key_column_name**: use column value as record key (string, default: `null`, if this parameter is null, set random number as record key, and it can use column in `ignore_columns`)
21
22
  - **partition_column_name**: use column value as partition id (string, default: `null`, this value is prefer to `key_column_name`, and if partition_column value is null, use key_column for partitioning)
23
+ - **column_for_deletion**: Determine to delete (string, default: `null`, `column_for_deletion` column must be boolean. If the value of the column is `true`, KafkaProducer sends `null` value to a Kafka Broker.)
22
24
  - **record_batch_size**: kafka producer record batch size (integer, default: `1000`)
23
25
  - **acks**: kafka producer require acks (string, default: `"1"`)
24
26
  - **retries**: kafka producer max retry count (integer, default: `1`)
@@ -28,7 +30,9 @@
28
30
  If use `avro_with_schema_registry` format, following configs are required.
29
31
 
30
32
  - **schema_registry_url**
31
- - **avsc** or **avsc_file**
33
+
34
+ If avsc and avsc_file are null, embulk-output-kafka fetch a schema from schema registry.
35
+ But currently, embulk-output-kafka supports only TopicNameStrategy.
32
36
 
33
37
  ## Example
34
38
 
@@ -1,6 +1,6 @@
1
1
  plugins {
2
- id "com.jfrog.bintray" version "1.1"
3
- id "com.github.jruby-gradle.base" version "1.5.0"
2
+ id "com.jfrog.bintray" version "1.8.5"
3
+ id "com.github.jruby-gradle.base" version "1.6.0"
4
4
  id "java"
5
5
  id "checkstyle"
6
6
  }
@@ -17,26 +17,43 @@ configurations {
17
17
  provided
18
18
  }
19
19
 
20
- version = "0.1.7"
20
+ version = "0.1.8"
21
21
 
22
22
  sourceCompatibility = 1.8
23
23
  targetCompatibility = 1.8
24
24
 
25
25
  dependencies {
26
- compile "org.embulk:embulk-core:0.9.17"
27
- provided "org.embulk:embulk-core:0.9.17"
26
+ compile "org.embulk:embulk-core:0.9.22"
27
+ provided "org.embulk:embulk-core:0.9.22"
28
28
  // compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
29
- testCompile "junit:junit:4.12"
30
- testCompile "org.embulk:embulk-test:0.9.17"
31
- testCompile "org.embulk:embulk-standards:0.9.17"
29
+ testCompile "junit:junit:4.13"
30
+ testCompile "org.embulk:embulk-test:0.9.22"
31
+ testCompile "org.embulk:embulk-standards:0.9.22"
32
+ testCompile "org.embulk:embulk-deps-buffer:0.9.22"
32
33
 
33
- compile "org.apache.kafka:kafka-clients:2.3.0"
34
- compile("org.apache.avro:avro:1.9.0") {
34
+ compile("org.apache.kafka:kafka-clients:2.5.1") {
35
+ exclude group: "org.slf4j", module: "slf4j-api"
35
36
  }
36
- compile("io.confluent:kafka-avro-serializer:5.3.0") {
37
+ compile("org.apache.avro:avro:1.10.0") {
38
+ exclude group: "com.fasterxml.jackson.core", module: "jackson-databind"
39
+ exclude group: "com.fasterxml.jackson.core", module: "jackson-annotations"
40
+ exclude group: "com.fasterxml.jackson.core", module: "jackson-core"
41
+ exclude group: "org.slf4j", module: "slf4j-api"
42
+ }
43
+ compile("io.confluent:kafka-avro-serializer:5.5.1") {
44
+ exclude group: "com.fasterxml.jackson.core", module: "jackson-databind"
45
+ exclude group: "com.fasterxml.jackson.core", module: "jackson-annotations"
46
+ exclude group: "com.fasterxml.jackson.core", module: "jackson-core"
47
+ exclude group: "org.slf4j", module: "slf4j-api"
37
48
  }
38
49
 
39
- testCompile("com.github.charithe:kafka-junit:4.1.6") {
50
+ testCompile "com.salesforce.kafka.test:kafka-junit4:3.+"
51
+ testCompile("org.apache.kafka:kafka_2.12:2.5.+") {
52
+ exclude group: "com.fasterxml.jackson.core", module: "jackson-databind"
53
+ exclude group: "com.fasterxml.jackson.core", module: "jackson-annotations"
54
+ exclude group: "com.fasterxml.jackson.core", module: "jackson-core"
55
+ // exclude group: "com.fasterxml.jackson.dataformat", module: "jackson-dataformat-csv"
56
+ exclude group: "com.fasterxml.jackson.datatype", module: "jackson-datatype-jdk8"
40
57
  }
41
58
  }
42
59
 
@@ -23,7 +23,7 @@ services:
23
23
  KAFKA_BROKER_ID: 1
24
24
  KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181'
25
25
  KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
26
- KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker:29092,PLAINTEXT_HOST://broker:9092
26
+ KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092
27
27
  KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
28
28
  KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0
29
29
 
@@ -14,7 +14,7 @@ import java.util.Objects;
14
14
  import java.util.Optional;
15
15
  import java.util.stream.Collectors;
16
16
 
17
- public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
17
+ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor<GenericRecord>
18
18
  {
19
19
  private Schema avroSchema;
20
20
  private GenericRecord genericRecord;
@@ -25,8 +25,13 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
25
25
  this.avroSchema = avroSchema;
26
26
  }
27
27
 
28
- GenericRecord getGenericRecord()
28
+ @Override
29
+ public GenericRecord getRecord()
29
30
  {
31
+ if (isDeletion()) {
32
+ return null;
33
+ }
34
+
30
35
  return genericRecord;
31
36
  }
32
37
 
@@ -40,6 +45,8 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
40
45
  @Override
41
46
  public void booleanColumn(Column column)
42
47
  {
48
+ super.booleanColumn(column);
49
+
43
50
  if (isIgnoreColumn(column)) {
44
51
  return;
45
52
  }
@@ -145,9 +152,8 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
145
152
  switch (avroSchema.getType()) {
146
153
  case ARRAY:
147
154
  if (value.isArrayValue()) {
148
- return value.asArrayValue().list().stream().map(item -> {
149
- return convertMsgPackValueToAvroValue(avroSchema.getElementType(), item);
150
- }).filter(Objects::nonNull).collect(Collectors.toList());
155
+ return value.asArrayValue().list().stream().map(item ->
156
+ convertMsgPackValueToAvroValue(avroSchema.getElementType(), item)).filter(Objects::nonNull).collect(Collectors.toList());
151
157
  }
152
158
  throw new RuntimeException(String.format("Schema mismatch: avro: %s, msgpack: %s", avroSchema.getType().getName(), value.getValueType().name()));
153
159
  case MAP:
@@ -166,9 +172,8 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
166
172
  GenericRecord record = new GenericData.Record(avroSchema);
167
173
  Map<Value, Value> valueMap = value.asMapValue().map();
168
174
  for (org.apache.avro.Schema.Field field : avroSchema.getFields()) {
169
- Optional.ofNullable(valueMap.get(ValueFactory.newString(field.name()))).ifPresent(v -> {
170
- record.put(field.name(), convertMsgPackValueToAvroValue(field.schema(), v));
171
- });
175
+ Optional.ofNullable(valueMap.get(ValueFactory.newString(field.name()))).ifPresent(v ->
176
+ record.put(field.name(), convertMsgPackValueToAvroValue(field.schema(), v)));
172
177
  }
173
178
  return record;
174
179
  }
@@ -0,0 +1,13 @@
1
+ package org.embulk.output.kafka;
2
+
3
+ import org.apache.avro.generic.GenericRecord;
4
+ import org.apache.kafka.clients.producer.KafkaProducer;
5
+ import org.embulk.spi.PageReader;
6
+
7
+ public class AvroFormatTransactionalPageOutput extends KafkaTransactionalPageOutput<Object, GenericRecord>
8
+ {
9
+ public AvroFormatTransactionalPageOutput(KafkaProducer<Object, Object> producer, PageReader pageReader, KafkaOutputColumnVisitor<GenericRecord> columnVisitor, String topic, int taskIndex)
10
+ {
11
+ super(producer, pageReader, columnVisitor, topic, taskIndex);
12
+ }
13
+ };
@@ -10,7 +10,7 @@ import org.msgpack.value.Value;
10
10
  import java.io.IOException;
11
11
  import java.time.format.DateTimeFormatter;
12
12
 
13
- public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
13
+ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor<ObjectNode>
14
14
  {
15
15
  private ObjectMapper objectMapper;
16
16
  private ObjectNode jsonNode;
@@ -23,8 +23,13 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
23
23
  this.objectMapper = objectMapper;
24
24
  }
25
25
 
26
- ObjectNode getJsonNode()
26
+ @Override
27
+ public ObjectNode getRecord()
27
28
  {
29
+ if (isDeletion()) {
30
+ return null;
31
+ }
32
+
28
33
  return jsonNode;
29
34
  }
30
35
 
@@ -38,6 +43,8 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
38
43
  @Override
39
44
  public void booleanColumn(Column column)
40
45
  {
46
+ super.booleanColumn(column);
47
+
41
48
  if (isIgnoreColumn(column)) {
42
49
  return;
43
50
  }
@@ -0,0 +1,13 @@
1
+ package org.embulk.output.kafka;
2
+
3
+ import com.fasterxml.jackson.databind.node.ObjectNode;
4
+ import org.apache.kafka.clients.producer.KafkaProducer;
5
+ import org.embulk.spi.PageReader;
6
+
7
+ public class JsonFormatTransactionalPageOutput extends KafkaTransactionalPageOutput<ObjectNode, ObjectNode>
8
+ {
9
+ public JsonFormatTransactionalPageOutput(KafkaProducer<Object, ObjectNode> producer, PageReader pageReader, KafkaOutputColumnVisitor<ObjectNode> columnVisitor, String topic, int taskIndex)
10
+ {
11
+ super(producer, pageReader, columnVisitor, topic, taskIndex);
12
+ }
13
+ };
@@ -12,6 +12,10 @@ public class KafkaJsonSerializer implements Serializer<ObjectNode>
12
12
  @Override
13
13
  public byte[] serialize(String topic, ObjectNode data)
14
14
  {
15
+ if (data == null) {
16
+ return null;
17
+ }
18
+
15
19
  try {
16
20
  return objectMapper.writeValueAsBytes(data);
17
21
  }
@@ -4,7 +4,7 @@ import org.embulk.spi.Column;
4
4
  import org.embulk.spi.ColumnVisitor;
5
5
  import org.embulk.spi.PageReader;
6
6
 
7
- public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
7
+ public abstract class KafkaOutputColumnVisitor<T> implements ColumnVisitor
8
8
  {
9
9
  private KafkaOutputPlugin.PluginTask task;
10
10
  PageReader pageReader;
@@ -13,6 +13,7 @@ public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
13
13
  private Object recordKey = null;
14
14
  private String topicName = null;
15
15
  private Integer partition = null;
16
+ private boolean deletion = false;
16
17
 
17
18
  KafkaOutputColumnVisitor(KafkaOutputPlugin.PluginTask task, PageReader pageReader)
18
19
  {
@@ -21,6 +22,8 @@ public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
21
22
  this.partitionColumnName = task.getPartitionColumnName().orElse(null);
22
23
  }
23
24
 
25
+ public abstract T getRecord();
26
+
24
27
  Object getRecordKey()
25
28
  {
26
29
  return recordKey;
@@ -50,11 +53,17 @@ public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
50
53
  return partition;
51
54
  }
52
55
 
56
+ boolean isDeletion()
57
+ {
58
+ return deletion;
59
+ }
60
+
53
61
  void reset()
54
62
  {
55
63
  this.recordKey = null;
56
64
  this.topicName = null;
57
65
  this.partition = null;
66
+ this.deletion = false;
58
67
  }
59
68
 
60
69
  boolean isIgnoreColumn(Column column)
@@ -62,6 +71,21 @@ public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
62
71
  return task.getIgnoreColumns().stream().anyMatch(name -> name.equals(column.getName()));
63
72
  }
64
73
 
74
+ boolean isColumnForDeletion(Column column)
75
+ {
76
+ return task.getColumnForDeletion().map(name -> name.equals(column.getName())).orElse(false);
77
+ }
78
+
79
+ @Override
80
+ public void booleanColumn(Column column)
81
+ {
82
+ if (!pageReader.isNull(column)) {
83
+ if (isColumnForDeletion(column)) {
84
+ deletion = pageReader.getBoolean(column);
85
+ }
86
+ }
87
+ }
88
+
65
89
  @Override
66
90
  public void longColumn(Column column)
67
91
  {
@@ -5,11 +5,16 @@ import com.fasterxml.jackson.annotation.JsonValue;
5
5
  import com.fasterxml.jackson.databind.ObjectMapper;
6
6
  import com.fasterxml.jackson.databind.node.ObjectNode;
7
7
  import com.google.common.collect.ImmutableList;
8
+ import io.confluent.kafka.schemaregistry.client.CachedSchemaRegistryClient;
9
+ import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient;
10
+ import io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException;
11
+ import io.confluent.kafka.schemaregistry.testutil.MockSchemaRegistry;
12
+ import io.confluent.kafka.serializers.subject.TopicNameStrategy;
13
+ import io.confluent.kafka.serializers.subject.strategy.SubjectNameStrategy;
8
14
  import org.apache.kafka.clients.admin.AdminClient;
9
15
  import org.apache.kafka.clients.admin.AdminClientConfig;
10
16
  import org.apache.kafka.clients.admin.DescribeTopicsResult;
11
17
  import org.apache.kafka.clients.producer.KafkaProducer;
12
- import org.apache.kafka.clients.producer.ProducerRecord;
13
18
  import org.embulk.config.Config;
14
19
  import org.embulk.config.ConfigDefault;
15
20
  import org.embulk.config.ConfigDiff;
@@ -20,12 +25,9 @@ import org.embulk.config.TaskReport;
20
25
  import org.embulk.config.TaskSource;
21
26
  import org.embulk.spi.Exec;
22
27
  import org.embulk.spi.OutputPlugin;
23
- import org.embulk.spi.Page;
24
28
  import org.embulk.spi.PageReader;
25
29
  import org.embulk.spi.Schema;
26
30
  import org.embulk.spi.TransactionalPageOutput;
27
- import org.slf4j.Logger;
28
- import org.slf4j.LoggerFactory;
29
31
 
30
32
  import java.io.File;
31
33
  import java.io.IOException;
@@ -33,13 +35,10 @@ import java.util.List;
33
35
  import java.util.Locale;
34
36
  import java.util.Map;
35
37
  import java.util.Optional;
36
- import java.util.PrimitiveIterator;
37
38
  import java.util.Properties;
38
- import java.util.Random;
39
39
  import java.util.concurrent.ExecutionException;
40
40
  import java.util.concurrent.TimeUnit;
41
41
  import java.util.concurrent.TimeoutException;
42
- import java.util.concurrent.atomic.AtomicLong;
43
42
 
44
43
  public class KafkaOutputPlugin
45
44
  implements OutputPlugin
@@ -107,7 +106,7 @@ public class KafkaOutputPlugin
107
106
  public Optional<String> getPartitionColumnName();
108
107
 
109
108
  @Config("record_batch_size")
110
- @ConfigDefault("1000")
109
+ @ConfigDefault("16384")
111
110
  public int getRecordBatchSize();
112
111
 
113
112
  @Config("acks")
@@ -129,17 +128,21 @@ public class KafkaOutputPlugin
129
128
  @Config("value_subject_name_strategy")
130
129
  @ConfigDefault("null")
131
130
  public Optional<String> getValueSubjectNameStrategy();
131
+
132
+ @Config("column_for_deletion")
133
+ @ConfigDefault("null")
134
+ public Optional<String> getColumnForDeletion();
132
135
  }
133
136
 
134
137
  private static ObjectMapper objectMapper = new ObjectMapper();
135
- private Logger logger = LoggerFactory.getLogger(getClass());
138
+
139
+ private static final int SCHEMA_REGISTRY_IDENTITY_MAP_CAPACITY = 1000;
136
140
 
137
141
  private AdminClient getKafkaAdminClient(PluginTask task)
138
142
  {
139
143
  Properties properties = new Properties();
140
144
  properties.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, task.getBrokers());
141
- AdminClient adminClient = AdminClient.create(properties);
142
- return adminClient;
145
+ return AdminClient.create(properties);
143
146
  }
144
147
 
145
148
  @Override
@@ -189,101 +192,48 @@ public class KafkaOutputPlugin
189
192
  case AVRO_WITH_SCHEMA_REGISTRY:
190
193
  return buildPageOutputForAvroWithSchemaRegistry(task, schema, taskIndex);
191
194
  default:
192
- throw new ConfigException("Unknow serialize format");
195
+ throw new ConfigException("Unknown serialize format");
193
196
  }
194
197
  }
195
198
 
196
199
  private TransactionalPageOutput buildPageOutputForJson(PluginTask task, Schema schema, int taskIndex)
197
200
  {
198
201
  KafkaProducer<Object, ObjectNode> producer = RecordProducerFactory.getForJson(task, schema, task.getOtherProducerConfigs());
199
-
200
202
  PageReader pageReader = new PageReader(schema);
201
- PrimitiveIterator.OfLong randomLong = new Random().longs(1, Long.MAX_VALUE).iterator();
202
- AtomicLong counter = new AtomicLong(0);
203
- AtomicLong recordLoggingCount = new AtomicLong(1);
204
-
205
- return new TransactionalPageOutput() {
206
- private JsonFormatColumnVisitor columnVisitor = new JsonFormatColumnVisitor(task, pageReader, objectMapper);
207
-
208
- @Override
209
- public void add(Page page)
210
- {
211
- pageReader.setPage(page);
212
- while (pageReader.nextRecord()) {
213
- columnVisitor.reset();
214
-
215
- pageReader.getSchema().visitColumns(columnVisitor);
216
-
217
- Object recordKey = columnVisitor.getRecordKey();
218
- if (recordKey == null) {
219
- recordKey = randomLong.next();
220
- }
221
-
222
- String targetTopic = columnVisitor.getTopicName() != null ? columnVisitor.getTopicName() : task.getTopic();
223
- ProducerRecord<Object, ObjectNode> producerRecord = new ProducerRecord<>(targetTopic, columnVisitor.getPartition(), recordKey, columnVisitor.getJsonNode());
224
- producer.send(producerRecord, (metadata, exception) -> {
225
- if (exception != null) {
226
- logger.error("produce error", exception);
227
- }
228
-
229
- logger.debug("sent record: {topic: {}, key: {}, value: {}, partition: {}}",
230
- producerRecord.topic(),
231
- producerRecord.key(),
232
- producerRecord.value(),
233
- producerRecord.partition());
234
-
235
- long current = counter.incrementAndGet();
236
- if (current >= recordLoggingCount.get()) {
237
- logger.info("[task-{}] Producer sent {} records", String.format("%04d", taskIndex), current);
238
- recordLoggingCount.set(recordLoggingCount.get() * 2);
239
- }
240
- });
241
- }
242
- }
203
+ KafkaOutputColumnVisitor<ObjectNode> columnVisitor = new JsonFormatColumnVisitor(task, pageReader, objectMapper);
243
204
 
244
- @Override
245
- public void finish()
246
- {
247
- producer.flush();
248
- }
249
-
250
- @Override
251
- public void close()
252
- {
253
- producer.close();
254
- }
255
-
256
- @Override
257
- public void abort()
258
- {
259
- producer.flush();
260
- producer.close();
261
- }
262
-
263
- @Override
264
- public TaskReport commit()
265
- {
266
- return null;
267
- }
268
- };
205
+ return new JsonFormatTransactionalPageOutput(producer, pageReader, columnVisitor, task.getTopic(), taskIndex);
269
206
  }
270
207
 
271
208
  private TransactionalPageOutput buildPageOutputForAvroWithSchemaRegistry(PluginTask task, Schema schema, int taskIndex)
272
209
  {
273
210
  KafkaProducer<Object, Object> producer = RecordProducerFactory.getForAvroWithSchemaRegistry(task, schema, task.getOtherProducerConfigs());
274
-
275
211
  PageReader pageReader = new PageReader(schema);
212
+ org.apache.avro.Schema avroSchema = getAvroSchema(task);
213
+ AvroFormatColumnVisitor avroFormatColumnVisitor = new AvroFormatColumnVisitor(task, pageReader, avroSchema);
214
+
215
+ return new AvroFormatTransactionalPageOutput(producer, pageReader, avroFormatColumnVisitor, task.getTopic(), taskIndex);
216
+ }
276
217
 
218
+ private org.apache.avro.Schema getAvroSchema(PluginTask task)
219
+ {
277
220
  org.apache.avro.Schema avroSchema = null;
278
- if (!task.getAvsc().isPresent() && !task.getAvscFile().isPresent() || task.getAvsc().isPresent() == task.getAvscFile().isPresent()) {
221
+ if (!task.getSchemaRegistryUrl().isPresent()) {
222
+ throw new ConfigException("avro_with_schema_registry format needs schema_registry_url");
223
+ }
224
+
225
+ if (task.getAvsc().isPresent() && task.getAvscFile().isPresent()) {
279
226
  throw new ConfigException("avro_with_schema_registry format needs either one of avsc and avsc_file");
280
227
  }
228
+
281
229
  if (task.getAvsc().isPresent()) {
282
230
  avroSchema = new org.apache.avro.Schema.Parser().parse(task.getAvsc().get().toString());
231
+ return avroSchema;
283
232
  }
284
233
  if (task.getAvscFile().isPresent()) {
285
234
  try {
286
235
  avroSchema = new org.apache.avro.Schema.Parser().parse(task.getAvscFile().get());
236
+ return avroSchema;
287
237
  }
288
238
  catch (IOException e) {
289
239
  e.printStackTrace();
@@ -291,77 +241,28 @@ public class KafkaOutputPlugin
291
241
  }
292
242
  }
293
243
 
294
- PrimitiveIterator.OfLong randomLong = new Random().longs(1, Long.MAX_VALUE).iterator();
295
-
296
- AtomicLong counter = new AtomicLong(0);
297
- AtomicLong recordLoggingCount = new AtomicLong(1);
298
-
299
- final org.apache.avro.Schema finalAvroSchema = avroSchema;
300
- return new TransactionalPageOutput()
301
- {
302
- private AvroFormatColumnVisitor columnVisitor = new AvroFormatColumnVisitor(task, pageReader, finalAvroSchema);
303
-
304
- @Override
305
- public void add(Page page)
306
- {
307
- pageReader.setPage(page);
308
- while (pageReader.nextRecord()) {
309
- columnVisitor.reset();
310
-
311
- pageReader.getSchema().visitColumns(columnVisitor);
312
-
313
- Object recordKey = columnVisitor.getRecordKey();
314
- if (recordKey == null) {
315
- recordKey = randomLong.next();
316
- }
317
-
318
- String targetTopic = columnVisitor.getTopicName() != null ? columnVisitor.getTopicName() : task.getTopic();
319
-
320
- ProducerRecord<Object, Object> producerRecord = new ProducerRecord<>(targetTopic, columnVisitor.getPartition(), recordKey, columnVisitor.getGenericRecord());
321
- producer.send(producerRecord, (metadata, exception) -> {
322
- if (exception != null) {
323
- logger.error("produce error", exception);
324
- }
325
-
326
- logger.debug("sent record: {topic: {}, key: {}, value: {}, partition: {}}",
327
- producerRecord.topic(),
328
- producerRecord.key(),
329
- producerRecord.value(),
330
- producerRecord.partition());
331
-
332
- long current = counter.incrementAndGet();
333
- if (current >= recordLoggingCount.get()) {
334
- logger.info("[task-{}] Producer sent {} records", String.format("%04d", taskIndex), current);
335
- recordLoggingCount.set(recordLoggingCount.get() * 2);
336
- }
337
- });
338
- }
339
- }
340
-
341
- @Override
342
- public void finish()
343
- {
344
- producer.flush();
345
- }
346
-
347
- @Override
348
- public void close()
349
- {
350
- producer.close();
351
- }
352
-
353
- @Override
354
- public void abort()
355
- {
356
- producer.flush();
357
- producer.close();
358
- }
244
+ SchemaRegistryClient schemaRegistryClient = getSchemaRegistryClient(task.getSchemaRegistryUrl().get());
245
+ SubjectNameStrategy subjectNameStrategy = new TopicNameStrategy();
246
+ String subjectName = subjectNameStrategy.subjectName(task.getTopic(), false, null);
247
+ try {
248
+ String schema = schemaRegistryClient.getLatestSchemaMetadata(subjectName).getSchema();
249
+ avroSchema = new org.apache.avro.Schema.Parser().parse(schema);
250
+ return avroSchema;
251
+ }
252
+ catch (IOException | RestClientException e) {
253
+ throw new ConfigException("cannot fetch latest schema from schema registry.", e);
254
+ }
255
+ }
359
256
 
360
- @Override
361
- public TaskReport commit()
362
- {
363
- return null;
364
- }
365
- };
257
+ private static final String MOCK_SCHEMA_REGISTRY_PREFIX = "mock://";
258
+ private SchemaRegistryClient getSchemaRegistryClient(String url)
259
+ {
260
+ if (url.startsWith(MOCK_SCHEMA_REGISTRY_PREFIX)) {
261
+ String mockScope = url.substring(MOCK_SCHEMA_REGISTRY_PREFIX.length());
262
+ return MockSchemaRegistry.getClientForScope(mockScope);
263
+ }
264
+ else {
265
+ return new CachedSchemaRegistryClient(url, SCHEMA_REGISTRY_IDENTITY_MAP_CAPACITY);
266
+ }
366
267
  }
367
268
  }