embulk-output-kafka 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.circleci/config.yml +44 -0
- data/.github/dependabot.yml +11 -0
- data/README.md +5 -1
- data/build.gradle +29 -12
- data/docker-compose.yml +1 -1
- data/src/main/java/org/embulk/output/kafka/AvroFormatColumnVisitor.java +13 -8
- data/src/main/java/org/embulk/output/kafka/AvroFormatTransactionalPageOutput.java +13 -0
- data/src/main/java/org/embulk/output/kafka/JsonFormatColumnVisitor.java +9 -2
- data/src/main/java/org/embulk/output/kafka/JsonFormatTransactionalPageOutput.java +13 -0
- data/src/main/java/org/embulk/output/kafka/KafkaJsonSerializer.java +4 -0
- data/src/main/java/org/embulk/output/kafka/KafkaOutputColumnVisitor.java +25 -1
- data/src/main/java/org/embulk/output/kafka/KafkaOutputPlugin.java +54 -153
- data/src/main/java/org/embulk/output/kafka/KafkaTransactionalPageOutput.java +104 -0
- data/src/main/java/org/embulk/output/kafka/RecordProducerFactory.java +3 -3
- data/src/test/java/org/embulk/output/kafka/TestKafkaOutputPlugin.java +384 -0
- data/src/test/resources/config_complex.yml +9 -28
- data/src/test/resources/config_complex_avro.yml +23 -42
- data/src/test/resources/config_simple.yml +5 -22
- data/src/test/resources/config_simple_avro.yml +14 -32
- data/src/test/resources/config_simple_avro_avsc_file.yml +7 -25
- data/src/test/resources/config_with_column_for_deletion.yml +7 -0
- data/src/test/resources/config_with_column_for_deletion_avro.yml +18 -0
- data/src/test/resources/config_with_key_column.yml +6 -23
- data/src/test/resources/config_with_partition_column.yml +6 -0
- data/src/test/resources/in1.csv +4 -4
- data/src/test/resources/in_complex.csv +4 -4
- data/src/test/resources/in_with_deletion.csv +4 -0
- metadata +30 -24
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: cb0e6dfc4b8b49b93fb6966948e8930e5785cf5a2a89fc5a8f17c80a4e7865c1
|
4
|
+
data.tar.gz: 18cb578eba9423c490e7416c906054f8401301b977e796c271182f593dc08563
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 84a080d3cc49d30ad04802162e94de0072aac96d2562649c34cb1623e7734f783e96deeb8082c81ce965b36520c21be122cdf0e292a55697c9d2c89fd767f551
|
7
|
+
data.tar.gz: 4eb1b72cb705b2eb473ae76f02085987b4f55a37d9ba7048789ea3133e8056850813d39cfb86b2d4f7482b31fbbb67f5387b2c3a730dfb0bffd2c5f9015d16d0
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# Java Gradle CircleCI 2.0 configuration file
|
2
|
+
#
|
3
|
+
# Check https://circleci.com/docs/2.0/language-java/ for more details
|
4
|
+
#
|
5
|
+
version: 2
|
6
|
+
jobs:
|
7
|
+
build:
|
8
|
+
docker:
|
9
|
+
# specify the version you desire here
|
10
|
+
- image: circleci/openjdk:8-jdk
|
11
|
+
|
12
|
+
# Specify service dependencies here if necessary
|
13
|
+
# CircleCI maintains a library of pre-built images
|
14
|
+
# documented at https://circleci.com/docs/2.0/circleci-images/
|
15
|
+
# - image: circleci/postgres:9.4
|
16
|
+
|
17
|
+
working_directory: ~/repo
|
18
|
+
|
19
|
+
environment:
|
20
|
+
# Customize the JVM maximum heap limit
|
21
|
+
JVM_OPTS: -Xmx3200m
|
22
|
+
TERM: dumb
|
23
|
+
|
24
|
+
steps:
|
25
|
+
- checkout
|
26
|
+
|
27
|
+
# Download and cache dependencies
|
28
|
+
- restore_cache:
|
29
|
+
keys:
|
30
|
+
- v1-dependencies-{{ checksum "build.gradle" }}
|
31
|
+
# fallback to using the latest cache if no exact match is found
|
32
|
+
- v1-dependencies-
|
33
|
+
|
34
|
+
- run: ./gradlew dependencies
|
35
|
+
|
36
|
+
- save_cache:
|
37
|
+
paths:
|
38
|
+
- ~/.gradle
|
39
|
+
key: v1-dependencies-{{ checksum "build.gradle" }}
|
40
|
+
|
41
|
+
# run tests!
|
42
|
+
- run: ./gradlew test
|
43
|
+
- store_test_results:
|
44
|
+
path: build/test-results
|
@@ -0,0 +1,11 @@
|
|
1
|
+
# To get started with Dependabot version updates, you'll need to specify which
|
2
|
+
# package ecosystems to update and where the package manifests are located.
|
3
|
+
# Please see the documentation for all configuration options:
|
4
|
+
# https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
|
5
|
+
|
6
|
+
version: 2
|
7
|
+
updates:
|
8
|
+
- package-ecosystem: "gradle" # See documentation for possible values
|
9
|
+
directory: "/" # Location of package manifests
|
10
|
+
schedule:
|
11
|
+
interval: "weekly"
|
data/README.md
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# Kafka output plugin for Embulk
|
2
|
+
[](https://circleci.com/gh/joker1007/embulk-output-kafka)
|
2
3
|
|
3
4
|
## Overview
|
4
5
|
|
@@ -19,6 +20,7 @@
|
|
19
20
|
- **ignore_columns**: remove columns from output (array(string), default: `[]`)
|
20
21
|
- **key_column_name**: use column value as record key (string, default: `null`, if this parameter is null, set random number as record key, and it can use column in `ignore_columns`)
|
21
22
|
- **partition_column_name**: use column value as partition id (string, default: `null`, this value is prefer to `key_column_name`, and if partition_column value is null, use key_column for partitioning)
|
23
|
+
- **column_for_deletion**: Determine to delete (string, default: `null`, `column_for_deletion` column must be boolean. If the value of the column is `true`, KafkaProducer sends `null` value to a Kafka Broker.)
|
22
24
|
- **record_batch_size**: kafka producer record batch size (integer, default: `1000`)
|
23
25
|
- **acks**: kafka producer require acks (string, default: `"1"`)
|
24
26
|
- **retries**: kafka producer max retry count (integer, default: `1`)
|
@@ -28,7 +30,9 @@
|
|
28
30
|
If use `avro_with_schema_registry` format, following configs are required.
|
29
31
|
|
30
32
|
- **schema_registry_url**
|
31
|
-
|
33
|
+
|
34
|
+
If avsc and avsc_file are null, embulk-output-kafka fetch a schema from schema registry.
|
35
|
+
But currently, embulk-output-kafka supports only TopicNameStrategy.
|
32
36
|
|
33
37
|
## Example
|
34
38
|
|
data/build.gradle
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
plugins {
|
2
|
-
id "com.jfrog.bintray" version "1.
|
3
|
-
id "com.github.jruby-gradle.base" version "1.
|
2
|
+
id "com.jfrog.bintray" version "1.8.5"
|
3
|
+
id "com.github.jruby-gradle.base" version "1.6.0"
|
4
4
|
id "java"
|
5
5
|
id "checkstyle"
|
6
6
|
}
|
@@ -17,26 +17,43 @@ configurations {
|
|
17
17
|
provided
|
18
18
|
}
|
19
19
|
|
20
|
-
version = "0.1.
|
20
|
+
version = "0.1.8"
|
21
21
|
|
22
22
|
sourceCompatibility = 1.8
|
23
23
|
targetCompatibility = 1.8
|
24
24
|
|
25
25
|
dependencies {
|
26
|
-
compile "org.embulk:embulk-core:0.9.
|
27
|
-
provided "org.embulk:embulk-core:0.9.
|
26
|
+
compile "org.embulk:embulk-core:0.9.22"
|
27
|
+
provided "org.embulk:embulk-core:0.9.22"
|
28
28
|
// compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
|
29
|
-
testCompile "junit:junit:4.
|
30
|
-
testCompile "org.embulk:embulk-test:0.9.
|
31
|
-
testCompile "org.embulk:embulk-standards:0.9.
|
29
|
+
testCompile "junit:junit:4.13"
|
30
|
+
testCompile "org.embulk:embulk-test:0.9.22"
|
31
|
+
testCompile "org.embulk:embulk-standards:0.9.22"
|
32
|
+
testCompile "org.embulk:embulk-deps-buffer:0.9.22"
|
32
33
|
|
33
|
-
compile
|
34
|
-
|
34
|
+
compile("org.apache.kafka:kafka-clients:2.5.1") {
|
35
|
+
exclude group: "org.slf4j", module: "slf4j-api"
|
35
36
|
}
|
36
|
-
compile("
|
37
|
+
compile("org.apache.avro:avro:1.10.0") {
|
38
|
+
exclude group: "com.fasterxml.jackson.core", module: "jackson-databind"
|
39
|
+
exclude group: "com.fasterxml.jackson.core", module: "jackson-annotations"
|
40
|
+
exclude group: "com.fasterxml.jackson.core", module: "jackson-core"
|
41
|
+
exclude group: "org.slf4j", module: "slf4j-api"
|
42
|
+
}
|
43
|
+
compile("io.confluent:kafka-avro-serializer:5.5.1") {
|
44
|
+
exclude group: "com.fasterxml.jackson.core", module: "jackson-databind"
|
45
|
+
exclude group: "com.fasterxml.jackson.core", module: "jackson-annotations"
|
46
|
+
exclude group: "com.fasterxml.jackson.core", module: "jackson-core"
|
47
|
+
exclude group: "org.slf4j", module: "slf4j-api"
|
37
48
|
}
|
38
49
|
|
39
|
-
testCompile
|
50
|
+
testCompile "com.salesforce.kafka.test:kafka-junit4:3.+"
|
51
|
+
testCompile("org.apache.kafka:kafka_2.12:2.5.+") {
|
52
|
+
exclude group: "com.fasterxml.jackson.core", module: "jackson-databind"
|
53
|
+
exclude group: "com.fasterxml.jackson.core", module: "jackson-annotations"
|
54
|
+
exclude group: "com.fasterxml.jackson.core", module: "jackson-core"
|
55
|
+
// exclude group: "com.fasterxml.jackson.dataformat", module: "jackson-dataformat-csv"
|
56
|
+
exclude group: "com.fasterxml.jackson.datatype", module: "jackson-datatype-jdk8"
|
40
57
|
}
|
41
58
|
}
|
42
59
|
|
data/docker-compose.yml
CHANGED
@@ -23,7 +23,7 @@ services:
|
|
23
23
|
KAFKA_BROKER_ID: 1
|
24
24
|
KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181'
|
25
25
|
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
|
26
|
-
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker:29092,PLAINTEXT_HOST://
|
26
|
+
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092
|
27
27
|
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
|
28
28
|
KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0
|
29
29
|
|
@@ -14,7 +14,7 @@ import java.util.Objects;
|
|
14
14
|
import java.util.Optional;
|
15
15
|
import java.util.stream.Collectors;
|
16
16
|
|
17
|
-
public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
|
17
|
+
public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor<GenericRecord>
|
18
18
|
{
|
19
19
|
private Schema avroSchema;
|
20
20
|
private GenericRecord genericRecord;
|
@@ -25,8 +25,13 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
25
25
|
this.avroSchema = avroSchema;
|
26
26
|
}
|
27
27
|
|
28
|
-
|
28
|
+
@Override
|
29
|
+
public GenericRecord getRecord()
|
29
30
|
{
|
31
|
+
if (isDeletion()) {
|
32
|
+
return null;
|
33
|
+
}
|
34
|
+
|
30
35
|
return genericRecord;
|
31
36
|
}
|
32
37
|
|
@@ -40,6 +45,8 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
40
45
|
@Override
|
41
46
|
public void booleanColumn(Column column)
|
42
47
|
{
|
48
|
+
super.booleanColumn(column);
|
49
|
+
|
43
50
|
if (isIgnoreColumn(column)) {
|
44
51
|
return;
|
45
52
|
}
|
@@ -145,9 +152,8 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
145
152
|
switch (avroSchema.getType()) {
|
146
153
|
case ARRAY:
|
147
154
|
if (value.isArrayValue()) {
|
148
|
-
return value.asArrayValue().list().stream().map(item ->
|
149
|
-
|
150
|
-
}).filter(Objects::nonNull).collect(Collectors.toList());
|
155
|
+
return value.asArrayValue().list().stream().map(item ->
|
156
|
+
convertMsgPackValueToAvroValue(avroSchema.getElementType(), item)).filter(Objects::nonNull).collect(Collectors.toList());
|
151
157
|
}
|
152
158
|
throw new RuntimeException(String.format("Schema mismatch: avro: %s, msgpack: %s", avroSchema.getType().getName(), value.getValueType().name()));
|
153
159
|
case MAP:
|
@@ -166,9 +172,8 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
166
172
|
GenericRecord record = new GenericData.Record(avroSchema);
|
167
173
|
Map<Value, Value> valueMap = value.asMapValue().map();
|
168
174
|
for (org.apache.avro.Schema.Field field : avroSchema.getFields()) {
|
169
|
-
Optional.ofNullable(valueMap.get(ValueFactory.newString(field.name()))).ifPresent(v ->
|
170
|
-
|
171
|
-
});
|
175
|
+
Optional.ofNullable(valueMap.get(ValueFactory.newString(field.name()))).ifPresent(v ->
|
176
|
+
record.put(field.name(), convertMsgPackValueToAvroValue(field.schema(), v)));
|
172
177
|
}
|
173
178
|
return record;
|
174
179
|
}
|
@@ -0,0 +1,13 @@
|
|
1
|
+
package org.embulk.output.kafka;
|
2
|
+
|
3
|
+
import org.apache.avro.generic.GenericRecord;
|
4
|
+
import org.apache.kafka.clients.producer.KafkaProducer;
|
5
|
+
import org.embulk.spi.PageReader;
|
6
|
+
|
7
|
+
public class AvroFormatTransactionalPageOutput extends KafkaTransactionalPageOutput<Object, GenericRecord>
|
8
|
+
{
|
9
|
+
public AvroFormatTransactionalPageOutput(KafkaProducer<Object, Object> producer, PageReader pageReader, KafkaOutputColumnVisitor<GenericRecord> columnVisitor, String topic, int taskIndex)
|
10
|
+
{
|
11
|
+
super(producer, pageReader, columnVisitor, topic, taskIndex);
|
12
|
+
}
|
13
|
+
};
|
@@ -10,7 +10,7 @@ import org.msgpack.value.Value;
|
|
10
10
|
import java.io.IOException;
|
11
11
|
import java.time.format.DateTimeFormatter;
|
12
12
|
|
13
|
-
public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
13
|
+
public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor<ObjectNode>
|
14
14
|
{
|
15
15
|
private ObjectMapper objectMapper;
|
16
16
|
private ObjectNode jsonNode;
|
@@ -23,8 +23,13 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
23
23
|
this.objectMapper = objectMapper;
|
24
24
|
}
|
25
25
|
|
26
|
-
|
26
|
+
@Override
|
27
|
+
public ObjectNode getRecord()
|
27
28
|
{
|
29
|
+
if (isDeletion()) {
|
30
|
+
return null;
|
31
|
+
}
|
32
|
+
|
28
33
|
return jsonNode;
|
29
34
|
}
|
30
35
|
|
@@ -38,6 +43,8 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
38
43
|
@Override
|
39
44
|
public void booleanColumn(Column column)
|
40
45
|
{
|
46
|
+
super.booleanColumn(column);
|
47
|
+
|
41
48
|
if (isIgnoreColumn(column)) {
|
42
49
|
return;
|
43
50
|
}
|
@@ -0,0 +1,13 @@
|
|
1
|
+
package org.embulk.output.kafka;
|
2
|
+
|
3
|
+
import com.fasterxml.jackson.databind.node.ObjectNode;
|
4
|
+
import org.apache.kafka.clients.producer.KafkaProducer;
|
5
|
+
import org.embulk.spi.PageReader;
|
6
|
+
|
7
|
+
public class JsonFormatTransactionalPageOutput extends KafkaTransactionalPageOutput<ObjectNode, ObjectNode>
|
8
|
+
{
|
9
|
+
public JsonFormatTransactionalPageOutput(KafkaProducer<Object, ObjectNode> producer, PageReader pageReader, KafkaOutputColumnVisitor<ObjectNode> columnVisitor, String topic, int taskIndex)
|
10
|
+
{
|
11
|
+
super(producer, pageReader, columnVisitor, topic, taskIndex);
|
12
|
+
}
|
13
|
+
};
|
@@ -4,7 +4,7 @@ import org.embulk.spi.Column;
|
|
4
4
|
import org.embulk.spi.ColumnVisitor;
|
5
5
|
import org.embulk.spi.PageReader;
|
6
6
|
|
7
|
-
public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
|
7
|
+
public abstract class KafkaOutputColumnVisitor<T> implements ColumnVisitor
|
8
8
|
{
|
9
9
|
private KafkaOutputPlugin.PluginTask task;
|
10
10
|
PageReader pageReader;
|
@@ -13,6 +13,7 @@ public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
|
|
13
13
|
private Object recordKey = null;
|
14
14
|
private String topicName = null;
|
15
15
|
private Integer partition = null;
|
16
|
+
private boolean deletion = false;
|
16
17
|
|
17
18
|
KafkaOutputColumnVisitor(KafkaOutputPlugin.PluginTask task, PageReader pageReader)
|
18
19
|
{
|
@@ -21,6 +22,8 @@ public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
|
|
21
22
|
this.partitionColumnName = task.getPartitionColumnName().orElse(null);
|
22
23
|
}
|
23
24
|
|
25
|
+
public abstract T getRecord();
|
26
|
+
|
24
27
|
Object getRecordKey()
|
25
28
|
{
|
26
29
|
return recordKey;
|
@@ -50,11 +53,17 @@ public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
|
|
50
53
|
return partition;
|
51
54
|
}
|
52
55
|
|
56
|
+
boolean isDeletion()
|
57
|
+
{
|
58
|
+
return deletion;
|
59
|
+
}
|
60
|
+
|
53
61
|
void reset()
|
54
62
|
{
|
55
63
|
this.recordKey = null;
|
56
64
|
this.topicName = null;
|
57
65
|
this.partition = null;
|
66
|
+
this.deletion = false;
|
58
67
|
}
|
59
68
|
|
60
69
|
boolean isIgnoreColumn(Column column)
|
@@ -62,6 +71,21 @@ public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
|
|
62
71
|
return task.getIgnoreColumns().stream().anyMatch(name -> name.equals(column.getName()));
|
63
72
|
}
|
64
73
|
|
74
|
+
boolean isColumnForDeletion(Column column)
|
75
|
+
{
|
76
|
+
return task.getColumnForDeletion().map(name -> name.equals(column.getName())).orElse(false);
|
77
|
+
}
|
78
|
+
|
79
|
+
@Override
|
80
|
+
public void booleanColumn(Column column)
|
81
|
+
{
|
82
|
+
if (!pageReader.isNull(column)) {
|
83
|
+
if (isColumnForDeletion(column)) {
|
84
|
+
deletion = pageReader.getBoolean(column);
|
85
|
+
}
|
86
|
+
}
|
87
|
+
}
|
88
|
+
|
65
89
|
@Override
|
66
90
|
public void longColumn(Column column)
|
67
91
|
{
|
@@ -5,11 +5,16 @@ import com.fasterxml.jackson.annotation.JsonValue;
|
|
5
5
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
6
6
|
import com.fasterxml.jackson.databind.node.ObjectNode;
|
7
7
|
import com.google.common.collect.ImmutableList;
|
8
|
+
import io.confluent.kafka.schemaregistry.client.CachedSchemaRegistryClient;
|
9
|
+
import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient;
|
10
|
+
import io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException;
|
11
|
+
import io.confluent.kafka.schemaregistry.testutil.MockSchemaRegistry;
|
12
|
+
import io.confluent.kafka.serializers.subject.TopicNameStrategy;
|
13
|
+
import io.confluent.kafka.serializers.subject.strategy.SubjectNameStrategy;
|
8
14
|
import org.apache.kafka.clients.admin.AdminClient;
|
9
15
|
import org.apache.kafka.clients.admin.AdminClientConfig;
|
10
16
|
import org.apache.kafka.clients.admin.DescribeTopicsResult;
|
11
17
|
import org.apache.kafka.clients.producer.KafkaProducer;
|
12
|
-
import org.apache.kafka.clients.producer.ProducerRecord;
|
13
18
|
import org.embulk.config.Config;
|
14
19
|
import org.embulk.config.ConfigDefault;
|
15
20
|
import org.embulk.config.ConfigDiff;
|
@@ -20,12 +25,9 @@ import org.embulk.config.TaskReport;
|
|
20
25
|
import org.embulk.config.TaskSource;
|
21
26
|
import org.embulk.spi.Exec;
|
22
27
|
import org.embulk.spi.OutputPlugin;
|
23
|
-
import org.embulk.spi.Page;
|
24
28
|
import org.embulk.spi.PageReader;
|
25
29
|
import org.embulk.spi.Schema;
|
26
30
|
import org.embulk.spi.TransactionalPageOutput;
|
27
|
-
import org.slf4j.Logger;
|
28
|
-
import org.slf4j.LoggerFactory;
|
29
31
|
|
30
32
|
import java.io.File;
|
31
33
|
import java.io.IOException;
|
@@ -33,13 +35,10 @@ import java.util.List;
|
|
33
35
|
import java.util.Locale;
|
34
36
|
import java.util.Map;
|
35
37
|
import java.util.Optional;
|
36
|
-
import java.util.PrimitiveIterator;
|
37
38
|
import java.util.Properties;
|
38
|
-
import java.util.Random;
|
39
39
|
import java.util.concurrent.ExecutionException;
|
40
40
|
import java.util.concurrent.TimeUnit;
|
41
41
|
import java.util.concurrent.TimeoutException;
|
42
|
-
import java.util.concurrent.atomic.AtomicLong;
|
43
42
|
|
44
43
|
public class KafkaOutputPlugin
|
45
44
|
implements OutputPlugin
|
@@ -107,7 +106,7 @@ public class KafkaOutputPlugin
|
|
107
106
|
public Optional<String> getPartitionColumnName();
|
108
107
|
|
109
108
|
@Config("record_batch_size")
|
110
|
-
@ConfigDefault("
|
109
|
+
@ConfigDefault("16384")
|
111
110
|
public int getRecordBatchSize();
|
112
111
|
|
113
112
|
@Config("acks")
|
@@ -129,17 +128,21 @@ public class KafkaOutputPlugin
|
|
129
128
|
@Config("value_subject_name_strategy")
|
130
129
|
@ConfigDefault("null")
|
131
130
|
public Optional<String> getValueSubjectNameStrategy();
|
131
|
+
|
132
|
+
@Config("column_for_deletion")
|
133
|
+
@ConfigDefault("null")
|
134
|
+
public Optional<String> getColumnForDeletion();
|
132
135
|
}
|
133
136
|
|
134
137
|
private static ObjectMapper objectMapper = new ObjectMapper();
|
135
|
-
|
138
|
+
|
139
|
+
private static final int SCHEMA_REGISTRY_IDENTITY_MAP_CAPACITY = 1000;
|
136
140
|
|
137
141
|
private AdminClient getKafkaAdminClient(PluginTask task)
|
138
142
|
{
|
139
143
|
Properties properties = new Properties();
|
140
144
|
properties.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, task.getBrokers());
|
141
|
-
|
142
|
-
return adminClient;
|
145
|
+
return AdminClient.create(properties);
|
143
146
|
}
|
144
147
|
|
145
148
|
@Override
|
@@ -189,101 +192,48 @@ public class KafkaOutputPlugin
|
|
189
192
|
case AVRO_WITH_SCHEMA_REGISTRY:
|
190
193
|
return buildPageOutputForAvroWithSchemaRegistry(task, schema, taskIndex);
|
191
194
|
default:
|
192
|
-
throw new ConfigException("
|
195
|
+
throw new ConfigException("Unknown serialize format");
|
193
196
|
}
|
194
197
|
}
|
195
198
|
|
196
199
|
private TransactionalPageOutput buildPageOutputForJson(PluginTask task, Schema schema, int taskIndex)
|
197
200
|
{
|
198
201
|
KafkaProducer<Object, ObjectNode> producer = RecordProducerFactory.getForJson(task, schema, task.getOtherProducerConfigs());
|
199
|
-
|
200
202
|
PageReader pageReader = new PageReader(schema);
|
201
|
-
|
202
|
-
AtomicLong counter = new AtomicLong(0);
|
203
|
-
AtomicLong recordLoggingCount = new AtomicLong(1);
|
204
|
-
|
205
|
-
return new TransactionalPageOutput() {
|
206
|
-
private JsonFormatColumnVisitor columnVisitor = new JsonFormatColumnVisitor(task, pageReader, objectMapper);
|
207
|
-
|
208
|
-
@Override
|
209
|
-
public void add(Page page)
|
210
|
-
{
|
211
|
-
pageReader.setPage(page);
|
212
|
-
while (pageReader.nextRecord()) {
|
213
|
-
columnVisitor.reset();
|
214
|
-
|
215
|
-
pageReader.getSchema().visitColumns(columnVisitor);
|
216
|
-
|
217
|
-
Object recordKey = columnVisitor.getRecordKey();
|
218
|
-
if (recordKey == null) {
|
219
|
-
recordKey = randomLong.next();
|
220
|
-
}
|
221
|
-
|
222
|
-
String targetTopic = columnVisitor.getTopicName() != null ? columnVisitor.getTopicName() : task.getTopic();
|
223
|
-
ProducerRecord<Object, ObjectNode> producerRecord = new ProducerRecord<>(targetTopic, columnVisitor.getPartition(), recordKey, columnVisitor.getJsonNode());
|
224
|
-
producer.send(producerRecord, (metadata, exception) -> {
|
225
|
-
if (exception != null) {
|
226
|
-
logger.error("produce error", exception);
|
227
|
-
}
|
228
|
-
|
229
|
-
logger.debug("sent record: {topic: {}, key: {}, value: {}, partition: {}}",
|
230
|
-
producerRecord.topic(),
|
231
|
-
producerRecord.key(),
|
232
|
-
producerRecord.value(),
|
233
|
-
producerRecord.partition());
|
234
|
-
|
235
|
-
long current = counter.incrementAndGet();
|
236
|
-
if (current >= recordLoggingCount.get()) {
|
237
|
-
logger.info("[task-{}] Producer sent {} records", String.format("%04d", taskIndex), current);
|
238
|
-
recordLoggingCount.set(recordLoggingCount.get() * 2);
|
239
|
-
}
|
240
|
-
});
|
241
|
-
}
|
242
|
-
}
|
203
|
+
KafkaOutputColumnVisitor<ObjectNode> columnVisitor = new JsonFormatColumnVisitor(task, pageReader, objectMapper);
|
243
204
|
|
244
|
-
|
245
|
-
public void finish()
|
246
|
-
{
|
247
|
-
producer.flush();
|
248
|
-
}
|
249
|
-
|
250
|
-
@Override
|
251
|
-
public void close()
|
252
|
-
{
|
253
|
-
producer.close();
|
254
|
-
}
|
255
|
-
|
256
|
-
@Override
|
257
|
-
public void abort()
|
258
|
-
{
|
259
|
-
producer.flush();
|
260
|
-
producer.close();
|
261
|
-
}
|
262
|
-
|
263
|
-
@Override
|
264
|
-
public TaskReport commit()
|
265
|
-
{
|
266
|
-
return null;
|
267
|
-
}
|
268
|
-
};
|
205
|
+
return new JsonFormatTransactionalPageOutput(producer, pageReader, columnVisitor, task.getTopic(), taskIndex);
|
269
206
|
}
|
270
207
|
|
271
208
|
private TransactionalPageOutput buildPageOutputForAvroWithSchemaRegistry(PluginTask task, Schema schema, int taskIndex)
|
272
209
|
{
|
273
210
|
KafkaProducer<Object, Object> producer = RecordProducerFactory.getForAvroWithSchemaRegistry(task, schema, task.getOtherProducerConfigs());
|
274
|
-
|
275
211
|
PageReader pageReader = new PageReader(schema);
|
212
|
+
org.apache.avro.Schema avroSchema = getAvroSchema(task);
|
213
|
+
AvroFormatColumnVisitor avroFormatColumnVisitor = new AvroFormatColumnVisitor(task, pageReader, avroSchema);
|
214
|
+
|
215
|
+
return new AvroFormatTransactionalPageOutput(producer, pageReader, avroFormatColumnVisitor, task.getTopic(), taskIndex);
|
216
|
+
}
|
276
217
|
|
218
|
+
private org.apache.avro.Schema getAvroSchema(PluginTask task)
|
219
|
+
{
|
277
220
|
org.apache.avro.Schema avroSchema = null;
|
278
|
-
if (!task.
|
221
|
+
if (!task.getSchemaRegistryUrl().isPresent()) {
|
222
|
+
throw new ConfigException("avro_with_schema_registry format needs schema_registry_url");
|
223
|
+
}
|
224
|
+
|
225
|
+
if (task.getAvsc().isPresent() && task.getAvscFile().isPresent()) {
|
279
226
|
throw new ConfigException("avro_with_schema_registry format needs either one of avsc and avsc_file");
|
280
227
|
}
|
228
|
+
|
281
229
|
if (task.getAvsc().isPresent()) {
|
282
230
|
avroSchema = new org.apache.avro.Schema.Parser().parse(task.getAvsc().get().toString());
|
231
|
+
return avroSchema;
|
283
232
|
}
|
284
233
|
if (task.getAvscFile().isPresent()) {
|
285
234
|
try {
|
286
235
|
avroSchema = new org.apache.avro.Schema.Parser().parse(task.getAvscFile().get());
|
236
|
+
return avroSchema;
|
287
237
|
}
|
288
238
|
catch (IOException e) {
|
289
239
|
e.printStackTrace();
|
@@ -291,77 +241,28 @@ public class KafkaOutputPlugin
|
|
291
241
|
}
|
292
242
|
}
|
293
243
|
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
{
|
307
|
-
pageReader.setPage(page);
|
308
|
-
while (pageReader.nextRecord()) {
|
309
|
-
columnVisitor.reset();
|
310
|
-
|
311
|
-
pageReader.getSchema().visitColumns(columnVisitor);
|
312
|
-
|
313
|
-
Object recordKey = columnVisitor.getRecordKey();
|
314
|
-
if (recordKey == null) {
|
315
|
-
recordKey = randomLong.next();
|
316
|
-
}
|
317
|
-
|
318
|
-
String targetTopic = columnVisitor.getTopicName() != null ? columnVisitor.getTopicName() : task.getTopic();
|
319
|
-
|
320
|
-
ProducerRecord<Object, Object> producerRecord = new ProducerRecord<>(targetTopic, columnVisitor.getPartition(), recordKey, columnVisitor.getGenericRecord());
|
321
|
-
producer.send(producerRecord, (metadata, exception) -> {
|
322
|
-
if (exception != null) {
|
323
|
-
logger.error("produce error", exception);
|
324
|
-
}
|
325
|
-
|
326
|
-
logger.debug("sent record: {topic: {}, key: {}, value: {}, partition: {}}",
|
327
|
-
producerRecord.topic(),
|
328
|
-
producerRecord.key(),
|
329
|
-
producerRecord.value(),
|
330
|
-
producerRecord.partition());
|
331
|
-
|
332
|
-
long current = counter.incrementAndGet();
|
333
|
-
if (current >= recordLoggingCount.get()) {
|
334
|
-
logger.info("[task-{}] Producer sent {} records", String.format("%04d", taskIndex), current);
|
335
|
-
recordLoggingCount.set(recordLoggingCount.get() * 2);
|
336
|
-
}
|
337
|
-
});
|
338
|
-
}
|
339
|
-
}
|
340
|
-
|
341
|
-
@Override
|
342
|
-
public void finish()
|
343
|
-
{
|
344
|
-
producer.flush();
|
345
|
-
}
|
346
|
-
|
347
|
-
@Override
|
348
|
-
public void close()
|
349
|
-
{
|
350
|
-
producer.close();
|
351
|
-
}
|
352
|
-
|
353
|
-
@Override
|
354
|
-
public void abort()
|
355
|
-
{
|
356
|
-
producer.flush();
|
357
|
-
producer.close();
|
358
|
-
}
|
244
|
+
SchemaRegistryClient schemaRegistryClient = getSchemaRegistryClient(task.getSchemaRegistryUrl().get());
|
245
|
+
SubjectNameStrategy subjectNameStrategy = new TopicNameStrategy();
|
246
|
+
String subjectName = subjectNameStrategy.subjectName(task.getTopic(), false, null);
|
247
|
+
try {
|
248
|
+
String schema = schemaRegistryClient.getLatestSchemaMetadata(subjectName).getSchema();
|
249
|
+
avroSchema = new org.apache.avro.Schema.Parser().parse(schema);
|
250
|
+
return avroSchema;
|
251
|
+
}
|
252
|
+
catch (IOException | RestClientException e) {
|
253
|
+
throw new ConfigException("cannot fetch latest schema from schema registry.", e);
|
254
|
+
}
|
255
|
+
}
|
359
256
|
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
257
|
+
private static final String MOCK_SCHEMA_REGISTRY_PREFIX = "mock://";
|
258
|
+
private SchemaRegistryClient getSchemaRegistryClient(String url)
|
259
|
+
{
|
260
|
+
if (url.startsWith(MOCK_SCHEMA_REGISTRY_PREFIX)) {
|
261
|
+
String mockScope = url.substring(MOCK_SCHEMA_REGISTRY_PREFIX.length());
|
262
|
+
return MockSchemaRegistry.getClientForScope(mockScope);
|
263
|
+
}
|
264
|
+
else {
|
265
|
+
return new CachedSchemaRegistryClient(url, SCHEMA_REGISTRY_IDENTITY_MAP_CAPACITY);
|
266
|
+
}
|
366
267
|
}
|
367
268
|
}
|