embulk-output-kafka 0.1.7 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.circleci/config.yml +44 -0
- data/.github/dependabot.yml +11 -0
- data/README.md +5 -1
- data/build.gradle +29 -12
- data/docker-compose.yml +1 -1
- data/src/main/java/org/embulk/output/kafka/AvroFormatColumnVisitor.java +13 -8
- data/src/main/java/org/embulk/output/kafka/AvroFormatTransactionalPageOutput.java +13 -0
- data/src/main/java/org/embulk/output/kafka/JsonFormatColumnVisitor.java +9 -2
- data/src/main/java/org/embulk/output/kafka/JsonFormatTransactionalPageOutput.java +13 -0
- data/src/main/java/org/embulk/output/kafka/KafkaJsonSerializer.java +4 -0
- data/src/main/java/org/embulk/output/kafka/KafkaOutputColumnVisitor.java +25 -1
- data/src/main/java/org/embulk/output/kafka/KafkaOutputPlugin.java +54 -153
- data/src/main/java/org/embulk/output/kafka/KafkaTransactionalPageOutput.java +104 -0
- data/src/main/java/org/embulk/output/kafka/RecordProducerFactory.java +3 -3
- data/src/test/java/org/embulk/output/kafka/TestKafkaOutputPlugin.java +384 -0
- data/src/test/resources/config_complex.yml +9 -28
- data/src/test/resources/config_complex_avro.yml +23 -42
- data/src/test/resources/config_simple.yml +5 -22
- data/src/test/resources/config_simple_avro.yml +14 -32
- data/src/test/resources/config_simple_avro_avsc_file.yml +7 -25
- data/src/test/resources/config_with_column_for_deletion.yml +7 -0
- data/src/test/resources/config_with_column_for_deletion_avro.yml +18 -0
- data/src/test/resources/config_with_key_column.yml +6 -23
- data/src/test/resources/config_with_partition_column.yml +6 -0
- data/src/test/resources/in1.csv +4 -4
- data/src/test/resources/in_complex.csv +4 -4
- data/src/test/resources/in_with_deletion.csv +4 -0
- metadata +30 -24
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: cb0e6dfc4b8b49b93fb6966948e8930e5785cf5a2a89fc5a8f17c80a4e7865c1
|
4
|
+
data.tar.gz: 18cb578eba9423c490e7416c906054f8401301b977e796c271182f593dc08563
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 84a080d3cc49d30ad04802162e94de0072aac96d2562649c34cb1623e7734f783e96deeb8082c81ce965b36520c21be122cdf0e292a55697c9d2c89fd767f551
|
7
|
+
data.tar.gz: 4eb1b72cb705b2eb473ae76f02085987b4f55a37d9ba7048789ea3133e8056850813d39cfb86b2d4f7482b31fbbb67f5387b2c3a730dfb0bffd2c5f9015d16d0
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# Java Gradle CircleCI 2.0 configuration file
|
2
|
+
#
|
3
|
+
# Check https://circleci.com/docs/2.0/language-java/ for more details
|
4
|
+
#
|
5
|
+
version: 2
|
6
|
+
jobs:
|
7
|
+
build:
|
8
|
+
docker:
|
9
|
+
# specify the version you desire here
|
10
|
+
- image: circleci/openjdk:8-jdk
|
11
|
+
|
12
|
+
# Specify service dependencies here if necessary
|
13
|
+
# CircleCI maintains a library of pre-built images
|
14
|
+
# documented at https://circleci.com/docs/2.0/circleci-images/
|
15
|
+
# - image: circleci/postgres:9.4
|
16
|
+
|
17
|
+
working_directory: ~/repo
|
18
|
+
|
19
|
+
environment:
|
20
|
+
# Customize the JVM maximum heap limit
|
21
|
+
JVM_OPTS: -Xmx3200m
|
22
|
+
TERM: dumb
|
23
|
+
|
24
|
+
steps:
|
25
|
+
- checkout
|
26
|
+
|
27
|
+
# Download and cache dependencies
|
28
|
+
- restore_cache:
|
29
|
+
keys:
|
30
|
+
- v1-dependencies-{{ checksum "build.gradle" }}
|
31
|
+
# fallback to using the latest cache if no exact match is found
|
32
|
+
- v1-dependencies-
|
33
|
+
|
34
|
+
- run: ./gradlew dependencies
|
35
|
+
|
36
|
+
- save_cache:
|
37
|
+
paths:
|
38
|
+
- ~/.gradle
|
39
|
+
key: v1-dependencies-{{ checksum "build.gradle" }}
|
40
|
+
|
41
|
+
# run tests!
|
42
|
+
- run: ./gradlew test
|
43
|
+
- store_test_results:
|
44
|
+
path: build/test-results
|
@@ -0,0 +1,11 @@
|
|
1
|
+
# To get started with Dependabot version updates, you'll need to specify which
|
2
|
+
# package ecosystems to update and where the package manifests are located.
|
3
|
+
# Please see the documentation for all configuration options:
|
4
|
+
# https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
|
5
|
+
|
6
|
+
version: 2
|
7
|
+
updates:
|
8
|
+
- package-ecosystem: "gradle" # See documentation for possible values
|
9
|
+
directory: "/" # Location of package manifests
|
10
|
+
schedule:
|
11
|
+
interval: "weekly"
|
data/README.md
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# Kafka output plugin for Embulk
|
2
|
+
[![CircleCI](https://circleci.com/gh/joker1007/embulk-output-kafka.svg?style=svg)](https://circleci.com/gh/joker1007/embulk-output-kafka)
|
2
3
|
|
3
4
|
## Overview
|
4
5
|
|
@@ -19,6 +20,7 @@
|
|
19
20
|
- **ignore_columns**: remove columns from output (array(string), default: `[]`)
|
20
21
|
- **key_column_name**: use column value as record key (string, default: `null`, if this parameter is null, set random number as record key, and it can use column in `ignore_columns`)
|
21
22
|
- **partition_column_name**: use column value as partition id (string, default: `null`, this value is prefer to `key_column_name`, and if partition_column value is null, use key_column for partitioning)
|
23
|
+
- **column_for_deletion**: Determine to delete (string, default: `null`, `column_for_deletion` column must be boolean. If the value of the column is `true`, KafkaProducer sends `null` value to a Kafka Broker.)
|
22
24
|
- **record_batch_size**: kafka producer record batch size (integer, default: `1000`)
|
23
25
|
- **acks**: kafka producer require acks (string, default: `"1"`)
|
24
26
|
- **retries**: kafka producer max retry count (integer, default: `1`)
|
@@ -28,7 +30,9 @@
|
|
28
30
|
If use `avro_with_schema_registry` format, following configs are required.
|
29
31
|
|
30
32
|
- **schema_registry_url**
|
31
|
-
|
33
|
+
|
34
|
+
If avsc and avsc_file are null, embulk-output-kafka fetch a schema from schema registry.
|
35
|
+
But currently, embulk-output-kafka supports only TopicNameStrategy.
|
32
36
|
|
33
37
|
## Example
|
34
38
|
|
data/build.gradle
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
plugins {
|
2
|
-
id "com.jfrog.bintray" version "1.
|
3
|
-
id "com.github.jruby-gradle.base" version "1.
|
2
|
+
id "com.jfrog.bintray" version "1.8.5"
|
3
|
+
id "com.github.jruby-gradle.base" version "1.6.0"
|
4
4
|
id "java"
|
5
5
|
id "checkstyle"
|
6
6
|
}
|
@@ -17,26 +17,43 @@ configurations {
|
|
17
17
|
provided
|
18
18
|
}
|
19
19
|
|
20
|
-
version = "0.1.
|
20
|
+
version = "0.1.8"
|
21
21
|
|
22
22
|
sourceCompatibility = 1.8
|
23
23
|
targetCompatibility = 1.8
|
24
24
|
|
25
25
|
dependencies {
|
26
|
-
compile "org.embulk:embulk-core:0.9.
|
27
|
-
provided "org.embulk:embulk-core:0.9.
|
26
|
+
compile "org.embulk:embulk-core:0.9.22"
|
27
|
+
provided "org.embulk:embulk-core:0.9.22"
|
28
28
|
// compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
|
29
|
-
testCompile "junit:junit:4.
|
30
|
-
testCompile "org.embulk:embulk-test:0.9.
|
31
|
-
testCompile "org.embulk:embulk-standards:0.9.
|
29
|
+
testCompile "junit:junit:4.13"
|
30
|
+
testCompile "org.embulk:embulk-test:0.9.22"
|
31
|
+
testCompile "org.embulk:embulk-standards:0.9.22"
|
32
|
+
testCompile "org.embulk:embulk-deps-buffer:0.9.22"
|
32
33
|
|
33
|
-
compile
|
34
|
-
|
34
|
+
compile("org.apache.kafka:kafka-clients:2.5.1") {
|
35
|
+
exclude group: "org.slf4j", module: "slf4j-api"
|
35
36
|
}
|
36
|
-
compile("
|
37
|
+
compile("org.apache.avro:avro:1.10.0") {
|
38
|
+
exclude group: "com.fasterxml.jackson.core", module: "jackson-databind"
|
39
|
+
exclude group: "com.fasterxml.jackson.core", module: "jackson-annotations"
|
40
|
+
exclude group: "com.fasterxml.jackson.core", module: "jackson-core"
|
41
|
+
exclude group: "org.slf4j", module: "slf4j-api"
|
42
|
+
}
|
43
|
+
compile("io.confluent:kafka-avro-serializer:5.5.1") {
|
44
|
+
exclude group: "com.fasterxml.jackson.core", module: "jackson-databind"
|
45
|
+
exclude group: "com.fasterxml.jackson.core", module: "jackson-annotations"
|
46
|
+
exclude group: "com.fasterxml.jackson.core", module: "jackson-core"
|
47
|
+
exclude group: "org.slf4j", module: "slf4j-api"
|
37
48
|
}
|
38
49
|
|
39
|
-
testCompile
|
50
|
+
testCompile "com.salesforce.kafka.test:kafka-junit4:3.+"
|
51
|
+
testCompile("org.apache.kafka:kafka_2.12:2.5.+") {
|
52
|
+
exclude group: "com.fasterxml.jackson.core", module: "jackson-databind"
|
53
|
+
exclude group: "com.fasterxml.jackson.core", module: "jackson-annotations"
|
54
|
+
exclude group: "com.fasterxml.jackson.core", module: "jackson-core"
|
55
|
+
// exclude group: "com.fasterxml.jackson.dataformat", module: "jackson-dataformat-csv"
|
56
|
+
exclude group: "com.fasterxml.jackson.datatype", module: "jackson-datatype-jdk8"
|
40
57
|
}
|
41
58
|
}
|
42
59
|
|
data/docker-compose.yml
CHANGED
@@ -23,7 +23,7 @@ services:
|
|
23
23
|
KAFKA_BROKER_ID: 1
|
24
24
|
KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181'
|
25
25
|
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
|
26
|
-
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker:29092,PLAINTEXT_HOST://
|
26
|
+
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092
|
27
27
|
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
|
28
28
|
KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0
|
29
29
|
|
@@ -14,7 +14,7 @@ import java.util.Objects;
|
|
14
14
|
import java.util.Optional;
|
15
15
|
import java.util.stream.Collectors;
|
16
16
|
|
17
|
-
public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
|
17
|
+
public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor<GenericRecord>
|
18
18
|
{
|
19
19
|
private Schema avroSchema;
|
20
20
|
private GenericRecord genericRecord;
|
@@ -25,8 +25,13 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
25
25
|
this.avroSchema = avroSchema;
|
26
26
|
}
|
27
27
|
|
28
|
-
|
28
|
+
@Override
|
29
|
+
public GenericRecord getRecord()
|
29
30
|
{
|
31
|
+
if (isDeletion()) {
|
32
|
+
return null;
|
33
|
+
}
|
34
|
+
|
30
35
|
return genericRecord;
|
31
36
|
}
|
32
37
|
|
@@ -40,6 +45,8 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
40
45
|
@Override
|
41
46
|
public void booleanColumn(Column column)
|
42
47
|
{
|
48
|
+
super.booleanColumn(column);
|
49
|
+
|
43
50
|
if (isIgnoreColumn(column)) {
|
44
51
|
return;
|
45
52
|
}
|
@@ -145,9 +152,8 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
145
152
|
switch (avroSchema.getType()) {
|
146
153
|
case ARRAY:
|
147
154
|
if (value.isArrayValue()) {
|
148
|
-
return value.asArrayValue().list().stream().map(item ->
|
149
|
-
|
150
|
-
}).filter(Objects::nonNull).collect(Collectors.toList());
|
155
|
+
return value.asArrayValue().list().stream().map(item ->
|
156
|
+
convertMsgPackValueToAvroValue(avroSchema.getElementType(), item)).filter(Objects::nonNull).collect(Collectors.toList());
|
151
157
|
}
|
152
158
|
throw new RuntimeException(String.format("Schema mismatch: avro: %s, msgpack: %s", avroSchema.getType().getName(), value.getValueType().name()));
|
153
159
|
case MAP:
|
@@ -166,9 +172,8 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
166
172
|
GenericRecord record = new GenericData.Record(avroSchema);
|
167
173
|
Map<Value, Value> valueMap = value.asMapValue().map();
|
168
174
|
for (org.apache.avro.Schema.Field field : avroSchema.getFields()) {
|
169
|
-
Optional.ofNullable(valueMap.get(ValueFactory.newString(field.name()))).ifPresent(v ->
|
170
|
-
|
171
|
-
});
|
175
|
+
Optional.ofNullable(valueMap.get(ValueFactory.newString(field.name()))).ifPresent(v ->
|
176
|
+
record.put(field.name(), convertMsgPackValueToAvroValue(field.schema(), v)));
|
172
177
|
}
|
173
178
|
return record;
|
174
179
|
}
|
@@ -0,0 +1,13 @@
|
|
1
|
+
package org.embulk.output.kafka;
|
2
|
+
|
3
|
+
import org.apache.avro.generic.GenericRecord;
|
4
|
+
import org.apache.kafka.clients.producer.KafkaProducer;
|
5
|
+
import org.embulk.spi.PageReader;
|
6
|
+
|
7
|
+
public class AvroFormatTransactionalPageOutput extends KafkaTransactionalPageOutput<Object, GenericRecord>
|
8
|
+
{
|
9
|
+
public AvroFormatTransactionalPageOutput(KafkaProducer<Object, Object> producer, PageReader pageReader, KafkaOutputColumnVisitor<GenericRecord> columnVisitor, String topic, int taskIndex)
|
10
|
+
{
|
11
|
+
super(producer, pageReader, columnVisitor, topic, taskIndex);
|
12
|
+
}
|
13
|
+
};
|
@@ -10,7 +10,7 @@ import org.msgpack.value.Value;
|
|
10
10
|
import java.io.IOException;
|
11
11
|
import java.time.format.DateTimeFormatter;
|
12
12
|
|
13
|
-
public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
13
|
+
public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor<ObjectNode>
|
14
14
|
{
|
15
15
|
private ObjectMapper objectMapper;
|
16
16
|
private ObjectNode jsonNode;
|
@@ -23,8 +23,13 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
23
23
|
this.objectMapper = objectMapper;
|
24
24
|
}
|
25
25
|
|
26
|
-
|
26
|
+
@Override
|
27
|
+
public ObjectNode getRecord()
|
27
28
|
{
|
29
|
+
if (isDeletion()) {
|
30
|
+
return null;
|
31
|
+
}
|
32
|
+
|
28
33
|
return jsonNode;
|
29
34
|
}
|
30
35
|
|
@@ -38,6 +43,8 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
38
43
|
@Override
|
39
44
|
public void booleanColumn(Column column)
|
40
45
|
{
|
46
|
+
super.booleanColumn(column);
|
47
|
+
|
41
48
|
if (isIgnoreColumn(column)) {
|
42
49
|
return;
|
43
50
|
}
|
@@ -0,0 +1,13 @@
|
|
1
|
+
package org.embulk.output.kafka;
|
2
|
+
|
3
|
+
import com.fasterxml.jackson.databind.node.ObjectNode;
|
4
|
+
import org.apache.kafka.clients.producer.KafkaProducer;
|
5
|
+
import org.embulk.spi.PageReader;
|
6
|
+
|
7
|
+
public class JsonFormatTransactionalPageOutput extends KafkaTransactionalPageOutput<ObjectNode, ObjectNode>
|
8
|
+
{
|
9
|
+
public JsonFormatTransactionalPageOutput(KafkaProducer<Object, ObjectNode> producer, PageReader pageReader, KafkaOutputColumnVisitor<ObjectNode> columnVisitor, String topic, int taskIndex)
|
10
|
+
{
|
11
|
+
super(producer, pageReader, columnVisitor, topic, taskIndex);
|
12
|
+
}
|
13
|
+
};
|
@@ -4,7 +4,7 @@ import org.embulk.spi.Column;
|
|
4
4
|
import org.embulk.spi.ColumnVisitor;
|
5
5
|
import org.embulk.spi.PageReader;
|
6
6
|
|
7
|
-
public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
|
7
|
+
public abstract class KafkaOutputColumnVisitor<T> implements ColumnVisitor
|
8
8
|
{
|
9
9
|
private KafkaOutputPlugin.PluginTask task;
|
10
10
|
PageReader pageReader;
|
@@ -13,6 +13,7 @@ public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
|
|
13
13
|
private Object recordKey = null;
|
14
14
|
private String topicName = null;
|
15
15
|
private Integer partition = null;
|
16
|
+
private boolean deletion = false;
|
16
17
|
|
17
18
|
KafkaOutputColumnVisitor(KafkaOutputPlugin.PluginTask task, PageReader pageReader)
|
18
19
|
{
|
@@ -21,6 +22,8 @@ public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
|
|
21
22
|
this.partitionColumnName = task.getPartitionColumnName().orElse(null);
|
22
23
|
}
|
23
24
|
|
25
|
+
public abstract T getRecord();
|
26
|
+
|
24
27
|
Object getRecordKey()
|
25
28
|
{
|
26
29
|
return recordKey;
|
@@ -50,11 +53,17 @@ public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
|
|
50
53
|
return partition;
|
51
54
|
}
|
52
55
|
|
56
|
+
boolean isDeletion()
|
57
|
+
{
|
58
|
+
return deletion;
|
59
|
+
}
|
60
|
+
|
53
61
|
void reset()
|
54
62
|
{
|
55
63
|
this.recordKey = null;
|
56
64
|
this.topicName = null;
|
57
65
|
this.partition = null;
|
66
|
+
this.deletion = false;
|
58
67
|
}
|
59
68
|
|
60
69
|
boolean isIgnoreColumn(Column column)
|
@@ -62,6 +71,21 @@ public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
|
|
62
71
|
return task.getIgnoreColumns().stream().anyMatch(name -> name.equals(column.getName()));
|
63
72
|
}
|
64
73
|
|
74
|
+
boolean isColumnForDeletion(Column column)
|
75
|
+
{
|
76
|
+
return task.getColumnForDeletion().map(name -> name.equals(column.getName())).orElse(false);
|
77
|
+
}
|
78
|
+
|
79
|
+
@Override
|
80
|
+
public void booleanColumn(Column column)
|
81
|
+
{
|
82
|
+
if (!pageReader.isNull(column)) {
|
83
|
+
if (isColumnForDeletion(column)) {
|
84
|
+
deletion = pageReader.getBoolean(column);
|
85
|
+
}
|
86
|
+
}
|
87
|
+
}
|
88
|
+
|
65
89
|
@Override
|
66
90
|
public void longColumn(Column column)
|
67
91
|
{
|
@@ -5,11 +5,16 @@ import com.fasterxml.jackson.annotation.JsonValue;
|
|
5
5
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
6
6
|
import com.fasterxml.jackson.databind.node.ObjectNode;
|
7
7
|
import com.google.common.collect.ImmutableList;
|
8
|
+
import io.confluent.kafka.schemaregistry.client.CachedSchemaRegistryClient;
|
9
|
+
import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient;
|
10
|
+
import io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException;
|
11
|
+
import io.confluent.kafka.schemaregistry.testutil.MockSchemaRegistry;
|
12
|
+
import io.confluent.kafka.serializers.subject.TopicNameStrategy;
|
13
|
+
import io.confluent.kafka.serializers.subject.strategy.SubjectNameStrategy;
|
8
14
|
import org.apache.kafka.clients.admin.AdminClient;
|
9
15
|
import org.apache.kafka.clients.admin.AdminClientConfig;
|
10
16
|
import org.apache.kafka.clients.admin.DescribeTopicsResult;
|
11
17
|
import org.apache.kafka.clients.producer.KafkaProducer;
|
12
|
-
import org.apache.kafka.clients.producer.ProducerRecord;
|
13
18
|
import org.embulk.config.Config;
|
14
19
|
import org.embulk.config.ConfigDefault;
|
15
20
|
import org.embulk.config.ConfigDiff;
|
@@ -20,12 +25,9 @@ import org.embulk.config.TaskReport;
|
|
20
25
|
import org.embulk.config.TaskSource;
|
21
26
|
import org.embulk.spi.Exec;
|
22
27
|
import org.embulk.spi.OutputPlugin;
|
23
|
-
import org.embulk.spi.Page;
|
24
28
|
import org.embulk.spi.PageReader;
|
25
29
|
import org.embulk.spi.Schema;
|
26
30
|
import org.embulk.spi.TransactionalPageOutput;
|
27
|
-
import org.slf4j.Logger;
|
28
|
-
import org.slf4j.LoggerFactory;
|
29
31
|
|
30
32
|
import java.io.File;
|
31
33
|
import java.io.IOException;
|
@@ -33,13 +35,10 @@ import java.util.List;
|
|
33
35
|
import java.util.Locale;
|
34
36
|
import java.util.Map;
|
35
37
|
import java.util.Optional;
|
36
|
-
import java.util.PrimitiveIterator;
|
37
38
|
import java.util.Properties;
|
38
|
-
import java.util.Random;
|
39
39
|
import java.util.concurrent.ExecutionException;
|
40
40
|
import java.util.concurrent.TimeUnit;
|
41
41
|
import java.util.concurrent.TimeoutException;
|
42
|
-
import java.util.concurrent.atomic.AtomicLong;
|
43
42
|
|
44
43
|
public class KafkaOutputPlugin
|
45
44
|
implements OutputPlugin
|
@@ -107,7 +106,7 @@ public class KafkaOutputPlugin
|
|
107
106
|
public Optional<String> getPartitionColumnName();
|
108
107
|
|
109
108
|
@Config("record_batch_size")
|
110
|
-
@ConfigDefault("
|
109
|
+
@ConfigDefault("16384")
|
111
110
|
public int getRecordBatchSize();
|
112
111
|
|
113
112
|
@Config("acks")
|
@@ -129,17 +128,21 @@ public class KafkaOutputPlugin
|
|
129
128
|
@Config("value_subject_name_strategy")
|
130
129
|
@ConfigDefault("null")
|
131
130
|
public Optional<String> getValueSubjectNameStrategy();
|
131
|
+
|
132
|
+
@Config("column_for_deletion")
|
133
|
+
@ConfigDefault("null")
|
134
|
+
public Optional<String> getColumnForDeletion();
|
132
135
|
}
|
133
136
|
|
134
137
|
private static ObjectMapper objectMapper = new ObjectMapper();
|
135
|
-
|
138
|
+
|
139
|
+
private static final int SCHEMA_REGISTRY_IDENTITY_MAP_CAPACITY = 1000;
|
136
140
|
|
137
141
|
private AdminClient getKafkaAdminClient(PluginTask task)
|
138
142
|
{
|
139
143
|
Properties properties = new Properties();
|
140
144
|
properties.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, task.getBrokers());
|
141
|
-
|
142
|
-
return adminClient;
|
145
|
+
return AdminClient.create(properties);
|
143
146
|
}
|
144
147
|
|
145
148
|
@Override
|
@@ -189,101 +192,48 @@ public class KafkaOutputPlugin
|
|
189
192
|
case AVRO_WITH_SCHEMA_REGISTRY:
|
190
193
|
return buildPageOutputForAvroWithSchemaRegistry(task, schema, taskIndex);
|
191
194
|
default:
|
192
|
-
throw new ConfigException("
|
195
|
+
throw new ConfigException("Unknown serialize format");
|
193
196
|
}
|
194
197
|
}
|
195
198
|
|
196
199
|
private TransactionalPageOutput buildPageOutputForJson(PluginTask task, Schema schema, int taskIndex)
|
197
200
|
{
|
198
201
|
KafkaProducer<Object, ObjectNode> producer = RecordProducerFactory.getForJson(task, schema, task.getOtherProducerConfigs());
|
199
|
-
|
200
202
|
PageReader pageReader = new PageReader(schema);
|
201
|
-
|
202
|
-
AtomicLong counter = new AtomicLong(0);
|
203
|
-
AtomicLong recordLoggingCount = new AtomicLong(1);
|
204
|
-
|
205
|
-
return new TransactionalPageOutput() {
|
206
|
-
private JsonFormatColumnVisitor columnVisitor = new JsonFormatColumnVisitor(task, pageReader, objectMapper);
|
207
|
-
|
208
|
-
@Override
|
209
|
-
public void add(Page page)
|
210
|
-
{
|
211
|
-
pageReader.setPage(page);
|
212
|
-
while (pageReader.nextRecord()) {
|
213
|
-
columnVisitor.reset();
|
214
|
-
|
215
|
-
pageReader.getSchema().visitColumns(columnVisitor);
|
216
|
-
|
217
|
-
Object recordKey = columnVisitor.getRecordKey();
|
218
|
-
if (recordKey == null) {
|
219
|
-
recordKey = randomLong.next();
|
220
|
-
}
|
221
|
-
|
222
|
-
String targetTopic = columnVisitor.getTopicName() != null ? columnVisitor.getTopicName() : task.getTopic();
|
223
|
-
ProducerRecord<Object, ObjectNode> producerRecord = new ProducerRecord<>(targetTopic, columnVisitor.getPartition(), recordKey, columnVisitor.getJsonNode());
|
224
|
-
producer.send(producerRecord, (metadata, exception) -> {
|
225
|
-
if (exception != null) {
|
226
|
-
logger.error("produce error", exception);
|
227
|
-
}
|
228
|
-
|
229
|
-
logger.debug("sent record: {topic: {}, key: {}, value: {}, partition: {}}",
|
230
|
-
producerRecord.topic(),
|
231
|
-
producerRecord.key(),
|
232
|
-
producerRecord.value(),
|
233
|
-
producerRecord.partition());
|
234
|
-
|
235
|
-
long current = counter.incrementAndGet();
|
236
|
-
if (current >= recordLoggingCount.get()) {
|
237
|
-
logger.info("[task-{}] Producer sent {} records", String.format("%04d", taskIndex), current);
|
238
|
-
recordLoggingCount.set(recordLoggingCount.get() * 2);
|
239
|
-
}
|
240
|
-
});
|
241
|
-
}
|
242
|
-
}
|
203
|
+
KafkaOutputColumnVisitor<ObjectNode> columnVisitor = new JsonFormatColumnVisitor(task, pageReader, objectMapper);
|
243
204
|
|
244
|
-
|
245
|
-
public void finish()
|
246
|
-
{
|
247
|
-
producer.flush();
|
248
|
-
}
|
249
|
-
|
250
|
-
@Override
|
251
|
-
public void close()
|
252
|
-
{
|
253
|
-
producer.close();
|
254
|
-
}
|
255
|
-
|
256
|
-
@Override
|
257
|
-
public void abort()
|
258
|
-
{
|
259
|
-
producer.flush();
|
260
|
-
producer.close();
|
261
|
-
}
|
262
|
-
|
263
|
-
@Override
|
264
|
-
public TaskReport commit()
|
265
|
-
{
|
266
|
-
return null;
|
267
|
-
}
|
268
|
-
};
|
205
|
+
return new JsonFormatTransactionalPageOutput(producer, pageReader, columnVisitor, task.getTopic(), taskIndex);
|
269
206
|
}
|
270
207
|
|
271
208
|
private TransactionalPageOutput buildPageOutputForAvroWithSchemaRegistry(PluginTask task, Schema schema, int taskIndex)
|
272
209
|
{
|
273
210
|
KafkaProducer<Object, Object> producer = RecordProducerFactory.getForAvroWithSchemaRegistry(task, schema, task.getOtherProducerConfigs());
|
274
|
-
|
275
211
|
PageReader pageReader = new PageReader(schema);
|
212
|
+
org.apache.avro.Schema avroSchema = getAvroSchema(task);
|
213
|
+
AvroFormatColumnVisitor avroFormatColumnVisitor = new AvroFormatColumnVisitor(task, pageReader, avroSchema);
|
214
|
+
|
215
|
+
return new AvroFormatTransactionalPageOutput(producer, pageReader, avroFormatColumnVisitor, task.getTopic(), taskIndex);
|
216
|
+
}
|
276
217
|
|
218
|
+
private org.apache.avro.Schema getAvroSchema(PluginTask task)
|
219
|
+
{
|
277
220
|
org.apache.avro.Schema avroSchema = null;
|
278
|
-
if (!task.
|
221
|
+
if (!task.getSchemaRegistryUrl().isPresent()) {
|
222
|
+
throw new ConfigException("avro_with_schema_registry format needs schema_registry_url");
|
223
|
+
}
|
224
|
+
|
225
|
+
if (task.getAvsc().isPresent() && task.getAvscFile().isPresent()) {
|
279
226
|
throw new ConfigException("avro_with_schema_registry format needs either one of avsc and avsc_file");
|
280
227
|
}
|
228
|
+
|
281
229
|
if (task.getAvsc().isPresent()) {
|
282
230
|
avroSchema = new org.apache.avro.Schema.Parser().parse(task.getAvsc().get().toString());
|
231
|
+
return avroSchema;
|
283
232
|
}
|
284
233
|
if (task.getAvscFile().isPresent()) {
|
285
234
|
try {
|
286
235
|
avroSchema = new org.apache.avro.Schema.Parser().parse(task.getAvscFile().get());
|
236
|
+
return avroSchema;
|
287
237
|
}
|
288
238
|
catch (IOException e) {
|
289
239
|
e.printStackTrace();
|
@@ -291,77 +241,28 @@ public class KafkaOutputPlugin
|
|
291
241
|
}
|
292
242
|
}
|
293
243
|
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
{
|
307
|
-
pageReader.setPage(page);
|
308
|
-
while (pageReader.nextRecord()) {
|
309
|
-
columnVisitor.reset();
|
310
|
-
|
311
|
-
pageReader.getSchema().visitColumns(columnVisitor);
|
312
|
-
|
313
|
-
Object recordKey = columnVisitor.getRecordKey();
|
314
|
-
if (recordKey == null) {
|
315
|
-
recordKey = randomLong.next();
|
316
|
-
}
|
317
|
-
|
318
|
-
String targetTopic = columnVisitor.getTopicName() != null ? columnVisitor.getTopicName() : task.getTopic();
|
319
|
-
|
320
|
-
ProducerRecord<Object, Object> producerRecord = new ProducerRecord<>(targetTopic, columnVisitor.getPartition(), recordKey, columnVisitor.getGenericRecord());
|
321
|
-
producer.send(producerRecord, (metadata, exception) -> {
|
322
|
-
if (exception != null) {
|
323
|
-
logger.error("produce error", exception);
|
324
|
-
}
|
325
|
-
|
326
|
-
logger.debug("sent record: {topic: {}, key: {}, value: {}, partition: {}}",
|
327
|
-
producerRecord.topic(),
|
328
|
-
producerRecord.key(),
|
329
|
-
producerRecord.value(),
|
330
|
-
producerRecord.partition());
|
331
|
-
|
332
|
-
long current = counter.incrementAndGet();
|
333
|
-
if (current >= recordLoggingCount.get()) {
|
334
|
-
logger.info("[task-{}] Producer sent {} records", String.format("%04d", taskIndex), current);
|
335
|
-
recordLoggingCount.set(recordLoggingCount.get() * 2);
|
336
|
-
}
|
337
|
-
});
|
338
|
-
}
|
339
|
-
}
|
340
|
-
|
341
|
-
@Override
|
342
|
-
public void finish()
|
343
|
-
{
|
344
|
-
producer.flush();
|
345
|
-
}
|
346
|
-
|
347
|
-
@Override
|
348
|
-
public void close()
|
349
|
-
{
|
350
|
-
producer.close();
|
351
|
-
}
|
352
|
-
|
353
|
-
@Override
|
354
|
-
public void abort()
|
355
|
-
{
|
356
|
-
producer.flush();
|
357
|
-
producer.close();
|
358
|
-
}
|
244
|
+
SchemaRegistryClient schemaRegistryClient = getSchemaRegistryClient(task.getSchemaRegistryUrl().get());
|
245
|
+
SubjectNameStrategy subjectNameStrategy = new TopicNameStrategy();
|
246
|
+
String subjectName = subjectNameStrategy.subjectName(task.getTopic(), false, null);
|
247
|
+
try {
|
248
|
+
String schema = schemaRegistryClient.getLatestSchemaMetadata(subjectName).getSchema();
|
249
|
+
avroSchema = new org.apache.avro.Schema.Parser().parse(schema);
|
250
|
+
return avroSchema;
|
251
|
+
}
|
252
|
+
catch (IOException | RestClientException e) {
|
253
|
+
throw new ConfigException("cannot fetch latest schema from schema registry.", e);
|
254
|
+
}
|
255
|
+
}
|
359
256
|
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
257
|
+
private static final String MOCK_SCHEMA_REGISTRY_PREFIX = "mock://";
|
258
|
+
private SchemaRegistryClient getSchemaRegistryClient(String url)
|
259
|
+
{
|
260
|
+
if (url.startsWith(MOCK_SCHEMA_REGISTRY_PREFIX)) {
|
261
|
+
String mockScope = url.substring(MOCK_SCHEMA_REGISTRY_PREFIX.length());
|
262
|
+
return MockSchemaRegistry.getClientForScope(mockScope);
|
263
|
+
}
|
264
|
+
else {
|
265
|
+
return new CachedSchemaRegistryClient(url, SCHEMA_REGISTRY_IDENTITY_MAP_CAPACITY);
|
266
|
+
}
|
366
267
|
}
|
367
268
|
}
|