embulk-output-kafka 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -2
- data/build.gradle +1 -1
- data/classpath/embulk-output-kafka-0.1.2.jar +0 -0
- data/docker-compose.yml +41 -0
- data/src/main/java/org/embulk/output/kafka/AvroFormatColumnVisitor.java +30 -3
- data/src/main/java/org/embulk/output/kafka/JsonFormatColumnVisitor.java +24 -0
- data/src/main/java/org/embulk/output/kafka/KafkaOutputColumnVisitor.java +15 -4
- data/src/main/java/org/embulk/output/kafka/KafkaOutputPlugin.java +11 -5
- data/src/test/resources/config_complex.yml +2 -0
- metadata +4 -3
- data/classpath/embulk-output-kafka-0.1.1.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: feda9642eebe84a68dc4ff9a3f3e2b297e2e94f0
|
4
|
+
data.tar.gz: 4cdb1d310d6e1ae9f4caa5102465311c963f2656
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ab050ce4361ef325f2dcb9a07dce6135c3735d9b65c3820ea84385bbec6f8d3f7ea810f155df4477af43f56bf75b0bb5dd82d90ef5cd911da1932ab5a9637328
|
7
|
+
data.tar.gz: a661ff3eb73bb171f3d04a5699aaf390fa7b0ea8b2dad5feb9cf5420237ae6f5b692011d993bb002d77981544abff890f37a7fc40308a3708e1cbfd0d57b1a1e
|
data/README.md
CHANGED
@@ -9,14 +9,15 @@
|
|
9
9
|
|
10
10
|
## Configuration
|
11
11
|
|
12
|
-
- **broker**: kafka broker host and port (array
|
12
|
+
- **broker**: kafka broker host and port (array(string), required)
|
13
13
|
- **topic**: target topic name (string, required)
|
14
14
|
- **topic_column**: use column value as target topic (string, default: `null`)
|
15
15
|
- **schema_registry_url**: Schema Registy URL that is needed for avro format (string, default: `null`)
|
16
16
|
- **serialize_format**: use column value as target topic (enum, required, `json` or `avro_with_schema_registry`)
|
17
17
|
- **avsc_file**: avro schema file path (string, default: `null`)
|
18
18
|
- **avsc**: inline avro schema config (json, default: `null`)
|
19
|
-
- **
|
19
|
+
- **ignore_columns**: remove columns from output (array(string), default: `[]`)
|
20
|
+
- **key_column_name**: use column value as record key (string, default: `null`, if this parameter is null, set random number as record key, and it can use column in `ignore_columns`)
|
20
21
|
- **record_batch_size**: kafka producer record batch size (integer, default: `1000`)
|
21
22
|
- **acks**: kafka producer require acks (string, default: `"1"`)
|
22
23
|
- **retries**: kafka producer max retry count (integer, default: `1`)
|
data/build.gradle
CHANGED
Binary file
|
data/docker-compose.yml
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
version: "3"
|
2
|
+
services:
|
3
|
+
zookeeper:
|
4
|
+
image: confluentinc/cp-zookeeper:5.3.0
|
5
|
+
hostname: zookeeper
|
6
|
+
container_name: zookeeper
|
7
|
+
ports:
|
8
|
+
- "2181:2181"
|
9
|
+
environment:
|
10
|
+
ZOOKEEPER_CLIENT_PORT: 2181
|
11
|
+
ZOOKEEPER_TICK_TIME: 2000
|
12
|
+
|
13
|
+
broker:
|
14
|
+
image: confluentinc/cp-kafka:5.3.0
|
15
|
+
hostname: broker
|
16
|
+
container_name: broker
|
17
|
+
depends_on:
|
18
|
+
- zookeeper
|
19
|
+
ports:
|
20
|
+
- "29092:29092"
|
21
|
+
- "9092:9092"
|
22
|
+
environment:
|
23
|
+
KAFKA_BROKER_ID: 1
|
24
|
+
KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181'
|
25
|
+
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
|
26
|
+
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker:29092,PLAINTEXT_HOST://broker:9092
|
27
|
+
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
|
28
|
+
KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0
|
29
|
+
|
30
|
+
schema-registry:
|
31
|
+
image: confluentinc/cp-schema-registry:5.3.0
|
32
|
+
hostname: schema-registry
|
33
|
+
container_name: schema-registry
|
34
|
+
depends_on:
|
35
|
+
- zookeeper
|
36
|
+
- broker
|
37
|
+
ports:
|
38
|
+
- "48081:8081"
|
39
|
+
environment:
|
40
|
+
SCHEMA_REGISTRY_HOST_NAME: schema-registry
|
41
|
+
SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL: 'zookeeper:2181'
|
@@ -29,6 +29,10 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
29
29
|
@Override
|
30
30
|
public void booleanColumn(Column column)
|
31
31
|
{
|
32
|
+
if (isIgnoreColumn(column)) {
|
33
|
+
return;
|
34
|
+
}
|
35
|
+
|
32
36
|
if (pageReader.isNull(column)) {
|
33
37
|
genericRecord.put(column.getName(), null);
|
34
38
|
return;
|
@@ -40,42 +44,61 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
40
44
|
@Override
|
41
45
|
public void longColumn(Column column)
|
42
46
|
{
|
47
|
+
super.longColumn(column);
|
48
|
+
|
49
|
+
if (isIgnoreColumn(column)) {
|
50
|
+
return;
|
51
|
+
}
|
52
|
+
|
43
53
|
if (pageReader.isNull(column)) {
|
44
54
|
genericRecord.put(column.getName(), null);
|
45
55
|
return;
|
46
56
|
}
|
47
57
|
|
48
58
|
genericRecord.put(column.getName(), pageReader.getLong(column));
|
49
|
-
super.longColumn(column);
|
50
59
|
}
|
51
60
|
|
52
61
|
@Override
|
53
62
|
public void doubleColumn(Column column)
|
54
63
|
{
|
64
|
+
super.doubleColumn(column);
|
65
|
+
|
66
|
+
if (isIgnoreColumn(column)) {
|
67
|
+
return;
|
68
|
+
}
|
69
|
+
|
55
70
|
if (pageReader.isNull(column)) {
|
56
71
|
genericRecord.put(column.getName(), null);
|
57
72
|
return;
|
58
73
|
}
|
59
74
|
|
60
75
|
genericRecord.put(column.getName(), pageReader.getDouble(column));
|
61
|
-
super.doubleColumn(column);
|
62
76
|
}
|
63
77
|
|
64
78
|
@Override
|
65
79
|
public void stringColumn(Column column)
|
66
80
|
{
|
81
|
+
super.stringColumn(column);
|
82
|
+
|
83
|
+
if (isIgnoreColumn(column)) {
|
84
|
+
return;
|
85
|
+
}
|
86
|
+
|
67
87
|
if (pageReader.isNull(column)) {
|
68
88
|
genericRecord.put(column.getName(), null);
|
69
89
|
return;
|
70
90
|
}
|
71
91
|
|
72
92
|
genericRecord.put(column.getName(), pageReader.getString(column));
|
73
|
-
super.stringColumn(column);
|
74
93
|
}
|
75
94
|
|
76
95
|
@Override
|
77
96
|
public void timestampColumn(Column column)
|
78
97
|
{
|
98
|
+
if (isIgnoreColumn(column)) {
|
99
|
+
return;
|
100
|
+
}
|
101
|
+
|
79
102
|
if (pageReader.isNull(column)) {
|
80
103
|
genericRecord.put(column.getName(), null);
|
81
104
|
return;
|
@@ -87,6 +110,10 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
87
110
|
@Override
|
88
111
|
public void jsonColumn(Column column)
|
89
112
|
{
|
113
|
+
if (isIgnoreColumn(column)) {
|
114
|
+
return;
|
115
|
+
}
|
116
|
+
|
90
117
|
if (pageReader.isNull(column)) {
|
91
118
|
genericRecord.put(column.getName(), null);
|
92
119
|
return;
|
@@ -27,6 +27,10 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
27
27
|
@Override
|
28
28
|
public void booleanColumn(Column column)
|
29
29
|
{
|
30
|
+
if (isIgnoreColumn(column)) {
|
31
|
+
return;
|
32
|
+
}
|
33
|
+
|
30
34
|
if (pageReader.isNull(column)) {
|
31
35
|
jsonNode.putNull(column.getName());
|
32
36
|
return;
|
@@ -38,6 +42,10 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
38
42
|
@Override
|
39
43
|
public void longColumn(Column column)
|
40
44
|
{
|
45
|
+
if (isIgnoreColumn(column)) {
|
46
|
+
return;
|
47
|
+
}
|
48
|
+
|
41
49
|
if (pageReader.isNull(column)) {
|
42
50
|
jsonNode.putNull(column.getName());
|
43
51
|
return;
|
@@ -50,6 +58,10 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
50
58
|
@Override
|
51
59
|
public void doubleColumn(Column column)
|
52
60
|
{
|
61
|
+
if (isIgnoreColumn(column)) {
|
62
|
+
return;
|
63
|
+
}
|
64
|
+
|
53
65
|
if (pageReader.isNull(column)) {
|
54
66
|
jsonNode.putNull(column.getName());
|
55
67
|
return;
|
@@ -62,6 +74,10 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
62
74
|
@Override
|
63
75
|
public void stringColumn(Column column)
|
64
76
|
{
|
77
|
+
if (isIgnoreColumn(column)) {
|
78
|
+
return;
|
79
|
+
}
|
80
|
+
|
65
81
|
if (pageReader.isNull(column)) {
|
66
82
|
jsonNode.putNull(column.getName());
|
67
83
|
return;
|
@@ -74,6 +90,10 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
74
90
|
@Override
|
75
91
|
public void timestampColumn(Column column)
|
76
92
|
{
|
93
|
+
if (isIgnoreColumn(column)) {
|
94
|
+
return;
|
95
|
+
}
|
96
|
+
|
77
97
|
if (pageReader.isNull(column)) {
|
78
98
|
jsonNode.putNull(column.getName());
|
79
99
|
return;
|
@@ -85,6 +105,10 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
85
105
|
@Override
|
86
106
|
public void jsonColumn(Column column)
|
87
107
|
{
|
108
|
+
if (isIgnoreColumn(column)) {
|
109
|
+
return;
|
110
|
+
}
|
111
|
+
|
88
112
|
if (pageReader.isNull(column)) {
|
89
113
|
jsonNode.putNull(column.getName());
|
90
114
|
return;
|
@@ -32,22 +32,33 @@ public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
|
|
32
32
|
}
|
33
33
|
}
|
34
34
|
|
35
|
+
boolean isIgnoreColumn(Column column)
|
36
|
+
{
|
37
|
+
return task.getIgnoreColumns().stream().anyMatch(name -> name.equals(column.getName()));
|
38
|
+
}
|
39
|
+
|
35
40
|
@Override
|
36
41
|
public void longColumn(Column column)
|
37
42
|
{
|
38
|
-
|
43
|
+
if (!pageReader.isNull(column)) {
|
44
|
+
setRecordKey(column, pageReader.getLong(column));
|
45
|
+
}
|
39
46
|
}
|
40
47
|
|
41
48
|
@Override
|
42
49
|
public void doubleColumn(Column column)
|
43
50
|
{
|
44
|
-
|
51
|
+
if (!pageReader.isNull(column)) {
|
52
|
+
setRecordKey(column, pageReader.getDouble(column));
|
53
|
+
}
|
45
54
|
}
|
46
55
|
|
47
56
|
@Override
|
48
57
|
public void stringColumn(Column column)
|
49
58
|
{
|
50
|
-
|
51
|
-
|
59
|
+
if (!pageReader.isNull(column)) {
|
60
|
+
setRecordKey(column, pageReader.getString(column));
|
61
|
+
setTopicName(column, pageReader.getString(column));
|
62
|
+
}
|
52
63
|
}
|
53
64
|
}
|
@@ -15,6 +15,7 @@ import org.embulk.config.ConfigSource;
|
|
15
15
|
import org.embulk.config.Task;
|
16
16
|
import org.embulk.config.TaskReport;
|
17
17
|
import org.embulk.config.TaskSource;
|
18
|
+
import org.embulk.spi.ColumnConfig;
|
18
19
|
import org.embulk.spi.Exec;
|
19
20
|
import org.embulk.spi.OutputPlugin;
|
20
21
|
import org.embulk.spi.Page;
|
@@ -110,11 +111,14 @@ public class KafkaOutputPlugin
|
|
110
111
|
@Config("other_producer_configs")
|
111
112
|
@ConfigDefault("{}")
|
112
113
|
public Map<String, String> getOtherProducerConfigs();
|
114
|
+
|
115
|
+
@Config("ignore_columns")
|
116
|
+
@ConfigDefault("[]")
|
117
|
+
public List<String> getIgnoreColumns();
|
113
118
|
}
|
114
119
|
|
115
120
|
private static ObjectMapper objectMapper = new ObjectMapper();
|
116
121
|
private Logger logger = LoggerFactory.getLogger(getClass());
|
117
|
-
private int recordLoggingCount = 1;
|
118
122
|
|
119
123
|
@Override
|
120
124
|
public ConfigDiff transaction(ConfigSource config,
|
@@ -168,6 +172,7 @@ public class KafkaOutputPlugin
|
|
168
172
|
PageReader pageReader = new PageReader(schema);
|
169
173
|
PrimitiveIterator.OfLong randomLong = new Random().longs(1, Long.MAX_VALUE).iterator();
|
170
174
|
AtomicInteger counter = new AtomicInteger(0);
|
175
|
+
AtomicInteger recordLoggingCount = new AtomicInteger(1);
|
171
176
|
|
172
177
|
return new TransactionalPageOutput() {
|
173
178
|
@Override
|
@@ -194,9 +199,9 @@ public class KafkaOutputPlugin
|
|
194
199
|
logger.debug("sent record: {key: {}, value: {}}", producerRecord.key(), producerRecord.value());
|
195
200
|
|
196
201
|
int current = counter.incrementAndGet();
|
197
|
-
if (current >= recordLoggingCount) {
|
202
|
+
if (current >= recordLoggingCount.get()) {
|
198
203
|
logger.info("[task-{}] Producer sent {} records", String.format("%04d", taskIndex), current);
|
199
|
-
recordLoggingCount
|
204
|
+
recordLoggingCount.set(recordLoggingCount.get() * 2);
|
200
205
|
}
|
201
206
|
});
|
202
207
|
}
|
@@ -255,6 +260,7 @@ public class KafkaOutputPlugin
|
|
255
260
|
PrimitiveIterator.OfLong randomLong = new Random().longs(1, Long.MAX_VALUE).iterator();
|
256
261
|
|
257
262
|
AtomicInteger counter = new AtomicInteger(0);
|
263
|
+
AtomicInteger recordLoggingCount = new AtomicInteger(1);
|
258
264
|
|
259
265
|
final org.apache.avro.Schema finalAvroSchema = avroSchema;
|
260
266
|
return new TransactionalPageOutput()
|
@@ -284,9 +290,9 @@ public class KafkaOutputPlugin
|
|
284
290
|
logger.debug("sent record: {key: {}, value: {}}", producerRecord.key(), producerRecord.value());
|
285
291
|
|
286
292
|
int current = counter.incrementAndGet();
|
287
|
-
if (current >= recordLoggingCount) {
|
293
|
+
if (current >= recordLoggingCount.get()) {
|
288
294
|
logger.info("[task-{}] Producer sent {} records", String.format("%04d", taskIndex), current);
|
289
|
-
recordLoggingCount
|
295
|
+
recordLoggingCount.set(recordLoggingCount.get() * 2);
|
290
296
|
}
|
291
297
|
});
|
292
298
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- joker1007
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-09-
|
11
|
+
date: 2019-09-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -54,7 +54,7 @@ files:
|
|
54
54
|
- classpath/common-config-5.3.0.jar
|
55
55
|
- classpath/common-utils-5.3.0.jar
|
56
56
|
- classpath/commons-compress-1.18.jar
|
57
|
-
- classpath/embulk-output-kafka-0.1.
|
57
|
+
- classpath/embulk-output-kafka-0.1.2.jar
|
58
58
|
- classpath/jackson-annotations-2.9.0.jar
|
59
59
|
- classpath/jackson-core-2.9.9.jar
|
60
60
|
- classpath/jackson-databind-2.9.9.jar
|
@@ -73,6 +73,7 @@ files:
|
|
73
73
|
- classpath/zstd-jni-1.4.0-1.jar
|
74
74
|
- config/checkstyle/checkstyle.xml
|
75
75
|
- config/checkstyle/default.xml
|
76
|
+
- docker-compose.yml
|
76
77
|
- gradle/wrapper/gradle-wrapper.jar
|
77
78
|
- gradle/wrapper/gradle-wrapper.properties
|
78
79
|
- gradlew
|
Binary file
|