embulk-output-kafka 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -2
- data/build.gradle +1 -1
- data/classpath/embulk-output-kafka-0.1.2.jar +0 -0
- data/docker-compose.yml +41 -0
- data/src/main/java/org/embulk/output/kafka/AvroFormatColumnVisitor.java +30 -3
- data/src/main/java/org/embulk/output/kafka/JsonFormatColumnVisitor.java +24 -0
- data/src/main/java/org/embulk/output/kafka/KafkaOutputColumnVisitor.java +15 -4
- data/src/main/java/org/embulk/output/kafka/KafkaOutputPlugin.java +11 -5
- data/src/test/resources/config_complex.yml +2 -0
- metadata +4 -3
- data/classpath/embulk-output-kafka-0.1.1.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: feda9642eebe84a68dc4ff9a3f3e2b297e2e94f0
|
4
|
+
data.tar.gz: 4cdb1d310d6e1ae9f4caa5102465311c963f2656
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ab050ce4361ef325f2dcb9a07dce6135c3735d9b65c3820ea84385bbec6f8d3f7ea810f155df4477af43f56bf75b0bb5dd82d90ef5cd911da1932ab5a9637328
|
7
|
+
data.tar.gz: a661ff3eb73bb171f3d04a5699aaf390fa7b0ea8b2dad5feb9cf5420237ae6f5b692011d993bb002d77981544abff890f37a7fc40308a3708e1cbfd0d57b1a1e
|
data/README.md
CHANGED
@@ -9,14 +9,15 @@
|
|
9
9
|
|
10
10
|
## Configuration
|
11
11
|
|
12
|
-
- **broker**: kafka broker host and port (array
|
12
|
+
- **broker**: kafka broker host and port (array(string), required)
|
13
13
|
- **topic**: target topic name (string, required)
|
14
14
|
- **topic_column**: use column value as target topic (string, default: `null`)
|
15
15
|
- **schema_registry_url**: Schema Registy URL that is needed for avro format (string, default: `null`)
|
16
16
|
- **serialize_format**: use column value as target topic (enum, required, `json` or `avro_with_schema_registry`)
|
17
17
|
- **avsc_file**: avro schema file path (string, default: `null`)
|
18
18
|
- **avsc**: inline avro schema config (json, default: `null`)
|
19
|
-
- **
|
19
|
+
- **ignore_columns**: remove columns from output (array(string), default: `[]`)
|
20
|
+
- **key_column_name**: use column value as record key (string, default: `null`, if this parameter is null, set random number as record key, and it can use column in `ignore_columns`)
|
20
21
|
- **record_batch_size**: kafka producer record batch size (integer, default: `1000`)
|
21
22
|
- **acks**: kafka producer require acks (string, default: `"1"`)
|
22
23
|
- **retries**: kafka producer max retry count (integer, default: `1`)
|
data/build.gradle
CHANGED
Binary file
|
data/docker-compose.yml
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
version: "3"
|
2
|
+
services:
|
3
|
+
zookeeper:
|
4
|
+
image: confluentinc/cp-zookeeper:5.3.0
|
5
|
+
hostname: zookeeper
|
6
|
+
container_name: zookeeper
|
7
|
+
ports:
|
8
|
+
- "2181:2181"
|
9
|
+
environment:
|
10
|
+
ZOOKEEPER_CLIENT_PORT: 2181
|
11
|
+
ZOOKEEPER_TICK_TIME: 2000
|
12
|
+
|
13
|
+
broker:
|
14
|
+
image: confluentinc/cp-kafka:5.3.0
|
15
|
+
hostname: broker
|
16
|
+
container_name: broker
|
17
|
+
depends_on:
|
18
|
+
- zookeeper
|
19
|
+
ports:
|
20
|
+
- "29092:29092"
|
21
|
+
- "9092:9092"
|
22
|
+
environment:
|
23
|
+
KAFKA_BROKER_ID: 1
|
24
|
+
KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181'
|
25
|
+
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
|
26
|
+
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker:29092,PLAINTEXT_HOST://broker:9092
|
27
|
+
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
|
28
|
+
KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0
|
29
|
+
|
30
|
+
schema-registry:
|
31
|
+
image: confluentinc/cp-schema-registry:5.3.0
|
32
|
+
hostname: schema-registry
|
33
|
+
container_name: schema-registry
|
34
|
+
depends_on:
|
35
|
+
- zookeeper
|
36
|
+
- broker
|
37
|
+
ports:
|
38
|
+
- "48081:8081"
|
39
|
+
environment:
|
40
|
+
SCHEMA_REGISTRY_HOST_NAME: schema-registry
|
41
|
+
SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL: 'zookeeper:2181'
|
@@ -29,6 +29,10 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
29
29
|
@Override
|
30
30
|
public void booleanColumn(Column column)
|
31
31
|
{
|
32
|
+
if (isIgnoreColumn(column)) {
|
33
|
+
return;
|
34
|
+
}
|
35
|
+
|
32
36
|
if (pageReader.isNull(column)) {
|
33
37
|
genericRecord.put(column.getName(), null);
|
34
38
|
return;
|
@@ -40,42 +44,61 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
40
44
|
@Override
|
41
45
|
public void longColumn(Column column)
|
42
46
|
{
|
47
|
+
super.longColumn(column);
|
48
|
+
|
49
|
+
if (isIgnoreColumn(column)) {
|
50
|
+
return;
|
51
|
+
}
|
52
|
+
|
43
53
|
if (pageReader.isNull(column)) {
|
44
54
|
genericRecord.put(column.getName(), null);
|
45
55
|
return;
|
46
56
|
}
|
47
57
|
|
48
58
|
genericRecord.put(column.getName(), pageReader.getLong(column));
|
49
|
-
super.longColumn(column);
|
50
59
|
}
|
51
60
|
|
52
61
|
@Override
|
53
62
|
public void doubleColumn(Column column)
|
54
63
|
{
|
64
|
+
super.doubleColumn(column);
|
65
|
+
|
66
|
+
if (isIgnoreColumn(column)) {
|
67
|
+
return;
|
68
|
+
}
|
69
|
+
|
55
70
|
if (pageReader.isNull(column)) {
|
56
71
|
genericRecord.put(column.getName(), null);
|
57
72
|
return;
|
58
73
|
}
|
59
74
|
|
60
75
|
genericRecord.put(column.getName(), pageReader.getDouble(column));
|
61
|
-
super.doubleColumn(column);
|
62
76
|
}
|
63
77
|
|
64
78
|
@Override
|
65
79
|
public void stringColumn(Column column)
|
66
80
|
{
|
81
|
+
super.stringColumn(column);
|
82
|
+
|
83
|
+
if (isIgnoreColumn(column)) {
|
84
|
+
return;
|
85
|
+
}
|
86
|
+
|
67
87
|
if (pageReader.isNull(column)) {
|
68
88
|
genericRecord.put(column.getName(), null);
|
69
89
|
return;
|
70
90
|
}
|
71
91
|
|
72
92
|
genericRecord.put(column.getName(), pageReader.getString(column));
|
73
|
-
super.stringColumn(column);
|
74
93
|
}
|
75
94
|
|
76
95
|
@Override
|
77
96
|
public void timestampColumn(Column column)
|
78
97
|
{
|
98
|
+
if (isIgnoreColumn(column)) {
|
99
|
+
return;
|
100
|
+
}
|
101
|
+
|
79
102
|
if (pageReader.isNull(column)) {
|
80
103
|
genericRecord.put(column.getName(), null);
|
81
104
|
return;
|
@@ -87,6 +110,10 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
87
110
|
@Override
|
88
111
|
public void jsonColumn(Column column)
|
89
112
|
{
|
113
|
+
if (isIgnoreColumn(column)) {
|
114
|
+
return;
|
115
|
+
}
|
116
|
+
|
90
117
|
if (pageReader.isNull(column)) {
|
91
118
|
genericRecord.put(column.getName(), null);
|
92
119
|
return;
|
@@ -27,6 +27,10 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
27
27
|
@Override
|
28
28
|
public void booleanColumn(Column column)
|
29
29
|
{
|
30
|
+
if (isIgnoreColumn(column)) {
|
31
|
+
return;
|
32
|
+
}
|
33
|
+
|
30
34
|
if (pageReader.isNull(column)) {
|
31
35
|
jsonNode.putNull(column.getName());
|
32
36
|
return;
|
@@ -38,6 +42,10 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
38
42
|
@Override
|
39
43
|
public void longColumn(Column column)
|
40
44
|
{
|
45
|
+
if (isIgnoreColumn(column)) {
|
46
|
+
return;
|
47
|
+
}
|
48
|
+
|
41
49
|
if (pageReader.isNull(column)) {
|
42
50
|
jsonNode.putNull(column.getName());
|
43
51
|
return;
|
@@ -50,6 +58,10 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
50
58
|
@Override
|
51
59
|
public void doubleColumn(Column column)
|
52
60
|
{
|
61
|
+
if (isIgnoreColumn(column)) {
|
62
|
+
return;
|
63
|
+
}
|
64
|
+
|
53
65
|
if (pageReader.isNull(column)) {
|
54
66
|
jsonNode.putNull(column.getName());
|
55
67
|
return;
|
@@ -62,6 +74,10 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
62
74
|
@Override
|
63
75
|
public void stringColumn(Column column)
|
64
76
|
{
|
77
|
+
if (isIgnoreColumn(column)) {
|
78
|
+
return;
|
79
|
+
}
|
80
|
+
|
65
81
|
if (pageReader.isNull(column)) {
|
66
82
|
jsonNode.putNull(column.getName());
|
67
83
|
return;
|
@@ -74,6 +90,10 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
74
90
|
@Override
|
75
91
|
public void timestampColumn(Column column)
|
76
92
|
{
|
93
|
+
if (isIgnoreColumn(column)) {
|
94
|
+
return;
|
95
|
+
}
|
96
|
+
|
77
97
|
if (pageReader.isNull(column)) {
|
78
98
|
jsonNode.putNull(column.getName());
|
79
99
|
return;
|
@@ -85,6 +105,10 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
|
|
85
105
|
@Override
|
86
106
|
public void jsonColumn(Column column)
|
87
107
|
{
|
108
|
+
if (isIgnoreColumn(column)) {
|
109
|
+
return;
|
110
|
+
}
|
111
|
+
|
88
112
|
if (pageReader.isNull(column)) {
|
89
113
|
jsonNode.putNull(column.getName());
|
90
114
|
return;
|
@@ -32,22 +32,33 @@ public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
|
|
32
32
|
}
|
33
33
|
}
|
34
34
|
|
35
|
+
boolean isIgnoreColumn(Column column)
|
36
|
+
{
|
37
|
+
return task.getIgnoreColumns().stream().anyMatch(name -> name.equals(column.getName()));
|
38
|
+
}
|
39
|
+
|
35
40
|
@Override
|
36
41
|
public void longColumn(Column column)
|
37
42
|
{
|
38
|
-
|
43
|
+
if (!pageReader.isNull(column)) {
|
44
|
+
setRecordKey(column, pageReader.getLong(column));
|
45
|
+
}
|
39
46
|
}
|
40
47
|
|
41
48
|
@Override
|
42
49
|
public void doubleColumn(Column column)
|
43
50
|
{
|
44
|
-
|
51
|
+
if (!pageReader.isNull(column)) {
|
52
|
+
setRecordKey(column, pageReader.getDouble(column));
|
53
|
+
}
|
45
54
|
}
|
46
55
|
|
47
56
|
@Override
|
48
57
|
public void stringColumn(Column column)
|
49
58
|
{
|
50
|
-
|
51
|
-
|
59
|
+
if (!pageReader.isNull(column)) {
|
60
|
+
setRecordKey(column, pageReader.getString(column));
|
61
|
+
setTopicName(column, pageReader.getString(column));
|
62
|
+
}
|
52
63
|
}
|
53
64
|
}
|
@@ -15,6 +15,7 @@ import org.embulk.config.ConfigSource;
|
|
15
15
|
import org.embulk.config.Task;
|
16
16
|
import org.embulk.config.TaskReport;
|
17
17
|
import org.embulk.config.TaskSource;
|
18
|
+
import org.embulk.spi.ColumnConfig;
|
18
19
|
import org.embulk.spi.Exec;
|
19
20
|
import org.embulk.spi.OutputPlugin;
|
20
21
|
import org.embulk.spi.Page;
|
@@ -110,11 +111,14 @@ public class KafkaOutputPlugin
|
|
110
111
|
@Config("other_producer_configs")
|
111
112
|
@ConfigDefault("{}")
|
112
113
|
public Map<String, String> getOtherProducerConfigs();
|
114
|
+
|
115
|
+
@Config("ignore_columns")
|
116
|
+
@ConfigDefault("[]")
|
117
|
+
public List<String> getIgnoreColumns();
|
113
118
|
}
|
114
119
|
|
115
120
|
private static ObjectMapper objectMapper = new ObjectMapper();
|
116
121
|
private Logger logger = LoggerFactory.getLogger(getClass());
|
117
|
-
private int recordLoggingCount = 1;
|
118
122
|
|
119
123
|
@Override
|
120
124
|
public ConfigDiff transaction(ConfigSource config,
|
@@ -168,6 +172,7 @@ public class KafkaOutputPlugin
|
|
168
172
|
PageReader pageReader = new PageReader(schema);
|
169
173
|
PrimitiveIterator.OfLong randomLong = new Random().longs(1, Long.MAX_VALUE).iterator();
|
170
174
|
AtomicInteger counter = new AtomicInteger(0);
|
175
|
+
AtomicInteger recordLoggingCount = new AtomicInteger(1);
|
171
176
|
|
172
177
|
return new TransactionalPageOutput() {
|
173
178
|
@Override
|
@@ -194,9 +199,9 @@ public class KafkaOutputPlugin
|
|
194
199
|
logger.debug("sent record: {key: {}, value: {}}", producerRecord.key(), producerRecord.value());
|
195
200
|
|
196
201
|
int current = counter.incrementAndGet();
|
197
|
-
if (current >= recordLoggingCount) {
|
202
|
+
if (current >= recordLoggingCount.get()) {
|
198
203
|
logger.info("[task-{}] Producer sent {} records", String.format("%04d", taskIndex), current);
|
199
|
-
recordLoggingCount
|
204
|
+
recordLoggingCount.set(recordLoggingCount.get() * 2);
|
200
205
|
}
|
201
206
|
});
|
202
207
|
}
|
@@ -255,6 +260,7 @@ public class KafkaOutputPlugin
|
|
255
260
|
PrimitiveIterator.OfLong randomLong = new Random().longs(1, Long.MAX_VALUE).iterator();
|
256
261
|
|
257
262
|
AtomicInteger counter = new AtomicInteger(0);
|
263
|
+
AtomicInteger recordLoggingCount = new AtomicInteger(1);
|
258
264
|
|
259
265
|
final org.apache.avro.Schema finalAvroSchema = avroSchema;
|
260
266
|
return new TransactionalPageOutput()
|
@@ -284,9 +290,9 @@ public class KafkaOutputPlugin
|
|
284
290
|
logger.debug("sent record: {key: {}, value: {}}", producerRecord.key(), producerRecord.value());
|
285
291
|
|
286
292
|
int current = counter.incrementAndGet();
|
287
|
-
if (current >= recordLoggingCount) {
|
293
|
+
if (current >= recordLoggingCount.get()) {
|
288
294
|
logger.info("[task-{}] Producer sent {} records", String.format("%04d", taskIndex), current);
|
289
|
-
recordLoggingCount
|
295
|
+
recordLoggingCount.set(recordLoggingCount.get() * 2);
|
290
296
|
}
|
291
297
|
});
|
292
298
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- joker1007
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-09-
|
11
|
+
date: 2019-09-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -54,7 +54,7 @@ files:
|
|
54
54
|
- classpath/common-config-5.3.0.jar
|
55
55
|
- classpath/common-utils-5.3.0.jar
|
56
56
|
- classpath/commons-compress-1.18.jar
|
57
|
-
- classpath/embulk-output-kafka-0.1.
|
57
|
+
- classpath/embulk-output-kafka-0.1.2.jar
|
58
58
|
- classpath/jackson-annotations-2.9.0.jar
|
59
59
|
- classpath/jackson-core-2.9.9.jar
|
60
60
|
- classpath/jackson-databind-2.9.9.jar
|
@@ -73,6 +73,7 @@ files:
|
|
73
73
|
- classpath/zstd-jni-1.4.0-1.jar
|
74
74
|
- config/checkstyle/checkstyle.xml
|
75
75
|
- config/checkstyle/default.xml
|
76
|
+
- docker-compose.yml
|
76
77
|
- gradle/wrapper/gradle-wrapper.jar
|
77
78
|
- gradle/wrapper/gradle-wrapper.properties
|
78
79
|
- gradlew
|
Binary file
|