embulk-output-kafka 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 55e78aa9c5aec3c15768aed48208cd4b2023c1cd
4
- data.tar.gz: fbdac9757f935083ab553d44fd7bfc82a59e84a2
3
+ metadata.gz: feda9642eebe84a68dc4ff9a3f3e2b297e2e94f0
4
+ data.tar.gz: 4cdb1d310d6e1ae9f4caa5102465311c963f2656
5
5
  SHA512:
6
- metadata.gz: 87a3889c7e9bcd881301585269086656c479ea8c6c8dc60a03a79f4a13a4d18d186a1f7e0eca5f0a9de002e0adfb20262306f0782a478f8d888642f25a12e33d
7
- data.tar.gz: a6e44be4d25da3fdf8bd5e836d6ce4e64effaf07dc5d1f860999a1aad661d6edbf0dcc66faa69666b66e988c3c57541cf8d4c8d10396b364641f7bd2591bd265
6
+ metadata.gz: ab050ce4361ef325f2dcb9a07dce6135c3735d9b65c3820ea84385bbec6f8d3f7ea810f155df4477af43f56bf75b0bb5dd82d90ef5cd911da1932ab5a9637328
7
+ data.tar.gz: a661ff3eb73bb171f3d04a5699aaf390fa7b0ea8b2dad5feb9cf5420237ae6f5b692011d993bb002d77981544abff890f37a7fc40308a3708e1cbfd0d57b1a1e
data/README.md CHANGED
@@ -9,14 +9,15 @@
9
9
 
10
10
  ## Configuration
11
11
 
12
- - **broker**: kafka broker host and port (array<string>, required)
12
+ - **broker**: kafka broker host and port (array(string), required)
13
13
  - **topic**: target topic name (string, required)
14
14
  - **topic_column**: use column value as target topic (string, default: `null`)
15
15
  - **schema_registry_url**: Schema Registy URL that is needed for avro format (string, default: `null`)
16
16
  - **serialize_format**: use column value as target topic (enum, required, `json` or `avro_with_schema_registry`)
17
17
  - **avsc_file**: avro schema file path (string, default: `null`)
18
18
  - **avsc**: inline avro schema config (json, default: `null`)
19
- - **key_column_name**: use column value as record key (string, default: `null`, if this parameter is null, set random number as record key)
19
+ - **ignore_columns**: remove columns from output (array(string), default: `[]`)
20
+ - **key_column_name**: use column value as record key (string, default: `null`, if this parameter is null, set random number as record key, and it can use column in `ignore_columns`)
20
21
  - **record_batch_size**: kafka producer record batch size (integer, default: `1000`)
21
22
  - **acks**: kafka producer require acks (string, default: `"1"`)
22
23
  - **retries**: kafka producer max retry count (integer, default: `1`)
data/build.gradle CHANGED
@@ -17,7 +17,7 @@ configurations {
17
17
  provided
18
18
  }
19
19
 
20
- version = "0.1.1"
20
+ version = "0.1.2"
21
21
 
22
22
  sourceCompatibility = 1.8
23
23
  targetCompatibility = 1.8
@@ -0,0 +1,41 @@
1
+ version: "3"
2
+ services:
3
+ zookeeper:
4
+ image: confluentinc/cp-zookeeper:5.3.0
5
+ hostname: zookeeper
6
+ container_name: zookeeper
7
+ ports:
8
+ - "2181:2181"
9
+ environment:
10
+ ZOOKEEPER_CLIENT_PORT: 2181
11
+ ZOOKEEPER_TICK_TIME: 2000
12
+
13
+ broker:
14
+ image: confluentinc/cp-kafka:5.3.0
15
+ hostname: broker
16
+ container_name: broker
17
+ depends_on:
18
+ - zookeeper
19
+ ports:
20
+ - "29092:29092"
21
+ - "9092:9092"
22
+ environment:
23
+ KAFKA_BROKER_ID: 1
24
+ KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181'
25
+ KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
26
+ KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker:29092,PLAINTEXT_HOST://broker:9092
27
+ KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
28
+ KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0
29
+
30
+ schema-registry:
31
+ image: confluentinc/cp-schema-registry:5.3.0
32
+ hostname: schema-registry
33
+ container_name: schema-registry
34
+ depends_on:
35
+ - zookeeper
36
+ - broker
37
+ ports:
38
+ - "48081:8081"
39
+ environment:
40
+ SCHEMA_REGISTRY_HOST_NAME: schema-registry
41
+ SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL: 'zookeeper:2181'
@@ -29,6 +29,10 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
29
29
  @Override
30
30
  public void booleanColumn(Column column)
31
31
  {
32
+ if (isIgnoreColumn(column)) {
33
+ return;
34
+ }
35
+
32
36
  if (pageReader.isNull(column)) {
33
37
  genericRecord.put(column.getName(), null);
34
38
  return;
@@ -40,42 +44,61 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
40
44
  @Override
41
45
  public void longColumn(Column column)
42
46
  {
47
+ super.longColumn(column);
48
+
49
+ if (isIgnoreColumn(column)) {
50
+ return;
51
+ }
52
+
43
53
  if (pageReader.isNull(column)) {
44
54
  genericRecord.put(column.getName(), null);
45
55
  return;
46
56
  }
47
57
 
48
58
  genericRecord.put(column.getName(), pageReader.getLong(column));
49
- super.longColumn(column);
50
59
  }
51
60
 
52
61
  @Override
53
62
  public void doubleColumn(Column column)
54
63
  {
64
+ super.doubleColumn(column);
65
+
66
+ if (isIgnoreColumn(column)) {
67
+ return;
68
+ }
69
+
55
70
  if (pageReader.isNull(column)) {
56
71
  genericRecord.put(column.getName(), null);
57
72
  return;
58
73
  }
59
74
 
60
75
  genericRecord.put(column.getName(), pageReader.getDouble(column));
61
- super.doubleColumn(column);
62
76
  }
63
77
 
64
78
  @Override
65
79
  public void stringColumn(Column column)
66
80
  {
81
+ super.stringColumn(column);
82
+
83
+ if (isIgnoreColumn(column)) {
84
+ return;
85
+ }
86
+
67
87
  if (pageReader.isNull(column)) {
68
88
  genericRecord.put(column.getName(), null);
69
89
  return;
70
90
  }
71
91
 
72
92
  genericRecord.put(column.getName(), pageReader.getString(column));
73
- super.stringColumn(column);
74
93
  }
75
94
 
76
95
  @Override
77
96
  public void timestampColumn(Column column)
78
97
  {
98
+ if (isIgnoreColumn(column)) {
99
+ return;
100
+ }
101
+
79
102
  if (pageReader.isNull(column)) {
80
103
  genericRecord.put(column.getName(), null);
81
104
  return;
@@ -87,6 +110,10 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
87
110
  @Override
88
111
  public void jsonColumn(Column column)
89
112
  {
113
+ if (isIgnoreColumn(column)) {
114
+ return;
115
+ }
116
+
90
117
  if (pageReader.isNull(column)) {
91
118
  genericRecord.put(column.getName(), null);
92
119
  return;
@@ -27,6 +27,10 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
27
27
  @Override
28
28
  public void booleanColumn(Column column)
29
29
  {
30
+ if (isIgnoreColumn(column)) {
31
+ return;
32
+ }
33
+
30
34
  if (pageReader.isNull(column)) {
31
35
  jsonNode.putNull(column.getName());
32
36
  return;
@@ -38,6 +42,10 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
38
42
  @Override
39
43
  public void longColumn(Column column)
40
44
  {
45
+ if (isIgnoreColumn(column)) {
46
+ return;
47
+ }
48
+
41
49
  if (pageReader.isNull(column)) {
42
50
  jsonNode.putNull(column.getName());
43
51
  return;
@@ -50,6 +58,10 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
50
58
  @Override
51
59
  public void doubleColumn(Column column)
52
60
  {
61
+ if (isIgnoreColumn(column)) {
62
+ return;
63
+ }
64
+
53
65
  if (pageReader.isNull(column)) {
54
66
  jsonNode.putNull(column.getName());
55
67
  return;
@@ -62,6 +74,10 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
62
74
  @Override
63
75
  public void stringColumn(Column column)
64
76
  {
77
+ if (isIgnoreColumn(column)) {
78
+ return;
79
+ }
80
+
65
81
  if (pageReader.isNull(column)) {
66
82
  jsonNode.putNull(column.getName());
67
83
  return;
@@ -74,6 +90,10 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
74
90
  @Override
75
91
  public void timestampColumn(Column column)
76
92
  {
93
+ if (isIgnoreColumn(column)) {
94
+ return;
95
+ }
96
+
77
97
  if (pageReader.isNull(column)) {
78
98
  jsonNode.putNull(column.getName());
79
99
  return;
@@ -85,6 +105,10 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
85
105
  @Override
86
106
  public void jsonColumn(Column column)
87
107
  {
108
+ if (isIgnoreColumn(column)) {
109
+ return;
110
+ }
111
+
88
112
  if (pageReader.isNull(column)) {
89
113
  jsonNode.putNull(column.getName());
90
114
  return;
@@ -32,22 +32,33 @@ public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
32
32
  }
33
33
  }
34
34
 
35
+ boolean isIgnoreColumn(Column column)
36
+ {
37
+ return task.getIgnoreColumns().stream().anyMatch(name -> name.equals(column.getName()));
38
+ }
39
+
35
40
  @Override
36
41
  public void longColumn(Column column)
37
42
  {
38
- setRecordKey(column, pageReader.getLong(column));
43
+ if (!pageReader.isNull(column)) {
44
+ setRecordKey(column, pageReader.getLong(column));
45
+ }
39
46
  }
40
47
 
41
48
  @Override
42
49
  public void doubleColumn(Column column)
43
50
  {
44
- setRecordKey(column, pageReader.getDouble(column));
51
+ if (!pageReader.isNull(column)) {
52
+ setRecordKey(column, pageReader.getDouble(column));
53
+ }
45
54
  }
46
55
 
47
56
  @Override
48
57
  public void stringColumn(Column column)
49
58
  {
50
- setRecordKey(column, pageReader.getString(column));
51
- setTopicName(column, pageReader.getString(column));
59
+ if (!pageReader.isNull(column)) {
60
+ setRecordKey(column, pageReader.getString(column));
61
+ setTopicName(column, pageReader.getString(column));
62
+ }
52
63
  }
53
64
  }
@@ -15,6 +15,7 @@ import org.embulk.config.ConfigSource;
15
15
  import org.embulk.config.Task;
16
16
  import org.embulk.config.TaskReport;
17
17
  import org.embulk.config.TaskSource;
18
+ import org.embulk.spi.ColumnConfig;
18
19
  import org.embulk.spi.Exec;
19
20
  import org.embulk.spi.OutputPlugin;
20
21
  import org.embulk.spi.Page;
@@ -110,11 +111,14 @@ public class KafkaOutputPlugin
110
111
  @Config("other_producer_configs")
111
112
  @ConfigDefault("{}")
112
113
  public Map<String, String> getOtherProducerConfigs();
114
+
115
+ @Config("ignore_columns")
116
+ @ConfigDefault("[]")
117
+ public List<String> getIgnoreColumns();
113
118
  }
114
119
 
115
120
  private static ObjectMapper objectMapper = new ObjectMapper();
116
121
  private Logger logger = LoggerFactory.getLogger(getClass());
117
- private int recordLoggingCount = 1;
118
122
 
119
123
  @Override
120
124
  public ConfigDiff transaction(ConfigSource config,
@@ -168,6 +172,7 @@ public class KafkaOutputPlugin
168
172
  PageReader pageReader = new PageReader(schema);
169
173
  PrimitiveIterator.OfLong randomLong = new Random().longs(1, Long.MAX_VALUE).iterator();
170
174
  AtomicInteger counter = new AtomicInteger(0);
175
+ AtomicInteger recordLoggingCount = new AtomicInteger(1);
171
176
 
172
177
  return new TransactionalPageOutput() {
173
178
  @Override
@@ -194,9 +199,9 @@ public class KafkaOutputPlugin
194
199
  logger.debug("sent record: {key: {}, value: {}}", producerRecord.key(), producerRecord.value());
195
200
 
196
201
  int current = counter.incrementAndGet();
197
- if (current >= recordLoggingCount) {
202
+ if (current >= recordLoggingCount.get()) {
198
203
  logger.info("[task-{}] Producer sent {} records", String.format("%04d", taskIndex), current);
199
- recordLoggingCount = recordLoggingCount * 2;
204
+ recordLoggingCount.set(recordLoggingCount.get() * 2);
200
205
  }
201
206
  });
202
207
  }
@@ -255,6 +260,7 @@ public class KafkaOutputPlugin
255
260
  PrimitiveIterator.OfLong randomLong = new Random().longs(1, Long.MAX_VALUE).iterator();
256
261
 
257
262
  AtomicInteger counter = new AtomicInteger(0);
263
+ AtomicInteger recordLoggingCount = new AtomicInteger(1);
258
264
 
259
265
  final org.apache.avro.Schema finalAvroSchema = avroSchema;
260
266
  return new TransactionalPageOutput()
@@ -284,9 +290,9 @@ public class KafkaOutputPlugin
284
290
  logger.debug("sent record: {key: {}, value: {}}", producerRecord.key(), producerRecord.value());
285
291
 
286
292
  int current = counter.incrementAndGet();
287
- if (current >= recordLoggingCount) {
293
+ if (current >= recordLoggingCount.get()) {
288
294
  logger.info("[task-{}] Producer sent {} records", String.format("%04d", taskIndex), current);
289
- recordLoggingCount = recordLoggingCount * 2;
295
+ recordLoggingCount.set(recordLoggingCount.get() * 2);
290
296
  }
291
297
  });
292
298
  }
@@ -24,3 +24,5 @@ out:
24
24
  - "localhost:9092"
25
25
  other_producer_configs:
26
26
  buffer.memory: "67108864"
27
+ ignore_columns:
28
+ - time
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - joker1007
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-09-12 00:00:00.000000000 Z
11
+ date: 2019-09-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -54,7 +54,7 @@ files:
54
54
  - classpath/common-config-5.3.0.jar
55
55
  - classpath/common-utils-5.3.0.jar
56
56
  - classpath/commons-compress-1.18.jar
57
- - classpath/embulk-output-kafka-0.1.1.jar
57
+ - classpath/embulk-output-kafka-0.1.2.jar
58
58
  - classpath/jackson-annotations-2.9.0.jar
59
59
  - classpath/jackson-core-2.9.9.jar
60
60
  - classpath/jackson-databind-2.9.9.jar
@@ -73,6 +73,7 @@ files:
73
73
  - classpath/zstd-jni-1.4.0-1.jar
74
74
  - config/checkstyle/checkstyle.xml
75
75
  - config/checkstyle/default.xml
76
+ - docker-compose.yml
76
77
  - gradle/wrapper/gradle-wrapper.jar
77
78
  - gradle/wrapper/gradle-wrapper.properties
78
79
  - gradlew