embulk-output-kafka 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 55e78aa9c5aec3c15768aed48208cd4b2023c1cd
4
- data.tar.gz: fbdac9757f935083ab553d44fd7bfc82a59e84a2
3
+ metadata.gz: feda9642eebe84a68dc4ff9a3f3e2b297e2e94f0
4
+ data.tar.gz: 4cdb1d310d6e1ae9f4caa5102465311c963f2656
5
5
  SHA512:
6
- metadata.gz: 87a3889c7e9bcd881301585269086656c479ea8c6c8dc60a03a79f4a13a4d18d186a1f7e0eca5f0a9de002e0adfb20262306f0782a478f8d888642f25a12e33d
7
- data.tar.gz: a6e44be4d25da3fdf8bd5e836d6ce4e64effaf07dc5d1f860999a1aad661d6edbf0dcc66faa69666b66e988c3c57541cf8d4c8d10396b364641f7bd2591bd265
6
+ metadata.gz: ab050ce4361ef325f2dcb9a07dce6135c3735d9b65c3820ea84385bbec6f8d3f7ea810f155df4477af43f56bf75b0bb5dd82d90ef5cd911da1932ab5a9637328
7
+ data.tar.gz: a661ff3eb73bb171f3d04a5699aaf390fa7b0ea8b2dad5feb9cf5420237ae6f5b692011d993bb002d77981544abff890f37a7fc40308a3708e1cbfd0d57b1a1e
data/README.md CHANGED
@@ -9,14 +9,15 @@
9
9
 
10
10
  ## Configuration
11
11
 
12
- - **broker**: kafka broker host and port (array<string>, required)
12
+ - **broker**: kafka broker host and port (array(string), required)
13
13
  - **topic**: target topic name (string, required)
14
14
  - **topic_column**: use column value as target topic (string, default: `null`)
15
15
  - **schema_registry_url**: Schema Registy URL that is needed for avro format (string, default: `null`)
16
16
  - **serialize_format**: use column value as target topic (enum, required, `json` or `avro_with_schema_registry`)
17
17
  - **avsc_file**: avro schema file path (string, default: `null`)
18
18
  - **avsc**: inline avro schema config (json, default: `null`)
19
- - **key_column_name**: use column value as record key (string, default: `null`, if this parameter is null, set random number as record key)
19
+ - **ignore_columns**: remove columns from output (array(string), default: `[]`)
20
+ - **key_column_name**: use column value as record key (string, default: `null`, if this parameter is null, set random number as record key, and it can use column in `ignore_columns`)
20
21
  - **record_batch_size**: kafka producer record batch size (integer, default: `1000`)
21
22
  - **acks**: kafka producer require acks (string, default: `"1"`)
22
23
  - **retries**: kafka producer max retry count (integer, default: `1`)
data/build.gradle CHANGED
@@ -17,7 +17,7 @@ configurations {
17
17
  provided
18
18
  }
19
19
 
20
- version = "0.1.1"
20
+ version = "0.1.2"
21
21
 
22
22
  sourceCompatibility = 1.8
23
23
  targetCompatibility = 1.8
@@ -0,0 +1,41 @@
1
+ version: "3"
2
+ services:
3
+ zookeeper:
4
+ image: confluentinc/cp-zookeeper:5.3.0
5
+ hostname: zookeeper
6
+ container_name: zookeeper
7
+ ports:
8
+ - "2181:2181"
9
+ environment:
10
+ ZOOKEEPER_CLIENT_PORT: 2181
11
+ ZOOKEEPER_TICK_TIME: 2000
12
+
13
+ broker:
14
+ image: confluentinc/cp-kafka:5.3.0
15
+ hostname: broker
16
+ container_name: broker
17
+ depends_on:
18
+ - zookeeper
19
+ ports:
20
+ - "29092:29092"
21
+ - "9092:9092"
22
+ environment:
23
+ KAFKA_BROKER_ID: 1
24
+ KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181'
25
+ KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
26
+ KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker:29092,PLAINTEXT_HOST://broker:9092
27
+ KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
28
+ KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0
29
+
30
+ schema-registry:
31
+ image: confluentinc/cp-schema-registry:5.3.0
32
+ hostname: schema-registry
33
+ container_name: schema-registry
34
+ depends_on:
35
+ - zookeeper
36
+ - broker
37
+ ports:
38
+ - "48081:8081"
39
+ environment:
40
+ SCHEMA_REGISTRY_HOST_NAME: schema-registry
41
+ SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL: 'zookeeper:2181'
@@ -29,6 +29,10 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
29
29
  @Override
30
30
  public void booleanColumn(Column column)
31
31
  {
32
+ if (isIgnoreColumn(column)) {
33
+ return;
34
+ }
35
+
32
36
  if (pageReader.isNull(column)) {
33
37
  genericRecord.put(column.getName(), null);
34
38
  return;
@@ -40,42 +44,61 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
40
44
  @Override
41
45
  public void longColumn(Column column)
42
46
  {
47
+ super.longColumn(column);
48
+
49
+ if (isIgnoreColumn(column)) {
50
+ return;
51
+ }
52
+
43
53
  if (pageReader.isNull(column)) {
44
54
  genericRecord.put(column.getName(), null);
45
55
  return;
46
56
  }
47
57
 
48
58
  genericRecord.put(column.getName(), pageReader.getLong(column));
49
- super.longColumn(column);
50
59
  }
51
60
 
52
61
  @Override
53
62
  public void doubleColumn(Column column)
54
63
  {
64
+ super.doubleColumn(column);
65
+
66
+ if (isIgnoreColumn(column)) {
67
+ return;
68
+ }
69
+
55
70
  if (pageReader.isNull(column)) {
56
71
  genericRecord.put(column.getName(), null);
57
72
  return;
58
73
  }
59
74
 
60
75
  genericRecord.put(column.getName(), pageReader.getDouble(column));
61
- super.doubleColumn(column);
62
76
  }
63
77
 
64
78
  @Override
65
79
  public void stringColumn(Column column)
66
80
  {
81
+ super.stringColumn(column);
82
+
83
+ if (isIgnoreColumn(column)) {
84
+ return;
85
+ }
86
+
67
87
  if (pageReader.isNull(column)) {
68
88
  genericRecord.put(column.getName(), null);
69
89
  return;
70
90
  }
71
91
 
72
92
  genericRecord.put(column.getName(), pageReader.getString(column));
73
- super.stringColumn(column);
74
93
  }
75
94
 
76
95
  @Override
77
96
  public void timestampColumn(Column column)
78
97
  {
98
+ if (isIgnoreColumn(column)) {
99
+ return;
100
+ }
101
+
79
102
  if (pageReader.isNull(column)) {
80
103
  genericRecord.put(column.getName(), null);
81
104
  return;
@@ -87,6 +110,10 @@ public class AvroFormatColumnVisitor extends KafkaOutputColumnVisitor
87
110
  @Override
88
111
  public void jsonColumn(Column column)
89
112
  {
113
+ if (isIgnoreColumn(column)) {
114
+ return;
115
+ }
116
+
90
117
  if (pageReader.isNull(column)) {
91
118
  genericRecord.put(column.getName(), null);
92
119
  return;
@@ -27,6 +27,10 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
27
27
  @Override
28
28
  public void booleanColumn(Column column)
29
29
  {
30
+ if (isIgnoreColumn(column)) {
31
+ return;
32
+ }
33
+
30
34
  if (pageReader.isNull(column)) {
31
35
  jsonNode.putNull(column.getName());
32
36
  return;
@@ -38,6 +42,10 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
38
42
  @Override
39
43
  public void longColumn(Column column)
40
44
  {
45
+ if (isIgnoreColumn(column)) {
46
+ return;
47
+ }
48
+
41
49
  if (pageReader.isNull(column)) {
42
50
  jsonNode.putNull(column.getName());
43
51
  return;
@@ -50,6 +58,10 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
50
58
  @Override
51
59
  public void doubleColumn(Column column)
52
60
  {
61
+ if (isIgnoreColumn(column)) {
62
+ return;
63
+ }
64
+
53
65
  if (pageReader.isNull(column)) {
54
66
  jsonNode.putNull(column.getName());
55
67
  return;
@@ -62,6 +74,10 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
62
74
  @Override
63
75
  public void stringColumn(Column column)
64
76
  {
77
+ if (isIgnoreColumn(column)) {
78
+ return;
79
+ }
80
+
65
81
  if (pageReader.isNull(column)) {
66
82
  jsonNode.putNull(column.getName());
67
83
  return;
@@ -74,6 +90,10 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
74
90
  @Override
75
91
  public void timestampColumn(Column column)
76
92
  {
93
+ if (isIgnoreColumn(column)) {
94
+ return;
95
+ }
96
+
77
97
  if (pageReader.isNull(column)) {
78
98
  jsonNode.putNull(column.getName());
79
99
  return;
@@ -85,6 +105,10 @@ public class JsonFormatColumnVisitor extends KafkaOutputColumnVisitor
85
105
  @Override
86
106
  public void jsonColumn(Column column)
87
107
  {
108
+ if (isIgnoreColumn(column)) {
109
+ return;
110
+ }
111
+
88
112
  if (pageReader.isNull(column)) {
89
113
  jsonNode.putNull(column.getName());
90
114
  return;
@@ -32,22 +32,33 @@ public abstract class KafkaOutputColumnVisitor implements ColumnVisitor
32
32
  }
33
33
  }
34
34
 
35
+ boolean isIgnoreColumn(Column column)
36
+ {
37
+ return task.getIgnoreColumns().stream().anyMatch(name -> name.equals(column.getName()));
38
+ }
39
+
35
40
  @Override
36
41
  public void longColumn(Column column)
37
42
  {
38
- setRecordKey(column, pageReader.getLong(column));
43
+ if (!pageReader.isNull(column)) {
44
+ setRecordKey(column, pageReader.getLong(column));
45
+ }
39
46
  }
40
47
 
41
48
  @Override
42
49
  public void doubleColumn(Column column)
43
50
  {
44
- setRecordKey(column, pageReader.getDouble(column));
51
+ if (!pageReader.isNull(column)) {
52
+ setRecordKey(column, pageReader.getDouble(column));
53
+ }
45
54
  }
46
55
 
47
56
  @Override
48
57
  public void stringColumn(Column column)
49
58
  {
50
- setRecordKey(column, pageReader.getString(column));
51
- setTopicName(column, pageReader.getString(column));
59
+ if (!pageReader.isNull(column)) {
60
+ setRecordKey(column, pageReader.getString(column));
61
+ setTopicName(column, pageReader.getString(column));
62
+ }
52
63
  }
53
64
  }
@@ -15,6 +15,7 @@ import org.embulk.config.ConfigSource;
15
15
  import org.embulk.config.Task;
16
16
  import org.embulk.config.TaskReport;
17
17
  import org.embulk.config.TaskSource;
18
+ import org.embulk.spi.ColumnConfig;
18
19
  import org.embulk.spi.Exec;
19
20
  import org.embulk.spi.OutputPlugin;
20
21
  import org.embulk.spi.Page;
@@ -110,11 +111,14 @@ public class KafkaOutputPlugin
110
111
  @Config("other_producer_configs")
111
112
  @ConfigDefault("{}")
112
113
  public Map<String, String> getOtherProducerConfigs();
114
+
115
+ @Config("ignore_columns")
116
+ @ConfigDefault("[]")
117
+ public List<String> getIgnoreColumns();
113
118
  }
114
119
 
115
120
  private static ObjectMapper objectMapper = new ObjectMapper();
116
121
  private Logger logger = LoggerFactory.getLogger(getClass());
117
- private int recordLoggingCount = 1;
118
122
 
119
123
  @Override
120
124
  public ConfigDiff transaction(ConfigSource config,
@@ -168,6 +172,7 @@ public class KafkaOutputPlugin
168
172
  PageReader pageReader = new PageReader(schema);
169
173
  PrimitiveIterator.OfLong randomLong = new Random().longs(1, Long.MAX_VALUE).iterator();
170
174
  AtomicInteger counter = new AtomicInteger(0);
175
+ AtomicInteger recordLoggingCount = new AtomicInteger(1);
171
176
 
172
177
  return new TransactionalPageOutput() {
173
178
  @Override
@@ -194,9 +199,9 @@ public class KafkaOutputPlugin
194
199
  logger.debug("sent record: {key: {}, value: {}}", producerRecord.key(), producerRecord.value());
195
200
 
196
201
  int current = counter.incrementAndGet();
197
- if (current >= recordLoggingCount) {
202
+ if (current >= recordLoggingCount.get()) {
198
203
  logger.info("[task-{}] Producer sent {} records", String.format("%04d", taskIndex), current);
199
- recordLoggingCount = recordLoggingCount * 2;
204
+ recordLoggingCount.set(recordLoggingCount.get() * 2);
200
205
  }
201
206
  });
202
207
  }
@@ -255,6 +260,7 @@ public class KafkaOutputPlugin
255
260
  PrimitiveIterator.OfLong randomLong = new Random().longs(1, Long.MAX_VALUE).iterator();
256
261
 
257
262
  AtomicInteger counter = new AtomicInteger(0);
263
+ AtomicInteger recordLoggingCount = new AtomicInteger(1);
258
264
 
259
265
  final org.apache.avro.Schema finalAvroSchema = avroSchema;
260
266
  return new TransactionalPageOutput()
@@ -284,9 +290,9 @@ public class KafkaOutputPlugin
284
290
  logger.debug("sent record: {key: {}, value: {}}", producerRecord.key(), producerRecord.value());
285
291
 
286
292
  int current = counter.incrementAndGet();
287
- if (current >= recordLoggingCount) {
293
+ if (current >= recordLoggingCount.get()) {
288
294
  logger.info("[task-{}] Producer sent {} records", String.format("%04d", taskIndex), current);
289
- recordLoggingCount = recordLoggingCount * 2;
295
+ recordLoggingCount.set(recordLoggingCount.get() * 2);
290
296
  }
291
297
  });
292
298
  }
@@ -24,3 +24,5 @@ out:
24
24
  - "localhost:9092"
25
25
  other_producer_configs:
26
26
  buffer.memory: "67108864"
27
+ ignore_columns:
28
+ - time
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - joker1007
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-09-12 00:00:00.000000000 Z
11
+ date: 2019-09-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -54,7 +54,7 @@ files:
54
54
  - classpath/common-config-5.3.0.jar
55
55
  - classpath/common-utils-5.3.0.jar
56
56
  - classpath/commons-compress-1.18.jar
57
- - classpath/embulk-output-kafka-0.1.1.jar
57
+ - classpath/embulk-output-kafka-0.1.2.jar
58
58
  - classpath/jackson-annotations-2.9.0.jar
59
59
  - classpath/jackson-core-2.9.9.jar
60
60
  - classpath/jackson-databind-2.9.9.jar
@@ -73,6 +73,7 @@ files:
73
73
  - classpath/zstd-jni-1.4.0-1.jar
74
74
  - config/checkstyle/checkstyle.xml
75
75
  - config/checkstyle/default.xml
76
+ - docker-compose.yml
76
77
  - gradle/wrapper/gradle-wrapper.jar
77
78
  - gradle/wrapper/gradle-wrapper.properties
78
79
  - gradlew