ruby-kafka 1.1.0 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/CHANGELOG.md +5 -0
- data/README.md +20 -0
- data/lib/kafka/async_producer.rb +2 -2
- data/lib/kafka/client.rb +22 -6
- data/lib/kafka/consumer.rb +8 -3
- data/lib/kafka/datadog.rb +2 -2
- data/lib/kafka/interceptors.rb +33 -0
- data/lib/kafka/partitioner.rb +1 -1
- data/lib/kafka/producer.rb +9 -4
- data/lib/kafka/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0f8391cc7b1989cb5f669796bc4ad647b77d882e6506fae42bab18acb8a6bcc6
|
4
|
+
data.tar.gz: 012baaff5d2cc9eb17e3a7b7342f49f7c905a5f91d26078fa0ecf2f0fa81a2ad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7f4e9302ca0ab41a6fded75f95ed866d959a2a027b90051ed7f3d7fba573aa63e57be7692d004dbeb3bacd99fe44b24188f81b0f3bed50f68e1da5189262271f
|
7
|
+
data.tar.gz: 7266bdd50e66a7ab9b3c71025468af0a4bea778fd312c41d4c699e2609420c11c28accb3246407ce98330bf521c109691bd1d4943dd532a44ee6732f1a410922
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
2.7.1
|
data/CHANGELOG.md
CHANGED
@@ -4,6 +4,11 @@ Changes and additions to the library will be listed here.
|
|
4
4
|
|
5
5
|
## Unreleased
|
6
6
|
|
7
|
+
## 1.2.0
|
8
|
+
|
9
|
+
- Add producer consumer interceptors (#837).
|
10
|
+
- Add support for configuring the client partitioner (#848).
|
11
|
+
|
7
12
|
## 1.1.0
|
8
13
|
|
9
14
|
- Extra sanity checking when marking offsets as processed (#824).
|
data/README.md
CHANGED
@@ -349,6 +349,26 @@ partition = PartitioningScheme.assign(partitions, event)
|
|
349
349
|
producer.produce(event, topic: "events", partition: partition)
|
350
350
|
```
|
351
351
|
|
352
|
+
Another option is to configure a custom client partitioner that implements `call(partition_count, message)` and uses the same schema as the other client. For example:
|
353
|
+
|
354
|
+
```ruby
|
355
|
+
class CustomPartitioner
|
356
|
+
def call(partition_count, message)
|
357
|
+
...
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
partitioner = CustomPartitioner.new
|
362
|
+
Kafka.new(partitioner: partitioner, ...)
|
363
|
+
```
|
364
|
+
|
365
|
+
Or, simply create a Proc handling the partitioning logic instead of having to add a new class. For example:
|
366
|
+
|
367
|
+
```ruby
|
368
|
+
partitioner = -> (partition_count, message) { ... }
|
369
|
+
Kafka.new(partitioner: partitioner, ...)
|
370
|
+
```
|
371
|
+
|
352
372
|
#### Buffering and Error Handling
|
353
373
|
|
354
374
|
The producer is designed for resilience in the face of temporary network errors, Kafka broker failovers, and other issues that prevent the client from writing messages to the destination topics. It does this by employing local, in-memory buffers. Only when messages are acknowledged by a Kafka broker will they be removed from the buffer.
|
data/lib/kafka/async_producer.rb
CHANGED
@@ -246,10 +246,10 @@ module Kafka
|
|
246
246
|
|
247
247
|
private
|
248
248
|
|
249
|
-
def produce(
|
249
|
+
def produce(value, **kwargs)
|
250
250
|
retries = 0
|
251
251
|
begin
|
252
|
-
@producer.produce(
|
252
|
+
@producer.produce(value, **kwargs)
|
253
253
|
rescue BufferOverflow => e
|
254
254
|
deliver_messages
|
255
255
|
if @max_retries == -1
|
data/lib/kafka/client.rb
CHANGED
@@ -62,10 +62,15 @@ module Kafka
|
|
62
62
|
#
|
63
63
|
# @param sasl_over_ssl [Boolean] whether to enforce SSL with SASL
|
64
64
|
#
|
65
|
+
# @param ssl_ca_certs_from_system [Boolean] whether to use the CA certs from the
|
66
|
+
# system's default certificate store.
|
67
|
+
#
|
68
|
+
# @param partitioner [Partitioner, nil] the partitioner that should be used by the client.
|
69
|
+
#
|
65
70
|
# @param sasl_oauth_token_provider [Object, nil] OAuthBearer Token Provider instance that
|
66
71
|
# implements method token. See {Sasl::OAuth#initialize}
|
67
72
|
#
|
68
|
-
# @param
|
73
|
+
# @param ssl_verify_hostname [Boolean, true] whether to verify that the host serving
|
69
74
|
# the SSL certificate and the signing chain of the certificate have the correct domains
|
70
75
|
# based on the CA certificate
|
71
76
|
#
|
@@ -75,7 +80,7 @@ module Kafka
|
|
75
80
|
ssl_client_cert_key_password: nil, ssl_client_cert_chain: nil, sasl_gssapi_principal: nil,
|
76
81
|
sasl_gssapi_keytab: nil, sasl_plain_authzid: '', sasl_plain_username: nil, sasl_plain_password: nil,
|
77
82
|
sasl_scram_username: nil, sasl_scram_password: nil, sasl_scram_mechanism: nil,
|
78
|
-
sasl_over_ssl: true, ssl_ca_certs_from_system: false, sasl_oauth_token_provider: nil, ssl_verify_hostname: true)
|
83
|
+
sasl_over_ssl: true, ssl_ca_certs_from_system: false, partitioner: nil, sasl_oauth_token_provider: nil, ssl_verify_hostname: true)
|
79
84
|
@logger = TaggedLogger.new(logger)
|
80
85
|
@instrumenter = Instrumenter.new(client_id: client_id)
|
81
86
|
@seed_brokers = normalize_seed_brokers(seed_brokers)
|
@@ -119,6 +124,7 @@ module Kafka
|
|
119
124
|
)
|
120
125
|
|
121
126
|
@cluster = initialize_cluster
|
127
|
+
@partitioner = partitioner || Partitioner.new
|
122
128
|
end
|
123
129
|
|
124
130
|
# Delivers a single message to the Kafka cluster.
|
@@ -157,7 +163,7 @@ module Kafka
|
|
157
163
|
|
158
164
|
if partition.nil?
|
159
165
|
partition_count = @cluster.partitions_for(topic).count
|
160
|
-
partition =
|
166
|
+
partition = @partitioner.call(partition_count, message)
|
161
167
|
end
|
162
168
|
|
163
169
|
buffer = MessageBuffer.new
|
@@ -248,6 +254,9 @@ module Kafka
|
|
248
254
|
# be in a message set before it should be compressed. Note that message sets
|
249
255
|
# are per-partition rather than per-topic or per-producer.
|
250
256
|
#
|
257
|
+
# @param interceptors [Array<Object>] a list of producer interceptors the implement
|
258
|
+
# `call(Kafka::PendingMessage)`.
|
259
|
+
#
|
251
260
|
# @return [Kafka::Producer] the Kafka producer.
|
252
261
|
def producer(
|
253
262
|
compression_codec: nil,
|
@@ -261,7 +270,8 @@ module Kafka
|
|
261
270
|
idempotent: false,
|
262
271
|
transactional: false,
|
263
272
|
transactional_id: nil,
|
264
|
-
transactional_timeout: 60
|
273
|
+
transactional_timeout: 60,
|
274
|
+
interceptors: []
|
265
275
|
)
|
266
276
|
cluster = initialize_cluster
|
267
277
|
compressor = Compressor.new(
|
@@ -291,6 +301,8 @@ module Kafka
|
|
291
301
|
retry_backoff: retry_backoff,
|
292
302
|
max_buffer_size: max_buffer_size,
|
293
303
|
max_buffer_bytesize: max_buffer_bytesize,
|
304
|
+
partitioner: @partitioner,
|
305
|
+
interceptors: interceptors
|
294
306
|
)
|
295
307
|
end
|
296
308
|
|
@@ -343,6 +355,8 @@ module Kafka
|
|
343
355
|
# @param refresh_topic_interval [Integer] interval of refreshing the topic list.
|
344
356
|
# If it is 0, the topic list won't be refreshed (default)
|
345
357
|
# If it is n (n > 0), the topic list will be refreshed every n seconds
|
358
|
+
# @param interceptors [Array<Object>] a list of consumer interceptors that implement
|
359
|
+
# `call(Kafka::FetchedBatch)`.
|
346
360
|
# @return [Consumer]
|
347
361
|
def consumer(
|
348
362
|
group_id:,
|
@@ -353,7 +367,8 @@ module Kafka
|
|
353
367
|
heartbeat_interval: 10,
|
354
368
|
offset_retention_time: nil,
|
355
369
|
fetcher_max_queue_size: 100,
|
356
|
-
refresh_topic_interval: 0
|
370
|
+
refresh_topic_interval: 0,
|
371
|
+
interceptors: []
|
357
372
|
)
|
358
373
|
cluster = initialize_cluster
|
359
374
|
|
@@ -407,7 +422,8 @@ module Kafka
|
|
407
422
|
fetcher: fetcher,
|
408
423
|
session_timeout: session_timeout,
|
409
424
|
heartbeat: heartbeat,
|
410
|
-
refresh_topic_interval: refresh_topic_interval
|
425
|
+
refresh_topic_interval: refresh_topic_interval,
|
426
|
+
interceptors: interceptors
|
411
427
|
)
|
412
428
|
end
|
413
429
|
|
data/lib/kafka/consumer.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "kafka/consumer_group"
|
4
|
+
require "kafka/interceptors"
|
4
5
|
require "kafka/offset_manager"
|
5
6
|
require "kafka/fetcher"
|
6
7
|
require "kafka/pause"
|
@@ -44,7 +45,8 @@ module Kafka
|
|
44
45
|
#
|
45
46
|
class Consumer
|
46
47
|
|
47
|
-
def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:,
|
48
|
+
def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:,
|
49
|
+
session_timeout:, heartbeat:, refresh_topic_interval: 0, interceptors: [])
|
48
50
|
@cluster = cluster
|
49
51
|
@logger = TaggedLogger.new(logger)
|
50
52
|
@instrumenter = instrumenter
|
@@ -54,6 +56,7 @@ module Kafka
|
|
54
56
|
@fetcher = fetcher
|
55
57
|
@heartbeat = heartbeat
|
56
58
|
@refresh_topic_interval = refresh_topic_interval
|
59
|
+
@interceptors = Interceptors.new(interceptors: interceptors, logger: logger)
|
57
60
|
|
58
61
|
@pauses = Hash.new {|h, k|
|
59
62
|
h[k] = Hash.new {|h2, k2|
|
@@ -76,7 +79,7 @@ module Kafka
|
|
76
79
|
@current_offsets = Hash.new { |h, k| h[k] = {} }
|
77
80
|
|
78
81
|
# Map storing subscribed topics with their configuration
|
79
|
-
@subscribed_topics =
|
82
|
+
@subscribed_topics = Hash.new
|
80
83
|
|
81
84
|
# Set storing topics that matched topics in @subscribed_topics
|
82
85
|
@matched_topics = Set.new
|
@@ -220,6 +223,7 @@ module Kafka
|
|
220
223
|
batches = fetch_batches
|
221
224
|
|
222
225
|
batches.each do |batch|
|
226
|
+
batch = @interceptors.call(batch)
|
223
227
|
batch.messages.each do |message|
|
224
228
|
notification = {
|
225
229
|
topic: message.topic,
|
@@ -311,12 +315,13 @@ module Kafka
|
|
311
315
|
unless batch.empty?
|
312
316
|
raw_messages = batch.messages
|
313
317
|
batch.messages = raw_messages.reject(&:is_control_record)
|
318
|
+
batch = @interceptors.call(batch)
|
314
319
|
|
315
320
|
notification = {
|
316
321
|
topic: batch.topic,
|
317
322
|
partition: batch.partition,
|
318
323
|
last_offset: batch.last_offset,
|
319
|
-
last_create_time: batch.messages.last.
|
324
|
+
last_create_time: batch.messages.last && batch.messages.last.create_time,
|
320
325
|
offset_lag: batch.offset_lag,
|
321
326
|
highwater_mark_offset: batch.highwater_mark_offset,
|
322
327
|
message_count: batch.messages.count,
|
data/lib/kafka/datadog.rb
CHANGED
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
# Holds a list of interceptors that implement `call`
|
5
|
+
# and wraps calls to a chain of custom interceptors.
|
6
|
+
class Interceptors
|
7
|
+
def initialize(interceptors:, logger:)
|
8
|
+
@interceptors = interceptors || []
|
9
|
+
@logger = TaggedLogger.new(logger)
|
10
|
+
end
|
11
|
+
|
12
|
+
# This method is called when the client produces a message or once the batches are fetched.
|
13
|
+
# The message returned from the first call is passed to the second interceptor call, and so on in an
|
14
|
+
# interceptor chain. This method does not throw exceptions.
|
15
|
+
#
|
16
|
+
# @param intercepted [Kafka::PendingMessage || Kafka::FetchedBatch] the produced message or
|
17
|
+
# fetched batch.
|
18
|
+
#
|
19
|
+
# @return [Kafka::PendingMessage || Kafka::FetchedBatch] the intercepted message or batch
|
20
|
+
# returned by the last interceptor.
|
21
|
+
def call(intercepted)
|
22
|
+
@interceptors.each do |interceptor|
|
23
|
+
begin
|
24
|
+
intercepted = interceptor.call(intercepted)
|
25
|
+
rescue Exception => e
|
26
|
+
@logger.warn "Error executing interceptor for topic: #{intercepted.topic} partition: #{intercepted.partition}: #{e.message}\n#{e.backtrace.join("\n")}"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
intercepted
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
data/lib/kafka/partitioner.rb
CHANGED
@@ -19,7 +19,7 @@ module Kafka
|
|
19
19
|
# @param message [Kafka::PendingMessage] the message that should be assigned
|
20
20
|
# a partition.
|
21
21
|
# @return [Integer] the partition number.
|
22
|
-
def
|
22
|
+
def call(partition_count, message)
|
23
23
|
raise ArgumentError if partition_count == 0
|
24
24
|
|
25
25
|
# If no explicit partition key is specified we use the message key instead.
|
data/lib/kafka/producer.rb
CHANGED
@@ -7,6 +7,7 @@ require "kafka/produce_operation"
|
|
7
7
|
require "kafka/pending_message_queue"
|
8
8
|
require "kafka/pending_message"
|
9
9
|
require "kafka/compressor"
|
10
|
+
require "kafka/interceptors"
|
10
11
|
|
11
12
|
module Kafka
|
12
13
|
# Allows sending messages to a Kafka cluster.
|
@@ -129,7 +130,9 @@ module Kafka
|
|
129
130
|
class Producer
|
130
131
|
class AbortTransaction < StandardError; end
|
131
132
|
|
132
|
-
def initialize(cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:,
|
133
|
+
def initialize(cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:,
|
134
|
+
required_acks:, max_retries:, retry_backoff:, max_buffer_size:,
|
135
|
+
max_buffer_bytesize:, partitioner:, interceptors: [])
|
133
136
|
@cluster = cluster
|
134
137
|
@transaction_manager = transaction_manager
|
135
138
|
@logger = TaggedLogger.new(logger)
|
@@ -141,6 +144,8 @@ module Kafka
|
|
141
144
|
@max_buffer_size = max_buffer_size
|
142
145
|
@max_buffer_bytesize = max_buffer_bytesize
|
143
146
|
@compressor = compressor
|
147
|
+
@partitioner = partitioner
|
148
|
+
@interceptors = Interceptors.new(interceptors: interceptors, logger: logger)
|
144
149
|
|
145
150
|
# The set of topics that are produced to.
|
146
151
|
@target_topics = Set.new
|
@@ -191,7 +196,7 @@ module Kafka
|
|
191
196
|
# We want to fail fast if `topic` isn't a String
|
192
197
|
topic = topic.to_str
|
193
198
|
|
194
|
-
message = PendingMessage.new(
|
199
|
+
message = @interceptors.call(PendingMessage.new(
|
195
200
|
value: value && value.to_s,
|
196
201
|
key: key && key.to_s,
|
197
202
|
headers: headers,
|
@@ -199,7 +204,7 @@ module Kafka
|
|
199
204
|
partition: partition && Integer(partition),
|
200
205
|
partition_key: partition_key && partition_key.to_s,
|
201
206
|
create_time: create_time
|
202
|
-
)
|
207
|
+
))
|
203
208
|
|
204
209
|
if buffer_size >= @max_buffer_size
|
205
210
|
buffer_overflow topic,
|
@@ -455,7 +460,7 @@ module Kafka
|
|
455
460
|
|
456
461
|
if partition.nil?
|
457
462
|
partition_count = @cluster.partitions_for(message.topic).count
|
458
|
-
partition =
|
463
|
+
partition = @partitioner.call(partition_count, message)
|
459
464
|
end
|
460
465
|
|
461
466
|
@buffer.write(
|
data/lib/kafka/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Schierbeck
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-08-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: digest-crc
|
@@ -386,6 +386,7 @@ files:
|
|
386
386
|
- lib/kafka/gzip_codec.rb
|
387
387
|
- lib/kafka/heartbeat.rb
|
388
388
|
- lib/kafka/instrumenter.rb
|
389
|
+
- lib/kafka/interceptors.rb
|
389
390
|
- lib/kafka/lz4_codec.rb
|
390
391
|
- lib/kafka/message_buffer.rb
|
391
392
|
- lib/kafka/offset_manager.rb
|