ruby-kafka 1.0.0 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +33 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +15 -0
- data/README.md +29 -0
- data/lib/kafka/async_producer.rb +2 -2
- data/lib/kafka/client.rb +37 -4
- data/lib/kafka/consumer.rb +61 -11
- data/lib/kafka/consumer_group.rb +6 -0
- data/lib/kafka/datadog.rb +18 -3
- data/lib/kafka/fetcher.rb +1 -1
- data/lib/kafka/interceptors.rb +33 -0
- data/lib/kafka/offset_manager.rb +12 -1
- data/lib/kafka/partitioner.rb +1 -1
- data/lib/kafka/producer.rb +9 -4
- data/lib/kafka/protocol/metadata_response.rb +1 -1
- data/lib/kafka/ssl_context.rb +4 -3
- data/lib/kafka/version.rb +1 -1
- data/ruby-kafka.gemspec +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0f8391cc7b1989cb5f669796bc4ad647b77d882e6506fae42bab18acb8a6bcc6
|
4
|
+
data.tar.gz: 012baaff5d2cc9eb17e3a7b7342f49f7c905a5f91d26078fa0ecf2f0fa81a2ad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7f4e9302ca0ab41a6fded75f95ed866d959a2a027b90051ed7f3d7fba573aa63e57be7692d004dbeb3bacd99fe44b24188f81b0f3bed50f68e1da5189262271f
|
7
|
+
data.tar.gz: 7266bdd50e66a7ab9b3c71025468af0a4bea778fd312c41d4c699e2609420c11c28accb3246407ce98330bf521c109691bd1d4943dd532a44ee6732f1a410922
|
data/.circleci/config.yml
CHANGED
@@ -267,6 +267,38 @@ jobs:
|
|
267
267
|
- run: bundle install --path vendor/bundle
|
268
268
|
- run: bundle exec rspec --profile --tag functional spec/functional
|
269
269
|
|
270
|
+
kafka-2.5:
|
271
|
+
docker:
|
272
|
+
- image: circleci/ruby:2.5.1-node
|
273
|
+
environment:
|
274
|
+
LOG_LEVEL: DEBUG
|
275
|
+
- image: wurstmeister/zookeeper
|
276
|
+
- image: wurstmeister/kafka:2.12-2.5.0
|
277
|
+
environment:
|
278
|
+
KAFKA_ADVERTISED_HOST_NAME: localhost
|
279
|
+
KAFKA_ADVERTISED_PORT: 9092
|
280
|
+
KAFKA_PORT: 9092
|
281
|
+
KAFKA_ZOOKEEPER_CONNECT: localhost:2181
|
282
|
+
KAFKA_DELETE_TOPIC_ENABLE: true
|
283
|
+
- image: wurstmeister/kafka:2.12-2.5.0
|
284
|
+
environment:
|
285
|
+
KAFKA_ADVERTISED_HOST_NAME: localhost
|
286
|
+
KAFKA_ADVERTISED_PORT: 9093
|
287
|
+
KAFKA_PORT: 9093
|
288
|
+
KAFKA_ZOOKEEPER_CONNECT: localhost:2181
|
289
|
+
KAFKA_DELETE_TOPIC_ENABLE: true
|
290
|
+
- image: wurstmeister/kafka:2.12-2.5.0
|
291
|
+
environment:
|
292
|
+
KAFKA_ADVERTISED_HOST_NAME: localhost
|
293
|
+
KAFKA_ADVERTISED_PORT: 9094
|
294
|
+
KAFKA_PORT: 9094
|
295
|
+
KAFKA_ZOOKEEPER_CONNECT: localhost:2181
|
296
|
+
KAFKA_DELETE_TOPIC_ENABLE: true
|
297
|
+
steps:
|
298
|
+
- checkout
|
299
|
+
- run: bundle install --path vendor/bundle
|
300
|
+
- run: bundle exec rspec --profile --tag functional spec/functional
|
301
|
+
|
270
302
|
workflows:
|
271
303
|
version: 2
|
272
304
|
test:
|
@@ -280,3 +312,4 @@ workflows:
|
|
280
312
|
- kafka-2.2
|
281
313
|
- kafka-2.3
|
282
314
|
- kafka-2.4
|
315
|
+
- kafka-2.5
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
2.7.1
|
data/CHANGELOG.md
CHANGED
@@ -4,6 +4,21 @@ Changes and additions to the library will be listed here.
|
|
4
4
|
|
5
5
|
## Unreleased
|
6
6
|
|
7
|
+
## 1.2.0
|
8
|
+
|
9
|
+
- Add producer consumer interceptors (#837).
|
10
|
+
- Add support for configuring the client partitioner (#848).
|
11
|
+
|
12
|
+
## 1.1.0
|
13
|
+
|
14
|
+
- Extra sanity checking when marking offsets as processed (#824).
|
15
|
+
- Make `verify_hostname` settable for SSL contexts (#828).
|
16
|
+
- Instrument `create_time` from last message in batch (#811).
|
17
|
+
- Add client function for fetching topic replica count (#822).
|
18
|
+
- Allow consumers to refresh the topic lists (#818).
|
19
|
+
- Disconnect after leaving a group (#817).
|
20
|
+
- Use `max_wait_time` as the sleep instead of hardcoded 2 seconds (#825).
|
21
|
+
|
7
22
|
## 1.0.0
|
8
23
|
|
9
24
|
- Add client methods to manage configs (#759)
|
data/README.md
CHANGED
@@ -118,10 +118,16 @@ Or install it yourself as:
|
|
118
118
|
<td>Limited support</td>
|
119
119
|
<td>Limited support</td>
|
120
120
|
</tr>
|
121
|
+
<tr>
|
121
122
|
<th>Kafka 2.4</th>
|
122
123
|
<td>Limited support</td>
|
123
124
|
<td>Limited support</td>
|
124
125
|
</tr>
|
126
|
+
<tr>
|
127
|
+
<th>Kafka 2.5</th>
|
128
|
+
<td>Limited support</td>
|
129
|
+
<td>Limited support</td>
|
130
|
+
</tr>
|
125
131
|
</table>
|
126
132
|
|
127
133
|
This library is targeting Kafka 0.9 with the v0.4.x series and Kafka 0.10 with the v0.5.x series. There's limited support for Kafka 0.8, and things should work with Kafka 0.11, although there may be performance issues due to changes in the protocol.
|
@@ -136,6 +142,7 @@ This library is targeting Kafka 0.9 with the v0.4.x series and Kafka 0.10 with t
|
|
136
142
|
- **Kafka 2.2:** Everything that works with Kafka 2.1 should still work, but so far no features specific to Kafka 2.2 have been added.
|
137
143
|
- **Kafka 2.3:** Everything that works with Kafka 2.2 should still work, but so far no features specific to Kafka 2.3 have been added.
|
138
144
|
- **Kafka 2.4:** Everything that works with Kafka 2.3 should still work, but so far no features specific to Kafka 2.4 have been added.
|
145
|
+
- **Kafka 2.5:** Everything that works with Kafka 2.4 should still work, but so far no features specific to Kafka 2.5 have been added.
|
139
146
|
|
140
147
|
This library requires Ruby 2.1 or higher.
|
141
148
|
|
@@ -342,6 +349,26 @@ partition = PartitioningScheme.assign(partitions, event)
|
|
342
349
|
producer.produce(event, topic: "events", partition: partition)
|
343
350
|
```
|
344
351
|
|
352
|
+
Another option is to configure a custom client partitioner that implements `call(partition_count, message)` and uses the same schema as the other client. For example:
|
353
|
+
|
354
|
+
```ruby
|
355
|
+
class CustomPartitioner
|
356
|
+
def call(partition_count, message)
|
357
|
+
...
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
partitioner = CustomPartitioner.new
|
362
|
+
Kafka.new(partitioner: partitioner, ...)
|
363
|
+
```
|
364
|
+
|
365
|
+
Or, simply create a Proc handling the partitioning logic instead of having to add a new class. For example:
|
366
|
+
|
367
|
+
```ruby
|
368
|
+
partitioner = -> (partition_count, message) { ... }
|
369
|
+
Kafka.new(partitioner: partitioner, ...)
|
370
|
+
```
|
371
|
+
|
345
372
|
#### Buffering and Error Handling
|
346
373
|
|
347
374
|
The producer is designed for resilience in the face of temporary network errors, Kafka broker failovers, and other issues that prevent the client from writing messages to the destination topics. It does this by employing local, in-memory buffers. Only when messages are acknowledged by a Kafka broker will they be removed from the buffer.
|
@@ -945,6 +972,8 @@ This configures the store to look up CA certificates from the system default cer
|
|
945
972
|
|
946
973
|
In order to authenticate the client to the cluster, you need to pass in a certificate and key created for the client and trusted by the brokers.
|
947
974
|
|
975
|
+
**NOTE**: You can disable hostname validation by passing `verify_hostname: false`.
|
976
|
+
|
948
977
|
```ruby
|
949
978
|
kafka = Kafka.new(
|
950
979
|
["kafka1:9092"],
|
data/lib/kafka/async_producer.rb
CHANGED
@@ -246,10 +246,10 @@ module Kafka
|
|
246
246
|
|
247
247
|
private
|
248
248
|
|
249
|
-
def produce(
|
249
|
+
def produce(value, **kwargs)
|
250
250
|
retries = 0
|
251
251
|
begin
|
252
|
-
@producer.produce(
|
252
|
+
@producer.produce(value, **kwargs)
|
253
253
|
rescue BufferOverflow => e
|
254
254
|
deliver_messages
|
255
255
|
if @max_retries == -1
|
data/lib/kafka/client.rb
CHANGED
@@ -62,16 +62,25 @@ module Kafka
|
|
62
62
|
#
|
63
63
|
# @param sasl_over_ssl [Boolean] whether to enforce SSL with SASL
|
64
64
|
#
|
65
|
+
# @param ssl_ca_certs_from_system [Boolean] whether to use the CA certs from the
|
66
|
+
# system's default certificate store.
|
67
|
+
#
|
68
|
+
# @param partitioner [Partitioner, nil] the partitioner that should be used by the client.
|
69
|
+
#
|
65
70
|
# @param sasl_oauth_token_provider [Object, nil] OAuthBearer Token Provider instance that
|
66
71
|
# implements method token. See {Sasl::OAuth#initialize}
|
67
72
|
#
|
73
|
+
# @param ssl_verify_hostname [Boolean, true] whether to verify that the host serving
|
74
|
+
# the SSL certificate and the signing chain of the certificate have the correct domains
|
75
|
+
# based on the CA certificate
|
76
|
+
#
|
68
77
|
# @return [Client]
|
69
78
|
def initialize(seed_brokers:, client_id: "ruby-kafka", logger: nil, connect_timeout: nil, socket_timeout: nil,
|
70
79
|
ssl_ca_cert_file_path: nil, ssl_ca_cert: nil, ssl_client_cert: nil, ssl_client_cert_key: nil,
|
71
80
|
ssl_client_cert_key_password: nil, ssl_client_cert_chain: nil, sasl_gssapi_principal: nil,
|
72
81
|
sasl_gssapi_keytab: nil, sasl_plain_authzid: '', sasl_plain_username: nil, sasl_plain_password: nil,
|
73
82
|
sasl_scram_username: nil, sasl_scram_password: nil, sasl_scram_mechanism: nil,
|
74
|
-
sasl_over_ssl: true, ssl_ca_certs_from_system: false, sasl_oauth_token_provider: nil, ssl_verify_hostname: true)
|
83
|
+
sasl_over_ssl: true, ssl_ca_certs_from_system: false, partitioner: nil, sasl_oauth_token_provider: nil, ssl_verify_hostname: true)
|
75
84
|
@logger = TaggedLogger.new(logger)
|
76
85
|
@instrumenter = Instrumenter.new(client_id: client_id)
|
77
86
|
@seed_brokers = normalize_seed_brokers(seed_brokers)
|
@@ -115,6 +124,7 @@ module Kafka
|
|
115
124
|
)
|
116
125
|
|
117
126
|
@cluster = initialize_cluster
|
127
|
+
@partitioner = partitioner || Partitioner.new
|
118
128
|
end
|
119
129
|
|
120
130
|
# Delivers a single message to the Kafka cluster.
|
@@ -153,7 +163,7 @@ module Kafka
|
|
153
163
|
|
154
164
|
if partition.nil?
|
155
165
|
partition_count = @cluster.partitions_for(topic).count
|
156
|
-
partition =
|
166
|
+
partition = @partitioner.call(partition_count, message)
|
157
167
|
end
|
158
168
|
|
159
169
|
buffer = MessageBuffer.new
|
@@ -244,6 +254,9 @@ module Kafka
|
|
244
254
|
# be in a message set before it should be compressed. Note that message sets
|
245
255
|
# are per-partition rather than per-topic or per-producer.
|
246
256
|
#
|
257
|
+
# @param interceptors [Array<Object>] a list of producer interceptors the implement
|
258
|
+
# `call(Kafka::PendingMessage)`.
|
259
|
+
#
|
247
260
|
# @return [Kafka::Producer] the Kafka producer.
|
248
261
|
def producer(
|
249
262
|
compression_codec: nil,
|
@@ -257,7 +270,8 @@ module Kafka
|
|
257
270
|
idempotent: false,
|
258
271
|
transactional: false,
|
259
272
|
transactional_id: nil,
|
260
|
-
transactional_timeout: 60
|
273
|
+
transactional_timeout: 60,
|
274
|
+
interceptors: []
|
261
275
|
)
|
262
276
|
cluster = initialize_cluster
|
263
277
|
compressor = Compressor.new(
|
@@ -287,6 +301,8 @@ module Kafka
|
|
287
301
|
retry_backoff: retry_backoff,
|
288
302
|
max_buffer_size: max_buffer_size,
|
289
303
|
max_buffer_bytesize: max_buffer_bytesize,
|
304
|
+
partitioner: @partitioner,
|
305
|
+
interceptors: interceptors
|
290
306
|
)
|
291
307
|
end
|
292
308
|
|
@@ -336,6 +352,11 @@ module Kafka
|
|
336
352
|
# @param fetcher_max_queue_size [Integer] max number of items in the fetch queue that
|
337
353
|
# are stored for further processing. Note, that each item in the queue represents a
|
338
354
|
# response from a single broker.
|
355
|
+
# @param refresh_topic_interval [Integer] interval of refreshing the topic list.
|
356
|
+
# If it is 0, the topic list won't be refreshed (default)
|
357
|
+
# If it is n (n > 0), the topic list will be refreshed every n seconds
|
358
|
+
# @param interceptors [Array<Object>] a list of consumer interceptors that implement
|
359
|
+
# `call(Kafka::FetchedBatch)`.
|
339
360
|
# @return [Consumer]
|
340
361
|
def consumer(
|
341
362
|
group_id:,
|
@@ -345,7 +366,9 @@ module Kafka
|
|
345
366
|
offset_commit_threshold: 0,
|
346
367
|
heartbeat_interval: 10,
|
347
368
|
offset_retention_time: nil,
|
348
|
-
fetcher_max_queue_size: 100
|
369
|
+
fetcher_max_queue_size: 100,
|
370
|
+
refresh_topic_interval: 0,
|
371
|
+
interceptors: []
|
349
372
|
)
|
350
373
|
cluster = initialize_cluster
|
351
374
|
|
@@ -399,6 +422,8 @@ module Kafka
|
|
399
422
|
fetcher: fetcher,
|
400
423
|
session_timeout: session_timeout,
|
401
424
|
heartbeat: heartbeat,
|
425
|
+
refresh_topic_interval: refresh_topic_interval,
|
426
|
+
interceptors: interceptors
|
402
427
|
)
|
403
428
|
end
|
404
429
|
|
@@ -694,6 +719,14 @@ module Kafka
|
|
694
719
|
@cluster.partitions_for(topic).count
|
695
720
|
end
|
696
721
|
|
722
|
+
# Counts the number of replicas for a topic's partition
|
723
|
+
#
|
724
|
+
# @param topic [String]
|
725
|
+
# @return [Integer] the number of replica nodes for the topic's partition
|
726
|
+
def replica_count_for(topic)
|
727
|
+
@cluster.partitions_for(topic).first.replicas.count
|
728
|
+
end
|
729
|
+
|
697
730
|
# Retrieve the offset of the last message in a partition. If there are no
|
698
731
|
# messages in the partition -1 is returned.
|
699
732
|
#
|
data/lib/kafka/consumer.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "kafka/consumer_group"
|
4
|
+
require "kafka/interceptors"
|
4
5
|
require "kafka/offset_manager"
|
5
6
|
require "kafka/fetcher"
|
6
7
|
require "kafka/pause"
|
@@ -44,7 +45,8 @@ module Kafka
|
|
44
45
|
#
|
45
46
|
class Consumer
|
46
47
|
|
47
|
-
def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:,
|
48
|
+
def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:,
|
49
|
+
session_timeout:, heartbeat:, refresh_topic_interval: 0, interceptors: [])
|
48
50
|
@cluster = cluster
|
49
51
|
@logger = TaggedLogger.new(logger)
|
50
52
|
@instrumenter = instrumenter
|
@@ -53,6 +55,8 @@ module Kafka
|
|
53
55
|
@session_timeout = session_timeout
|
54
56
|
@fetcher = fetcher
|
55
57
|
@heartbeat = heartbeat
|
58
|
+
@refresh_topic_interval = refresh_topic_interval
|
59
|
+
@interceptors = Interceptors.new(interceptors: interceptors, logger: logger)
|
56
60
|
|
57
61
|
@pauses = Hash.new {|h, k|
|
58
62
|
h[k] = Hash.new {|h2, k2|
|
@@ -73,6 +77,15 @@ module Kafka
|
|
73
77
|
# when user commits message other than last in a batch, this would make ruby-kafka refetch
|
74
78
|
# some already consumed messages
|
75
79
|
@current_offsets = Hash.new { |h, k| h[k] = {} }
|
80
|
+
|
81
|
+
# Map storing subscribed topics with their configuration
|
82
|
+
@subscribed_topics = Hash.new
|
83
|
+
|
84
|
+
# Set storing topics that matched topics in @subscribed_topics
|
85
|
+
@matched_topics = Set.new
|
86
|
+
|
87
|
+
# Whether join_group must be executed again because new topics are added
|
88
|
+
@join_group_for_new_topics = false
|
76
89
|
end
|
77
90
|
|
78
91
|
# Subscribes the consumer to a topic.
|
@@ -97,13 +110,12 @@ module Kafka
|
|
97
110
|
def subscribe(topic_or_regex, default_offset: nil, start_from_beginning: true, max_bytes_per_partition: 1048576)
|
98
111
|
default_offset ||= start_from_beginning ? :earliest : :latest
|
99
112
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
end
|
113
|
+
@subscribed_topics[topic_or_regex] = {
|
114
|
+
default_offset: default_offset,
|
115
|
+
start_from_beginning: start_from_beginning,
|
116
|
+
max_bytes_per_partition: max_bytes_per_partition
|
117
|
+
}
|
118
|
+
scan_for_subscribing
|
107
119
|
|
108
120
|
nil
|
109
121
|
end
|
@@ -116,7 +128,6 @@ module Kafka
|
|
116
128
|
def stop
|
117
129
|
@running = false
|
118
130
|
@fetcher.stop
|
119
|
-
@cluster.disconnect
|
120
131
|
end
|
121
132
|
|
122
133
|
# Pause processing of a specific topic partition.
|
@@ -212,6 +223,7 @@ module Kafka
|
|
212
223
|
batches = fetch_batches
|
213
224
|
|
214
225
|
batches.each do |batch|
|
226
|
+
batch = @interceptors.call(batch)
|
215
227
|
batch.messages.each do |message|
|
216
228
|
notification = {
|
217
229
|
topic: message.topic,
|
@@ -303,11 +315,13 @@ module Kafka
|
|
303
315
|
unless batch.empty?
|
304
316
|
raw_messages = batch.messages
|
305
317
|
batch.messages = raw_messages.reject(&:is_control_record)
|
318
|
+
batch = @interceptors.call(batch)
|
306
319
|
|
307
320
|
notification = {
|
308
321
|
topic: batch.topic,
|
309
322
|
partition: batch.partition,
|
310
323
|
last_offset: batch.last_offset,
|
324
|
+
last_create_time: batch.messages.last && batch.messages.last.create_time,
|
311
325
|
offset_lag: batch.offset_lag,
|
312
326
|
highwater_mark_offset: batch.highwater_mark_offset,
|
313
327
|
message_count: batch.messages.count,
|
@@ -401,6 +415,7 @@ module Kafka
|
|
401
415
|
while running?
|
402
416
|
begin
|
403
417
|
@instrumenter.instrument("loop.consumer") do
|
418
|
+
refresh_topic_list_if_enabled
|
404
419
|
yield
|
405
420
|
end
|
406
421
|
rescue HeartbeatError
|
@@ -432,6 +447,7 @@ module Kafka
|
|
432
447
|
# important that members explicitly tell Kafka when they're leaving.
|
433
448
|
make_final_offsets_commit!
|
434
449
|
@group.leave rescue nil
|
450
|
+
@cluster.disconnect
|
435
451
|
@running = false
|
436
452
|
@logger.pop_tags
|
437
453
|
end
|
@@ -452,6 +468,8 @@ module Kafka
|
|
452
468
|
end
|
453
469
|
|
454
470
|
def join_group
|
471
|
+
@join_group_for_new_topics = false
|
472
|
+
|
455
473
|
old_generation_id = @group.generation_id
|
456
474
|
|
457
475
|
@group.join
|
@@ -513,11 +531,19 @@ module Kafka
|
|
513
531
|
end
|
514
532
|
end
|
515
533
|
|
534
|
+
def refresh_topic_list_if_enabled
|
535
|
+
return if @refresh_topic_interval <= 0
|
536
|
+
return if @refreshed_at && @refreshed_at + @refresh_topic_interval > Time.now
|
537
|
+
|
538
|
+
scan_for_subscribing
|
539
|
+
@refreshed_at = Time.now
|
540
|
+
end
|
541
|
+
|
516
542
|
def fetch_batches
|
517
543
|
# Return early if the consumer has been stopped.
|
518
544
|
return [] if shutting_down?
|
519
545
|
|
520
|
-
join_group
|
546
|
+
join_group if !@group.member? || @join_group_for_new_topics
|
521
547
|
|
522
548
|
trigger_heartbeat
|
523
549
|
|
@@ -525,7 +551,7 @@ module Kafka
|
|
525
551
|
|
526
552
|
if !@fetcher.data?
|
527
553
|
@logger.debug "No batches to process"
|
528
|
-
sleep 2
|
554
|
+
sleep(@fetcher.max_wait_time || 2)
|
529
555
|
[]
|
530
556
|
else
|
531
557
|
tag, message = @fetcher.poll
|
@@ -571,10 +597,34 @@ module Kafka
|
|
571
597
|
end
|
572
598
|
end
|
573
599
|
|
600
|
+
def scan_for_subscribing
|
601
|
+
@subscribed_topics.each do |topic_or_regex, config|
|
602
|
+
default_offset = config.fetch(:default_offset)
|
603
|
+
start_from_beginning = config.fetch(:start_from_beginning)
|
604
|
+
max_bytes_per_partition = config.fetch(:max_bytes_per_partition)
|
605
|
+
if topic_or_regex.is_a?(Regexp)
|
606
|
+
subscribe_to_regex(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
|
607
|
+
else
|
608
|
+
subscribe_to_topic(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
|
609
|
+
end
|
610
|
+
end
|
611
|
+
end
|
612
|
+
|
613
|
+
def subscribe_to_regex(topic_regex, default_offset, start_from_beginning, max_bytes_per_partition)
|
614
|
+
cluster_topics.select { |topic| topic =~ topic_regex }.each do |topic|
|
615
|
+
subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
|
616
|
+
end
|
617
|
+
end
|
618
|
+
|
574
619
|
def subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
|
620
|
+
return if @matched_topics.include?(topic)
|
621
|
+
@matched_topics.add(topic)
|
622
|
+
@join_group_for_new_topics = true
|
623
|
+
|
575
624
|
@group.subscribe(topic)
|
576
625
|
@offset_manager.set_default_offset(topic, default_offset)
|
577
626
|
@fetcher.subscribe(topic, max_bytes_per_partition: max_bytes_per_partition)
|
627
|
+
@cluster.mark_as_stale!
|
578
628
|
end
|
579
629
|
|
580
630
|
def cluster_topics
|
data/lib/kafka/consumer_group.rb
CHANGED
data/lib/kafka/datadog.rb
CHANGED
@@ -31,7 +31,7 @@ module Kafka
|
|
31
31
|
|
32
32
|
class << self
|
33
33
|
def statsd
|
34
|
-
@statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags)
|
34
|
+
@statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags, socket_path: socket_path)
|
35
35
|
end
|
36
36
|
|
37
37
|
def statsd=(statsd)
|
@@ -57,6 +57,15 @@ module Kafka
|
|
57
57
|
clear
|
58
58
|
end
|
59
59
|
|
60
|
+
def socket_path
|
61
|
+
@socket_path
|
62
|
+
end
|
63
|
+
|
64
|
+
def socket_path=(socket_path)
|
65
|
+
@socket_path = socket_path
|
66
|
+
clear
|
67
|
+
end
|
68
|
+
|
60
69
|
def namespace
|
61
70
|
@namespace ||= STATSD_NAMESPACE
|
62
71
|
end
|
@@ -87,8 +96,8 @@ module Kafka
|
|
87
96
|
private
|
88
97
|
|
89
98
|
%w[increment histogram count timing gauge].each do |type|
|
90
|
-
define_method(type) do |*args|
|
91
|
-
emit(type, *args)
|
99
|
+
define_method(type) do |*args, **kwargs|
|
100
|
+
emit(type, *args, **kwargs)
|
92
101
|
end
|
93
102
|
end
|
94
103
|
|
@@ -160,6 +169,8 @@ module Kafka
|
|
160
169
|
def process_batch(event)
|
161
170
|
offset = event.payload.fetch(:last_offset)
|
162
171
|
messages = event.payload.fetch(:message_count)
|
172
|
+
create_time = event.payload.fetch(:last_create_time)
|
173
|
+
time_lag = create_time && ((Time.now - create_time) * 1000).to_i
|
163
174
|
|
164
175
|
tags = {
|
165
176
|
client: event.payload.fetch(:client_id),
|
@@ -176,6 +187,10 @@ module Kafka
|
|
176
187
|
end
|
177
188
|
|
178
189
|
gauge("consumer.offset", offset, tags: tags)
|
190
|
+
|
191
|
+
if time_lag
|
192
|
+
gauge("consumer.time_lag", time_lag, tags: tags)
|
193
|
+
end
|
179
194
|
end
|
180
195
|
|
181
196
|
def fetch_batch(event)
|
data/lib/kafka/fetcher.rb
CHANGED
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
# Holds a list of interceptors that implement `call`
|
5
|
+
# and wraps calls to a chain of custom interceptors.
|
6
|
+
class Interceptors
|
7
|
+
def initialize(interceptors:, logger:)
|
8
|
+
@interceptors = interceptors || []
|
9
|
+
@logger = TaggedLogger.new(logger)
|
10
|
+
end
|
11
|
+
|
12
|
+
# This method is called when the client produces a message or once the batches are fetched.
|
13
|
+
# The message returned from the first call is passed to the second interceptor call, and so on in an
|
14
|
+
# interceptor chain. This method does not throw exceptions.
|
15
|
+
#
|
16
|
+
# @param intercepted [Kafka::PendingMessage || Kafka::FetchedBatch] the produced message or
|
17
|
+
# fetched batch.
|
18
|
+
#
|
19
|
+
# @return [Kafka::PendingMessage || Kafka::FetchedBatch] the intercepted message or batch
|
20
|
+
# returned by the last interceptor.
|
21
|
+
def call(intercepted)
|
22
|
+
@interceptors.each do |interceptor|
|
23
|
+
begin
|
24
|
+
intercepted = interceptor.call(intercepted)
|
25
|
+
rescue Exception => e
|
26
|
+
@logger.warn "Error executing interceptor for topic: #{intercepted.topic} partition: #{intercepted.partition}: #{e.message}\n#{e.backtrace.join("\n")}"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
intercepted
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
data/lib/kafka/offset_manager.rb
CHANGED
@@ -50,9 +50,20 @@ module Kafka
|
|
50
50
|
# @param offset [Integer] the offset of the message that should be marked as processed.
|
51
51
|
# @return [nil]
|
52
52
|
def mark_as_processed(topic, partition, offset)
|
53
|
-
@
|
53
|
+
unless @group.assigned_to?(topic, partition)
|
54
|
+
@logger.debug "Not marking #{topic}/#{partition}:#{offset} as processed for partition not assigned to this consumer."
|
55
|
+
return
|
56
|
+
end
|
54
57
|
@processed_offsets[topic] ||= {}
|
55
58
|
|
59
|
+
last_processed_offset = @processed_offsets[topic][partition] || -1
|
60
|
+
if last_processed_offset > offset + 1
|
61
|
+
@logger.debug "Not overwriting newer offset #{topic}/#{partition}:#{last_processed_offset - 1} with older #{offset}"
|
62
|
+
return
|
63
|
+
end
|
64
|
+
|
65
|
+
@uncommitted_offsets += 1
|
66
|
+
|
56
67
|
# The committed offset should always be the offset of the next message that the
|
57
68
|
# application will read, thus adding one to the last message processed.
|
58
69
|
@processed_offsets[topic][partition] = offset + 1
|
data/lib/kafka/partitioner.rb
CHANGED
@@ -19,7 +19,7 @@ module Kafka
|
|
19
19
|
# @param message [Kafka::PendingMessage] the message that should be assigned
|
20
20
|
# a partition.
|
21
21
|
# @return [Integer] the partition number.
|
22
|
-
def
|
22
|
+
def call(partition_count, message)
|
23
23
|
raise ArgumentError if partition_count == 0
|
24
24
|
|
25
25
|
# If no explicit partition key is specified we use the message key instead.
|
data/lib/kafka/producer.rb
CHANGED
@@ -7,6 +7,7 @@ require "kafka/produce_operation"
|
|
7
7
|
require "kafka/pending_message_queue"
|
8
8
|
require "kafka/pending_message"
|
9
9
|
require "kafka/compressor"
|
10
|
+
require "kafka/interceptors"
|
10
11
|
|
11
12
|
module Kafka
|
12
13
|
# Allows sending messages to a Kafka cluster.
|
@@ -129,7 +130,9 @@ module Kafka
|
|
129
130
|
class Producer
|
130
131
|
class AbortTransaction < StandardError; end
|
131
132
|
|
132
|
-
def initialize(cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:,
|
133
|
+
def initialize(cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:,
|
134
|
+
required_acks:, max_retries:, retry_backoff:, max_buffer_size:,
|
135
|
+
max_buffer_bytesize:, partitioner:, interceptors: [])
|
133
136
|
@cluster = cluster
|
134
137
|
@transaction_manager = transaction_manager
|
135
138
|
@logger = TaggedLogger.new(logger)
|
@@ -141,6 +144,8 @@ module Kafka
|
|
141
144
|
@max_buffer_size = max_buffer_size
|
142
145
|
@max_buffer_bytesize = max_buffer_bytesize
|
143
146
|
@compressor = compressor
|
147
|
+
@partitioner = partitioner
|
148
|
+
@interceptors = Interceptors.new(interceptors: interceptors, logger: logger)
|
144
149
|
|
145
150
|
# The set of topics that are produced to.
|
146
151
|
@target_topics = Set.new
|
@@ -191,7 +196,7 @@ module Kafka
|
|
191
196
|
# We want to fail fast if `topic` isn't a String
|
192
197
|
topic = topic.to_str
|
193
198
|
|
194
|
-
message = PendingMessage.new(
|
199
|
+
message = @interceptors.call(PendingMessage.new(
|
195
200
|
value: value && value.to_s,
|
196
201
|
key: key && key.to_s,
|
197
202
|
headers: headers,
|
@@ -199,7 +204,7 @@ module Kafka
|
|
199
204
|
partition: partition && Integer(partition),
|
200
205
|
partition_key: partition_key && partition_key.to_s,
|
201
206
|
create_time: create_time
|
202
|
-
)
|
207
|
+
))
|
203
208
|
|
204
209
|
if buffer_size >= @max_buffer_size
|
205
210
|
buffer_overflow topic,
|
@@ -455,7 +460,7 @@ module Kafka
|
|
455
460
|
|
456
461
|
if partition.nil?
|
457
462
|
partition_count = @cluster.partitions_for(message.topic).count
|
458
|
-
partition =
|
463
|
+
partition = @partitioner.call(partition_count, message)
|
459
464
|
end
|
460
465
|
|
461
466
|
@buffer.write(
|
data/lib/kafka/ssl_context.rb
CHANGED
@@ -54,11 +54,12 @@ module Kafka
|
|
54
54
|
store.set_default_paths
|
55
55
|
end
|
56
56
|
ssl_context.cert_store = store
|
57
|
-
ssl_context.verify_mode = OpenSSL::SSL::VERIFY_PEER
|
58
|
-
# Verify certificate hostname if supported (ruby >= 2.4.0)
|
59
|
-
ssl_context.verify_hostname = verify_hostname if ssl_context.respond_to?(:verify_hostname=)
|
60
57
|
end
|
61
58
|
|
59
|
+
ssl_context.verify_mode = OpenSSL::SSL::VERIFY_PEER
|
60
|
+
# Verify certificate hostname if supported (ruby >= 2.4.0)
|
61
|
+
ssl_context.verify_hostname = verify_hostname if ssl_context.respond_to?(:verify_hostname=)
|
62
|
+
|
62
63
|
ssl_context
|
63
64
|
end
|
64
65
|
end
|
data/lib/kafka/version.rb
CHANGED
data/ruby-kafka.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Schierbeck
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-08-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: digest-crc
|
@@ -386,6 +386,7 @@ files:
|
|
386
386
|
- lib/kafka/gzip_codec.rb
|
387
387
|
- lib/kafka/heartbeat.rb
|
388
388
|
- lib/kafka/instrumenter.rb
|
389
|
+
- lib/kafka/interceptors.rb
|
389
390
|
- lib/kafka/lz4_codec.rb
|
390
391
|
- lib/kafka/message_buffer.rb
|
391
392
|
- lib/kafka/offset_manager.rb
|
@@ -476,7 +477,7 @@ files:
|
|
476
477
|
- ruby-kafka.gemspec
|
477
478
|
homepage: https://github.com/zendesk/ruby-kafka
|
478
479
|
licenses:
|
479
|
-
- Apache
|
480
|
+
- Apache-2.0
|
480
481
|
metadata: {}
|
481
482
|
post_install_message:
|
482
483
|
rdoc_options: []
|