ruby-kafka 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +33 -0
- data/CHANGELOG.md +10 -0
- data/README.md +9 -0
- data/lib/kafka/client.rb +18 -1
- data/lib/kafka/consumer.rb +56 -11
- data/lib/kafka/consumer_group.rb +6 -0
- data/lib/kafka/datadog.rb +16 -1
- data/lib/kafka/fetcher.rb +1 -1
- data/lib/kafka/offset_manager.rb +12 -1
- data/lib/kafka/protocol/metadata_response.rb +1 -1
- data/lib/kafka/ssl_context.rb +4 -3
- data/lib/kafka/version.rb +1 -1
- data/ruby-kafka.gemspec +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 22b958f3d452f35c3c6084a5b8b790f370b96069e87ef5974838e5ea4b6945e1
|
4
|
+
data.tar.gz: eb6d704dbaace9c13a99bc93924d8a6b84ee5eb686c9c863a81d085ffeb7e92d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 22c1d59bcdd42849849122b559f0d161653a5cfa492ffb36f28a875b109444d670a76bc62ba0856ae2ebce95fd10abce9e16834928fc7fe6eb2ee006827d307c
|
7
|
+
data.tar.gz: 3a51df6b1d40e1edbd96c06f3473319f0ac1b072b040b676a8e1d980fbaad114ad0248a7670cb7ae0a51a2c79ac390f2f4cfe8f65eedb5dd55be020c6011bc18
|
data/.circleci/config.yml
CHANGED
@@ -267,6 +267,38 @@ jobs:
|
|
267
267
|
- run: bundle install --path vendor/bundle
|
268
268
|
- run: bundle exec rspec --profile --tag functional spec/functional
|
269
269
|
|
270
|
+
kafka-2.5:
|
271
|
+
docker:
|
272
|
+
- image: circleci/ruby:2.5.1-node
|
273
|
+
environment:
|
274
|
+
LOG_LEVEL: DEBUG
|
275
|
+
- image: wurstmeister/zookeeper
|
276
|
+
- image: wurstmeister/kafka:2.12-2.5.0
|
277
|
+
environment:
|
278
|
+
KAFKA_ADVERTISED_HOST_NAME: localhost
|
279
|
+
KAFKA_ADVERTISED_PORT: 9092
|
280
|
+
KAFKA_PORT: 9092
|
281
|
+
KAFKA_ZOOKEEPER_CONNECT: localhost:2181
|
282
|
+
KAFKA_DELETE_TOPIC_ENABLE: true
|
283
|
+
- image: wurstmeister/kafka:2.12-2.5.0
|
284
|
+
environment:
|
285
|
+
KAFKA_ADVERTISED_HOST_NAME: localhost
|
286
|
+
KAFKA_ADVERTISED_PORT: 9093
|
287
|
+
KAFKA_PORT: 9093
|
288
|
+
KAFKA_ZOOKEEPER_CONNECT: localhost:2181
|
289
|
+
KAFKA_DELETE_TOPIC_ENABLE: true
|
290
|
+
- image: wurstmeister/kafka:2.12-2.5.0
|
291
|
+
environment:
|
292
|
+
KAFKA_ADVERTISED_HOST_NAME: localhost
|
293
|
+
KAFKA_ADVERTISED_PORT: 9094
|
294
|
+
KAFKA_PORT: 9094
|
295
|
+
KAFKA_ZOOKEEPER_CONNECT: localhost:2181
|
296
|
+
KAFKA_DELETE_TOPIC_ENABLE: true
|
297
|
+
steps:
|
298
|
+
- checkout
|
299
|
+
- run: bundle install --path vendor/bundle
|
300
|
+
- run: bundle exec rspec --profile --tag functional spec/functional
|
301
|
+
|
270
302
|
workflows:
|
271
303
|
version: 2
|
272
304
|
test:
|
@@ -280,3 +312,4 @@ workflows:
|
|
280
312
|
- kafka-2.2
|
281
313
|
- kafka-2.3
|
282
314
|
- kafka-2.4
|
315
|
+
- kafka-2.5
|
data/CHANGELOG.md
CHANGED
@@ -4,6 +4,16 @@ Changes and additions to the library will be listed here.
|
|
4
4
|
|
5
5
|
## Unreleased
|
6
6
|
|
7
|
+
## 1.1.0
|
8
|
+
|
9
|
+
- Extra sanity checking when marking offsets as processed (#824).
|
10
|
+
- Make `verify_hostname` settable for SSL contexts (#828).
|
11
|
+
- Instrument `create_time` from last message in batch (#811).
|
12
|
+
- Add client function for fetching topic replica count (#822).
|
13
|
+
- Allow consumers to refresh the topic lists (#818).
|
14
|
+
- Disconnect after leaving a group (#817).
|
15
|
+
- Use `max_wait_time` as the sleep instead of hardcoded 2 seconds (#825).
|
16
|
+
|
7
17
|
## 1.0.0
|
8
18
|
|
9
19
|
- Add client methods to manage configs (#759)
|
data/README.md
CHANGED
@@ -118,10 +118,16 @@ Or install it yourself as:
|
|
118
118
|
<td>Limited support</td>
|
119
119
|
<td>Limited support</td>
|
120
120
|
</tr>
|
121
|
+
<tr>
|
121
122
|
<th>Kafka 2.4</th>
|
122
123
|
<td>Limited support</td>
|
123
124
|
<td>Limited support</td>
|
124
125
|
</tr>
|
126
|
+
<tr>
|
127
|
+
<th>Kafka 2.5</th>
|
128
|
+
<td>Limited support</td>
|
129
|
+
<td>Limited support</td>
|
130
|
+
</tr>
|
125
131
|
</table>
|
126
132
|
|
127
133
|
This library is targeting Kafka 0.9 with the v0.4.x series and Kafka 0.10 with the v0.5.x series. There's limited support for Kafka 0.8, and things should work with Kafka 0.11, although there may be performance issues due to changes in the protocol.
|
@@ -136,6 +142,7 @@ This library is targeting Kafka 0.9 with the v0.4.x series and Kafka 0.10 with t
|
|
136
142
|
- **Kafka 2.2:** Everything that works with Kafka 2.1 should still work, but so far no features specific to Kafka 2.2 have been added.
|
137
143
|
- **Kafka 2.3:** Everything that works with Kafka 2.2 should still work, but so far no features specific to Kafka 2.3 have been added.
|
138
144
|
- **Kafka 2.4:** Everything that works with Kafka 2.3 should still work, but so far no features specific to Kafka 2.4 have been added.
|
145
|
+
- **Kafka 2.5:** Everything that works with Kafka 2.4 should still work, but so far no features specific to Kafka 2.5 have been added.
|
139
146
|
|
140
147
|
This library requires Ruby 2.1 or higher.
|
141
148
|
|
@@ -945,6 +952,8 @@ This configures the store to look up CA certificates from the system default cer
|
|
945
952
|
|
946
953
|
In order to authenticate the client to the cluster, you need to pass in a certificate and key created for the client and trusted by the brokers.
|
947
954
|
|
955
|
+
**NOTE**: You can disable hostname validation by passing `verify_hostname: false`.
|
956
|
+
|
948
957
|
```ruby
|
949
958
|
kafka = Kafka.new(
|
950
959
|
["kafka1:9092"],
|
data/lib/kafka/client.rb
CHANGED
@@ -65,6 +65,10 @@ module Kafka
|
|
65
65
|
# @param sasl_oauth_token_provider [Object, nil] OAuthBearer Token Provider instance that
|
66
66
|
# implements method token. See {Sasl::OAuth#initialize}
|
67
67
|
#
|
68
|
+
# @param verify_hostname [Boolean, true] whether to verify that the host serving
|
69
|
+
# the SSL certificate and the signing chain of the certificate have the correct domains
|
70
|
+
# based on the CA certificate
|
71
|
+
#
|
68
72
|
# @return [Client]
|
69
73
|
def initialize(seed_brokers:, client_id: "ruby-kafka", logger: nil, connect_timeout: nil, socket_timeout: nil,
|
70
74
|
ssl_ca_cert_file_path: nil, ssl_ca_cert: nil, ssl_client_cert: nil, ssl_client_cert_key: nil,
|
@@ -336,6 +340,9 @@ module Kafka
|
|
336
340
|
# @param fetcher_max_queue_size [Integer] max number of items in the fetch queue that
|
337
341
|
# are stored for further processing. Note, that each item in the queue represents a
|
338
342
|
# response from a single broker.
|
343
|
+
# @param refresh_topic_interval [Integer] interval of refreshing the topic list.
|
344
|
+
# If it is 0, the topic list won't be refreshed (default)
|
345
|
+
# If it is n (n > 0), the topic list will be refreshed every n seconds
|
339
346
|
# @return [Consumer]
|
340
347
|
def consumer(
|
341
348
|
group_id:,
|
@@ -345,7 +352,8 @@ module Kafka
|
|
345
352
|
offset_commit_threshold: 0,
|
346
353
|
heartbeat_interval: 10,
|
347
354
|
offset_retention_time: nil,
|
348
|
-
fetcher_max_queue_size: 100
|
355
|
+
fetcher_max_queue_size: 100,
|
356
|
+
refresh_topic_interval: 0
|
349
357
|
)
|
350
358
|
cluster = initialize_cluster
|
351
359
|
|
@@ -399,6 +407,7 @@ module Kafka
|
|
399
407
|
fetcher: fetcher,
|
400
408
|
session_timeout: session_timeout,
|
401
409
|
heartbeat: heartbeat,
|
410
|
+
refresh_topic_interval: refresh_topic_interval
|
402
411
|
)
|
403
412
|
end
|
404
413
|
|
@@ -694,6 +703,14 @@ module Kafka
|
|
694
703
|
@cluster.partitions_for(topic).count
|
695
704
|
end
|
696
705
|
|
706
|
+
# Counts the number of replicas for a topic's partition
|
707
|
+
#
|
708
|
+
# @param topic [String]
|
709
|
+
# @return [Integer] the number of replica nodes for the topic's partition
|
710
|
+
def replica_count_for(topic)
|
711
|
+
@cluster.partitions_for(topic).first.replicas.count
|
712
|
+
end
|
713
|
+
|
697
714
|
# Retrieve the offset of the last message in a partition. If there are no
|
698
715
|
# messages in the partition -1 is returned.
|
699
716
|
#
|
data/lib/kafka/consumer.rb
CHANGED
@@ -44,7 +44,7 @@ module Kafka
|
|
44
44
|
#
|
45
45
|
class Consumer
|
46
46
|
|
47
|
-
def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:, session_timeout:, heartbeat:)
|
47
|
+
def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:, session_timeout:, heartbeat:, refresh_topic_interval: 0)
|
48
48
|
@cluster = cluster
|
49
49
|
@logger = TaggedLogger.new(logger)
|
50
50
|
@instrumenter = instrumenter
|
@@ -53,6 +53,7 @@ module Kafka
|
|
53
53
|
@session_timeout = session_timeout
|
54
54
|
@fetcher = fetcher
|
55
55
|
@heartbeat = heartbeat
|
56
|
+
@refresh_topic_interval = refresh_topic_interval
|
56
57
|
|
57
58
|
@pauses = Hash.new {|h, k|
|
58
59
|
h[k] = Hash.new {|h2, k2|
|
@@ -73,6 +74,15 @@ module Kafka
|
|
73
74
|
# when user commits message other than last in a batch, this would make ruby-kafka refetch
|
74
75
|
# some already consumed messages
|
75
76
|
@current_offsets = Hash.new { |h, k| h[k] = {} }
|
77
|
+
|
78
|
+
# Map storing subscribed topics with their configuration
|
79
|
+
@subscribed_topics = Concurrent::Map.new
|
80
|
+
|
81
|
+
# Set storing topics that matched topics in @subscribed_topics
|
82
|
+
@matched_topics = Set.new
|
83
|
+
|
84
|
+
# Whether join_group must be executed again because new topics are added
|
85
|
+
@join_group_for_new_topics = false
|
76
86
|
end
|
77
87
|
|
78
88
|
# Subscribes the consumer to a topic.
|
@@ -97,13 +107,12 @@ module Kafka
|
|
97
107
|
def subscribe(topic_or_regex, default_offset: nil, start_from_beginning: true, max_bytes_per_partition: 1048576)
|
98
108
|
default_offset ||= start_from_beginning ? :earliest : :latest
|
99
109
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
end
|
110
|
+
@subscribed_topics[topic_or_regex] = {
|
111
|
+
default_offset: default_offset,
|
112
|
+
start_from_beginning: start_from_beginning,
|
113
|
+
max_bytes_per_partition: max_bytes_per_partition
|
114
|
+
}
|
115
|
+
scan_for_subscribing
|
107
116
|
|
108
117
|
nil
|
109
118
|
end
|
@@ -116,7 +125,6 @@ module Kafka
|
|
116
125
|
def stop
|
117
126
|
@running = false
|
118
127
|
@fetcher.stop
|
119
|
-
@cluster.disconnect
|
120
128
|
end
|
121
129
|
|
122
130
|
# Pause processing of a specific topic partition.
|
@@ -308,6 +316,7 @@ module Kafka
|
|
308
316
|
topic: batch.topic,
|
309
317
|
partition: batch.partition,
|
310
318
|
last_offset: batch.last_offset,
|
319
|
+
last_create_time: batch.messages.last.try(:create_time),
|
311
320
|
offset_lag: batch.offset_lag,
|
312
321
|
highwater_mark_offset: batch.highwater_mark_offset,
|
313
322
|
message_count: batch.messages.count,
|
@@ -401,6 +410,7 @@ module Kafka
|
|
401
410
|
while running?
|
402
411
|
begin
|
403
412
|
@instrumenter.instrument("loop.consumer") do
|
413
|
+
refresh_topic_list_if_enabled
|
404
414
|
yield
|
405
415
|
end
|
406
416
|
rescue HeartbeatError
|
@@ -432,6 +442,7 @@ module Kafka
|
|
432
442
|
# important that members explicitly tell Kafka when they're leaving.
|
433
443
|
make_final_offsets_commit!
|
434
444
|
@group.leave rescue nil
|
445
|
+
@cluster.disconnect
|
435
446
|
@running = false
|
436
447
|
@logger.pop_tags
|
437
448
|
end
|
@@ -452,6 +463,8 @@ module Kafka
|
|
452
463
|
end
|
453
464
|
|
454
465
|
def join_group
|
466
|
+
@join_group_for_new_topics = false
|
467
|
+
|
455
468
|
old_generation_id = @group.generation_id
|
456
469
|
|
457
470
|
@group.join
|
@@ -513,11 +526,19 @@ module Kafka
|
|
513
526
|
end
|
514
527
|
end
|
515
528
|
|
529
|
+
def refresh_topic_list_if_enabled
|
530
|
+
return if @refresh_topic_interval <= 0
|
531
|
+
return if @refreshed_at && @refreshed_at + @refresh_topic_interval > Time.now
|
532
|
+
|
533
|
+
scan_for_subscribing
|
534
|
+
@refreshed_at = Time.now
|
535
|
+
end
|
536
|
+
|
516
537
|
def fetch_batches
|
517
538
|
# Return early if the consumer has been stopped.
|
518
539
|
return [] if shutting_down?
|
519
540
|
|
520
|
-
join_group
|
541
|
+
join_group if !@group.member? || @join_group_for_new_topics
|
521
542
|
|
522
543
|
trigger_heartbeat
|
523
544
|
|
@@ -525,7 +546,7 @@ module Kafka
|
|
525
546
|
|
526
547
|
if !@fetcher.data?
|
527
548
|
@logger.debug "No batches to process"
|
528
|
-
sleep 2
|
549
|
+
sleep(@fetcher.max_wait_time || 2)
|
529
550
|
[]
|
530
551
|
else
|
531
552
|
tag, message = @fetcher.poll
|
@@ -571,10 +592,34 @@ module Kafka
|
|
571
592
|
end
|
572
593
|
end
|
573
594
|
|
595
|
+
def scan_for_subscribing
|
596
|
+
@subscribed_topics.each do |topic_or_regex, config|
|
597
|
+
default_offset = config.fetch(:default_offset)
|
598
|
+
start_from_beginning = config.fetch(:start_from_beginning)
|
599
|
+
max_bytes_per_partition = config.fetch(:max_bytes_per_partition)
|
600
|
+
if topic_or_regex.is_a?(Regexp)
|
601
|
+
subscribe_to_regex(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
|
602
|
+
else
|
603
|
+
subscribe_to_topic(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
|
604
|
+
end
|
605
|
+
end
|
606
|
+
end
|
607
|
+
|
608
|
+
def subscribe_to_regex(topic_regex, default_offset, start_from_beginning, max_bytes_per_partition)
|
609
|
+
cluster_topics.select { |topic| topic =~ topic_regex }.each do |topic|
|
610
|
+
subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
|
611
|
+
end
|
612
|
+
end
|
613
|
+
|
574
614
|
def subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
|
615
|
+
return if @matched_topics.include?(topic)
|
616
|
+
@matched_topics.add(topic)
|
617
|
+
@join_group_for_new_topics = true
|
618
|
+
|
575
619
|
@group.subscribe(topic)
|
576
620
|
@offset_manager.set_default_offset(topic, default_offset)
|
577
621
|
@fetcher.subscribe(topic, max_bytes_per_partition: max_bytes_per_partition)
|
622
|
+
@cluster.mark_as_stale!
|
578
623
|
end
|
579
624
|
|
580
625
|
def cluster_topics
|
data/lib/kafka/consumer_group.rb
CHANGED
data/lib/kafka/datadog.rb
CHANGED
@@ -31,7 +31,7 @@ module Kafka
|
|
31
31
|
|
32
32
|
class << self
|
33
33
|
def statsd
|
34
|
-
@statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags)
|
34
|
+
@statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags, socket_path: socket_path)
|
35
35
|
end
|
36
36
|
|
37
37
|
def statsd=(statsd)
|
@@ -57,6 +57,15 @@ module Kafka
|
|
57
57
|
clear
|
58
58
|
end
|
59
59
|
|
60
|
+
def socket_path
|
61
|
+
@socket_path
|
62
|
+
end
|
63
|
+
|
64
|
+
def socket_path=(socket_path)
|
65
|
+
@socket_path = socket_path
|
66
|
+
clear
|
67
|
+
end
|
68
|
+
|
60
69
|
def namespace
|
61
70
|
@namespace ||= STATSD_NAMESPACE
|
62
71
|
end
|
@@ -160,6 +169,8 @@ module Kafka
|
|
160
169
|
def process_batch(event)
|
161
170
|
offset = event.payload.fetch(:last_offset)
|
162
171
|
messages = event.payload.fetch(:message_count)
|
172
|
+
create_time = event.payload.fetch(:last_create_time)
|
173
|
+
time_lag = create_time && ((Time.now - create_time) * 1000).to_i
|
163
174
|
|
164
175
|
tags = {
|
165
176
|
client: event.payload.fetch(:client_id),
|
@@ -176,6 +187,10 @@ module Kafka
|
|
176
187
|
end
|
177
188
|
|
178
189
|
gauge("consumer.offset", offset, tags: tags)
|
190
|
+
|
191
|
+
if time_lag
|
192
|
+
gauge("consumer.time_lag", time_lag, tags: tags)
|
193
|
+
end
|
179
194
|
end
|
180
195
|
|
181
196
|
def fetch_batch(event)
|
data/lib/kafka/fetcher.rb
CHANGED
data/lib/kafka/offset_manager.rb
CHANGED
@@ -50,9 +50,20 @@ module Kafka
|
|
50
50
|
# @param offset [Integer] the offset of the message that should be marked as processed.
|
51
51
|
# @return [nil]
|
52
52
|
def mark_as_processed(topic, partition, offset)
|
53
|
-
@
|
53
|
+
unless @group.assigned_to?(topic, partition)
|
54
|
+
@logger.debug "Not marking #{topic}/#{partition}:#{offset} as processed for partition not assigned to this consumer."
|
55
|
+
return
|
56
|
+
end
|
54
57
|
@processed_offsets[topic] ||= {}
|
55
58
|
|
59
|
+
last_processed_offset = @processed_offsets[topic][partition] || -1
|
60
|
+
if last_processed_offset > offset + 1
|
61
|
+
@logger.debug "Not overwriting newer offset #{topic}/#{partition}:#{last_processed_offset - 1} with older #{offset}"
|
62
|
+
return
|
63
|
+
end
|
64
|
+
|
65
|
+
@uncommitted_offsets += 1
|
66
|
+
|
56
67
|
# The committed offset should always be the offset of the next message that the
|
57
68
|
# application will read, thus adding one to the last message processed.
|
58
69
|
@processed_offsets[topic][partition] = offset + 1
|
data/lib/kafka/ssl_context.rb
CHANGED
@@ -54,11 +54,12 @@ module Kafka
|
|
54
54
|
store.set_default_paths
|
55
55
|
end
|
56
56
|
ssl_context.cert_store = store
|
57
|
-
ssl_context.verify_mode = OpenSSL::SSL::VERIFY_PEER
|
58
|
-
# Verify certificate hostname if supported (ruby >= 2.4.0)
|
59
|
-
ssl_context.verify_hostname = verify_hostname if ssl_context.respond_to?(:verify_hostname=)
|
60
57
|
end
|
61
58
|
|
59
|
+
ssl_context.verify_mode = OpenSSL::SSL::VERIFY_PEER
|
60
|
+
# Verify certificate hostname if supported (ruby >= 2.4.0)
|
61
|
+
ssl_context.verify_hostname = verify_hostname if ssl_context.respond_to?(:verify_hostname=)
|
62
|
+
|
62
63
|
ssl_context
|
63
64
|
end
|
64
65
|
end
|
data/lib/kafka/version.rb
CHANGED
data/ruby-kafka.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Schierbeck
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-06-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: digest-crc
|
@@ -476,7 +476,7 @@ files:
|
|
476
476
|
- ruby-kafka.gemspec
|
477
477
|
homepage: https://github.com/zendesk/ruby-kafka
|
478
478
|
licenses:
|
479
|
-
- Apache
|
479
|
+
- Apache-2.0
|
480
480
|
metadata: {}
|
481
481
|
post_install_message:
|
482
482
|
rdoc_options: []
|