ruby-kafka 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8ebe10cb2c93b4387a14d069c4291751808abed924cbee6f4ff2f00770aaa1c2
4
- data.tar.gz: 1e392b4f2b5137cd83bd191b5c629fd98b41697d389fb5c68c3100d03c3ff218
3
+ metadata.gz: 22b958f3d452f35c3c6084a5b8b790f370b96069e87ef5974838e5ea4b6945e1
4
+ data.tar.gz: eb6d704dbaace9c13a99bc93924d8a6b84ee5eb686c9c863a81d085ffeb7e92d
5
5
  SHA512:
6
- metadata.gz: 037a8fe1a7495f5e3d723e047392f68a1373e91f8a00337beb7ae6e053d031d6ab1b8597767a1b8b43399850e23493295180a24250e0c2f09b4f626316cd6915
7
- data.tar.gz: 108d7160593e452f27c1e828438f4cd8c2c6c009f081a0ff949ca330f9487b948aad73cf23ba39e12adb0ed9f773c667cfeebaad1625b33770057b7b69b3dcb2
6
+ metadata.gz: 22c1d59bcdd42849849122b559f0d161653a5cfa492ffb36f28a875b109444d670a76bc62ba0856ae2ebce95fd10abce9e16834928fc7fe6eb2ee006827d307c
7
+ data.tar.gz: 3a51df6b1d40e1edbd96c06f3473319f0ac1b072b040b676a8e1d980fbaad114ad0248a7670cb7ae0a51a2c79ac390f2f4cfe8f65eedb5dd55be020c6011bc18
@@ -267,6 +267,38 @@ jobs:
267
267
  - run: bundle install --path vendor/bundle
268
268
  - run: bundle exec rspec --profile --tag functional spec/functional
269
269
 
270
+ kafka-2.5:
271
+ docker:
272
+ - image: circleci/ruby:2.5.1-node
273
+ environment:
274
+ LOG_LEVEL: DEBUG
275
+ - image: wurstmeister/zookeeper
276
+ - image: wurstmeister/kafka:2.12-2.5.0
277
+ environment:
278
+ KAFKA_ADVERTISED_HOST_NAME: localhost
279
+ KAFKA_ADVERTISED_PORT: 9092
280
+ KAFKA_PORT: 9092
281
+ KAFKA_ZOOKEEPER_CONNECT: localhost:2181
282
+ KAFKA_DELETE_TOPIC_ENABLE: true
283
+ - image: wurstmeister/kafka:2.12-2.5.0
284
+ environment:
285
+ KAFKA_ADVERTISED_HOST_NAME: localhost
286
+ KAFKA_ADVERTISED_PORT: 9093
287
+ KAFKA_PORT: 9093
288
+ KAFKA_ZOOKEEPER_CONNECT: localhost:2181
289
+ KAFKA_DELETE_TOPIC_ENABLE: true
290
+ - image: wurstmeister/kafka:2.12-2.5.0
291
+ environment:
292
+ KAFKA_ADVERTISED_HOST_NAME: localhost
293
+ KAFKA_ADVERTISED_PORT: 9094
294
+ KAFKA_PORT: 9094
295
+ KAFKA_ZOOKEEPER_CONNECT: localhost:2181
296
+ KAFKA_DELETE_TOPIC_ENABLE: true
297
+ steps:
298
+ - checkout
299
+ - run: bundle install --path vendor/bundle
300
+ - run: bundle exec rspec --profile --tag functional spec/functional
301
+
270
302
  workflows:
271
303
  version: 2
272
304
  test:
@@ -280,3 +312,4 @@ workflows:
280
312
  - kafka-2.2
281
313
  - kafka-2.3
282
314
  - kafka-2.4
315
+ - kafka-2.5
@@ -4,6 +4,16 @@ Changes and additions to the library will be listed here.
4
4
 
5
5
  ## Unreleased
6
6
 
7
+ ## 1.1.0
8
+
9
+ - Extra sanity checking when marking offsets as processed (#824).
10
+ - Make `verify_hostname` settable for SSL contexts (#828).
11
+ - Instrument `create_time` from last message in batch (#811).
12
+ - Add client function for fetching topic replica count (#822).
13
+ - Allow consumers to refresh the topic lists (#818).
14
+ - Disconnect after leaving a group (#817).
15
+ - Use `max_wait_time` as the sleep instead of hardcoded 2 seconds (#825).
16
+
7
17
  ## 1.0.0
8
18
 
9
19
  - Add client methods to manage configs (#759)
data/README.md CHANGED
@@ -118,10 +118,16 @@ Or install it yourself as:
118
118
  <td>Limited support</td>
119
119
  <td>Limited support</td>
120
120
  </tr>
121
+ <tr>
121
122
  <th>Kafka 2.4</th>
122
123
  <td>Limited support</td>
123
124
  <td>Limited support</td>
124
125
  </tr>
126
+ <tr>
127
+ <th>Kafka 2.5</th>
128
+ <td>Limited support</td>
129
+ <td>Limited support</td>
130
+ </tr>
125
131
  </table>
126
132
 
127
133
  This library is targeting Kafka 0.9 with the v0.4.x series and Kafka 0.10 with the v0.5.x series. There's limited support for Kafka 0.8, and things should work with Kafka 0.11, although there may be performance issues due to changes in the protocol.
@@ -136,6 +142,7 @@ This library is targeting Kafka 0.9 with the v0.4.x series and Kafka 0.10 with t
136
142
  - **Kafka 2.2:** Everything that works with Kafka 2.1 should still work, but so far no features specific to Kafka 2.2 have been added.
137
143
  - **Kafka 2.3:** Everything that works with Kafka 2.2 should still work, but so far no features specific to Kafka 2.3 have been added.
138
144
  - **Kafka 2.4:** Everything that works with Kafka 2.3 should still work, but so far no features specific to Kafka 2.4 have been added.
145
+ - **Kafka 2.5:** Everything that works with Kafka 2.4 should still work, but so far no features specific to Kafka 2.5 have been added.
139
146
 
140
147
  This library requires Ruby 2.1 or higher.
141
148
 
@@ -945,6 +952,8 @@ This configures the store to look up CA certificates from the system default cer
945
952
 
946
953
  In order to authenticate the client to the cluster, you need to pass in a certificate and key created for the client and trusted by the brokers.
947
954
 
955
+ **NOTE**: You can disable hostname validation by passing `verify_hostname: false`.
956
+
948
957
  ```ruby
949
958
  kafka = Kafka.new(
950
959
  ["kafka1:9092"],
@@ -65,6 +65,10 @@ module Kafka
65
65
  # @param sasl_oauth_token_provider [Object, nil] OAuthBearer Token Provider instance that
66
66
  # implements method token. See {Sasl::OAuth#initialize}
67
67
  #
68
+ # @param verify_hostname [Boolean, true] whether to verify that the host serving
69
+ # the SSL certificate and the signing chain of the certificate have the correct domains
70
+ # based on the CA certificate
71
+ #
68
72
  # @return [Client]
69
73
  def initialize(seed_brokers:, client_id: "ruby-kafka", logger: nil, connect_timeout: nil, socket_timeout: nil,
70
74
  ssl_ca_cert_file_path: nil, ssl_ca_cert: nil, ssl_client_cert: nil, ssl_client_cert_key: nil,
@@ -336,6 +340,9 @@ module Kafka
336
340
  # @param fetcher_max_queue_size [Integer] max number of items in the fetch queue that
337
341
  # are stored for further processing. Note, that each item in the queue represents a
338
342
  # response from a single broker.
343
+ # @param refresh_topic_interval [Integer] interval of refreshing the topic list.
344
+ # If it is 0, the topic list won't be refreshed (default)
345
+ # If it is n (n > 0), the topic list will be refreshed every n seconds
339
346
  # @return [Consumer]
340
347
  def consumer(
341
348
  group_id:,
@@ -345,7 +352,8 @@ module Kafka
345
352
  offset_commit_threshold: 0,
346
353
  heartbeat_interval: 10,
347
354
  offset_retention_time: nil,
348
- fetcher_max_queue_size: 100
355
+ fetcher_max_queue_size: 100,
356
+ refresh_topic_interval: 0
349
357
  )
350
358
  cluster = initialize_cluster
351
359
 
@@ -399,6 +407,7 @@ module Kafka
399
407
  fetcher: fetcher,
400
408
  session_timeout: session_timeout,
401
409
  heartbeat: heartbeat,
410
+ refresh_topic_interval: refresh_topic_interval
402
411
  )
403
412
  end
404
413
 
@@ -694,6 +703,14 @@ module Kafka
694
703
  @cluster.partitions_for(topic).count
695
704
  end
696
705
 
706
+ # Counts the number of replicas for a topic's partition
707
+ #
708
+ # @param topic [String]
709
+ # @return [Integer] the number of replica nodes for the topic's partition
710
+ def replica_count_for(topic)
711
+ @cluster.partitions_for(topic).first.replicas.count
712
+ end
713
+
697
714
  # Retrieve the offset of the last message in a partition. If there are no
698
715
  # messages in the partition -1 is returned.
699
716
  #
@@ -44,7 +44,7 @@ module Kafka
44
44
  #
45
45
  class Consumer
46
46
 
47
- def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:, session_timeout:, heartbeat:)
47
+ def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:, session_timeout:, heartbeat:, refresh_topic_interval: 0)
48
48
  @cluster = cluster
49
49
  @logger = TaggedLogger.new(logger)
50
50
  @instrumenter = instrumenter
@@ -53,6 +53,7 @@ module Kafka
53
53
  @session_timeout = session_timeout
54
54
  @fetcher = fetcher
55
55
  @heartbeat = heartbeat
56
+ @refresh_topic_interval = refresh_topic_interval
56
57
 
57
58
  @pauses = Hash.new {|h, k|
58
59
  h[k] = Hash.new {|h2, k2|
@@ -73,6 +74,15 @@ module Kafka
73
74
  # when user commits message other than last in a batch, this would make ruby-kafka refetch
74
75
  # some already consumed messages
75
76
  @current_offsets = Hash.new { |h, k| h[k] = {} }
77
+
78
+ # Map storing subscribed topics with their configuration
79
+ @subscribed_topics = Concurrent::Map.new
80
+
81
+ # Set storing topics that matched topics in @subscribed_topics
82
+ @matched_topics = Set.new
83
+
84
+ # Whether join_group must be executed again because new topics are added
85
+ @join_group_for_new_topics = false
76
86
  end
77
87
 
78
88
  # Subscribes the consumer to a topic.
@@ -97,13 +107,12 @@ module Kafka
97
107
  def subscribe(topic_or_regex, default_offset: nil, start_from_beginning: true, max_bytes_per_partition: 1048576)
98
108
  default_offset ||= start_from_beginning ? :earliest : :latest
99
109
 
100
- if topic_or_regex.is_a?(Regexp)
101
- cluster_topics.select { |topic| topic =~ topic_or_regex }.each do |topic|
102
- subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
103
- end
104
- else
105
- subscribe_to_topic(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
106
- end
110
+ @subscribed_topics[topic_or_regex] = {
111
+ default_offset: default_offset,
112
+ start_from_beginning: start_from_beginning,
113
+ max_bytes_per_partition: max_bytes_per_partition
114
+ }
115
+ scan_for_subscribing
107
116
 
108
117
  nil
109
118
  end
@@ -116,7 +125,6 @@ module Kafka
116
125
  def stop
117
126
  @running = false
118
127
  @fetcher.stop
119
- @cluster.disconnect
120
128
  end
121
129
 
122
130
  # Pause processing of a specific topic partition.
@@ -308,6 +316,7 @@ module Kafka
308
316
  topic: batch.topic,
309
317
  partition: batch.partition,
310
318
  last_offset: batch.last_offset,
319
+ last_create_time: batch.messages.last.try(:create_time),
311
320
  offset_lag: batch.offset_lag,
312
321
  highwater_mark_offset: batch.highwater_mark_offset,
313
322
  message_count: batch.messages.count,
@@ -401,6 +410,7 @@ module Kafka
401
410
  while running?
402
411
  begin
403
412
  @instrumenter.instrument("loop.consumer") do
413
+ refresh_topic_list_if_enabled
404
414
  yield
405
415
  end
406
416
  rescue HeartbeatError
@@ -432,6 +442,7 @@ module Kafka
432
442
  # important that members explicitly tell Kafka when they're leaving.
433
443
  make_final_offsets_commit!
434
444
  @group.leave rescue nil
445
+ @cluster.disconnect
435
446
  @running = false
436
447
  @logger.pop_tags
437
448
  end
@@ -452,6 +463,8 @@ module Kafka
452
463
  end
453
464
 
454
465
  def join_group
466
+ @join_group_for_new_topics = false
467
+
455
468
  old_generation_id = @group.generation_id
456
469
 
457
470
  @group.join
@@ -513,11 +526,19 @@ module Kafka
513
526
  end
514
527
  end
515
528
 
529
+ def refresh_topic_list_if_enabled
530
+ return if @refresh_topic_interval <= 0
531
+ return if @refreshed_at && @refreshed_at + @refresh_topic_interval > Time.now
532
+
533
+ scan_for_subscribing
534
+ @refreshed_at = Time.now
535
+ end
536
+
516
537
  def fetch_batches
517
538
  # Return early if the consumer has been stopped.
518
539
  return [] if shutting_down?
519
540
 
520
- join_group unless @group.member?
541
+ join_group if !@group.member? || @join_group_for_new_topics
521
542
 
522
543
  trigger_heartbeat
523
544
 
@@ -525,7 +546,7 @@ module Kafka
525
546
 
526
547
  if !@fetcher.data?
527
548
  @logger.debug "No batches to process"
528
- sleep 2
549
+ sleep(@fetcher.max_wait_time || 2)
529
550
  []
530
551
  else
531
552
  tag, message = @fetcher.poll
@@ -571,10 +592,34 @@ module Kafka
571
592
  end
572
593
  end
573
594
 
595
+ def scan_for_subscribing
596
+ @subscribed_topics.each do |topic_or_regex, config|
597
+ default_offset = config.fetch(:default_offset)
598
+ start_from_beginning = config.fetch(:start_from_beginning)
599
+ max_bytes_per_partition = config.fetch(:max_bytes_per_partition)
600
+ if topic_or_regex.is_a?(Regexp)
601
+ subscribe_to_regex(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
602
+ else
603
+ subscribe_to_topic(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
604
+ end
605
+ end
606
+ end
607
+
608
+ def subscribe_to_regex(topic_regex, default_offset, start_from_beginning, max_bytes_per_partition)
609
+ cluster_topics.select { |topic| topic =~ topic_regex }.each do |topic|
610
+ subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
611
+ end
612
+ end
613
+
574
614
  def subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
615
+ return if @matched_topics.include?(topic)
616
+ @matched_topics.add(topic)
617
+ @join_group_for_new_topics = true
618
+
575
619
  @group.subscribe(topic)
576
620
  @offset_manager.set_default_offset(topic, default_offset)
577
621
  @fetcher.subscribe(topic, max_bytes_per_partition: max_bytes_per_partition)
622
+ @cluster.mark_as_stale!
578
623
  end
579
624
 
580
625
  def cluster_topics
@@ -161,6 +161,12 @@ module Kafka
161
161
  @member_id = ""
162
162
  sleep 1
163
163
 
164
+ retry
165
+ rescue CoordinatorLoadInProgress
166
+ @logger.error "Coordinator broker still loading, retrying in 1s..."
167
+
168
+ sleep 1
169
+
164
170
  retry
165
171
  end
166
172
 
@@ -31,7 +31,7 @@ module Kafka
31
31
 
32
32
  class << self
33
33
  def statsd
34
- @statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags)
34
+ @statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags, socket_path: socket_path)
35
35
  end
36
36
 
37
37
  def statsd=(statsd)
@@ -57,6 +57,15 @@ module Kafka
57
57
  clear
58
58
  end
59
59
 
60
+ def socket_path
61
+ @socket_path
62
+ end
63
+
64
+ def socket_path=(socket_path)
65
+ @socket_path = socket_path
66
+ clear
67
+ end
68
+
60
69
  def namespace
61
70
  @namespace ||= STATSD_NAMESPACE
62
71
  end
@@ -160,6 +169,8 @@ module Kafka
160
169
  def process_batch(event)
161
170
  offset = event.payload.fetch(:last_offset)
162
171
  messages = event.payload.fetch(:message_count)
172
+ create_time = event.payload.fetch(:last_create_time)
173
+ time_lag = create_time && ((Time.now - create_time) * 1000).to_i
163
174
 
164
175
  tags = {
165
176
  client: event.payload.fetch(:client_id),
@@ -176,6 +187,10 @@ module Kafka
176
187
  end
177
188
 
178
189
  gauge("consumer.offset", offset, tags: tags)
190
+
191
+ if time_lag
192
+ gauge("consumer.time_lag", time_lag, tags: tags)
193
+ end
179
194
  end
180
195
 
181
196
  def fetch_batch(event)
@@ -4,7 +4,7 @@ require "kafka/fetch_operation"
4
4
 
5
5
  module Kafka
6
6
  class Fetcher
7
- attr_reader :queue
7
+ attr_reader :queue, :max_wait_time
8
8
 
9
9
  def initialize(cluster:, logger:, instrumenter:, max_queue_size:, group:)
10
10
  @cluster = cluster
@@ -50,9 +50,20 @@ module Kafka
50
50
  # @param offset [Integer] the offset of the message that should be marked as processed.
51
51
  # @return [nil]
52
52
  def mark_as_processed(topic, partition, offset)
53
- @uncommitted_offsets += 1
53
+ unless @group.assigned_to?(topic, partition)
54
+ @logger.debug "Not marking #{topic}/#{partition}:#{offset} as processed for partition not assigned to this consumer."
55
+ return
56
+ end
54
57
  @processed_offsets[topic] ||= {}
55
58
 
59
+ last_processed_offset = @processed_offsets[topic][partition] || -1
60
+ if last_processed_offset > offset + 1
61
+ @logger.debug "Not overwriting newer offset #{topic}/#{partition}:#{last_processed_offset - 1} with older #{offset}"
62
+ return
63
+ end
64
+
65
+ @uncommitted_offsets += 1
66
+
56
67
  # The committed offset should always be the offset of the next message that the
57
68
  # application will read, thus adding one to the last message processed.
58
69
  @processed_offsets[topic][partition] = offset + 1
@@ -34,7 +34,7 @@ module Kafka
34
34
  #
35
35
  class MetadataResponse
36
36
  class PartitionMetadata
37
- attr_reader :partition_id, :leader
37
+ attr_reader :partition_id, :leader, :replicas
38
38
 
39
39
  attr_reader :partition_error_code
40
40
 
@@ -54,11 +54,12 @@ module Kafka
54
54
  store.set_default_paths
55
55
  end
56
56
  ssl_context.cert_store = store
57
- ssl_context.verify_mode = OpenSSL::SSL::VERIFY_PEER
58
- # Verify certificate hostname if supported (ruby >= 2.4.0)
59
- ssl_context.verify_hostname = verify_hostname if ssl_context.respond_to?(:verify_hostname=)
60
57
  end
61
58
 
59
+ ssl_context.verify_mode = OpenSSL::SSL::VERIFY_PEER
60
+ # Verify certificate hostname if supported (ruby >= 2.4.0)
61
+ ssl_context.verify_hostname = verify_hostname if ssl_context.respond_to?(:verify_hostname=)
62
+
62
63
  ssl_context
63
64
  end
64
65
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Kafka
4
- VERSION = "1.0.0"
4
+ VERSION = "1.1.0"
5
5
  end
@@ -18,7 +18,7 @@ Gem::Specification.new do |spec|
18
18
  DESC
19
19
 
20
20
  spec.homepage = "https://github.com/zendesk/ruby-kafka"
21
- spec.license = "Apache License Version 2.0"
21
+ spec.license = "Apache-2.0"
22
22
 
23
23
  spec.required_ruby_version = '>= 2.1.0'
24
24
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Schierbeck
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-02-25 00:00:00.000000000 Z
11
+ date: 2020-06-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: digest-crc
@@ -476,7 +476,7 @@ files:
476
476
  - ruby-kafka.gemspec
477
477
  homepage: https://github.com/zendesk/ruby-kafka
478
478
  licenses:
479
- - Apache License Version 2.0
479
+ - Apache-2.0
480
480
  metadata: {}
481
481
  post_install_message:
482
482
  rdoc_options: []