ruby-kafka 1.1.0 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 22b958f3d452f35c3c6084a5b8b790f370b96069e87ef5974838e5ea4b6945e1
4
- data.tar.gz: eb6d704dbaace9c13a99bc93924d8a6b84ee5eb686c9c863a81d085ffeb7e92d
3
+ metadata.gz: 0f8391cc7b1989cb5f669796bc4ad647b77d882e6506fae42bab18acb8a6bcc6
4
+ data.tar.gz: 012baaff5d2cc9eb17e3a7b7342f49f7c905a5f91d26078fa0ecf2f0fa81a2ad
5
5
  SHA512:
6
- metadata.gz: 22c1d59bcdd42849849122b559f0d161653a5cfa492ffb36f28a875b109444d670a76bc62ba0856ae2ebce95fd10abce9e16834928fc7fe6eb2ee006827d307c
7
- data.tar.gz: 3a51df6b1d40e1edbd96c06f3473319f0ac1b072b040b676a8e1d980fbaad114ad0248a7670cb7ae0a51a2c79ac390f2f4cfe8f65eedb5dd55be020c6011bc18
6
+ metadata.gz: 7f4e9302ca0ab41a6fded75f95ed866d959a2a027b90051ed7f3d7fba573aa63e57be7692d004dbeb3bacd99fe44b24188f81b0f3bed50f68e1da5189262271f
7
+ data.tar.gz: 7266bdd50e66a7ab9b3c71025468af0a4bea778fd312c41d4c699e2609420c11c28accb3246407ce98330bf521c109691bd1d4943dd532a44ee6732f1a410922
@@ -1 +1 @@
1
- 2.5.1
1
+ 2.7.1
@@ -4,6 +4,11 @@ Changes and additions to the library will be listed here.
4
4
 
5
5
  ## Unreleased
6
6
 
7
+ ## 1.2.0
8
+
9
+ - Add producer consumer interceptors (#837).
10
+ - Add support for configuring the client partitioner (#848).
11
+
7
12
  ## 1.1.0
8
13
 
9
14
  - Extra sanity checking when marking offsets as processed (#824).
data/README.md CHANGED
@@ -349,6 +349,26 @@ partition = PartitioningScheme.assign(partitions, event)
349
349
  producer.produce(event, topic: "events", partition: partition)
350
350
  ```
351
351
 
352
+ Another option is to configure a custom client partitioner that implements `call(partition_count, message)` and uses the same schema as the other client. For example:
353
+
354
+ ```ruby
355
+ class CustomPartitioner
356
+ def call(partition_count, message)
357
+ ...
358
+ end
359
+ end
360
+
361
+ partitioner = CustomPartitioner.new
362
+ Kafka.new(partitioner: partitioner, ...)
363
+ ```
364
+
365
+ Or, simply create a Proc handling the partitioning logic instead of having to add a new class. For example:
366
+
367
+ ```ruby
368
+ partitioner = -> (partition_count, message) { ... }
369
+ Kafka.new(partitioner: partitioner, ...)
370
+ ```
371
+
352
372
  #### Buffering and Error Handling
353
373
 
354
374
  The producer is designed for resilience in the face of temporary network errors, Kafka broker failovers, and other issues that prevent the client from writing messages to the destination topics. It does this by employing local, in-memory buffers. Only when messages are acknowledged by a Kafka broker will they be removed from the buffer.
@@ -246,10 +246,10 @@ module Kafka
246
246
 
247
247
  private
248
248
 
249
- def produce(*args)
249
+ def produce(value, **kwargs)
250
250
  retries = 0
251
251
  begin
252
- @producer.produce(*args)
252
+ @producer.produce(value, **kwargs)
253
253
  rescue BufferOverflow => e
254
254
  deliver_messages
255
255
  if @max_retries == -1
@@ -62,10 +62,15 @@ module Kafka
62
62
  #
63
63
  # @param sasl_over_ssl [Boolean] whether to enforce SSL with SASL
64
64
  #
65
+ # @param ssl_ca_certs_from_system [Boolean] whether to use the CA certs from the
66
+ # system's default certificate store.
67
+ #
68
+ # @param partitioner [Partitioner, nil] the partitioner that should be used by the client.
69
+ #
65
70
  # @param sasl_oauth_token_provider [Object, nil] OAuthBearer Token Provider instance that
66
71
  # implements method token. See {Sasl::OAuth#initialize}
67
72
  #
68
- # @param verify_hostname [Boolean, true] whether to verify that the host serving
73
+ # @param ssl_verify_hostname [Boolean, true] whether to verify that the host serving
69
74
  # the SSL certificate and the signing chain of the certificate have the correct domains
70
75
  # based on the CA certificate
71
76
  #
@@ -75,7 +80,7 @@ module Kafka
75
80
  ssl_client_cert_key_password: nil, ssl_client_cert_chain: nil, sasl_gssapi_principal: nil,
76
81
  sasl_gssapi_keytab: nil, sasl_plain_authzid: '', sasl_plain_username: nil, sasl_plain_password: nil,
77
82
  sasl_scram_username: nil, sasl_scram_password: nil, sasl_scram_mechanism: nil,
78
- sasl_over_ssl: true, ssl_ca_certs_from_system: false, sasl_oauth_token_provider: nil, ssl_verify_hostname: true)
83
+ sasl_over_ssl: true, ssl_ca_certs_from_system: false, partitioner: nil, sasl_oauth_token_provider: nil, ssl_verify_hostname: true)
79
84
  @logger = TaggedLogger.new(logger)
80
85
  @instrumenter = Instrumenter.new(client_id: client_id)
81
86
  @seed_brokers = normalize_seed_brokers(seed_brokers)
@@ -119,6 +124,7 @@ module Kafka
119
124
  )
120
125
 
121
126
  @cluster = initialize_cluster
127
+ @partitioner = partitioner || Partitioner.new
122
128
  end
123
129
 
124
130
  # Delivers a single message to the Kafka cluster.
@@ -157,7 +163,7 @@ module Kafka
157
163
 
158
164
  if partition.nil?
159
165
  partition_count = @cluster.partitions_for(topic).count
160
- partition = Partitioner.partition_for_key(partition_count, message)
166
+ partition = @partitioner.call(partition_count, message)
161
167
  end
162
168
 
163
169
  buffer = MessageBuffer.new
@@ -248,6 +254,9 @@ module Kafka
248
254
  # be in a message set before it should be compressed. Note that message sets
249
255
  # are per-partition rather than per-topic or per-producer.
250
256
  #
257
+ # @param interceptors [Array<Object>] a list of producer interceptors the implement
258
+ # `call(Kafka::PendingMessage)`.
259
+ #
251
260
  # @return [Kafka::Producer] the Kafka producer.
252
261
  def producer(
253
262
  compression_codec: nil,
@@ -261,7 +270,8 @@ module Kafka
261
270
  idempotent: false,
262
271
  transactional: false,
263
272
  transactional_id: nil,
264
- transactional_timeout: 60
273
+ transactional_timeout: 60,
274
+ interceptors: []
265
275
  )
266
276
  cluster = initialize_cluster
267
277
  compressor = Compressor.new(
@@ -291,6 +301,8 @@ module Kafka
291
301
  retry_backoff: retry_backoff,
292
302
  max_buffer_size: max_buffer_size,
293
303
  max_buffer_bytesize: max_buffer_bytesize,
304
+ partitioner: @partitioner,
305
+ interceptors: interceptors
294
306
  )
295
307
  end
296
308
 
@@ -343,6 +355,8 @@ module Kafka
343
355
  # @param refresh_topic_interval [Integer] interval of refreshing the topic list.
344
356
  # If it is 0, the topic list won't be refreshed (default)
345
357
  # If it is n (n > 0), the topic list will be refreshed every n seconds
358
+ # @param interceptors [Array<Object>] a list of consumer interceptors that implement
359
+ # `call(Kafka::FetchedBatch)`.
346
360
  # @return [Consumer]
347
361
  def consumer(
348
362
  group_id:,
@@ -353,7 +367,8 @@ module Kafka
353
367
  heartbeat_interval: 10,
354
368
  offset_retention_time: nil,
355
369
  fetcher_max_queue_size: 100,
356
- refresh_topic_interval: 0
370
+ refresh_topic_interval: 0,
371
+ interceptors: []
357
372
  )
358
373
  cluster = initialize_cluster
359
374
 
@@ -407,7 +422,8 @@ module Kafka
407
422
  fetcher: fetcher,
408
423
  session_timeout: session_timeout,
409
424
  heartbeat: heartbeat,
410
- refresh_topic_interval: refresh_topic_interval
425
+ refresh_topic_interval: refresh_topic_interval,
426
+ interceptors: interceptors
411
427
  )
412
428
  end
413
429
 
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "kafka/consumer_group"
4
+ require "kafka/interceptors"
4
5
  require "kafka/offset_manager"
5
6
  require "kafka/fetcher"
6
7
  require "kafka/pause"
@@ -44,7 +45,8 @@ module Kafka
44
45
  #
45
46
  class Consumer
46
47
 
47
- def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:, session_timeout:, heartbeat:, refresh_topic_interval: 0)
48
+ def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:,
49
+ session_timeout:, heartbeat:, refresh_topic_interval: 0, interceptors: [])
48
50
  @cluster = cluster
49
51
  @logger = TaggedLogger.new(logger)
50
52
  @instrumenter = instrumenter
@@ -54,6 +56,7 @@ module Kafka
54
56
  @fetcher = fetcher
55
57
  @heartbeat = heartbeat
56
58
  @refresh_topic_interval = refresh_topic_interval
59
+ @interceptors = Interceptors.new(interceptors: interceptors, logger: logger)
57
60
 
58
61
  @pauses = Hash.new {|h, k|
59
62
  h[k] = Hash.new {|h2, k2|
@@ -76,7 +79,7 @@ module Kafka
76
79
  @current_offsets = Hash.new { |h, k| h[k] = {} }
77
80
 
78
81
  # Map storing subscribed topics with their configuration
79
- @subscribed_topics = Concurrent::Map.new
82
+ @subscribed_topics = Hash.new
80
83
 
81
84
  # Set storing topics that matched topics in @subscribed_topics
82
85
  @matched_topics = Set.new
@@ -220,6 +223,7 @@ module Kafka
220
223
  batches = fetch_batches
221
224
 
222
225
  batches.each do |batch|
226
+ batch = @interceptors.call(batch)
223
227
  batch.messages.each do |message|
224
228
  notification = {
225
229
  topic: message.topic,
@@ -311,12 +315,13 @@ module Kafka
311
315
  unless batch.empty?
312
316
  raw_messages = batch.messages
313
317
  batch.messages = raw_messages.reject(&:is_control_record)
318
+ batch = @interceptors.call(batch)
314
319
 
315
320
  notification = {
316
321
  topic: batch.topic,
317
322
  partition: batch.partition,
318
323
  last_offset: batch.last_offset,
319
- last_create_time: batch.messages.last.try(:create_time),
324
+ last_create_time: batch.messages.last && batch.messages.last.create_time,
320
325
  offset_lag: batch.offset_lag,
321
326
  highwater_mark_offset: batch.highwater_mark_offset,
322
327
  message_count: batch.messages.count,
@@ -96,8 +96,8 @@ module Kafka
96
96
  private
97
97
 
98
98
  %w[increment histogram count timing gauge].each do |type|
99
- define_method(type) do |*args|
100
- emit(type, *args)
99
+ define_method(type) do |*args, **kwargs|
100
+ emit(type, *args, **kwargs)
101
101
  end
102
102
  end
103
103
 
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ # Holds a list of interceptors that implement `call`
5
+ # and wraps calls to a chain of custom interceptors.
6
+ class Interceptors
7
+ def initialize(interceptors:, logger:)
8
+ @interceptors = interceptors || []
9
+ @logger = TaggedLogger.new(logger)
10
+ end
11
+
12
+ # This method is called when the client produces a message or once the batches are fetched.
13
+ # The message returned from the first call is passed to the second interceptor call, and so on in an
14
+ # interceptor chain. This method does not throw exceptions.
15
+ #
16
+ # @param intercepted [Kafka::PendingMessage || Kafka::FetchedBatch] the produced message or
17
+ # fetched batch.
18
+ #
19
+ # @return [Kafka::PendingMessage || Kafka::FetchedBatch] the intercepted message or batch
20
+ # returned by the last interceptor.
21
+ def call(intercepted)
22
+ @interceptors.each do |interceptor|
23
+ begin
24
+ intercepted = interceptor.call(intercepted)
25
+ rescue Exception => e
26
+ @logger.warn "Error executing interceptor for topic: #{intercepted.topic} partition: #{intercepted.partition}: #{e.message}\n#{e.backtrace.join("\n")}"
27
+ end
28
+ end
29
+
30
+ intercepted
31
+ end
32
+ end
33
+ end
@@ -19,7 +19,7 @@ module Kafka
19
19
  # @param message [Kafka::PendingMessage] the message that should be assigned
20
20
  # a partition.
21
21
  # @return [Integer] the partition number.
22
- def self.partition_for_key(partition_count, message)
22
+ def call(partition_count, message)
23
23
  raise ArgumentError if partition_count == 0
24
24
 
25
25
  # If no explicit partition key is specified we use the message key instead.
@@ -7,6 +7,7 @@ require "kafka/produce_operation"
7
7
  require "kafka/pending_message_queue"
8
8
  require "kafka/pending_message"
9
9
  require "kafka/compressor"
10
+ require "kafka/interceptors"
10
11
 
11
12
  module Kafka
12
13
  # Allows sending messages to a Kafka cluster.
@@ -129,7 +130,9 @@ module Kafka
129
130
  class Producer
130
131
  class AbortTransaction < StandardError; end
131
132
 
132
- def initialize(cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:, required_acks:, max_retries:, retry_backoff:, max_buffer_size:, max_buffer_bytesize:)
133
+ def initialize(cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:,
134
+ required_acks:, max_retries:, retry_backoff:, max_buffer_size:,
135
+ max_buffer_bytesize:, partitioner:, interceptors: [])
133
136
  @cluster = cluster
134
137
  @transaction_manager = transaction_manager
135
138
  @logger = TaggedLogger.new(logger)
@@ -141,6 +144,8 @@ module Kafka
141
144
  @max_buffer_size = max_buffer_size
142
145
  @max_buffer_bytesize = max_buffer_bytesize
143
146
  @compressor = compressor
147
+ @partitioner = partitioner
148
+ @interceptors = Interceptors.new(interceptors: interceptors, logger: logger)
144
149
 
145
150
  # The set of topics that are produced to.
146
151
  @target_topics = Set.new
@@ -191,7 +196,7 @@ module Kafka
191
196
  # We want to fail fast if `topic` isn't a String
192
197
  topic = topic.to_str
193
198
 
194
- message = PendingMessage.new(
199
+ message = @interceptors.call(PendingMessage.new(
195
200
  value: value && value.to_s,
196
201
  key: key && key.to_s,
197
202
  headers: headers,
@@ -199,7 +204,7 @@ module Kafka
199
204
  partition: partition && Integer(partition),
200
205
  partition_key: partition_key && partition_key.to_s,
201
206
  create_time: create_time
202
- )
207
+ ))
203
208
 
204
209
  if buffer_size >= @max_buffer_size
205
210
  buffer_overflow topic,
@@ -455,7 +460,7 @@ module Kafka
455
460
 
456
461
  if partition.nil?
457
462
  partition_count = @cluster.partitions_for(message.topic).count
458
- partition = Partitioner.partition_for_key(partition_count, message)
463
+ partition = @partitioner.call(partition_count, message)
459
464
  end
460
465
 
461
466
  @buffer.write(
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Kafka
4
- VERSION = "1.1.0"
4
+ VERSION = "1.2.0"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Schierbeck
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-06-09 00:00:00.000000000 Z
11
+ date: 2020-08-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: digest-crc
@@ -386,6 +386,7 @@ files:
386
386
  - lib/kafka/gzip_codec.rb
387
387
  - lib/kafka/heartbeat.rb
388
388
  - lib/kafka/instrumenter.rb
389
+ - lib/kafka/interceptors.rb
389
390
  - lib/kafka/lz4_codec.rb
390
391
  - lib/kafka/message_buffer.rb
391
392
  - lib/kafka/offset_manager.rb