karafka 2.4.7 → 2.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/CHANGELOG.md +9 -1
  4. data/Gemfile +1 -1
  5. data/Gemfile.lock +10 -9
  6. data/config/locales/errors.yml +3 -0
  7. data/karafka.gemspec +1 -0
  8. data/lib/karafka/base_consumer.rb +23 -1
  9. data/lib/karafka/connection/client.rb +33 -21
  10. data/lib/karafka/connection/listener.rb +47 -11
  11. data/lib/karafka/connection/messages_buffer.rb +16 -6
  12. data/lib/karafka/connection/raw_messages_buffer.rb +32 -9
  13. data/lib/karafka/contracts/config.rb +1 -0
  14. data/lib/karafka/instrumentation/logger_listener.rb +3 -0
  15. data/lib/karafka/instrumentation/notifications.rb +4 -0
  16. data/lib/karafka/pro/processing/jobs/eofed_non_blocking.rb +32 -0
  17. data/lib/karafka/pro/processing/jobs_builder.rb +12 -0
  18. data/lib/karafka/pro/processing/partitioner.rb +30 -11
  19. data/lib/karafka/pro/processing/schedulers/default.rb +1 -0
  20. data/lib/karafka/processing/coordinator.rb +10 -0
  21. data/lib/karafka/processing/executor.rb +12 -0
  22. data/lib/karafka/processing/jobs/eofed.rb +27 -0
  23. data/lib/karafka/processing/jobs_builder.rb +6 -0
  24. data/lib/karafka/processing/schedulers/default.rb +1 -0
  25. data/lib/karafka/processing/strategies/default.rb +11 -0
  26. data/lib/karafka/routing/features/eofed/config.rb +15 -0
  27. data/lib/karafka/routing/features/eofed/contracts/topic.rb +27 -0
  28. data/lib/karafka/routing/features/eofed/topic.rb +31 -0
  29. data/lib/karafka/routing/features/eofed.rb +14 -0
  30. data/lib/karafka/setup/config.rb +3 -0
  31. data/lib/karafka/version.rb +1 -1
  32. data/lib/karafka.rb +14 -2
  33. data.tar.gz.sig +0 -0
  34. metadata +22 -2
  35. metadata.gz.sig +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6c94c3a4e646233f535e63bcf7a65c88d656f534bfccd21cad87be59b525adb1
4
- data.tar.gz: 63b6ad0491880325d6ac0e5a7367f2a938e8a36245bd1cff48d3de73da6266c6
3
+ metadata.gz: c7e8e51a5c0c4ded0d074965cee2090e64ca77e43443f4f36ab03cc3a21ddfd6
4
+ data.tar.gz: c6e912c518d301f55974a9e4deb491ebe4c3e073e6748fe62ce1003eaea7bed7
5
5
  SHA512:
6
- metadata.gz: 6c04bcd1f7ed17855140e519a82a52c0cae93a3c41126120ab797bbcf172315f7f7720f12e233cc85eed4af2dd437a3364f7f5613f730c9b2e32f8b79225570f
7
- data.tar.gz: 3441c4ac2230c5903dacd210b54c80c05f72c2804490229334d10656672c4ef9d9b3160c12cdd4d22916d55f1687a730980b0b3fc21099a003f36238f58025eb
6
+ metadata.gz: 8769a192c7ebb852250afd96611e115807172bf320d4e9d513bffbcc66f5570ca637aee1b8f3b68e46350b37054935ea905893139bc264a02949f975b39f2041
7
+ data.tar.gz: 13dc8e118850aace7127bae5fe667b6f73118d220b61b1d2b590cc18e71243b3c1467d43df9ae134cd7b26c0274a100e34d8a07db0c6091bc02fd9c95ffb24f0
checksums.yaml.gz.sig CHANGED
Binary file
data/CHANGELOG.md CHANGED
@@ -1,7 +1,15 @@
1
1
  # Karafka Framework Changelog
2
2
 
3
+ ## 2.4.8 (2024-08-09)
4
+ - **[Feature]** Introduce ability to react to `#eof` either from `#consume` or from `#eofed` when EOF without new messages.
5
+ - [Enhancement] Provide `Consumer#eofed?` to indicate reaching EOF.
6
+ - [Enhancement] Always immediately report on `inconsistent_group_protocol` error.
7
+ - [Enhancement] Reduce virtual partitioning to 1 partition when any partitioner execution in a partitioned batch crashes.
8
+ - [Enhancement] Provide `KARAFKA_REQUIRE_RAILS` to disable default Rails `require` to run Karafka without Rails despite having Rails in the Gemfile.
9
+ - [Enhancement] Increase final listener recovery from 1 to 60 seconds to prevent constant rebalancing. This is the last resort recovery and should never happen unless critical errors occur.
10
+
3
11
  ## 2.4.7 (2024-08-01)
4
- - [Enhancement] Introduce `Karafka::Server.mode` to check in what mode Karafka process operates (`standalone`, `swarm`, `supervisor`, `embedded`).
12
+ - [Enhancement] Introduce `Karafka::Server.execution_mode` to check in what mode Karafka process operates (`standalone`, `swarm`, `supervisor`, `embedded`).
5
13
  - [Enhancement] Ensure `max.poll.interval.ms` is always present and populate it with librdkafka default.
6
14
  - [Enhancement] Introduce a shutdown time limit for unsubscription wait.
7
15
  - [Enhancement] Tag with `mode:swarm` each of the running swarm consumers.
data/Gemfile CHANGED
@@ -12,7 +12,7 @@ gemspec
12
12
  group :integrations do
13
13
  gem 'activejob', require: false
14
14
  gem 'karafka-testing', '>= 2.4.0', require: false
15
- gem 'karafka-web', '>= 0.9.0', require: false
15
+ gem 'karafka-web', '>= 0.10.0.beta1', require: false
16
16
  gem 'rspec', require: false
17
17
  end
18
18
 
data/Gemfile.lock CHANGED
@@ -1,9 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.4.7)
4
+ karafka (2.4.8)
5
5
  base64 (~> 0.2)
6
6
  karafka-core (>= 2.4.3, < 2.5.0)
7
+ karafka-rdkafka (>= 0.17.2)
7
8
  waterdrop (>= 2.7.3, < 3.0.0)
8
9
  zeitwerk (~> 2.3)
9
10
 
@@ -29,7 +30,7 @@ GEM
29
30
  concurrent-ruby (1.3.3)
30
31
  connection_pool (2.4.1)
31
32
  diff-lcs (1.5.1)
32
- docile (1.4.0)
33
+ docile (1.4.1)
33
34
  drb (2.2.1)
34
35
  erubi (1.13.0)
35
36
  factory_bot (6.4.6)
@@ -41,26 +42,26 @@ GEM
41
42
  concurrent-ruby (~> 1.0)
42
43
  karafka-core (2.4.4)
43
44
  karafka-rdkafka (>= 0.15.0, < 0.18.0)
44
- karafka-rdkafka (0.17.1)
45
+ karafka-rdkafka (0.17.3)
45
46
  ffi (~> 1.15)
46
47
  mini_portile2 (~> 2.6)
47
48
  rake (> 12)
48
49
  karafka-testing (2.4.6)
49
50
  karafka (>= 2.4.0, < 2.5.0)
50
51
  waterdrop (>= 2.7.0)
51
- karafka-web (0.9.1)
52
+ karafka-web (0.10.0.rc1)
52
53
  erubi (~> 1.4)
53
- karafka (>= 2.4.0, < 2.5.0)
54
+ karafka (>= 2.4.7, < 2.5.0)
54
55
  karafka-core (>= 2.4.0, < 2.5.0)
55
56
  roda (~> 3.68, >= 3.69)
56
57
  tilt (~> 2.0)
57
58
  mini_portile2 (2.8.7)
58
- minitest (5.24.0)
59
+ minitest (5.24.1)
59
60
  mutex_m (0.2.0)
60
61
  ostruct (0.6.0)
61
- rack (3.1.5)
62
+ rack (3.1.7)
62
63
  rake (13.2.1)
63
- roda (3.81.0)
64
+ roda (3.82.0)
64
65
  rack
65
66
  rspec (3.13.0)
66
67
  rspec-core (~> 3.13.0)
@@ -100,7 +101,7 @@ DEPENDENCIES
100
101
  factory_bot
101
102
  karafka!
102
103
  karafka-testing (>= 2.4.0)
103
- karafka-web (>= 0.9.0)
104
+ karafka-web (>= 0.10.0.beta1)
104
105
  ostruct
105
106
  rspec
106
107
  simplecov
@@ -49,6 +49,7 @@ en:
49
49
 
50
50
  internal.connection.manager_format: needs to be present
51
51
  internal.connection.conductor_format: needs to be present
52
+ internal.connection.reset_backoff_format: needs to be an integer bigger or equal to 1000
52
53
  internal.connection.proxy.query_watermark_offsets.timeout_format: needs to be an integer bigger than 0
53
54
  internal.connection.proxy.query_watermark_offsets.max_attempts_format: needs to be an integer bigger than 0
54
55
  internal.connection.proxy.query_watermark_offsets.wait_time_format: needs to be an integer bigger than 0
@@ -115,6 +116,8 @@ en:
115
116
 
116
117
  active_format: needs to be either true or false
117
118
 
119
+ eofed.active_format: needs to be either true or false
120
+
118
121
  declaratives.partitions_format: needs to be more or equal to 1
119
122
  declaratives.active_format: needs to be true
120
123
  declaratives.replication_factor_format: needs to be more or equal to 1
data/karafka.gemspec CHANGED
@@ -23,6 +23,7 @@ Gem::Specification.new do |spec|
23
23
 
24
24
  spec.add_dependency 'base64', '~> 0.2'
25
25
  spec.add_dependency 'karafka-core', '>= 2.4.3', '< 2.5.0'
26
+ spec.add_dependency 'karafka-rdkafka', '>= 0.17.2'
26
27
  spec.add_dependency 'waterdrop', '>= 2.7.3', '< 3.0.0'
27
28
  spec.add_dependency 'zeitwerk', '~> 2.3'
28
29
 
@@ -9,7 +9,7 @@ module Karafka
9
9
 
10
10
  extend Forwardable
11
11
 
12
- def_delegators :@coordinator, :topic, :partition
12
+ def_delegators :@coordinator, :topic, :partition, :eofed?
13
13
 
14
14
  def_delegators :producer, :produce_async, :produce_sync, :produce_many_async,
15
15
  :produce_many_sync
@@ -100,6 +100,24 @@ module Karafka
100
100
  retry_after_pause
101
101
  end
102
102
 
103
+ # Can be used to run code prior to scheduling of eofed execution
104
+ def on_before_schedule_eofed
105
+ handle_before_schedule_eofed
106
+ end
107
+
108
+ # Trigger method for running on eof without messages
109
+ def on_eofed
110
+ handle_eofed
111
+ rescue StandardError => e
112
+ Karafka.monitor.instrument(
113
+ 'error.occurred',
114
+ error: e,
115
+ caller: self,
116
+ seek_offset: coordinator.seek_offset,
117
+ type: 'consumer.eofed.error'
118
+ )
119
+ end
120
+
103
121
  # Can be used to run code prior to scheduling of idle execution
104
122
  #
105
123
  # @private
@@ -166,6 +184,10 @@ module Karafka
166
184
  raise NotImplementedError, 'Implement this in a subclass'
167
185
  end
168
186
 
187
+ # Method that will be executed when a given topic partition reaches eof without any new
188
+ # incoming messages alongside
189
+ def eofed; end
190
+
169
191
  # Method that will be executed when a given topic partition is revoked. You can use it for
170
192
  # some teardown procedures (closing file handler, etc).
171
193
  def revoked; end
@@ -31,7 +31,21 @@ module Karafka
31
31
  # before we move to a regular unsubscribe.
32
32
  COOP_UNSUBSCRIBE_FACTOR = 0.5
33
33
 
34
- private_constant :MAX_POLL_RETRIES, :COOP_UNSUBSCRIBE_FACTOR
34
+ # Errors upon which we early report that something is off without retrying prior to the
35
+ # report
36
+ EARLY_REPORT_ERRORS = [
37
+ :inconsistent_group_protocol, # 23
38
+ :max_poll_exceeded, # -147
39
+ :network_exception, # 13
40
+ :transport, # -195
41
+ :topic_authorization_failed, # 29
42
+ :group_authorization_failed, # 30
43
+ :cluster_authorization_failed, # 31
44
+ # This can happen for many reasons, including issues with static membership being fenced
45
+ :fatal # -150
46
+ ].freeze
47
+
48
+ private_constant :MAX_POLL_RETRIES, :COOP_UNSUBSCRIBE_FACTOR, :EARLY_REPORT_ERRORS
35
49
 
36
50
  # Creates a new consumer instance.
37
51
  #
@@ -98,8 +112,17 @@ module Karafka
98
112
  # Fetch message within our time boundaries
99
113
  response = poll(time_poll.remaining)
100
114
 
101
- # Put a message to the buffer if there is one
102
- @buffer << response if response && response != :tick_time
115
+ case response
116
+ when :tick_time
117
+ nil
118
+ # We get a hash only in case of eof error
119
+ when Hash
120
+ @buffer.eof(response[:topic], response[:partition])
121
+ when nil
122
+ nil
123
+ else
124
+ @buffer << response
125
+ end
103
126
 
104
127
  # Upon polling rebalance manager might have been updated.
105
128
  # If partition revocation happens, we need to remove messages from revoked partitions
@@ -122,10 +145,11 @@ module Karafka
122
145
  time_poll.checkpoint
123
146
 
124
147
  # Finally once we've (potentially) removed revoked, etc, if no messages were returned
125
- # and it was not an early poll exist, we can break.
148
+ # and it was not an early poll exist, we can break. We also break if we got the eof
149
+ # signaling to propagate it asap
126
150
  # Worth keeping in mind, that the rebalance manager might have been updated despite no
127
151
  # messages being returned during a poll
128
- break unless response
152
+ break if response.nil? || response.is_a?(Hash)
129
153
  end
130
154
 
131
155
  @buffer
@@ -576,20 +600,7 @@ module Karafka
576
600
  # We want to report early on max poll interval exceeding because it may mean that the
577
601
  # underlying processing is taking too much time and it is not LRJ
578
602
  case e.code
579
- when :max_poll_exceeded # -147
580
- early_report = true
581
- when :network_exception # 13
582
- early_report = true
583
- when :transport # -195
584
- early_report = true
585
- when :topic_authorization_failed # 29
586
- early_report = true
587
- when :group_authorization_failed # 30
588
- early_report = true
589
- when :cluster_authorization_failed # 31
590
- early_report = true
591
- # This can happen for many reasons, including issues with static membership being fenced
592
- when :fatal # -150
603
+ when *EARLY_REPORT_ERRORS
593
604
  early_report = true
594
605
  # @see
595
606
  # https://github.com/confluentinc/confluent-kafka-dotnet/issues/1366#issuecomment-821842990
@@ -603,11 +614,12 @@ module Karafka
603
614
  # No sense in retrying when no topic/partition and we're no longer running
604
615
  retryable = false unless Karafka::App.running?
605
616
  # If we detect the end of partition which can happen if `enable.partition.eof` is set to
606
- # true, we can just return nil fast. This will fast yield whatever set of messages we
617
+ # true, we can just return fast. This will fast yield whatever set of messages we
607
618
  # already have instead of waiting. This can be used for better latency control when we do
608
619
  # not expect a lof of lag and want to quickly move to processing.
620
+ # We can also pass the eof notion to the consumers for improved decision making.
609
621
  when :partition_eof
610
- return nil
622
+ return e.details
611
623
  end
612
624
 
613
625
  if early_report || !retryable
@@ -253,7 +253,9 @@ module Karafka
253
253
 
254
254
  reset
255
255
 
256
- sleep(1) && retry
256
+ # Ruby sleep is in seconds
257
+ sleep_time = ::Karafka::App.config.internal.connection.reset_backoff / 10_000.0
258
+ sleep(sleep_time) && retry
257
259
  end
258
260
 
259
261
  # Resumes processing of partitions that were paused due to an error.
@@ -330,28 +332,57 @@ module Karafka
330
332
  # given scheduler. It also handles the idle jobs when filtering API removed all messages
331
333
  # and we need to run house-keeping
332
334
  def build_and_schedule_flow_jobs
333
- return if @messages_buffer.empty?
334
-
335
335
  consume_jobs = []
336
336
  idle_jobs = []
337
+ eofed_jobs = []
338
+
339
+ @messages_buffer.each do |topic, partition, messages, eof|
340
+ # In case we did not receive any new messages without eof we skip.
341
+ # We may yield empty array here in case we have reached eof without new messages but in
342
+ # such cases, we can run an eof job
343
+ next if messages.empty? && !eof
337
344
 
338
- @messages_buffer.each do |topic, partition, messages|
339
345
  coordinator = @coordinators.find_or_create(topic, partition)
340
- # Start work coordination for this topic partition
346
+ coordinator.eofed = eof
347
+
348
+ # If we did not receive any messages and we did receive eof signal, we run the eofed
349
+ # jobs so user can take actions on reaching eof
350
+ if messages.empty? && eof
351
+ # If user wants to run the eofed jobs on eof we do it. Otherwise we just allow it to
352
+ # pass through. This allows to configure if user actually wants to have `#eofed`
353
+ # logic or if he wants to only use fast eof work yield
354
+ if coordinator.topic.eofed?
355
+ @executors.find_all_or_create(topic, partition, coordinator).each do |executor|
356
+ coordinator.increment(:eofed)
357
+ eofed_jobs << @jobs_builder.eofed(executor)
358
+ end
359
+ end
360
+
361
+ next
362
+ end
363
+
341
364
  coordinator.start(messages)
342
365
 
366
+ # If it is not an eof and there are no new messages, we just run house-keeping
367
+ #
343
368
  # We do not increment coordinator for idle job because it's not a user related one
344
369
  # and it will not go through a standard lifecycle. Same applies to revoked and shutdown
345
370
  if messages.empty?
371
+ # Start work coordination for this topic partition
346
372
  coordinator.increment(:idle)
347
373
  executor = @executors.find_or_create(topic, partition, 0, coordinator)
348
374
  idle_jobs << @jobs_builder.idle(executor)
349
- else
350
- @partitioner.call(topic, messages, coordinator) do |group_id, partition_messages|
351
- coordinator.increment(:consume)
352
- executor = @executors.find_or_create(topic, partition, group_id, coordinator)
353
- consume_jobs << @jobs_builder.consume(executor, partition_messages)
354
- end
375
+
376
+ next
377
+ end
378
+
379
+ # If there are messages, it is irrelevant if eof or not as consumption needs to happen
380
+ #
381
+ # Start work coordination for this topic partition
382
+ @partitioner.call(topic, messages, coordinator) do |group_id, partition_messages|
383
+ coordinator.increment(:consume)
384
+ executor = @executors.find_or_create(topic, partition, group_id, coordinator)
385
+ consume_jobs << @jobs_builder.consume(executor, partition_messages)
355
386
  end
356
387
  end
357
388
 
@@ -367,6 +398,11 @@ module Karafka
367
398
  consume_jobs.each(&:before_schedule)
368
399
  @scheduler.on_schedule_consumption(consume_jobs)
369
400
  end
401
+
402
+ unless eofed_jobs.empty?
403
+ eofed_jobs.each(&:before_schedule)
404
+ @scheduler.on_schedule_eofed(eofed_jobs)
405
+ end
370
406
  end
371
407
 
372
408
  # Builds and schedules periodic jobs for topics partitions for which no messages were
@@ -23,9 +23,13 @@ module Karafka
23
23
  def initialize(subscription_group)
24
24
  @subscription_group = subscription_group
25
25
  @size = 0
26
+
26
27
  @groups = Hash.new do |topic_groups, topic|
27
28
  topic_groups[topic] = Hash.new do |partition_groups, partition|
28
- partition_groups[partition] = []
29
+ partition_groups[partition] = {
30
+ eof: false,
31
+ messages: []
32
+ }
29
33
  end
30
34
  end
31
35
  end
@@ -33,24 +37,29 @@ module Karafka
33
37
  # Remaps raw messages from the raw messages buffer to Karafka messages
34
38
  # @param raw_messages_buffer [RawMessagesBuffer] buffer with raw messages
35
39
  def remap(raw_messages_buffer)
36
- clear unless @size.zero?
40
+ clear
37
41
 
38
42
  # Since it happens "right after" we've received the messages, it is close enough it time
39
43
  # to be used as the moment we received messages.
40
44
  received_at = Time.now
41
45
 
42
- raw_messages_buffer.each do |topic, partition, messages|
46
+ raw_messages_buffer.each do |topic, partition, messages, eof|
43
47
  @size += messages.count
44
48
 
45
49
  ktopic = @subscription_group.topics.find(topic)
46
50
 
47
- @groups[topic][partition] = messages.map do |message|
51
+ built_messages = messages.map do |message|
48
52
  Messages::Builders::Message.call(
49
53
  message,
50
54
  ktopic,
51
55
  received_at
52
56
  )
53
57
  end
58
+
59
+ @groups[topic][partition] = {
60
+ eof: eof,
61
+ messages: built_messages
62
+ }
54
63
  end
55
64
  end
56
65
 
@@ -59,10 +68,11 @@ module Karafka
59
68
  # @yieldparam [String] topic name
60
69
  # @yieldparam [Integer] partition number
61
70
  # @yieldparam [Array<Karafka::Messages::Message>] messages from a given topic partition
71
+ # @yieldparam [Boolean] true if eof, false otherwise
62
72
  def each
63
73
  @groups.each do |topic, partitions|
64
- partitions.each do |partition, messages|
65
- yield(topic, partition, messages)
74
+ partitions.each do |partition, details|
75
+ yield(topic, partition, details[:messages], details[:eof])
66
76
  end
67
77
  end
68
78
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Karafka
4
4
  module Connection
5
- # Buffer for raw librdkafka messages.
5
+ # Buffer for raw librdkafka messages and eof status.
6
6
  #
7
7
  # When message is added to this buffer, it gets assigned to an array with other messages from
8
8
  # the same topic and partition.
@@ -17,9 +17,13 @@ module Karafka
17
17
  # @return [Karafka::Connection::MessagesBuffer] buffer instance
18
18
  def initialize
19
19
  @size = 0
20
+
20
21
  @groups = Hash.new do |topic_groups, topic|
21
22
  topic_groups[topic] = Hash.new do |partition_groups, partition|
22
- partition_groups[partition] = []
23
+ partition_groups[partition] = {
24
+ eof: false,
25
+ messages: []
26
+ }
23
27
  end
24
28
  end
25
29
  end
@@ -30,7 +34,16 @@ module Karafka
30
34
  # @return [Array<Rdkafka::Consumer::Message>] given partition topic sub-buffer array
31
35
  def <<(message)
32
36
  @size += 1
33
- @groups[message.topic][message.partition] << message
37
+ partition_state = @groups[message.topic][message.partition]
38
+ partition_state[:messages] << message
39
+ partition_state[:eof] = false
40
+ end
41
+
42
+ # Marks given topic partition as one that reached eof
43
+ # @param topic [String] topic that reached eof
44
+ # @param partition [Integer] partition that reached eof
45
+ def eof(topic, partition)
46
+ @groups[topic][partition][:eof] = true
34
47
  end
35
48
 
36
49
  # Allows to iterate over all the topics and partitions messages
@@ -38,10 +51,11 @@ module Karafka
38
51
  # @yieldparam [String] topic name
39
52
  # @yieldparam [Integer] partition number
40
53
  # @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
54
+ # @yieldparam [Boolean] has polling of this partition reach eof
41
55
  def each
42
56
  @groups.each do |topic, partitions|
43
- partitions.each do |partition, messages|
44
- yield(topic, partition, messages)
57
+ partitions.each do |partition, details|
58
+ yield(topic, partition, details[:messages], details[:eof])
45
59
  end
46
60
  end
47
61
  end
@@ -69,8 +83,8 @@ module Karafka
69
83
  # again and we do want to ensure as few duplications as possible
70
84
  def uniq!
71
85
  @groups.each_value do |partitions|
72
- partitions.each_value do |messages|
73
- messages.uniq!(&:offset)
86
+ partitions.each_value do |details|
87
+ details[:messages].uniq!(&:offset)
74
88
  end
75
89
  end
76
90
 
@@ -83,6 +97,11 @@ module Karafka
83
97
  # we save ourselves some objects allocations. We cannot clear the underlying arrays as they
84
98
  # may be used in other threads for data processing, thus if we would clear it, we could
85
99
  # potentially clear a raw messages array for a job that is in the jobs queue.
100
+ #
101
+ # @note We do not clear the eof assignments because they can span across batch pollings.
102
+ # Since eof is not raised non-stop and is silenced after an eof poll, if we would clean it
103
+ # here we would loose the notion of it. The reset state for it should happen when we do
104
+ # discover new messages for given topic partition.
86
105
  def clear
87
106
  @size = 0
88
107
  @groups.each_value(&:clear)
@@ -92,8 +111,12 @@ module Karafka
92
111
 
93
112
  # Updates the messages count if we performed any operations that could change the state
94
113
  def recount!
95
- @size = @groups.each_value.sum do |partitions|
96
- partitions.each_value.map(&:count).sum
114
+ @size = 0
115
+
116
+ @groups.each_value do |partitions|
117
+ partitions.each_value do |details|
118
+ @size += details[:messages].size
119
+ end
97
120
  end
98
121
  end
99
122
  end
@@ -79,6 +79,7 @@ module Karafka
79
79
  nested(:connection) do
80
80
  required(:manager) { |val| !val.nil? }
81
81
  required(:conductor) { |val| !val.nil? }
82
+ required(:reset_backoff) { |val| val.is_a?(Integer) && val >= 1_000 }
82
83
 
83
84
  nested(:proxy) do
84
85
  nested(:commit) do
@@ -290,6 +290,9 @@ module Karafka
290
290
  when 'consumer.tick.error'
291
291
  error "Consumer on tick failed due to an error: #{error}"
292
292
  error details
293
+ when 'consumer.eofed.error'
294
+ error "Consumer on eofed failed due to an error: #{error}"
295
+ error details
293
296
  when 'consumer.after_consume.error'
294
297
  error "Consumer on after_consume failed due to an error: #{error}"
295
298
  error details
@@ -65,6 +65,10 @@ module Karafka
65
65
  consumer.tick
66
66
  consumer.ticked
67
67
 
68
+ consumer.before_schedule_eofed
69
+ consumer.eof
70
+ consumer.eofed
71
+
68
72
  consumer.before_schedule_shutdown
69
73
  consumer.shutting_down
70
74
  consumer.shutdown
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Jobs
18
+ # Non-Blocking version of the Eofed job
19
+ # We use this version for LRJ topics for cases where saturated resources would not allow
20
+ # to run this job for extended period of time. Under such scenarios, if we would not use
21
+ # a non-blocking one, we would reach max.poll.interval.ms.
22
+ class EofedNonBlocking < ::Karafka::Processing::Jobs::Eofed
23
+ # @param args [Array] any arguments accepted by `::Karafka::Processing::Jobs::Eofed`
24
+ def initialize(*args)
25
+ super
26
+ @non_blocking = true
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -33,6 +33,18 @@ module Karafka
33
33
  end
34
34
  end
35
35
 
36
+ # @param executor [Karafka::Pro::Processing::Executor]
37
+ # @return [Karafka::Processing::Jobs::Eofed] eofed job for non LRJ
38
+ # @return [Karafka::Processing::Jobs::EofedBlocking] eofed job that is
39
+ # non-blocking, so when revocation job is scheduled for LRJ it also will not block
40
+ def eofed(executor)
41
+ if executor.topic.long_running_job?
42
+ Jobs::EofedNonBlocking.new(executor)
43
+ else
44
+ super
45
+ end
46
+ end
47
+
36
48
  # @param executor [Karafka::Pro::Processing::Executor]
37
49
  # @return [Karafka::Processing::Jobs::Revoked] revocation job for non LRJ
38
50
  # @return [Karafka::Processing::Jobs::RevokedNonBlocking] revocation job that is
@@ -31,6 +31,7 @@ module Karafka
31
31
  # - a virtual partitioner
32
32
  # - more than one thread to process the data
33
33
  # - collective is not collapsed via coordinator
34
+ # - none of the partitioner executions raised an error
34
35
  #
35
36
  # With one thread it is not worth partitioning the work as the work itself will be
36
37
  # assigned to one thread (pointless work)
@@ -41,18 +42,36 @@ module Karafka
41
42
  # This is great because it allows us to run things without the parallelization that adds
42
43
  # a bit of uncertainty and allows us to use DLQ and safely skip messages if needed.
43
44
  if vps.active? && vps.max_partitions > 1 && !coordinator.collapsed?
44
- groupings = messages.group_by do |msg|
45
- # We need to reduce it to the max concurrency, so the group_id is not a direct effect
46
- # of the end user action. Otherwise the persistence layer for consumers would cache
47
- # it forever and it would cause memory leaks
48
- #
49
- # This also needs to be consistent because the aggregation here needs to warrant,
50
- # that the same partitioned message will always be assigned to the same virtual
51
- # partition. Otherwise in case of a window aggregation with VP spanning across
52
- # several polls, the data could not be complete.
53
- vps.reducer.call(
54
- vps.partitioner.call(msg)
45
+ # If we cannot virtualize even one message from a given batch due to user errors, we
46
+ # reduce the whole set into one partition and emit error. This should still allow for
47
+ # user flow but should mitigate damages by not virtualizing
48
+ begin
49
+ groupings = messages.group_by do |msg|
50
+ # We need to reduce it to the max concurrency, so the group_id is not a direct
51
+ # effect of the end user action. Otherwise the persistence layer for consumers
52
+ # would cache it forever and it would cause memory leaks
53
+ #
54
+ # This also needs to be consistent because the aggregation here needs to warrant,
55
+ # that the same partitioned message will always be assigned to the same virtual
56
+ # partition. Otherwise in case of a window aggregation with VP spanning across
57
+ # several polls, the data could not be complete.
58
+ vps.reducer.call(
59
+ vps.partitioner.call(msg)
60
+ )
61
+ end
62
+ rescue StandardError => e
63
+ # This should not happen. If you are seeing this it means your partitioner code
64
+ # failed and raised an error. We highly recommend mitigating partitioner level errors
65
+ # on the user side because this type of collapse should be considered a last resort
66
+ Karafka.monitor.instrument(
67
+ 'error.occurred',
68
+ caller: self,
69
+ error: e,
70
+ messages: messages,
71
+ type: 'virtual_partitions.partitioner.error'
55
72
  )
73
+
74
+ groupings = { 0 => messages }
56
75
  end
57
76
 
58
77
  groupings.each do |key, messages_group|
@@ -68,6 +68,7 @@ module Karafka
68
68
  alias on_schedule_shutdown schedule_fifo
69
69
  alias on_schedule_idle schedule_fifo
70
70
  alias on_schedule_periodic schedule_fifo
71
+ alias on_schedule_eofed schedule_fifo
71
72
 
72
73
  # This scheduler does not have anything to manage as it is a pass through and has no
73
74
  # state
@@ -15,6 +15,10 @@ module Karafka
15
15
 
16
16
  attr_reader :pause_tracker, :seek_offset, :topic, :partition
17
17
 
18
+ # This can be set directly on the listener because it can be triggered on first run without
19
+ # any messages
20
+ attr_accessor :eofed
21
+
18
22
  def_delegators :@pause_tracker, :attempt, :paused?
19
23
 
20
24
  # @param topic [Karafka::Routing::Topic]
@@ -32,6 +36,7 @@ module Karafka
32
36
  @mutex = Mutex.new
33
37
  @marked = false
34
38
  @failure = false
39
+ @eofed = false
35
40
  @changed_at = monotonic_now
36
41
  end
37
42
 
@@ -146,6 +151,11 @@ module Karafka
146
151
  @revoked
147
152
  end
148
153
 
154
+ # @return [Boolean] did we reach end of partition when polling data
155
+ def eofed?
156
+ @eofed
157
+ end
158
+
149
159
  # @return [Boolean] was the new seek offset assigned at least once. This is needed because
150
160
  # by default we assign seek offset of a first message ever, however this is insufficient
151
161
  # for DLQ in a scenario where the first message would be broken. We would never move
@@ -104,6 +104,18 @@ module Karafka
104
104
  consumer.on_idle
105
105
  end
106
106
 
107
+ # Runs the code needed before eofed work is scheduled
108
+ def before_schedule_eofed
109
+ consumer.on_before_schedule_eofed
110
+ end
111
+
112
+ # Runs consumed eofed operation.
113
+ # This may run even when there were no messages received prior. This will however not
114
+ # run when eof is received together with messages as in such case `#consume` will run
115
+ def eofed
116
+ consumer.on_eofed
117
+ end
118
+
107
119
  # Runs code needed before revoked job is scheduled
108
120
  def before_schedule_revoked
109
121
  consumer.on_before_schedule_revoked if @consumer
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ module Jobs
6
+ # Job that runs the eofed operation when we receive eof without messages alongside.
7
+ class Eofed < Base
8
+ # @param executor [Karafka::Processing::Executor] executor that is suppose to run the job
9
+ # @return [Eofed]
10
+ def initialize(executor)
11
+ @executor = executor
12
+ super()
13
+ end
14
+
15
+ # Runs code prior to scheduling this eofed job
16
+ def before_schedule
17
+ executor.before_schedule_eofed
18
+ end
19
+
20
+ # Runs the eofed job via an executor.
21
+ def call
22
+ executor.eofed
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -12,6 +12,12 @@ module Karafka
12
12
  Jobs::Consume.new(executor, messages)
13
13
  end
14
14
 
15
+ # @param executor [Karafka::Processing::Executor]
16
+ # @return [Karafka::Processing::Jobs::Eofed] eofed job
17
+ def eofed(executor)
18
+ Jobs::Eofed.new(executor)
19
+ end
20
+
15
21
  # @param executor [Karafka::Processing::Executor]
16
22
  # @return [Karafka::Processing::Jobs::Revoked] revocation job
17
23
  def revoked(executor)
@@ -24,6 +24,7 @@ module Karafka
24
24
  alias on_schedule_revocation on_schedule_consumption
25
25
  alias on_schedule_shutdown on_schedule_consumption
26
26
  alias on_schedule_idle on_schedule_consumption
27
+ alias on_schedule_eofed on_schedule_consumption
27
28
 
28
29
  # This scheduler does not have anything to manage as it is a pass through and has no state
29
30
  def on_manage
@@ -17,6 +17,7 @@ module Karafka
17
17
  %i[
18
18
  consume
19
19
  idle
20
+ eofed
20
21
  revoked
21
22
  shutdown
22
23
  ].each do |action|
@@ -151,6 +152,16 @@ module Karafka
151
152
  coordinator.decrement(:idle)
152
153
  end
153
154
 
155
+ # Runs the consumer `#eofed` method with reporting
156
+ def handle_eofed
157
+ Karafka.monitor.instrument('consumer.eof', caller: self)
158
+ Karafka.monitor.instrument('consumer.eofed', caller: self) do
159
+ eofed
160
+ end
161
+ ensure
162
+ coordinator.decrement(:eofed)
163
+ end
164
+
154
165
  # We need to always un-pause the processing in case we have lost a given partition.
155
166
  # Otherwise the underlying librdkafka would not know we may want to continue processing and
156
167
  # the pause could in theory last forever
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Routing
5
+ module Features
6
+ class Eofed < Base
7
+ # Config of this feature
8
+ Config = Struct.new(
9
+ :active,
10
+ keyword_init: true
11
+ ) { alias_method :active?, :active }
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Routing
5
+ module Features
6
+ class Eofed < Base
7
+ # Eofed related contracts namespace
8
+ module Contracts
9
+ # Contract for eofed topic setup
10
+ class Topic < Karafka::Contracts::Base
11
+ configure do |config|
12
+ config.error_messages = YAML.safe_load(
13
+ File.read(
14
+ File.join(Karafka.gem_root, 'config', 'locales', 'errors.yml')
15
+ )
16
+ ).fetch('en').fetch('validations').fetch('topic')
17
+ end
18
+
19
+ nested :eofed do
20
+ required(:active) { |val| [true, false].include?(val) }
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Routing
5
+ module Features
6
+ class Eofed < Base
7
+ # Routing topic eofed API
8
+ module Topic
9
+ # @param active [Boolean] should the `#eofed` job run on eof
10
+ def eofed(active = false)
11
+ @eofed ||= Config.new(
12
+ active: active
13
+ )
14
+ end
15
+
16
+ # @return [Boolean] Are `#eofed` jobs active
17
+ def eofed?
18
+ eofed.active?
19
+ end
20
+
21
+ # @return [Hash] topic setup hash
22
+ def to_h
23
+ super.merge(
24
+ eofed: eofed.to_h
25
+ ).freeze
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Routing
5
+ module Features
6
+ # Namespace for feature allowing to enable the `#eofed` jobs.
7
+ # We do not enable it always because users may only be interested in fast eofed yielding
8
+ # without running the `#eofed` operation at all. This safes on empty cycles of running
9
+ # pointless empty jobs.
10
+ class Eofed < Base
11
+ end
12
+ end
13
+ end
14
+ end
@@ -203,6 +203,9 @@ module Karafka
203
203
  setting :manager, default: Connection::Manager.new
204
204
  # Controls frequency of connections management checks
205
205
  setting :conductor, default: Connection::Conductor.new
206
+ # How long should we wait before a critical listener recovery
207
+ # Too short may cause endless rebalance loops
208
+ setting :reset_backoff, default: 60_000
206
209
 
207
210
  # Settings that are altered by our client proxy layer
208
211
  setting :proxy do
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.4.7'
6
+ VERSION = '2.4.8'
7
7
  end
data/lib/karafka.rb CHANGED
@@ -71,11 +71,23 @@ module Karafka
71
71
 
72
72
  # @return [Boolean] Do we run within/with Rails. We use this to initialize Railtie and proxy
73
73
  # the console invocation to Rails
74
+ #
75
+ # @note We allow users to disable Rails require because having Rails in the Gemfile does not
76
+ # always mean user wants to have it required. User may want to run Karafka without Rails
77
+ # even when having both in the same Gemfile.
74
78
  def rails?
75
79
  return @rails if instance_variable_defined?('@rails')
76
80
 
77
- # Do not load Rails again if already loaded
78
- Object.const_defined?('Rails::Railtie') || require('rails')
81
+ @rails = Object.const_defined?('Rails::Railtie')
82
+
83
+ # If Rails exists we set it immediately based on its presence and return
84
+ return @rails if @rails
85
+
86
+ # If rails is not present and user wants us not to force-load it, we return
87
+ return @rails if ENV['KARAFKA_REQUIRE_RAILS'] == 'false'
88
+
89
+ # If we should try to require it, we try and if no error, it means its there
90
+ require('rails')
79
91
 
80
92
  @rails = true
81
93
  rescue LoadError
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.7
4
+ version: 2.4.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  AnG1dJU+yL2BK7vaVytLTstJME5mepSZ46qqIJXMuWob/YPDmVaBF39TDSG9e34s
36
36
  msG3BiCqgOgHAnL23+CN3Rt8MsuRfEtoTKpJVcCfoEoNHOkc
37
37
  -----END CERTIFICATE-----
38
- date: 2024-08-01 00:00:00.000000000 Z
38
+ date: 2024-08-09 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: base64
@@ -71,6 +71,20 @@ dependencies:
71
71
  - - "<"
72
72
  - !ruby/object:Gem::Version
73
73
  version: 2.5.0
74
+ - !ruby/object:Gem::Dependency
75
+ name: karafka-rdkafka
76
+ requirement: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ version: 0.17.2
81
+ type: :runtime
82
+ prerelease: false
83
+ version_requirements: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: 0.17.2
74
88
  - !ruby/object:Gem::Dependency
75
89
  name: waterdrop
76
90
  requirement: !ruby/object:Gem::Requirement
@@ -290,6 +304,7 @@ files:
290
304
  - lib/karafka/pro/processing/filters/throttler.rb
291
305
  - lib/karafka/pro/processing/filters/virtual_limiter.rb
292
306
  - lib/karafka/pro/processing/jobs/consume_non_blocking.rb
307
+ - lib/karafka/pro/processing/jobs/eofed_non_blocking.rb
293
308
  - lib/karafka/pro/processing/jobs/periodic.rb
294
309
  - lib/karafka/pro/processing/jobs/periodic_non_blocking.rb
295
310
  - lib/karafka/pro/processing/jobs/revoked_non_blocking.rb
@@ -444,6 +459,7 @@ files:
444
459
  - lib/karafka/processing/inline_insights/tracker.rb
445
460
  - lib/karafka/processing/jobs/base.rb
446
461
  - lib/karafka/processing/jobs/consume.rb
462
+ - lib/karafka/processing/jobs/eofed.rb
447
463
  - lib/karafka/processing/jobs/idle.rb
448
464
  - lib/karafka/processing/jobs/revoked.rb
449
465
  - lib/karafka/processing/jobs/shutdown.rb
@@ -487,6 +503,10 @@ files:
487
503
  - lib/karafka/routing/features/deserializers/config.rb
488
504
  - lib/karafka/routing/features/deserializers/contracts/topic.rb
489
505
  - lib/karafka/routing/features/deserializers/topic.rb
506
+ - lib/karafka/routing/features/eofed.rb
507
+ - lib/karafka/routing/features/eofed/config.rb
508
+ - lib/karafka/routing/features/eofed/contracts/topic.rb
509
+ - lib/karafka/routing/features/eofed/topic.rb
490
510
  - lib/karafka/routing/features/inline_insights.rb
491
511
  - lib/karafka/routing/features/inline_insights/config.rb
492
512
  - lib/karafka/routing/features/inline_insights/contracts/topic.rb
metadata.gz.sig CHANGED
Binary file