karafka 2.4.7 → 2.4.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/CHANGELOG.md +9 -1
  4. data/Gemfile +1 -1
  5. data/Gemfile.lock +10 -9
  6. data/config/locales/errors.yml +3 -0
  7. data/karafka.gemspec +1 -0
  8. data/lib/karafka/base_consumer.rb +23 -1
  9. data/lib/karafka/connection/client.rb +33 -21
  10. data/lib/karafka/connection/listener.rb +47 -11
  11. data/lib/karafka/connection/messages_buffer.rb +16 -6
  12. data/lib/karafka/connection/raw_messages_buffer.rb +32 -9
  13. data/lib/karafka/contracts/config.rb +1 -0
  14. data/lib/karafka/instrumentation/logger_listener.rb +3 -0
  15. data/lib/karafka/instrumentation/notifications.rb +4 -0
  16. data/lib/karafka/pro/processing/jobs/eofed_non_blocking.rb +32 -0
  17. data/lib/karafka/pro/processing/jobs_builder.rb +12 -0
  18. data/lib/karafka/pro/processing/partitioner.rb +30 -11
  19. data/lib/karafka/pro/processing/schedulers/default.rb +1 -0
  20. data/lib/karafka/processing/coordinator.rb +10 -0
  21. data/lib/karafka/processing/executor.rb +12 -0
  22. data/lib/karafka/processing/jobs/eofed.rb +27 -0
  23. data/lib/karafka/processing/jobs_builder.rb +6 -0
  24. data/lib/karafka/processing/schedulers/default.rb +1 -0
  25. data/lib/karafka/processing/strategies/default.rb +11 -0
  26. data/lib/karafka/routing/features/eofed/config.rb +15 -0
  27. data/lib/karafka/routing/features/eofed/contracts/topic.rb +27 -0
  28. data/lib/karafka/routing/features/eofed/topic.rb +31 -0
  29. data/lib/karafka/routing/features/eofed.rb +14 -0
  30. data/lib/karafka/setup/config.rb +3 -0
  31. data/lib/karafka/version.rb +1 -1
  32. data/lib/karafka.rb +14 -2
  33. data.tar.gz.sig +0 -0
  34. metadata +22 -2
  35. metadata.gz.sig +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6c94c3a4e646233f535e63bcf7a65c88d656f534bfccd21cad87be59b525adb1
4
- data.tar.gz: 63b6ad0491880325d6ac0e5a7367f2a938e8a36245bd1cff48d3de73da6266c6
3
+ metadata.gz: c7e8e51a5c0c4ded0d074965cee2090e64ca77e43443f4f36ab03cc3a21ddfd6
4
+ data.tar.gz: c6e912c518d301f55974a9e4deb491ebe4c3e073e6748fe62ce1003eaea7bed7
5
5
  SHA512:
6
- metadata.gz: 6c04bcd1f7ed17855140e519a82a52c0cae93a3c41126120ab797bbcf172315f7f7720f12e233cc85eed4af2dd437a3364f7f5613f730c9b2e32f8b79225570f
7
- data.tar.gz: 3441c4ac2230c5903dacd210b54c80c05f72c2804490229334d10656672c4ef9d9b3160c12cdd4d22916d55f1687a730980b0b3fc21099a003f36238f58025eb
6
+ metadata.gz: 8769a192c7ebb852250afd96611e115807172bf320d4e9d513bffbcc66f5570ca637aee1b8f3b68e46350b37054935ea905893139bc264a02949f975b39f2041
7
+ data.tar.gz: 13dc8e118850aace7127bae5fe667b6f73118d220b61b1d2b590cc18e71243b3c1467d43df9ae134cd7b26c0274a100e34d8a07db0c6091bc02fd9c95ffb24f0
checksums.yaml.gz.sig CHANGED
Binary file
data/CHANGELOG.md CHANGED
@@ -1,7 +1,15 @@
1
1
  # Karafka Framework Changelog
2
2
 
3
+ ## 2.4.8 (2024-08-09)
4
+ - **[Feature]** Introduce ability to react to `#eof` either from `#consume` or from `#eofed` when EOF without new messages.
5
+ - [Enhancement] Provide `Consumer#eofed?` to indicate reaching EOF.
6
+ - [Enhancement] Always immediately report on `inconsistent_group_protocol` error.
7
+ - [Enhancement] Reduce virtual partitioning to 1 partition when any partitioner execution in a partitioned batch crashes.
8
+ - [Enhancement] Provide `KARAFKA_REQUIRE_RAILS` to disable default Rails `require` to run Karafka without Rails despite having Rails in the Gemfile.
9
+ - [Enhancement] Increase final listener recovery from 1 to 60 seconds to prevent constant rebalancing. This is the last resort recovery and should never happen unless critical errors occur.
10
+
3
11
  ## 2.4.7 (2024-08-01)
4
- - [Enhancement] Introduce `Karafka::Server.mode` to check in what mode Karafka process operates (`standalone`, `swarm`, `supervisor`, `embedded`).
12
+ - [Enhancement] Introduce `Karafka::Server.execution_mode` to check in what mode Karafka process operates (`standalone`, `swarm`, `supervisor`, `embedded`).
5
13
  - [Enhancement] Ensure `max.poll.interval.ms` is always present and populate it with librdkafka default.
6
14
  - [Enhancement] Introduce a shutdown time limit for unsubscription wait.
7
15
  - [Enhancement] Tag with `mode:swarm` each of the running swarm consumers.
data/Gemfile CHANGED
@@ -12,7 +12,7 @@ gemspec
12
12
  group :integrations do
13
13
  gem 'activejob', require: false
14
14
  gem 'karafka-testing', '>= 2.4.0', require: false
15
- gem 'karafka-web', '>= 0.9.0', require: false
15
+ gem 'karafka-web', '>= 0.10.0.beta1', require: false
16
16
  gem 'rspec', require: false
17
17
  end
18
18
 
data/Gemfile.lock CHANGED
@@ -1,9 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.4.7)
4
+ karafka (2.4.8)
5
5
  base64 (~> 0.2)
6
6
  karafka-core (>= 2.4.3, < 2.5.0)
7
+ karafka-rdkafka (>= 0.17.2)
7
8
  waterdrop (>= 2.7.3, < 3.0.0)
8
9
  zeitwerk (~> 2.3)
9
10
 
@@ -29,7 +30,7 @@ GEM
29
30
  concurrent-ruby (1.3.3)
30
31
  connection_pool (2.4.1)
31
32
  diff-lcs (1.5.1)
32
- docile (1.4.0)
33
+ docile (1.4.1)
33
34
  drb (2.2.1)
34
35
  erubi (1.13.0)
35
36
  factory_bot (6.4.6)
@@ -41,26 +42,26 @@ GEM
41
42
  concurrent-ruby (~> 1.0)
42
43
  karafka-core (2.4.4)
43
44
  karafka-rdkafka (>= 0.15.0, < 0.18.0)
44
- karafka-rdkafka (0.17.1)
45
+ karafka-rdkafka (0.17.3)
45
46
  ffi (~> 1.15)
46
47
  mini_portile2 (~> 2.6)
47
48
  rake (> 12)
48
49
  karafka-testing (2.4.6)
49
50
  karafka (>= 2.4.0, < 2.5.0)
50
51
  waterdrop (>= 2.7.0)
51
- karafka-web (0.9.1)
52
+ karafka-web (0.10.0.rc1)
52
53
  erubi (~> 1.4)
53
- karafka (>= 2.4.0, < 2.5.0)
54
+ karafka (>= 2.4.7, < 2.5.0)
54
55
  karafka-core (>= 2.4.0, < 2.5.0)
55
56
  roda (~> 3.68, >= 3.69)
56
57
  tilt (~> 2.0)
57
58
  mini_portile2 (2.8.7)
58
- minitest (5.24.0)
59
+ minitest (5.24.1)
59
60
  mutex_m (0.2.0)
60
61
  ostruct (0.6.0)
61
- rack (3.1.5)
62
+ rack (3.1.7)
62
63
  rake (13.2.1)
63
- roda (3.81.0)
64
+ roda (3.82.0)
64
65
  rack
65
66
  rspec (3.13.0)
66
67
  rspec-core (~> 3.13.0)
@@ -100,7 +101,7 @@ DEPENDENCIES
100
101
  factory_bot
101
102
  karafka!
102
103
  karafka-testing (>= 2.4.0)
103
- karafka-web (>= 0.9.0)
104
+ karafka-web (>= 0.10.0.beta1)
104
105
  ostruct
105
106
  rspec
106
107
  simplecov
@@ -49,6 +49,7 @@ en:
49
49
 
50
50
  internal.connection.manager_format: needs to be present
51
51
  internal.connection.conductor_format: needs to be present
52
+ internal.connection.reset_backoff_format: needs to be an integer bigger or equal to 1000
52
53
  internal.connection.proxy.query_watermark_offsets.timeout_format: needs to be an integer bigger than 0
53
54
  internal.connection.proxy.query_watermark_offsets.max_attempts_format: needs to be an integer bigger than 0
54
55
  internal.connection.proxy.query_watermark_offsets.wait_time_format: needs to be an integer bigger than 0
@@ -115,6 +116,8 @@ en:
115
116
 
116
117
  active_format: needs to be either true or false
117
118
 
119
+ eofed.active_format: needs to be either true or false
120
+
118
121
  declaratives.partitions_format: needs to be more or equal to 1
119
122
  declaratives.active_format: needs to be true
120
123
  declaratives.replication_factor_format: needs to be more or equal to 1
data/karafka.gemspec CHANGED
@@ -23,6 +23,7 @@ Gem::Specification.new do |spec|
23
23
 
24
24
  spec.add_dependency 'base64', '~> 0.2'
25
25
  spec.add_dependency 'karafka-core', '>= 2.4.3', '< 2.5.0'
26
+ spec.add_dependency 'karafka-rdkafka', '>= 0.17.2'
26
27
  spec.add_dependency 'waterdrop', '>= 2.7.3', '< 3.0.0'
27
28
  spec.add_dependency 'zeitwerk', '~> 2.3'
28
29
 
@@ -9,7 +9,7 @@ module Karafka
9
9
 
10
10
  extend Forwardable
11
11
 
12
- def_delegators :@coordinator, :topic, :partition
12
+ def_delegators :@coordinator, :topic, :partition, :eofed?
13
13
 
14
14
  def_delegators :producer, :produce_async, :produce_sync, :produce_many_async,
15
15
  :produce_many_sync
@@ -100,6 +100,24 @@ module Karafka
100
100
  retry_after_pause
101
101
  end
102
102
 
103
+ # Can be used to run code prior to scheduling of eofed execution
104
+ def on_before_schedule_eofed
105
+ handle_before_schedule_eofed
106
+ end
107
+
108
+ # Trigger method for running on eof without messages
109
+ def on_eofed
110
+ handle_eofed
111
+ rescue StandardError => e
112
+ Karafka.monitor.instrument(
113
+ 'error.occurred',
114
+ error: e,
115
+ caller: self,
116
+ seek_offset: coordinator.seek_offset,
117
+ type: 'consumer.eofed.error'
118
+ )
119
+ end
120
+
103
121
  # Can be used to run code prior to scheduling of idle execution
104
122
  #
105
123
  # @private
@@ -166,6 +184,10 @@ module Karafka
166
184
  raise NotImplementedError, 'Implement this in a subclass'
167
185
  end
168
186
 
187
+ # Method that will be executed when a given topic partition reaches eof without any new
188
+ # incoming messages alongside
189
+ def eofed; end
190
+
169
191
  # Method that will be executed when a given topic partition is revoked. You can use it for
170
192
  # some teardown procedures (closing file handler, etc).
171
193
  def revoked; end
@@ -31,7 +31,21 @@ module Karafka
31
31
  # before we move to a regular unsubscribe.
32
32
  COOP_UNSUBSCRIBE_FACTOR = 0.5
33
33
 
34
- private_constant :MAX_POLL_RETRIES, :COOP_UNSUBSCRIBE_FACTOR
34
+ # Errors upon which we early report that something is off without retrying prior to the
35
+ # report
36
+ EARLY_REPORT_ERRORS = [
37
+ :inconsistent_group_protocol, # 23
38
+ :max_poll_exceeded, # -147
39
+ :network_exception, # 13
40
+ :transport, # -195
41
+ :topic_authorization_failed, # 29
42
+ :group_authorization_failed, # 30
43
+ :cluster_authorization_failed, # 31
44
+ # This can happen for many reasons, including issues with static membership being fenced
45
+ :fatal # -150
46
+ ].freeze
47
+
48
+ private_constant :MAX_POLL_RETRIES, :COOP_UNSUBSCRIBE_FACTOR, :EARLY_REPORT_ERRORS
35
49
 
36
50
  # Creates a new consumer instance.
37
51
  #
@@ -98,8 +112,17 @@ module Karafka
98
112
  # Fetch message within our time boundaries
99
113
  response = poll(time_poll.remaining)
100
114
 
101
- # Put a message to the buffer if there is one
102
- @buffer << response if response && response != :tick_time
115
+ case response
116
+ when :tick_time
117
+ nil
118
+ # We get a hash only in case of eof error
119
+ when Hash
120
+ @buffer.eof(response[:topic], response[:partition])
121
+ when nil
122
+ nil
123
+ else
124
+ @buffer << response
125
+ end
103
126
 
104
127
  # Upon polling rebalance manager might have been updated.
105
128
  # If partition revocation happens, we need to remove messages from revoked partitions
@@ -122,10 +145,11 @@ module Karafka
122
145
  time_poll.checkpoint
123
146
 
124
147
  # Finally once we've (potentially) removed revoked, etc, if no messages were returned
125
- # and it was not an early poll exist, we can break.
148
+ # and it was not an early poll exist, we can break. We also break if we got the eof
149
+ # signaling to propagate it asap
126
150
  # Worth keeping in mind, that the rebalance manager might have been updated despite no
127
151
  # messages being returned during a poll
128
- break unless response
152
+ break if response.nil? || response.is_a?(Hash)
129
153
  end
130
154
 
131
155
  @buffer
@@ -576,20 +600,7 @@ module Karafka
576
600
  # We want to report early on max poll interval exceeding because it may mean that the
577
601
  # underlying processing is taking too much time and it is not LRJ
578
602
  case e.code
579
- when :max_poll_exceeded # -147
580
- early_report = true
581
- when :network_exception # 13
582
- early_report = true
583
- when :transport # -195
584
- early_report = true
585
- when :topic_authorization_failed # 29
586
- early_report = true
587
- when :group_authorization_failed # 30
588
- early_report = true
589
- when :cluster_authorization_failed # 31
590
- early_report = true
591
- # This can happen for many reasons, including issues with static membership being fenced
592
- when :fatal # -150
603
+ when *EARLY_REPORT_ERRORS
593
604
  early_report = true
594
605
  # @see
595
606
  # https://github.com/confluentinc/confluent-kafka-dotnet/issues/1366#issuecomment-821842990
@@ -603,11 +614,12 @@ module Karafka
603
614
  # No sense in retrying when no topic/partition and we're no longer running
604
615
  retryable = false unless Karafka::App.running?
605
616
  # If we detect the end of partition which can happen if `enable.partition.eof` is set to
606
- # true, we can just return nil fast. This will fast yield whatever set of messages we
617
+ # true, we can just return fast. This will fast yield whatever set of messages we
607
618
  # already have instead of waiting. This can be used for better latency control when we do
608
619
  # not expect a lof of lag and want to quickly move to processing.
620
+ # We can also pass the eof notion to the consumers for improved decision making.
609
621
  when :partition_eof
610
- return nil
622
+ return e.details
611
623
  end
612
624
 
613
625
  if early_report || !retryable
@@ -253,7 +253,9 @@ module Karafka
253
253
 
254
254
  reset
255
255
 
256
- sleep(1) && retry
256
+ # Ruby sleep is in seconds
257
+ sleep_time = ::Karafka::App.config.internal.connection.reset_backoff / 10_000.0
258
+ sleep(sleep_time) && retry
257
259
  end
258
260
 
259
261
  # Resumes processing of partitions that were paused due to an error.
@@ -330,28 +332,57 @@ module Karafka
330
332
  # given scheduler. It also handles the idle jobs when filtering API removed all messages
331
333
  # and we need to run house-keeping
332
334
  def build_and_schedule_flow_jobs
333
- return if @messages_buffer.empty?
334
-
335
335
  consume_jobs = []
336
336
  idle_jobs = []
337
+ eofed_jobs = []
338
+
339
+ @messages_buffer.each do |topic, partition, messages, eof|
340
+ # In case we did not receive any new messages without eof we skip.
341
+ # We may yield empty array here in case we have reached eof without new messages but in
342
+ # such cases, we can run an eof job
343
+ next if messages.empty? && !eof
337
344
 
338
- @messages_buffer.each do |topic, partition, messages|
339
345
  coordinator = @coordinators.find_or_create(topic, partition)
340
- # Start work coordination for this topic partition
346
+ coordinator.eofed = eof
347
+
348
+ # If we did not receive any messages and we did receive eof signal, we run the eofed
349
+ # jobs so user can take actions on reaching eof
350
+ if messages.empty? && eof
351
+ # If user wants to run the eofed jobs on eof we do it. Otherwise we just allow it to
352
+ # pass through. This allows to configure if user actually wants to have `#eofed`
353
+ # logic or if he wants to only use fast eof work yield
354
+ if coordinator.topic.eofed?
355
+ @executors.find_all_or_create(topic, partition, coordinator).each do |executor|
356
+ coordinator.increment(:eofed)
357
+ eofed_jobs << @jobs_builder.eofed(executor)
358
+ end
359
+ end
360
+
361
+ next
362
+ end
363
+
341
364
  coordinator.start(messages)
342
365
 
366
+ # If it is not an eof and there are no new messages, we just run house-keeping
367
+ #
343
368
  # We do not increment coordinator for idle job because it's not a user related one
344
369
  # and it will not go through a standard lifecycle. Same applies to revoked and shutdown
345
370
  if messages.empty?
371
+ # Start work coordination for this topic partition
346
372
  coordinator.increment(:idle)
347
373
  executor = @executors.find_or_create(topic, partition, 0, coordinator)
348
374
  idle_jobs << @jobs_builder.idle(executor)
349
- else
350
- @partitioner.call(topic, messages, coordinator) do |group_id, partition_messages|
351
- coordinator.increment(:consume)
352
- executor = @executors.find_or_create(topic, partition, group_id, coordinator)
353
- consume_jobs << @jobs_builder.consume(executor, partition_messages)
354
- end
375
+
376
+ next
377
+ end
378
+
379
+ # If there are messages, it is irrelevant if eof or not as consumption needs to happen
380
+ #
381
+ # Start work coordination for this topic partition
382
+ @partitioner.call(topic, messages, coordinator) do |group_id, partition_messages|
383
+ coordinator.increment(:consume)
384
+ executor = @executors.find_or_create(topic, partition, group_id, coordinator)
385
+ consume_jobs << @jobs_builder.consume(executor, partition_messages)
355
386
  end
356
387
  end
357
388
 
@@ -367,6 +398,11 @@ module Karafka
367
398
  consume_jobs.each(&:before_schedule)
368
399
  @scheduler.on_schedule_consumption(consume_jobs)
369
400
  end
401
+
402
+ unless eofed_jobs.empty?
403
+ eofed_jobs.each(&:before_schedule)
404
+ @scheduler.on_schedule_eofed(eofed_jobs)
405
+ end
370
406
  end
371
407
 
372
408
  # Builds and schedules periodic jobs for topics partitions for which no messages were
@@ -23,9 +23,13 @@ module Karafka
23
23
  def initialize(subscription_group)
24
24
  @subscription_group = subscription_group
25
25
  @size = 0
26
+
26
27
  @groups = Hash.new do |topic_groups, topic|
27
28
  topic_groups[topic] = Hash.new do |partition_groups, partition|
28
- partition_groups[partition] = []
29
+ partition_groups[partition] = {
30
+ eof: false,
31
+ messages: []
32
+ }
29
33
  end
30
34
  end
31
35
  end
@@ -33,24 +37,29 @@ module Karafka
33
37
  # Remaps raw messages from the raw messages buffer to Karafka messages
34
38
  # @param raw_messages_buffer [RawMessagesBuffer] buffer with raw messages
35
39
  def remap(raw_messages_buffer)
36
- clear unless @size.zero?
40
+ clear
37
41
 
38
42
  # Since it happens "right after" we've received the messages, it is close enough it time
39
43
  # to be used as the moment we received messages.
40
44
  received_at = Time.now
41
45
 
42
- raw_messages_buffer.each do |topic, partition, messages|
46
+ raw_messages_buffer.each do |topic, partition, messages, eof|
43
47
  @size += messages.count
44
48
 
45
49
  ktopic = @subscription_group.topics.find(topic)
46
50
 
47
- @groups[topic][partition] = messages.map do |message|
51
+ built_messages = messages.map do |message|
48
52
  Messages::Builders::Message.call(
49
53
  message,
50
54
  ktopic,
51
55
  received_at
52
56
  )
53
57
  end
58
+
59
+ @groups[topic][partition] = {
60
+ eof: eof,
61
+ messages: built_messages
62
+ }
54
63
  end
55
64
  end
56
65
 
@@ -59,10 +68,11 @@ module Karafka
59
68
  # @yieldparam [String] topic name
60
69
  # @yieldparam [Integer] partition number
61
70
  # @yieldparam [Array<Karafka::Messages::Message>] messages from a given topic partition
71
+ # @yieldparam [Boolean] true if eof, false otherwise
62
72
  def each
63
73
  @groups.each do |topic, partitions|
64
- partitions.each do |partition, messages|
65
- yield(topic, partition, messages)
74
+ partitions.each do |partition, details|
75
+ yield(topic, partition, details[:messages], details[:eof])
66
76
  end
67
77
  end
68
78
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Karafka
4
4
  module Connection
5
- # Buffer for raw librdkafka messages.
5
+ # Buffer for raw librdkafka messages and eof status.
6
6
  #
7
7
  # When message is added to this buffer, it gets assigned to an array with other messages from
8
8
  # the same topic and partition.
@@ -17,9 +17,13 @@ module Karafka
17
17
  # @return [Karafka::Connection::MessagesBuffer] buffer instance
18
18
  def initialize
19
19
  @size = 0
20
+
20
21
  @groups = Hash.new do |topic_groups, topic|
21
22
  topic_groups[topic] = Hash.new do |partition_groups, partition|
22
- partition_groups[partition] = []
23
+ partition_groups[partition] = {
24
+ eof: false,
25
+ messages: []
26
+ }
23
27
  end
24
28
  end
25
29
  end
@@ -30,7 +34,16 @@ module Karafka
30
34
  # @return [Array<Rdkafka::Consumer::Message>] given partition topic sub-buffer array
31
35
  def <<(message)
32
36
  @size += 1
33
- @groups[message.topic][message.partition] << message
37
+ partition_state = @groups[message.topic][message.partition]
38
+ partition_state[:messages] << message
39
+ partition_state[:eof] = false
40
+ end
41
+
42
+ # Marks given topic partition as one that reached eof
43
+ # @param topic [String] topic that reached eof
44
+ # @param partition [Integer] partition that reached eof
45
+ def eof(topic, partition)
46
+ @groups[topic][partition][:eof] = true
34
47
  end
35
48
 
36
49
  # Allows to iterate over all the topics and partitions messages
@@ -38,10 +51,11 @@ module Karafka
38
51
  # @yieldparam [String] topic name
39
52
  # @yieldparam [Integer] partition number
40
53
  # @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
54
+ # @yieldparam [Boolean] has polling of this partition reach eof
41
55
  def each
42
56
  @groups.each do |topic, partitions|
43
- partitions.each do |partition, messages|
44
- yield(topic, partition, messages)
57
+ partitions.each do |partition, details|
58
+ yield(topic, partition, details[:messages], details[:eof])
45
59
  end
46
60
  end
47
61
  end
@@ -69,8 +83,8 @@ module Karafka
69
83
  # again and we do want to ensure as few duplications as possible
70
84
  def uniq!
71
85
  @groups.each_value do |partitions|
72
- partitions.each_value do |messages|
73
- messages.uniq!(&:offset)
86
+ partitions.each_value do |details|
87
+ details[:messages].uniq!(&:offset)
74
88
  end
75
89
  end
76
90
 
@@ -83,6 +97,11 @@ module Karafka
83
97
  # we save ourselves some objects allocations. We cannot clear the underlying arrays as they
84
98
  # may be used in other threads for data processing, thus if we would clear it, we could
85
99
  # potentially clear a raw messages array for a job that is in the jobs queue.
100
+ #
101
+ # @note We do not clear the eof assignments because they can span across batch pollings.
102
+ # Since eof is not raised non-stop and is silenced after an eof poll, if we would clean it
103
+ # here we would loose the notion of it. The reset state for it should happen when we do
104
+ # discover new messages for given topic partition.
86
105
  def clear
87
106
  @size = 0
88
107
  @groups.each_value(&:clear)
@@ -92,8 +111,12 @@ module Karafka
92
111
 
93
112
  # Updates the messages count if we performed any operations that could change the state
94
113
  def recount!
95
- @size = @groups.each_value.sum do |partitions|
96
- partitions.each_value.map(&:count).sum
114
+ @size = 0
115
+
116
+ @groups.each_value do |partitions|
117
+ partitions.each_value do |details|
118
+ @size += details[:messages].size
119
+ end
97
120
  end
98
121
  end
99
122
  end
@@ -79,6 +79,7 @@ module Karafka
79
79
  nested(:connection) do
80
80
  required(:manager) { |val| !val.nil? }
81
81
  required(:conductor) { |val| !val.nil? }
82
+ required(:reset_backoff) { |val| val.is_a?(Integer) && val >= 1_000 }
82
83
 
83
84
  nested(:proxy) do
84
85
  nested(:commit) do
@@ -290,6 +290,9 @@ module Karafka
290
290
  when 'consumer.tick.error'
291
291
  error "Consumer on tick failed due to an error: #{error}"
292
292
  error details
293
+ when 'consumer.eofed.error'
294
+ error "Consumer on eofed failed due to an error: #{error}"
295
+ error details
293
296
  when 'consumer.after_consume.error'
294
297
  error "Consumer on after_consume failed due to an error: #{error}"
295
298
  error details
@@ -65,6 +65,10 @@ module Karafka
65
65
  consumer.tick
66
66
  consumer.ticked
67
67
 
68
+ consumer.before_schedule_eofed
69
+ consumer.eof
70
+ consumer.eofed
71
+
68
72
  consumer.before_schedule_shutdown
69
73
  consumer.shutting_down
70
74
  consumer.shutdown
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Jobs
18
+ # Non-Blocking version of the Eofed job
19
+ # We use this version for LRJ topics for cases where saturated resources would not allow
20
+ # to run this job for extended period of time. Under such scenarios, if we would not use
21
+ # a non-blocking one, we would reach max.poll.interval.ms.
22
+ class EofedNonBlocking < ::Karafka::Processing::Jobs::Eofed
23
+ # @param args [Array] any arguments accepted by `::Karafka::Processing::Jobs::Eofed`
24
+ def initialize(*args)
25
+ super
26
+ @non_blocking = true
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -33,6 +33,18 @@ module Karafka
33
33
  end
34
34
  end
35
35
 
36
+ # @param executor [Karafka::Pro::Processing::Executor]
37
+ # @return [Karafka::Processing::Jobs::Eofed] eofed job for non LRJ
38
+ # @return [Karafka::Processing::Jobs::EofedBlocking] eofed job that is
39
+ # non-blocking, so when revocation job is scheduled for LRJ it also will not block
40
+ def eofed(executor)
41
+ if executor.topic.long_running_job?
42
+ Jobs::EofedNonBlocking.new(executor)
43
+ else
44
+ super
45
+ end
46
+ end
47
+
36
48
  # @param executor [Karafka::Pro::Processing::Executor]
37
49
  # @return [Karafka::Processing::Jobs::Revoked] revocation job for non LRJ
38
50
  # @return [Karafka::Processing::Jobs::RevokedNonBlocking] revocation job that is
@@ -31,6 +31,7 @@ module Karafka
31
31
  # - a virtual partitioner
32
32
  # - more than one thread to process the data
33
33
  # - collective is not collapsed via coordinator
34
+ # - none of the partitioner executions raised an error
34
35
  #
35
36
  # With one thread it is not worth partitioning the work as the work itself will be
36
37
  # assigned to one thread (pointless work)
@@ -41,18 +42,36 @@ module Karafka
41
42
  # This is great because it allows us to run things without the parallelization that adds
42
43
  # a bit of uncertainty and allows us to use DLQ and safely skip messages if needed.
43
44
  if vps.active? && vps.max_partitions > 1 && !coordinator.collapsed?
44
- groupings = messages.group_by do |msg|
45
- # We need to reduce it to the max concurrency, so the group_id is not a direct effect
46
- # of the end user action. Otherwise the persistence layer for consumers would cache
47
- # it forever and it would cause memory leaks
48
- #
49
- # This also needs to be consistent because the aggregation here needs to warrant,
50
- # that the same partitioned message will always be assigned to the same virtual
51
- # partition. Otherwise in case of a window aggregation with VP spanning across
52
- # several polls, the data could not be complete.
53
- vps.reducer.call(
54
- vps.partitioner.call(msg)
45
+ # If we cannot virtualize even one message from a given batch due to user errors, we
46
+ # reduce the whole set into one partition and emit error. This should still allow for
47
+ # user flow but should mitigate damages by not virtualizing
48
+ begin
49
+ groupings = messages.group_by do |msg|
50
+ # We need to reduce it to the max concurrency, so the group_id is not a direct
51
+ # effect of the end user action. Otherwise the persistence layer for consumers
52
+ # would cache it forever and it would cause memory leaks
53
+ #
54
+ # This also needs to be consistent because the aggregation here needs to warrant,
55
+ # that the same partitioned message will always be assigned to the same virtual
56
+ # partition. Otherwise in case of a window aggregation with VP spanning across
57
+ # several polls, the data could not be complete.
58
+ vps.reducer.call(
59
+ vps.partitioner.call(msg)
60
+ )
61
+ end
62
+ rescue StandardError => e
63
+ # This should not happen. If you are seeing this it means your partitioner code
64
+ # failed and raised an error. We highly recommend mitigating partitioner level errors
65
+ # on the user side because this type of collapse should be considered a last resort
66
+ Karafka.monitor.instrument(
67
+ 'error.occurred',
68
+ caller: self,
69
+ error: e,
70
+ messages: messages,
71
+ type: 'virtual_partitions.partitioner.error'
55
72
  )
73
+
74
+ groupings = { 0 => messages }
56
75
  end
57
76
 
58
77
  groupings.each do |key, messages_group|
@@ -68,6 +68,7 @@ module Karafka
68
68
  alias on_schedule_shutdown schedule_fifo
69
69
  alias on_schedule_idle schedule_fifo
70
70
  alias on_schedule_periodic schedule_fifo
71
+ alias on_schedule_eofed schedule_fifo
71
72
 
72
73
  # This scheduler does not have anything to manage as it is a pass through and has no
73
74
  # state
@@ -15,6 +15,10 @@ module Karafka
15
15
 
16
16
  attr_reader :pause_tracker, :seek_offset, :topic, :partition
17
17
 
18
+ # This can be set directly on the listener because it can be triggered on first run without
19
+ # any messages
20
+ attr_accessor :eofed
21
+
18
22
  def_delegators :@pause_tracker, :attempt, :paused?
19
23
 
20
24
  # @param topic [Karafka::Routing::Topic]
@@ -32,6 +36,7 @@ module Karafka
32
36
  @mutex = Mutex.new
33
37
  @marked = false
34
38
  @failure = false
39
+ @eofed = false
35
40
  @changed_at = monotonic_now
36
41
  end
37
42
 
@@ -146,6 +151,11 @@ module Karafka
146
151
  @revoked
147
152
  end
148
153
 
154
+ # @return [Boolean] did we reach end of partition when polling data
155
+ def eofed?
156
+ @eofed
157
+ end
158
+
149
159
  # @return [Boolean] was the new seek offset assigned at least once. This is needed because
150
160
  # by default we assign seek offset of a first message ever, however this is insufficient
151
161
  # for DLQ in a scenario where the first message would be broken. We would never move
@@ -104,6 +104,18 @@ module Karafka
104
104
  consumer.on_idle
105
105
  end
106
106
 
107
+ # Runs the code needed before eofed work is scheduled
108
+ def before_schedule_eofed
109
+ consumer.on_before_schedule_eofed
110
+ end
111
+
112
+ # Runs consumed eofed operation.
113
+ # This may run even when there were no messages received prior. This will however not
114
+ # run when eof is received together with messages as in such case `#consume` will run
115
+ def eofed
116
+ consumer.on_eofed
117
+ end
118
+
107
119
  # Runs code needed before revoked job is scheduled
108
120
  def before_schedule_revoked
109
121
  consumer.on_before_schedule_revoked if @consumer
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ module Jobs
6
+ # Job that runs the eofed operation when we receive eof without messages alongside.
7
+ class Eofed < Base
8
+ # @param executor [Karafka::Processing::Executor] executor that is suppose to run the job
9
+ # @return [Eofed]
10
+ def initialize(executor)
11
+ @executor = executor
12
+ super()
13
+ end
14
+
15
+ # Runs code prior to scheduling this eofed job
16
+ def before_schedule
17
+ executor.before_schedule_eofed
18
+ end
19
+
20
+ # Runs the eofed job via an executor.
21
+ def call
22
+ executor.eofed
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -12,6 +12,12 @@ module Karafka
12
12
  Jobs::Consume.new(executor, messages)
13
13
  end
14
14
 
15
+ # @param executor [Karafka::Processing::Executor]
16
+ # @return [Karafka::Processing::Jobs::Eofed] eofed job
17
+ def eofed(executor)
18
+ Jobs::Eofed.new(executor)
19
+ end
20
+
15
21
  # @param executor [Karafka::Processing::Executor]
16
22
  # @return [Karafka::Processing::Jobs::Revoked] revocation job
17
23
  def revoked(executor)
@@ -24,6 +24,7 @@ module Karafka
24
24
  alias on_schedule_revocation on_schedule_consumption
25
25
  alias on_schedule_shutdown on_schedule_consumption
26
26
  alias on_schedule_idle on_schedule_consumption
27
+ alias on_schedule_eofed on_schedule_consumption
27
28
 
28
29
  # This scheduler does not have anything to manage as it is a pass through and has no state
29
30
  def on_manage
@@ -17,6 +17,7 @@ module Karafka
17
17
  %i[
18
18
  consume
19
19
  idle
20
+ eofed
20
21
  revoked
21
22
  shutdown
22
23
  ].each do |action|
@@ -151,6 +152,16 @@ module Karafka
151
152
  coordinator.decrement(:idle)
152
153
  end
153
154
 
155
+ # Runs the consumer `#eofed` method with reporting
156
+ def handle_eofed
157
+ Karafka.monitor.instrument('consumer.eof', caller: self)
158
+ Karafka.monitor.instrument('consumer.eofed', caller: self) do
159
+ eofed
160
+ end
161
+ ensure
162
+ coordinator.decrement(:eofed)
163
+ end
164
+
154
165
  # We need to always un-pause the processing in case we have lost a given partition.
155
166
  # Otherwise the underlying librdkafka would not know we may want to continue processing and
156
167
  # the pause could in theory last forever
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Routing
5
+ module Features
6
+ class Eofed < Base
7
+ # Config of this feature
8
+ Config = Struct.new(
9
+ :active,
10
+ keyword_init: true
11
+ ) { alias_method :active?, :active }
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Routing
5
+ module Features
6
+ class Eofed < Base
7
+ # Eofed related contracts namespace
8
+ module Contracts
9
+ # Contract for eofed topic setup
10
+ class Topic < Karafka::Contracts::Base
11
+ configure do |config|
12
+ config.error_messages = YAML.safe_load(
13
+ File.read(
14
+ File.join(Karafka.gem_root, 'config', 'locales', 'errors.yml')
15
+ )
16
+ ).fetch('en').fetch('validations').fetch('topic')
17
+ end
18
+
19
+ nested :eofed do
20
+ required(:active) { |val| [true, false].include?(val) }
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Routing
5
+ module Features
6
+ class Eofed < Base
7
+ # Routing topic eofed API
8
+ module Topic
9
+ # @param active [Boolean] should the `#eofed` job run on eof
10
+ def eofed(active = false)
11
+ @eofed ||= Config.new(
12
+ active: active
13
+ )
14
+ end
15
+
16
+ # @return [Boolean] Are `#eofed` jobs active
17
+ def eofed?
18
+ eofed.active?
19
+ end
20
+
21
+ # @return [Hash] topic setup hash
22
+ def to_h
23
+ super.merge(
24
+ eofed: eofed.to_h
25
+ ).freeze
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Routing
5
+ module Features
6
+ # Namespace for feature allowing to enable the `#eofed` jobs.
7
+ # We do not enable it always because users may only be interested in fast eofed yielding
8
+ # without running the `#eofed` operation at all. This safes on empty cycles of running
9
+ # pointless empty jobs.
10
+ class Eofed < Base
11
+ end
12
+ end
13
+ end
14
+ end
@@ -203,6 +203,9 @@ module Karafka
203
203
  setting :manager, default: Connection::Manager.new
204
204
  # Controls frequency of connections management checks
205
205
  setting :conductor, default: Connection::Conductor.new
206
+ # How long should we wait before a critical listener recovery
207
+ # Too short may cause endless rebalance loops
208
+ setting :reset_backoff, default: 60_000
206
209
 
207
210
  # Settings that are altered by our client proxy layer
208
211
  setting :proxy do
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.4.7'
6
+ VERSION = '2.4.8'
7
7
  end
data/lib/karafka.rb CHANGED
@@ -71,11 +71,23 @@ module Karafka
71
71
 
72
72
  # @return [Boolean] Do we run within/with Rails. We use this to initialize Railtie and proxy
73
73
  # the console invocation to Rails
74
+ #
75
+ # @note We allow users to disable Rails require because having Rails in the Gemfile does not
76
+ # always mean user wants to have it required. User may want to run Karafka without Rails
77
+ # even when having both in the same Gemfile.
74
78
  def rails?
75
79
  return @rails if instance_variable_defined?('@rails')
76
80
 
77
- # Do not load Rails again if already loaded
78
- Object.const_defined?('Rails::Railtie') || require('rails')
81
+ @rails = Object.const_defined?('Rails::Railtie')
82
+
83
+ # If Rails exists we set it immediately based on its presence and return
84
+ return @rails if @rails
85
+
86
+ # If rails is not present and user wants us not to force-load it, we return
87
+ return @rails if ENV['KARAFKA_REQUIRE_RAILS'] == 'false'
88
+
89
+ # If we should try to require it, we try and if no error, it means its there
90
+ require('rails')
79
91
 
80
92
  @rails = true
81
93
  rescue LoadError
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.7
4
+ version: 2.4.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  AnG1dJU+yL2BK7vaVytLTstJME5mepSZ46qqIJXMuWob/YPDmVaBF39TDSG9e34s
36
36
  msG3BiCqgOgHAnL23+CN3Rt8MsuRfEtoTKpJVcCfoEoNHOkc
37
37
  -----END CERTIFICATE-----
38
- date: 2024-08-01 00:00:00.000000000 Z
38
+ date: 2024-08-09 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: base64
@@ -71,6 +71,20 @@ dependencies:
71
71
  - - "<"
72
72
  - !ruby/object:Gem::Version
73
73
  version: 2.5.0
74
+ - !ruby/object:Gem::Dependency
75
+ name: karafka-rdkafka
76
+ requirement: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ version: 0.17.2
81
+ type: :runtime
82
+ prerelease: false
83
+ version_requirements: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: 0.17.2
74
88
  - !ruby/object:Gem::Dependency
75
89
  name: waterdrop
76
90
  requirement: !ruby/object:Gem::Requirement
@@ -290,6 +304,7 @@ files:
290
304
  - lib/karafka/pro/processing/filters/throttler.rb
291
305
  - lib/karafka/pro/processing/filters/virtual_limiter.rb
292
306
  - lib/karafka/pro/processing/jobs/consume_non_blocking.rb
307
+ - lib/karafka/pro/processing/jobs/eofed_non_blocking.rb
293
308
  - lib/karafka/pro/processing/jobs/periodic.rb
294
309
  - lib/karafka/pro/processing/jobs/periodic_non_blocking.rb
295
310
  - lib/karafka/pro/processing/jobs/revoked_non_blocking.rb
@@ -444,6 +459,7 @@ files:
444
459
  - lib/karafka/processing/inline_insights/tracker.rb
445
460
  - lib/karafka/processing/jobs/base.rb
446
461
  - lib/karafka/processing/jobs/consume.rb
462
+ - lib/karafka/processing/jobs/eofed.rb
447
463
  - lib/karafka/processing/jobs/idle.rb
448
464
  - lib/karafka/processing/jobs/revoked.rb
449
465
  - lib/karafka/processing/jobs/shutdown.rb
@@ -487,6 +503,10 @@ files:
487
503
  - lib/karafka/routing/features/deserializers/config.rb
488
504
  - lib/karafka/routing/features/deserializers/contracts/topic.rb
489
505
  - lib/karafka/routing/features/deserializers/topic.rb
506
+ - lib/karafka/routing/features/eofed.rb
507
+ - lib/karafka/routing/features/eofed/config.rb
508
+ - lib/karafka/routing/features/eofed/contracts/topic.rb
509
+ - lib/karafka/routing/features/eofed/topic.rb
490
510
  - lib/karafka/routing/features/inline_insights.rb
491
511
  - lib/karafka/routing/features/inline_insights/config.rb
492
512
  - lib/karafka/routing/features/inline_insights/contracts/topic.rb
metadata.gz.sig CHANGED
Binary file