karafka 2.4.6 → 2.4.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/CHANGELOG.md +19 -0
  4. data/Gemfile +1 -1
  5. data/Gemfile.lock +12 -11
  6. data/LICENSE +1 -1
  7. data/config/locales/errors.yml +3 -0
  8. data/docker-compose.yml +1 -1
  9. data/karafka.gemspec +2 -1
  10. data/lib/karafka/base_consumer.rb +23 -1
  11. data/lib/karafka/cli/server.rb +1 -0
  12. data/lib/karafka/cli/swarm.rb +1 -0
  13. data/lib/karafka/connection/client.rb +60 -22
  14. data/lib/karafka/connection/listener.rb +47 -11
  15. data/lib/karafka/connection/messages_buffer.rb +16 -6
  16. data/lib/karafka/connection/raw_messages_buffer.rb +32 -9
  17. data/lib/karafka/contracts/config.rb +1 -0
  18. data/lib/karafka/embedded.rb +2 -1
  19. data/lib/karafka/instrumentation/logger_listener.rb +3 -0
  20. data/lib/karafka/instrumentation/notifications.rb +4 -0
  21. data/lib/karafka/pro/processing/jobs/eofed_non_blocking.rb +32 -0
  22. data/lib/karafka/pro/processing/jobs_builder.rb +12 -0
  23. data/lib/karafka/pro/processing/partitioner.rb +30 -11
  24. data/lib/karafka/pro/processing/schedulers/default.rb +1 -0
  25. data/lib/karafka/processing/coordinator.rb +10 -0
  26. data/lib/karafka/processing/executor.rb +12 -0
  27. data/lib/karafka/processing/jobs/eofed.rb +27 -0
  28. data/lib/karafka/processing/jobs_builder.rb +6 -0
  29. data/lib/karafka/processing/schedulers/default.rb +1 -0
  30. data/lib/karafka/processing/strategies/default.rb +11 -0
  31. data/lib/karafka/routing/consumer_group.rb +9 -14
  32. data/lib/karafka/routing/features/eofed/config.rb +15 -0
  33. data/lib/karafka/routing/features/eofed/contracts/topic.rb +27 -0
  34. data/lib/karafka/routing/features/eofed/topic.rb +31 -0
  35. data/lib/karafka/routing/features/eofed.rb +14 -0
  36. data/lib/karafka/routing/subscription_group.rb +8 -0
  37. data/lib/karafka/server.rb +16 -0
  38. data/lib/karafka/setup/config.rb +3 -49
  39. data/lib/karafka/setup/defaults_injector.rb +64 -0
  40. data/lib/karafka/swarm/node.rb +2 -0
  41. data/lib/karafka/version.rb +1 -1
  42. data/lib/karafka.rb +14 -2
  43. data.tar.gz.sig +0 -0
  44. metadata +24 -3
  45. metadata.gz.sig +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e550fa0b7395eab3b961181319d195809baeca82c02935d2735cf98353f6ef8b
4
- data.tar.gz: f734562bfb86db59a64f4fbeb596bfe126eacd428a4ade00da90d238d959ab64
3
+ metadata.gz: c7e8e51a5c0c4ded0d074965cee2090e64ca77e43443f4f36ab03cc3a21ddfd6
4
+ data.tar.gz: c6e912c518d301f55974a9e4deb491ebe4c3e073e6748fe62ce1003eaea7bed7
5
5
  SHA512:
6
- metadata.gz: f80b521d063653c93caee987358573c992af2488ffec9c6465dbb04f9e51e8334548cf669b2b5f8f5783540be01fa8b52e067042cf72afda35f14181148ec87d
7
- data.tar.gz: 32fd0f08102a2687f4b646d15cd07599f9aaefac957f828396af1f2d91bc91d5c922119bfec04d94cba63af86724d43cb50852de140d78e289fdbb612892dc78
6
+ metadata.gz: 8769a192c7ebb852250afd96611e115807172bf320d4e9d513bffbcc66f5570ca637aee1b8f3b68e46350b37054935ea905893139bc264a02949f975b39f2041
7
+ data.tar.gz: 13dc8e118850aace7127bae5fe667b6f73118d220b61b1d2b590cc18e71243b3c1467d43df9ae134cd7b26c0274a100e34d8a07db0c6091bc02fd9c95ffb24f0
checksums.yaml.gz.sig CHANGED
Binary file
data/CHANGELOG.md CHANGED
@@ -1,5 +1,24 @@
1
1
  # Karafka Framework Changelog
2
2
 
3
+ ## 2.4.8 (2024-08-09)
4
+ - **[Feature]** Introduce ability to react to `#eof` either from `#consume` or from `#eofed` when EOF without new messages.
5
+ - [Enhancement] Provide `Consumer#eofed?` to indicate reaching EOF.
6
+ - [Enhancement] Always immediately report on `inconsistent_group_protocol` error.
7
+ - [Enhancement] Reduce virtual partitioning to 1 partition when any partitioner execution in a partitioned batch crashes.
8
+ - [Enhancement] Provide `KARAFKA_REQUIRE_RAILS` to disable default Rails `require` to run Karafka without Rails despite having Rails in the Gemfile.
9
+ - [Enhancement] Increase final listener recovery from 1 to 60 seconds to prevent constant rebalancing. This is the last resort recovery and should never happen unless critical errors occur.
10
+
11
+ ## 2.4.7 (2024-08-01)
12
+ - [Enhancement] Introduce `Karafka::Server.execution_mode` to check in what mode Karafka process operates (`standalone`, `swarm`, `supervisor`, `embedded`).
13
+ - [Enhancement] Ensure `max.poll.interval.ms` is always present and populate it with librdkafka default.
14
+ - [Enhancement] Introduce a shutdown time limit for unsubscription wait.
15
+ - [Enhancement] Tag with `mode:swarm` each of the running swarm consumers.
16
+ - [Change] Tag with `mode:embedded` instead of `embedded` the embedded consumers.
17
+ - [Fix] License identifier `LGPL-3.0` is deprecated for SPDX (#2177).
18
+ - [Fix] Fix an issue where custom clusters would not have default settings populated same as the primary cluster.
19
+ - [Fix] Fix Rspec warnings of nil mocks.
20
+ - [Maintenance] Cover `cooperative-sticky` librdkafka issues with integration spec.
21
+
3
22
  ## 2.4.6 (2024-07-22)
4
23
  - [Fix] Mitigate `rd_kafka_cgrp_terminated` and other `librdkafka` shutdown issues by unsubscribing fully prior to shutdown.
5
24
 
data/Gemfile CHANGED
@@ -12,7 +12,7 @@ gemspec
12
12
  group :integrations do
13
13
  gem 'activejob', require: false
14
14
  gem 'karafka-testing', '>= 2.4.0', require: false
15
- gem 'karafka-web', '>= 0.9.0', require: false
15
+ gem 'karafka-web', '>= 0.10.0.beta1', require: false
16
16
  gem 'rspec', require: false
17
17
  end
18
18
 
data/Gemfile.lock CHANGED
@@ -1,9 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.4.6)
4
+ karafka (2.4.8)
5
5
  base64 (~> 0.2)
6
6
  karafka-core (>= 2.4.3, < 2.5.0)
7
+ karafka-rdkafka (>= 0.17.2)
7
8
  waterdrop (>= 2.7.3, < 3.0.0)
8
9
  zeitwerk (~> 2.3)
9
10
 
@@ -29,7 +30,7 @@ GEM
29
30
  concurrent-ruby (1.3.3)
30
31
  connection_pool (2.4.1)
31
32
  diff-lcs (1.5.1)
32
- docile (1.4.0)
33
+ docile (1.4.1)
33
34
  drb (2.2.1)
34
35
  erubi (1.13.0)
35
36
  factory_bot (6.4.6)
@@ -41,26 +42,26 @@ GEM
41
42
  concurrent-ruby (~> 1.0)
42
43
  karafka-core (2.4.4)
43
44
  karafka-rdkafka (>= 0.15.0, < 0.18.0)
44
- karafka-rdkafka (0.17.0)
45
+ karafka-rdkafka (0.17.3)
45
46
  ffi (~> 1.15)
46
47
  mini_portile2 (~> 2.6)
47
48
  rake (> 12)
48
- karafka-testing (2.4.5)
49
+ karafka-testing (2.4.6)
49
50
  karafka (>= 2.4.0, < 2.5.0)
50
51
  waterdrop (>= 2.7.0)
51
- karafka-web (0.9.1)
52
+ karafka-web (0.10.0.rc1)
52
53
  erubi (~> 1.4)
53
- karafka (>= 2.4.0, < 2.5.0)
54
+ karafka (>= 2.4.7, < 2.5.0)
54
55
  karafka-core (>= 2.4.0, < 2.5.0)
55
56
  roda (~> 3.68, >= 3.69)
56
57
  tilt (~> 2.0)
57
58
  mini_portile2 (2.8.7)
58
- minitest (5.24.0)
59
+ minitest (5.24.1)
59
60
  mutex_m (0.2.0)
60
61
  ostruct (0.6.0)
61
- rack (3.1.5)
62
+ rack (3.1.7)
62
63
  rake (13.2.1)
63
- roda (3.81.0)
64
+ roda (3.82.0)
64
65
  rack
65
66
  rspec (3.13.0)
66
67
  rspec-core (~> 3.13.0)
@@ -88,7 +89,7 @@ GEM
88
89
  karafka-core (>= 2.4.3, < 3.0.0)
89
90
  karafka-rdkafka (>= 0.15.1)
90
91
  zeitwerk (~> 2.3)
91
- zeitwerk (2.6.16)
92
+ zeitwerk (2.6.17)
92
93
 
93
94
  PLATFORMS
94
95
  ruby
@@ -100,7 +101,7 @@ DEPENDENCIES
100
101
  factory_bot
101
102
  karafka!
102
103
  karafka-testing (>= 2.4.0)
103
- karafka-web (>= 0.9.0)
104
+ karafka-web (>= 0.10.0.beta1)
104
105
  ostruct
105
106
  rspec
106
107
  simplecov
data/LICENSE CHANGED
@@ -1,7 +1,7 @@
1
1
  Copyright (c) Maciej Mensfeld
2
2
 
3
3
  Karafka is an Open Source project licensed under the terms of
4
- the LGPLv3 license. Please see <https://github.com/karafka/karafka/blob/master/LGPL>
4
+ the LGPLv3 license. Please see <https://github.com/karafka/karafka/blob/master/LICENSE-LGPL>
5
5
  for license text.
6
6
 
7
7
  Karafka has also commercial-friendly license, commercial support and commercial components.
@@ -49,6 +49,7 @@ en:
49
49
 
50
50
  internal.connection.manager_format: needs to be present
51
51
  internal.connection.conductor_format: needs to be present
52
+ internal.connection.reset_backoff_format: needs to be an integer bigger or equal to 1000
52
53
  internal.connection.proxy.query_watermark_offsets.timeout_format: needs to be an integer bigger than 0
53
54
  internal.connection.proxy.query_watermark_offsets.max_attempts_format: needs to be an integer bigger than 0
54
55
  internal.connection.proxy.query_watermark_offsets.wait_time_format: needs to be an integer bigger than 0
@@ -115,6 +116,8 @@ en:
115
116
 
116
117
  active_format: needs to be either true or false
117
118
 
119
+ eofed.active_format: needs to be either true or false
120
+
118
121
  declaratives.partitions_format: needs to be more or equal to 1
119
122
  declaratives.active_format: needs to be true
120
123
  declaratives.replication_factor_format: needs to be more or equal to 1
data/docker-compose.yml CHANGED
@@ -3,7 +3,7 @@ version: '2'
3
3
  services:
4
4
  kafka:
5
5
  container_name: kafka
6
- image: confluentinc/cp-kafka:7.6.2
6
+ image: confluentinc/cp-kafka:7.7.0
7
7
 
8
8
  ports:
9
9
  - 9092:9092
data/karafka.gemspec CHANGED
@@ -12,7 +12,7 @@ Gem::Specification.new do |spec|
12
12
  spec.authors = ['Maciej Mensfeld']
13
13
  spec.email = %w[contact@karafka.io]
14
14
  spec.homepage = 'https://karafka.io'
15
- spec.licenses = %w[LGPL-3.0 Commercial]
15
+ spec.licenses = %w[LGPL-3.0-only Commercial]
16
16
  spec.summary = 'Karafka is Ruby and Rails efficient Kafka processing framework.'
17
17
  spec.description = <<-DESC
18
18
  Karafka is Ruby and Rails efficient Kafka processing framework.
@@ -23,6 +23,7 @@ Gem::Specification.new do |spec|
23
23
 
24
24
  spec.add_dependency 'base64', '~> 0.2'
25
25
  spec.add_dependency 'karafka-core', '>= 2.4.3', '< 2.5.0'
26
+ spec.add_dependency 'karafka-rdkafka', '>= 0.17.2'
26
27
  spec.add_dependency 'waterdrop', '>= 2.7.3', '< 3.0.0'
27
28
  spec.add_dependency 'zeitwerk', '~> 2.3'
28
29
 
@@ -9,7 +9,7 @@ module Karafka
9
9
 
10
10
  extend Forwardable
11
11
 
12
- def_delegators :@coordinator, :topic, :partition
12
+ def_delegators :@coordinator, :topic, :partition, :eofed?
13
13
 
14
14
  def_delegators :producer, :produce_async, :produce_sync, :produce_many_async,
15
15
  :produce_many_sync
@@ -100,6 +100,24 @@ module Karafka
100
100
  retry_after_pause
101
101
  end
102
102
 
103
+ # Can be used to run code prior to scheduling of eofed execution
104
+ def on_before_schedule_eofed
105
+ handle_before_schedule_eofed
106
+ end
107
+
108
+ # Trigger method for running on eof without messages
109
+ def on_eofed
110
+ handle_eofed
111
+ rescue StandardError => e
112
+ Karafka.monitor.instrument(
113
+ 'error.occurred',
114
+ error: e,
115
+ caller: self,
116
+ seek_offset: coordinator.seek_offset,
117
+ type: 'consumer.eofed.error'
118
+ )
119
+ end
120
+
103
121
  # Can be used to run code prior to scheduling of idle execution
104
122
  #
105
123
  # @private
@@ -166,6 +184,10 @@ module Karafka
166
184
  raise NotImplementedError, 'Implement this in a subclass'
167
185
  end
168
186
 
187
+ # Method that will be executed when a given topic partition reaches eof without any new
188
+ # incoming messages alongside
189
+ def eofed; end
190
+
169
191
  # Method that will be executed when a given topic partition is revoked. You can use it for
170
192
  # some teardown procedures (closing file handler, etc).
171
193
  def revoked; end
@@ -89,6 +89,7 @@ module Karafka
89
89
  register_inclusions
90
90
  register_exclusions
91
91
 
92
+ Karafka::Server.execution_mode = :standalone
92
93
  Karafka::Server.run
93
94
  end
94
95
 
@@ -23,6 +23,7 @@ module Karafka
23
23
  server.register_inclusions
24
24
  server.register_exclusions
25
25
 
26
+ Karafka::Server.execution_mode = :supervisor
26
27
  Karafka::Swarm::Supervisor.new.run
27
28
  end
28
29
  end
@@ -8,6 +8,8 @@ module Karafka
8
8
  # It is threadsafe and provides some security measures so we won't end up operating on a
9
9
  # closed consumer instance as it causes Ruby VM process to crash.
10
10
  class Client
11
+ include ::Karafka::Core::Helpers::Time
12
+
11
13
  attr_reader :rebalance_manager
12
14
 
13
15
  # @return [Karafka::Routing::SubscriptionGroup] subscription group to which this client
@@ -24,7 +26,26 @@ module Karafka
24
26
  # How many times should we retry polling in case of a failure
25
27
  MAX_POLL_RETRIES = 20
26
28
 
27
- private_constant :MAX_POLL_RETRIES
29
+ # How much time of the total shutdown time can we wait for our manual unsubscribe before
30
+ # attempting to close without unsubscribe. We try to wait for 50% of the shutdown time
31
+ # before we move to a regular unsubscribe.
32
+ COOP_UNSUBSCRIBE_FACTOR = 0.5
33
+
34
+ # Errors upon which we early report that something is off without retrying prior to the
35
+ # report
36
+ EARLY_REPORT_ERRORS = [
37
+ :inconsistent_group_protocol, # 23
38
+ :max_poll_exceeded, # -147
39
+ :network_exception, # 13
40
+ :transport, # -195
41
+ :topic_authorization_failed, # 29
42
+ :group_authorization_failed, # 30
43
+ :cluster_authorization_failed, # 31
44
+ # This can happen for many reasons, including issues with static membership being fenced
45
+ :fatal # -150
46
+ ].freeze
47
+
48
+ private_constant :MAX_POLL_RETRIES, :COOP_UNSUBSCRIBE_FACTOR, :EARLY_REPORT_ERRORS
28
49
 
29
50
  # Creates a new consumer instance.
30
51
  #
@@ -91,8 +112,17 @@ module Karafka
91
112
  # Fetch message within our time boundaries
92
113
  response = poll(time_poll.remaining)
93
114
 
94
- # Put a message to the buffer if there is one
95
- @buffer << response if response && response != :tick_time
115
+ case response
116
+ when :tick_time
117
+ nil
118
+ # We get a hash only in case of eof error
119
+ when Hash
120
+ @buffer.eof(response[:topic], response[:partition])
121
+ when nil
122
+ nil
123
+ else
124
+ @buffer << response
125
+ end
96
126
 
97
127
  # Upon polling rebalance manager might have been updated.
98
128
  # If partition revocation happens, we need to remove messages from revoked partitions
@@ -115,10 +145,11 @@ module Karafka
115
145
  time_poll.checkpoint
116
146
 
117
147
  # Finally once we've (potentially) removed revoked, etc, if no messages were returned
118
- # and it was not an early poll exist, we can break.
148
+ # and it was not an early poll exist, we can break. We also break if we got the eof
149
+ # signaling to propagate it asap
119
150
  # Worth keeping in mind, that the rebalance manager might have been updated despite no
120
151
  # messages being returned during a poll
121
- break unless response
152
+ break if response.nil? || response.is_a?(Hash)
122
153
  end
123
154
 
124
155
  @buffer
@@ -261,14 +292,32 @@ module Karafka
261
292
  # an issue that gets back every few versions of librdkafka in a limited scope, for example
262
293
  # for cooperative-sticky or in a general scope. This is why we unsubscribe and wait until
263
294
  # we no longer have any assignments. That way librdkafka consumer shutdown should never
264
- # happen with rebalance associated with the given consumer instance
295
+ # happen with rebalance associated with the given consumer instance. Since we do not want
296
+ # to wait forever, we also impose a limit on how long should we wait. This prioritizes
297
+ # shutdown stability over endless wait.
298
+ #
299
+ # The `@unsubscribing` ensures that when there would be a direct close attempt, it
300
+ # won't get into this loop again. This can happen when supervision decides it should close
301
+ # things faster
265
302
  #
266
303
  # @see https://github.com/confluentinc/librdkafka/issues/4792
267
304
  # @see https://github.com/confluentinc/librdkafka/issues/4527
268
305
  if unsubscribe?
306
+ @unsubscribing = true
307
+
308
+ # Give 50% of time for the final close before we reach the forceful
309
+ max_wait = ::Karafka::App.config.shutdown_timeout * COOP_UNSUBSCRIBE_FACTOR
310
+ used = 0
311
+ stopped_at = monotonic_now
312
+
269
313
  unsubscribe
270
314
 
271
315
  until assignment.empty?
316
+ used += monotonic_now - stopped_at
317
+ stopped_at = monotonic_now
318
+
319
+ break if used >= max_wait
320
+
272
321
  sleep(0.1)
273
322
 
274
323
  ping
@@ -551,20 +600,7 @@ module Karafka
551
600
  # We want to report early on max poll interval exceeding because it may mean that the
552
601
  # underlying processing is taking too much time and it is not LRJ
553
602
  case e.code
554
- when :max_poll_exceeded # -147
555
- early_report = true
556
- when :network_exception # 13
557
- early_report = true
558
- when :transport # -195
559
- early_report = true
560
- when :topic_authorization_failed # 29
561
- early_report = true
562
- when :group_authorization_failed # 30
563
- early_report = true
564
- when :cluster_authorization_failed # 31
565
- early_report = true
566
- # This can happen for many reasons, including issues with static membership being fenced
567
- when :fatal # -150
603
+ when *EARLY_REPORT_ERRORS
568
604
  early_report = true
569
605
  # @see
570
606
  # https://github.com/confluentinc/confluent-kafka-dotnet/issues/1366#issuecomment-821842990
@@ -578,11 +614,12 @@ module Karafka
578
614
  # No sense in retrying when no topic/partition and we're no longer running
579
615
  retryable = false unless Karafka::App.running?
580
616
  # If we detect the end of partition which can happen if `enable.partition.eof` is set to
581
- # true, we can just return nil fast. This will fast yield whatever set of messages we
617
+ # true, we can just return fast. This will fast yield whatever set of messages we
582
618
  # already have instead of waiting. This can be used for better latency control when we do
583
619
  # not expect a lof of lag and want to quickly move to processing.
620
+ # We can also pass the eof notion to the consumers for improved decision making.
584
621
  when :partition_eof
585
- return nil
622
+ return e.details
586
623
  end
587
624
 
588
625
  if early_report || !retryable
@@ -709,6 +746,7 @@ module Karafka
709
746
  #
710
747
  # @return [Boolean] should we unsubscribe prior to shutdown
711
748
  def unsubscribe?
749
+ return false if @unsubscribing
712
750
  return false if @subscription_group.kafka.key?(:'group.instance.id')
713
751
  return false if @mode != :subscribe
714
752
  return false if assignment.empty?
@@ -253,7 +253,9 @@ module Karafka
253
253
 
254
254
  reset
255
255
 
256
- sleep(1) && retry
256
+ # Ruby sleep is in seconds
257
+ sleep_time = ::Karafka::App.config.internal.connection.reset_backoff / 10_000.0
258
+ sleep(sleep_time) && retry
257
259
  end
258
260
 
259
261
  # Resumes processing of partitions that were paused due to an error.
@@ -330,28 +332,57 @@ module Karafka
330
332
  # given scheduler. It also handles the idle jobs when filtering API removed all messages
331
333
  # and we need to run house-keeping
332
334
  def build_and_schedule_flow_jobs
333
- return if @messages_buffer.empty?
334
-
335
335
  consume_jobs = []
336
336
  idle_jobs = []
337
+ eofed_jobs = []
338
+
339
+ @messages_buffer.each do |topic, partition, messages, eof|
340
+ # In case we did not receive any new messages without eof we skip.
341
+ # We may yield empty array here in case we have reached eof without new messages but in
342
+ # such cases, we can run an eof job
343
+ next if messages.empty? && !eof
337
344
 
338
- @messages_buffer.each do |topic, partition, messages|
339
345
  coordinator = @coordinators.find_or_create(topic, partition)
340
- # Start work coordination for this topic partition
346
+ coordinator.eofed = eof
347
+
348
+ # If we did not receive any messages and we did receive eof signal, we run the eofed
349
+ # jobs so user can take actions on reaching eof
350
+ if messages.empty? && eof
351
+ # If user wants to run the eofed jobs on eof we do it. Otherwise we just allow it to
352
+ # pass through. This allows to configure if user actually wants to have `#eofed`
353
+ # logic or if he wants to only use fast eof work yield
354
+ if coordinator.topic.eofed?
355
+ @executors.find_all_or_create(topic, partition, coordinator).each do |executor|
356
+ coordinator.increment(:eofed)
357
+ eofed_jobs << @jobs_builder.eofed(executor)
358
+ end
359
+ end
360
+
361
+ next
362
+ end
363
+
341
364
  coordinator.start(messages)
342
365
 
366
+ # If it is not an eof and there are no new messages, we just run house-keeping
367
+ #
343
368
  # We do not increment coordinator for idle job because it's not a user related one
344
369
  # and it will not go through a standard lifecycle. Same applies to revoked and shutdown
345
370
  if messages.empty?
371
+ # Start work coordination for this topic partition
346
372
  coordinator.increment(:idle)
347
373
  executor = @executors.find_or_create(topic, partition, 0, coordinator)
348
374
  idle_jobs << @jobs_builder.idle(executor)
349
- else
350
- @partitioner.call(topic, messages, coordinator) do |group_id, partition_messages|
351
- coordinator.increment(:consume)
352
- executor = @executors.find_or_create(topic, partition, group_id, coordinator)
353
- consume_jobs << @jobs_builder.consume(executor, partition_messages)
354
- end
375
+
376
+ next
377
+ end
378
+
379
+ # If there are messages, it is irrelevant if eof or not as consumption needs to happen
380
+ #
381
+ # Start work coordination for this topic partition
382
+ @partitioner.call(topic, messages, coordinator) do |group_id, partition_messages|
383
+ coordinator.increment(:consume)
384
+ executor = @executors.find_or_create(topic, partition, group_id, coordinator)
385
+ consume_jobs << @jobs_builder.consume(executor, partition_messages)
355
386
  end
356
387
  end
357
388
 
@@ -367,6 +398,11 @@ module Karafka
367
398
  consume_jobs.each(&:before_schedule)
368
399
  @scheduler.on_schedule_consumption(consume_jobs)
369
400
  end
401
+
402
+ unless eofed_jobs.empty?
403
+ eofed_jobs.each(&:before_schedule)
404
+ @scheduler.on_schedule_eofed(eofed_jobs)
405
+ end
370
406
  end
371
407
 
372
408
  # Builds and schedules periodic jobs for topics partitions for which no messages were
@@ -23,9 +23,13 @@ module Karafka
23
23
  def initialize(subscription_group)
24
24
  @subscription_group = subscription_group
25
25
  @size = 0
26
+
26
27
  @groups = Hash.new do |topic_groups, topic|
27
28
  topic_groups[topic] = Hash.new do |partition_groups, partition|
28
- partition_groups[partition] = []
29
+ partition_groups[partition] = {
30
+ eof: false,
31
+ messages: []
32
+ }
29
33
  end
30
34
  end
31
35
  end
@@ -33,24 +37,29 @@ module Karafka
33
37
  # Remaps raw messages from the raw messages buffer to Karafka messages
34
38
  # @param raw_messages_buffer [RawMessagesBuffer] buffer with raw messages
35
39
  def remap(raw_messages_buffer)
36
- clear unless @size.zero?
40
+ clear
37
41
 
38
42
  # Since it happens "right after" we've received the messages, it is close enough it time
39
43
  # to be used as the moment we received messages.
40
44
  received_at = Time.now
41
45
 
42
- raw_messages_buffer.each do |topic, partition, messages|
46
+ raw_messages_buffer.each do |topic, partition, messages, eof|
43
47
  @size += messages.count
44
48
 
45
49
  ktopic = @subscription_group.topics.find(topic)
46
50
 
47
- @groups[topic][partition] = messages.map do |message|
51
+ built_messages = messages.map do |message|
48
52
  Messages::Builders::Message.call(
49
53
  message,
50
54
  ktopic,
51
55
  received_at
52
56
  )
53
57
  end
58
+
59
+ @groups[topic][partition] = {
60
+ eof: eof,
61
+ messages: built_messages
62
+ }
54
63
  end
55
64
  end
56
65
 
@@ -59,10 +68,11 @@ module Karafka
59
68
  # @yieldparam [String] topic name
60
69
  # @yieldparam [Integer] partition number
61
70
  # @yieldparam [Array<Karafka::Messages::Message>] messages from a given topic partition
71
+ # @yieldparam [Boolean] true if eof, false otherwise
62
72
  def each
63
73
  @groups.each do |topic, partitions|
64
- partitions.each do |partition, messages|
65
- yield(topic, partition, messages)
74
+ partitions.each do |partition, details|
75
+ yield(topic, partition, details[:messages], details[:eof])
66
76
  end
67
77
  end
68
78
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Karafka
4
4
  module Connection
5
- # Buffer for raw librdkafka messages.
5
+ # Buffer for raw librdkafka messages and eof status.
6
6
  #
7
7
  # When message is added to this buffer, it gets assigned to an array with other messages from
8
8
  # the same topic and partition.
@@ -17,9 +17,13 @@ module Karafka
17
17
  # @return [Karafka::Connection::MessagesBuffer] buffer instance
18
18
  def initialize
19
19
  @size = 0
20
+
20
21
  @groups = Hash.new do |topic_groups, topic|
21
22
  topic_groups[topic] = Hash.new do |partition_groups, partition|
22
- partition_groups[partition] = []
23
+ partition_groups[partition] = {
24
+ eof: false,
25
+ messages: []
26
+ }
23
27
  end
24
28
  end
25
29
  end
@@ -30,7 +34,16 @@ module Karafka
30
34
  # @return [Array<Rdkafka::Consumer::Message>] given partition topic sub-buffer array
31
35
  def <<(message)
32
36
  @size += 1
33
- @groups[message.topic][message.partition] << message
37
+ partition_state = @groups[message.topic][message.partition]
38
+ partition_state[:messages] << message
39
+ partition_state[:eof] = false
40
+ end
41
+
42
+ # Marks given topic partition as one that reached eof
43
+ # @param topic [String] topic that reached eof
44
+ # @param partition [Integer] partition that reached eof
45
+ def eof(topic, partition)
46
+ @groups[topic][partition][:eof] = true
34
47
  end
35
48
 
36
49
  # Allows to iterate over all the topics and partitions messages
@@ -38,10 +51,11 @@ module Karafka
38
51
  # @yieldparam [String] topic name
39
52
  # @yieldparam [Integer] partition number
40
53
  # @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
54
+ # @yieldparam [Boolean] has polling of this partition reach eof
41
55
  def each
42
56
  @groups.each do |topic, partitions|
43
- partitions.each do |partition, messages|
44
- yield(topic, partition, messages)
57
+ partitions.each do |partition, details|
58
+ yield(topic, partition, details[:messages], details[:eof])
45
59
  end
46
60
  end
47
61
  end
@@ -69,8 +83,8 @@ module Karafka
69
83
  # again and we do want to ensure as few duplications as possible
70
84
  def uniq!
71
85
  @groups.each_value do |partitions|
72
- partitions.each_value do |messages|
73
- messages.uniq!(&:offset)
86
+ partitions.each_value do |details|
87
+ details[:messages].uniq!(&:offset)
74
88
  end
75
89
  end
76
90
 
@@ -83,6 +97,11 @@ module Karafka
83
97
  # we save ourselves some objects allocations. We cannot clear the underlying arrays as they
84
98
  # may be used in other threads for data processing, thus if we would clear it, we could
85
99
  # potentially clear a raw messages array for a job that is in the jobs queue.
100
+ #
101
+ # @note We do not clear the eof assignments because they can span across batch pollings.
102
+ # Since eof is not raised non-stop and is silenced after an eof poll, if we would clean it
103
+ # here we would loose the notion of it. The reset state for it should happen when we do
104
+ # discover new messages for given topic partition.
86
105
  def clear
87
106
  @size = 0
88
107
  @groups.each_value(&:clear)
@@ -92,8 +111,12 @@ module Karafka
92
111
 
93
112
  # Updates the messages count if we performed any operations that could change the state
94
113
  def recount!
95
- @size = @groups.each_value.sum do |partitions|
96
- partitions.each_value.map(&:count).sum
114
+ @size = 0
115
+
116
+ @groups.each_value do |partitions|
117
+ partitions.each_value do |details|
118
+ @size += details[:messages].size
119
+ end
97
120
  end
98
121
  end
99
122
  end
@@ -79,6 +79,7 @@ module Karafka
79
79
  nested(:connection) do
80
80
  required(:manager) { |val| !val.nil? }
81
81
  required(:conductor) { |val| !val.nil? }
82
+ required(:reset_backoff) { |val| val.is_a?(Integer) && val >= 1_000 }
82
83
 
83
84
  nested(:proxy) do
84
85
  nested(:commit) do
@@ -27,7 +27,8 @@ module Karafka
27
27
  Thread.new do
28
28
  Thread.current.name = 'karafka.embedded'
29
29
 
30
- Karafka::Process.tags.add(:execution_mode, 'embedded')
30
+ Karafka::Process.tags.add(:execution_mode, 'mode:embedded')
31
+ Karafka::Server.execution_mode = :embedded
31
32
  Karafka::Server.start
32
33
  end
33
34
  end
@@ -290,6 +290,9 @@ module Karafka
290
290
  when 'consumer.tick.error'
291
291
  error "Consumer on tick failed due to an error: #{error}"
292
292
  error details
293
+ when 'consumer.eofed.error'
294
+ error "Consumer on eofed failed due to an error: #{error}"
295
+ error details
293
296
  when 'consumer.after_consume.error'
294
297
  error "Consumer on after_consume failed due to an error: #{error}"
295
298
  error details