karafka 2.4.6 → 2.4.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +19 -0
- data/Gemfile +1 -1
- data/Gemfile.lock +12 -11
- data/LICENSE +1 -1
- data/config/locales/errors.yml +3 -0
- data/docker-compose.yml +1 -1
- data/karafka.gemspec +2 -1
- data/lib/karafka/base_consumer.rb +23 -1
- data/lib/karafka/cli/server.rb +1 -0
- data/lib/karafka/cli/swarm.rb +1 -0
- data/lib/karafka/connection/client.rb +60 -22
- data/lib/karafka/connection/listener.rb +47 -11
- data/lib/karafka/connection/messages_buffer.rb +16 -6
- data/lib/karafka/connection/raw_messages_buffer.rb +32 -9
- data/lib/karafka/contracts/config.rb +1 -0
- data/lib/karafka/embedded.rb +2 -1
- data/lib/karafka/instrumentation/logger_listener.rb +3 -0
- data/lib/karafka/instrumentation/notifications.rb +4 -0
- data/lib/karafka/pro/processing/jobs/eofed_non_blocking.rb +32 -0
- data/lib/karafka/pro/processing/jobs_builder.rb +12 -0
- data/lib/karafka/pro/processing/partitioner.rb +30 -11
- data/lib/karafka/pro/processing/schedulers/default.rb +1 -0
- data/lib/karafka/processing/coordinator.rb +10 -0
- data/lib/karafka/processing/executor.rb +12 -0
- data/lib/karafka/processing/jobs/eofed.rb +27 -0
- data/lib/karafka/processing/jobs_builder.rb +6 -0
- data/lib/karafka/processing/schedulers/default.rb +1 -0
- data/lib/karafka/processing/strategies/default.rb +11 -0
- data/lib/karafka/routing/consumer_group.rb +9 -14
- data/lib/karafka/routing/features/eofed/config.rb +15 -0
- data/lib/karafka/routing/features/eofed/contracts/topic.rb +27 -0
- data/lib/karafka/routing/features/eofed/topic.rb +31 -0
- data/lib/karafka/routing/features/eofed.rb +14 -0
- data/lib/karafka/routing/subscription_group.rb +8 -0
- data/lib/karafka/server.rb +16 -0
- data/lib/karafka/setup/config.rb +3 -49
- data/lib/karafka/setup/defaults_injector.rb +64 -0
- data/lib/karafka/swarm/node.rb +2 -0
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +14 -2
- data.tar.gz.sig +0 -0
- metadata +24 -3
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c7e8e51a5c0c4ded0d074965cee2090e64ca77e43443f4f36ab03cc3a21ddfd6
|
4
|
+
data.tar.gz: c6e912c518d301f55974a9e4deb491ebe4c3e073e6748fe62ce1003eaea7bed7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8769a192c7ebb852250afd96611e115807172bf320d4e9d513bffbcc66f5570ca637aee1b8f3b68e46350b37054935ea905893139bc264a02949f975b39f2041
|
7
|
+
data.tar.gz: 13dc8e118850aace7127bae5fe667b6f73118d220b61b1d2b590cc18e71243b3c1467d43df9ae134cd7b26c0274a100e34d8a07db0c6091bc02fd9c95ffb24f0
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,24 @@
|
|
1
1
|
# Karafka Framework Changelog
|
2
2
|
|
3
|
+
## 2.4.8 (2024-08-09)
|
4
|
+
- **[Feature]** Introduce ability to react to `#eof` either from `#consume` or from `#eofed` when EOF without new messages.
|
5
|
+
- [Enhancement] Provide `Consumer#eofed?` to indicate reaching EOF.
|
6
|
+
- [Enhancement] Always immediately report on `inconsistent_group_protocol` error.
|
7
|
+
- [Enhancement] Reduce virtual partitioning to 1 partition when any partitioner execution in a partitioned batch crashes.
|
8
|
+
- [Enhancement] Provide `KARAFKA_REQUIRE_RAILS` to disable default Rails `require` to run Karafka without Rails despite having Rails in the Gemfile.
|
9
|
+
- [Enhancement] Increase final listener recovery from 1 to 60 seconds to prevent constant rebalancing. This is the last resort recovery and should never happen unless critical errors occur.
|
10
|
+
|
11
|
+
## 2.4.7 (2024-08-01)
|
12
|
+
- [Enhancement] Introduce `Karafka::Server.execution_mode` to check in what mode Karafka process operates (`standalone`, `swarm`, `supervisor`, `embedded`).
|
13
|
+
- [Enhancement] Ensure `max.poll.interval.ms` is always present and populate it with librdkafka default.
|
14
|
+
- [Enhancement] Introduce a shutdown time limit for unsubscription wait.
|
15
|
+
- [Enhancement] Tag with `mode:swarm` each of the running swarm consumers.
|
16
|
+
- [Change] Tag with `mode:embedded` instead of `embedded` the embedded consumers.
|
17
|
+
- [Fix] License identifier `LGPL-3.0` is deprecated for SPDX (#2177).
|
18
|
+
- [Fix] Fix an issue where custom clusters would not have default settings populated same as the primary cluster.
|
19
|
+
- [Fix] Fix Rspec warnings of nil mocks.
|
20
|
+
- [Maintenance] Cover `cooperative-sticky` librdkafka issues with integration spec.
|
21
|
+
|
3
22
|
## 2.4.6 (2024-07-22)
|
4
23
|
- [Fix] Mitigate `rd_kafka_cgrp_terminated` and other `librdkafka` shutdown issues by unsubscribing fully prior to shutdown.
|
5
24
|
|
data/Gemfile
CHANGED
@@ -12,7 +12,7 @@ gemspec
|
|
12
12
|
group :integrations do
|
13
13
|
gem 'activejob', require: false
|
14
14
|
gem 'karafka-testing', '>= 2.4.0', require: false
|
15
|
-
gem 'karafka-web', '>= 0.
|
15
|
+
gem 'karafka-web', '>= 0.10.0.beta1', require: false
|
16
16
|
gem 'rspec', require: false
|
17
17
|
end
|
18
18
|
|
data/Gemfile.lock
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
karafka (2.4.
|
4
|
+
karafka (2.4.8)
|
5
5
|
base64 (~> 0.2)
|
6
6
|
karafka-core (>= 2.4.3, < 2.5.0)
|
7
|
+
karafka-rdkafka (>= 0.17.2)
|
7
8
|
waterdrop (>= 2.7.3, < 3.0.0)
|
8
9
|
zeitwerk (~> 2.3)
|
9
10
|
|
@@ -29,7 +30,7 @@ GEM
|
|
29
30
|
concurrent-ruby (1.3.3)
|
30
31
|
connection_pool (2.4.1)
|
31
32
|
diff-lcs (1.5.1)
|
32
|
-
docile (1.4.
|
33
|
+
docile (1.4.1)
|
33
34
|
drb (2.2.1)
|
34
35
|
erubi (1.13.0)
|
35
36
|
factory_bot (6.4.6)
|
@@ -41,26 +42,26 @@ GEM
|
|
41
42
|
concurrent-ruby (~> 1.0)
|
42
43
|
karafka-core (2.4.4)
|
43
44
|
karafka-rdkafka (>= 0.15.0, < 0.18.0)
|
44
|
-
karafka-rdkafka (0.17.
|
45
|
+
karafka-rdkafka (0.17.3)
|
45
46
|
ffi (~> 1.15)
|
46
47
|
mini_portile2 (~> 2.6)
|
47
48
|
rake (> 12)
|
48
|
-
karafka-testing (2.4.
|
49
|
+
karafka-testing (2.4.6)
|
49
50
|
karafka (>= 2.4.0, < 2.5.0)
|
50
51
|
waterdrop (>= 2.7.0)
|
51
|
-
karafka-web (0.
|
52
|
+
karafka-web (0.10.0.rc1)
|
52
53
|
erubi (~> 1.4)
|
53
|
-
karafka (>= 2.4.
|
54
|
+
karafka (>= 2.4.7, < 2.5.0)
|
54
55
|
karafka-core (>= 2.4.0, < 2.5.0)
|
55
56
|
roda (~> 3.68, >= 3.69)
|
56
57
|
tilt (~> 2.0)
|
57
58
|
mini_portile2 (2.8.7)
|
58
|
-
minitest (5.24.
|
59
|
+
minitest (5.24.1)
|
59
60
|
mutex_m (0.2.0)
|
60
61
|
ostruct (0.6.0)
|
61
|
-
rack (3.1.
|
62
|
+
rack (3.1.7)
|
62
63
|
rake (13.2.1)
|
63
|
-
roda (3.
|
64
|
+
roda (3.82.0)
|
64
65
|
rack
|
65
66
|
rspec (3.13.0)
|
66
67
|
rspec-core (~> 3.13.0)
|
@@ -88,7 +89,7 @@ GEM
|
|
88
89
|
karafka-core (>= 2.4.3, < 3.0.0)
|
89
90
|
karafka-rdkafka (>= 0.15.1)
|
90
91
|
zeitwerk (~> 2.3)
|
91
|
-
zeitwerk (2.6.
|
92
|
+
zeitwerk (2.6.17)
|
92
93
|
|
93
94
|
PLATFORMS
|
94
95
|
ruby
|
@@ -100,7 +101,7 @@ DEPENDENCIES
|
|
100
101
|
factory_bot
|
101
102
|
karafka!
|
102
103
|
karafka-testing (>= 2.4.0)
|
103
|
-
karafka-web (>= 0.
|
104
|
+
karafka-web (>= 0.10.0.beta1)
|
104
105
|
ostruct
|
105
106
|
rspec
|
106
107
|
simplecov
|
data/LICENSE
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Copyright (c) Maciej Mensfeld
|
2
2
|
|
3
3
|
Karafka is an Open Source project licensed under the terms of
|
4
|
-
the LGPLv3 license. Please see <https://github.com/karafka/karafka/blob/master/LGPL>
|
4
|
+
the LGPLv3 license. Please see <https://github.com/karafka/karafka/blob/master/LICENSE-LGPL>
|
5
5
|
for license text.
|
6
6
|
|
7
7
|
Karafka has also commercial-friendly license, commercial support and commercial components.
|
data/config/locales/errors.yml
CHANGED
@@ -49,6 +49,7 @@ en:
|
|
49
49
|
|
50
50
|
internal.connection.manager_format: needs to be present
|
51
51
|
internal.connection.conductor_format: needs to be present
|
52
|
+
internal.connection.reset_backoff_format: needs to be an integer bigger or equal to 1000
|
52
53
|
internal.connection.proxy.query_watermark_offsets.timeout_format: needs to be an integer bigger than 0
|
53
54
|
internal.connection.proxy.query_watermark_offsets.max_attempts_format: needs to be an integer bigger than 0
|
54
55
|
internal.connection.proxy.query_watermark_offsets.wait_time_format: needs to be an integer bigger than 0
|
@@ -115,6 +116,8 @@ en:
|
|
115
116
|
|
116
117
|
active_format: needs to be either true or false
|
117
118
|
|
119
|
+
eofed.active_format: needs to be either true or false
|
120
|
+
|
118
121
|
declaratives.partitions_format: needs to be more or equal to 1
|
119
122
|
declaratives.active_format: needs to be true
|
120
123
|
declaratives.replication_factor_format: needs to be more or equal to 1
|
data/docker-compose.yml
CHANGED
data/karafka.gemspec
CHANGED
@@ -12,7 +12,7 @@ Gem::Specification.new do |spec|
|
|
12
12
|
spec.authors = ['Maciej Mensfeld']
|
13
13
|
spec.email = %w[contact@karafka.io]
|
14
14
|
spec.homepage = 'https://karafka.io'
|
15
|
-
spec.licenses = %w[LGPL-3.0 Commercial]
|
15
|
+
spec.licenses = %w[LGPL-3.0-only Commercial]
|
16
16
|
spec.summary = 'Karafka is Ruby and Rails efficient Kafka processing framework.'
|
17
17
|
spec.description = <<-DESC
|
18
18
|
Karafka is Ruby and Rails efficient Kafka processing framework.
|
@@ -23,6 +23,7 @@ Gem::Specification.new do |spec|
|
|
23
23
|
|
24
24
|
spec.add_dependency 'base64', '~> 0.2'
|
25
25
|
spec.add_dependency 'karafka-core', '>= 2.4.3', '< 2.5.0'
|
26
|
+
spec.add_dependency 'karafka-rdkafka', '>= 0.17.2'
|
26
27
|
spec.add_dependency 'waterdrop', '>= 2.7.3', '< 3.0.0'
|
27
28
|
spec.add_dependency 'zeitwerk', '~> 2.3'
|
28
29
|
|
@@ -9,7 +9,7 @@ module Karafka
|
|
9
9
|
|
10
10
|
extend Forwardable
|
11
11
|
|
12
|
-
def_delegators :@coordinator, :topic, :partition
|
12
|
+
def_delegators :@coordinator, :topic, :partition, :eofed?
|
13
13
|
|
14
14
|
def_delegators :producer, :produce_async, :produce_sync, :produce_many_async,
|
15
15
|
:produce_many_sync
|
@@ -100,6 +100,24 @@ module Karafka
|
|
100
100
|
retry_after_pause
|
101
101
|
end
|
102
102
|
|
103
|
+
# Can be used to run code prior to scheduling of eofed execution
|
104
|
+
def on_before_schedule_eofed
|
105
|
+
handle_before_schedule_eofed
|
106
|
+
end
|
107
|
+
|
108
|
+
# Trigger method for running on eof without messages
|
109
|
+
def on_eofed
|
110
|
+
handle_eofed
|
111
|
+
rescue StandardError => e
|
112
|
+
Karafka.monitor.instrument(
|
113
|
+
'error.occurred',
|
114
|
+
error: e,
|
115
|
+
caller: self,
|
116
|
+
seek_offset: coordinator.seek_offset,
|
117
|
+
type: 'consumer.eofed.error'
|
118
|
+
)
|
119
|
+
end
|
120
|
+
|
103
121
|
# Can be used to run code prior to scheduling of idle execution
|
104
122
|
#
|
105
123
|
# @private
|
@@ -166,6 +184,10 @@ module Karafka
|
|
166
184
|
raise NotImplementedError, 'Implement this in a subclass'
|
167
185
|
end
|
168
186
|
|
187
|
+
# Method that will be executed when a given topic partition reaches eof without any new
|
188
|
+
# incoming messages alongside
|
189
|
+
def eofed; end
|
190
|
+
|
169
191
|
# Method that will be executed when a given topic partition is revoked. You can use it for
|
170
192
|
# some teardown procedures (closing file handler, etc).
|
171
193
|
def revoked; end
|
data/lib/karafka/cli/server.rb
CHANGED
data/lib/karafka/cli/swarm.rb
CHANGED
@@ -8,6 +8,8 @@ module Karafka
|
|
8
8
|
# It is threadsafe and provides some security measures so we won't end up operating on a
|
9
9
|
# closed consumer instance as it causes Ruby VM process to crash.
|
10
10
|
class Client
|
11
|
+
include ::Karafka::Core::Helpers::Time
|
12
|
+
|
11
13
|
attr_reader :rebalance_manager
|
12
14
|
|
13
15
|
# @return [Karafka::Routing::SubscriptionGroup] subscription group to which this client
|
@@ -24,7 +26,26 @@ module Karafka
|
|
24
26
|
# How many times should we retry polling in case of a failure
|
25
27
|
MAX_POLL_RETRIES = 20
|
26
28
|
|
27
|
-
|
29
|
+
# How much time of the total shutdown time can we wait for our manual unsubscribe before
|
30
|
+
# attempting to close without unsubscribe. We try to wait for 50% of the shutdown time
|
31
|
+
# before we move to a regular unsubscribe.
|
32
|
+
COOP_UNSUBSCRIBE_FACTOR = 0.5
|
33
|
+
|
34
|
+
# Errors upon which we early report that something is off without retrying prior to the
|
35
|
+
# report
|
36
|
+
EARLY_REPORT_ERRORS = [
|
37
|
+
:inconsistent_group_protocol, # 23
|
38
|
+
:max_poll_exceeded, # -147
|
39
|
+
:network_exception, # 13
|
40
|
+
:transport, # -195
|
41
|
+
:topic_authorization_failed, # 29
|
42
|
+
:group_authorization_failed, # 30
|
43
|
+
:cluster_authorization_failed, # 31
|
44
|
+
# This can happen for many reasons, including issues with static membership being fenced
|
45
|
+
:fatal # -150
|
46
|
+
].freeze
|
47
|
+
|
48
|
+
private_constant :MAX_POLL_RETRIES, :COOP_UNSUBSCRIBE_FACTOR, :EARLY_REPORT_ERRORS
|
28
49
|
|
29
50
|
# Creates a new consumer instance.
|
30
51
|
#
|
@@ -91,8 +112,17 @@ module Karafka
|
|
91
112
|
# Fetch message within our time boundaries
|
92
113
|
response = poll(time_poll.remaining)
|
93
114
|
|
94
|
-
|
95
|
-
|
115
|
+
case response
|
116
|
+
when :tick_time
|
117
|
+
nil
|
118
|
+
# We get a hash only in case of eof error
|
119
|
+
when Hash
|
120
|
+
@buffer.eof(response[:topic], response[:partition])
|
121
|
+
when nil
|
122
|
+
nil
|
123
|
+
else
|
124
|
+
@buffer << response
|
125
|
+
end
|
96
126
|
|
97
127
|
# Upon polling rebalance manager might have been updated.
|
98
128
|
# If partition revocation happens, we need to remove messages from revoked partitions
|
@@ -115,10 +145,11 @@ module Karafka
|
|
115
145
|
time_poll.checkpoint
|
116
146
|
|
117
147
|
# Finally once we've (potentially) removed revoked, etc, if no messages were returned
|
118
|
-
# and it was not an early poll exist, we can break.
|
148
|
+
# and it was not an early poll exist, we can break. We also break if we got the eof
|
149
|
+
# signaling to propagate it asap
|
119
150
|
# Worth keeping in mind, that the rebalance manager might have been updated despite no
|
120
151
|
# messages being returned during a poll
|
121
|
-
break
|
152
|
+
break if response.nil? || response.is_a?(Hash)
|
122
153
|
end
|
123
154
|
|
124
155
|
@buffer
|
@@ -261,14 +292,32 @@ module Karafka
|
|
261
292
|
# an issue that gets back every few versions of librdkafka in a limited scope, for example
|
262
293
|
# for cooperative-sticky or in a general scope. This is why we unsubscribe and wait until
|
263
294
|
# we no longer have any assignments. That way librdkafka consumer shutdown should never
|
264
|
-
# happen with rebalance associated with the given consumer instance
|
295
|
+
# happen with rebalance associated with the given consumer instance. Since we do not want
|
296
|
+
# to wait forever, we also impose a limit on how long should we wait. This prioritizes
|
297
|
+
# shutdown stability over endless wait.
|
298
|
+
#
|
299
|
+
# The `@unsubscribing` ensures that when there would be a direct close attempt, it
|
300
|
+
# won't get into this loop again. This can happen when supervision decides it should close
|
301
|
+
# things faster
|
265
302
|
#
|
266
303
|
# @see https://github.com/confluentinc/librdkafka/issues/4792
|
267
304
|
# @see https://github.com/confluentinc/librdkafka/issues/4527
|
268
305
|
if unsubscribe?
|
306
|
+
@unsubscribing = true
|
307
|
+
|
308
|
+
# Give 50% of time for the final close before we reach the forceful
|
309
|
+
max_wait = ::Karafka::App.config.shutdown_timeout * COOP_UNSUBSCRIBE_FACTOR
|
310
|
+
used = 0
|
311
|
+
stopped_at = monotonic_now
|
312
|
+
|
269
313
|
unsubscribe
|
270
314
|
|
271
315
|
until assignment.empty?
|
316
|
+
used += monotonic_now - stopped_at
|
317
|
+
stopped_at = monotonic_now
|
318
|
+
|
319
|
+
break if used >= max_wait
|
320
|
+
|
272
321
|
sleep(0.1)
|
273
322
|
|
274
323
|
ping
|
@@ -551,20 +600,7 @@ module Karafka
|
|
551
600
|
# We want to report early on max poll interval exceeding because it may mean that the
|
552
601
|
# underlying processing is taking too much time and it is not LRJ
|
553
602
|
case e.code
|
554
|
-
when
|
555
|
-
early_report = true
|
556
|
-
when :network_exception # 13
|
557
|
-
early_report = true
|
558
|
-
when :transport # -195
|
559
|
-
early_report = true
|
560
|
-
when :topic_authorization_failed # 29
|
561
|
-
early_report = true
|
562
|
-
when :group_authorization_failed # 30
|
563
|
-
early_report = true
|
564
|
-
when :cluster_authorization_failed # 31
|
565
|
-
early_report = true
|
566
|
-
# This can happen for many reasons, including issues with static membership being fenced
|
567
|
-
when :fatal # -150
|
603
|
+
when *EARLY_REPORT_ERRORS
|
568
604
|
early_report = true
|
569
605
|
# @see
|
570
606
|
# https://github.com/confluentinc/confluent-kafka-dotnet/issues/1366#issuecomment-821842990
|
@@ -578,11 +614,12 @@ module Karafka
|
|
578
614
|
# No sense in retrying when no topic/partition and we're no longer running
|
579
615
|
retryable = false unless Karafka::App.running?
|
580
616
|
# If we detect the end of partition which can happen if `enable.partition.eof` is set to
|
581
|
-
# true, we can just return
|
617
|
+
# true, we can just return fast. This will fast yield whatever set of messages we
|
582
618
|
# already have instead of waiting. This can be used for better latency control when we do
|
583
619
|
# not expect a lof of lag and want to quickly move to processing.
|
620
|
+
# We can also pass the eof notion to the consumers for improved decision making.
|
584
621
|
when :partition_eof
|
585
|
-
return
|
622
|
+
return e.details
|
586
623
|
end
|
587
624
|
|
588
625
|
if early_report || !retryable
|
@@ -709,6 +746,7 @@ module Karafka
|
|
709
746
|
#
|
710
747
|
# @return [Boolean] should we unsubscribe prior to shutdown
|
711
748
|
def unsubscribe?
|
749
|
+
return false if @unsubscribing
|
712
750
|
return false if @subscription_group.kafka.key?(:'group.instance.id')
|
713
751
|
return false if @mode != :subscribe
|
714
752
|
return false if assignment.empty?
|
@@ -253,7 +253,9 @@ module Karafka
|
|
253
253
|
|
254
254
|
reset
|
255
255
|
|
256
|
-
sleep
|
256
|
+
# Ruby sleep is in seconds
|
257
|
+
sleep_time = ::Karafka::App.config.internal.connection.reset_backoff / 10_000.0
|
258
|
+
sleep(sleep_time) && retry
|
257
259
|
end
|
258
260
|
|
259
261
|
# Resumes processing of partitions that were paused due to an error.
|
@@ -330,28 +332,57 @@ module Karafka
|
|
330
332
|
# given scheduler. It also handles the idle jobs when filtering API removed all messages
|
331
333
|
# and we need to run house-keeping
|
332
334
|
def build_and_schedule_flow_jobs
|
333
|
-
return if @messages_buffer.empty?
|
334
|
-
|
335
335
|
consume_jobs = []
|
336
336
|
idle_jobs = []
|
337
|
+
eofed_jobs = []
|
338
|
+
|
339
|
+
@messages_buffer.each do |topic, partition, messages, eof|
|
340
|
+
# In case we did not receive any new messages without eof we skip.
|
341
|
+
# We may yield empty array here in case we have reached eof without new messages but in
|
342
|
+
# such cases, we can run an eof job
|
343
|
+
next if messages.empty? && !eof
|
337
344
|
|
338
|
-
@messages_buffer.each do |topic, partition, messages|
|
339
345
|
coordinator = @coordinators.find_or_create(topic, partition)
|
340
|
-
|
346
|
+
coordinator.eofed = eof
|
347
|
+
|
348
|
+
# If we did not receive any messages and we did receive eof signal, we run the eofed
|
349
|
+
# jobs so user can take actions on reaching eof
|
350
|
+
if messages.empty? && eof
|
351
|
+
# If user wants to run the eofed jobs on eof we do it. Otherwise we just allow it to
|
352
|
+
# pass through. This allows to configure if user actually wants to have `#eofed`
|
353
|
+
# logic or if he wants to only use fast eof work yield
|
354
|
+
if coordinator.topic.eofed?
|
355
|
+
@executors.find_all_or_create(topic, partition, coordinator).each do |executor|
|
356
|
+
coordinator.increment(:eofed)
|
357
|
+
eofed_jobs << @jobs_builder.eofed(executor)
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
next
|
362
|
+
end
|
363
|
+
|
341
364
|
coordinator.start(messages)
|
342
365
|
|
366
|
+
# If it is not an eof and there are no new messages, we just run house-keeping
|
367
|
+
#
|
343
368
|
# We do not increment coordinator for idle job because it's not a user related one
|
344
369
|
# and it will not go through a standard lifecycle. Same applies to revoked and shutdown
|
345
370
|
if messages.empty?
|
371
|
+
# Start work coordination for this topic partition
|
346
372
|
coordinator.increment(:idle)
|
347
373
|
executor = @executors.find_or_create(topic, partition, 0, coordinator)
|
348
374
|
idle_jobs << @jobs_builder.idle(executor)
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
375
|
+
|
376
|
+
next
|
377
|
+
end
|
378
|
+
|
379
|
+
# If there are messages, it is irrelevant if eof or not as consumption needs to happen
|
380
|
+
#
|
381
|
+
# Start work coordination for this topic partition
|
382
|
+
@partitioner.call(topic, messages, coordinator) do |group_id, partition_messages|
|
383
|
+
coordinator.increment(:consume)
|
384
|
+
executor = @executors.find_or_create(topic, partition, group_id, coordinator)
|
385
|
+
consume_jobs << @jobs_builder.consume(executor, partition_messages)
|
355
386
|
end
|
356
387
|
end
|
357
388
|
|
@@ -367,6 +398,11 @@ module Karafka
|
|
367
398
|
consume_jobs.each(&:before_schedule)
|
368
399
|
@scheduler.on_schedule_consumption(consume_jobs)
|
369
400
|
end
|
401
|
+
|
402
|
+
unless eofed_jobs.empty?
|
403
|
+
eofed_jobs.each(&:before_schedule)
|
404
|
+
@scheduler.on_schedule_eofed(eofed_jobs)
|
405
|
+
end
|
370
406
|
end
|
371
407
|
|
372
408
|
# Builds and schedules periodic jobs for topics partitions for which no messages were
|
@@ -23,9 +23,13 @@ module Karafka
|
|
23
23
|
def initialize(subscription_group)
|
24
24
|
@subscription_group = subscription_group
|
25
25
|
@size = 0
|
26
|
+
|
26
27
|
@groups = Hash.new do |topic_groups, topic|
|
27
28
|
topic_groups[topic] = Hash.new do |partition_groups, partition|
|
28
|
-
partition_groups[partition] =
|
29
|
+
partition_groups[partition] = {
|
30
|
+
eof: false,
|
31
|
+
messages: []
|
32
|
+
}
|
29
33
|
end
|
30
34
|
end
|
31
35
|
end
|
@@ -33,24 +37,29 @@ module Karafka
|
|
33
37
|
# Remaps raw messages from the raw messages buffer to Karafka messages
|
34
38
|
# @param raw_messages_buffer [RawMessagesBuffer] buffer with raw messages
|
35
39
|
def remap(raw_messages_buffer)
|
36
|
-
clear
|
40
|
+
clear
|
37
41
|
|
38
42
|
# Since it happens "right after" we've received the messages, it is close enough it time
|
39
43
|
# to be used as the moment we received messages.
|
40
44
|
received_at = Time.now
|
41
45
|
|
42
|
-
raw_messages_buffer.each do |topic, partition, messages|
|
46
|
+
raw_messages_buffer.each do |topic, partition, messages, eof|
|
43
47
|
@size += messages.count
|
44
48
|
|
45
49
|
ktopic = @subscription_group.topics.find(topic)
|
46
50
|
|
47
|
-
|
51
|
+
built_messages = messages.map do |message|
|
48
52
|
Messages::Builders::Message.call(
|
49
53
|
message,
|
50
54
|
ktopic,
|
51
55
|
received_at
|
52
56
|
)
|
53
57
|
end
|
58
|
+
|
59
|
+
@groups[topic][partition] = {
|
60
|
+
eof: eof,
|
61
|
+
messages: built_messages
|
62
|
+
}
|
54
63
|
end
|
55
64
|
end
|
56
65
|
|
@@ -59,10 +68,11 @@ module Karafka
|
|
59
68
|
# @yieldparam [String] topic name
|
60
69
|
# @yieldparam [Integer] partition number
|
61
70
|
# @yieldparam [Array<Karafka::Messages::Message>] messages from a given topic partition
|
71
|
+
# @yieldparam [Boolean] true if eof, false otherwise
|
62
72
|
def each
|
63
73
|
@groups.each do |topic, partitions|
|
64
|
-
partitions.each do |partition,
|
65
|
-
yield(topic, partition, messages)
|
74
|
+
partitions.each do |partition, details|
|
75
|
+
yield(topic, partition, details[:messages], details[:eof])
|
66
76
|
end
|
67
77
|
end
|
68
78
|
end
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
module Karafka
|
4
4
|
module Connection
|
5
|
-
# Buffer for raw librdkafka messages.
|
5
|
+
# Buffer for raw librdkafka messages and eof status.
|
6
6
|
#
|
7
7
|
# When message is added to this buffer, it gets assigned to an array with other messages from
|
8
8
|
# the same topic and partition.
|
@@ -17,9 +17,13 @@ module Karafka
|
|
17
17
|
# @return [Karafka::Connection::MessagesBuffer] buffer instance
|
18
18
|
def initialize
|
19
19
|
@size = 0
|
20
|
+
|
20
21
|
@groups = Hash.new do |topic_groups, topic|
|
21
22
|
topic_groups[topic] = Hash.new do |partition_groups, partition|
|
22
|
-
partition_groups[partition] =
|
23
|
+
partition_groups[partition] = {
|
24
|
+
eof: false,
|
25
|
+
messages: []
|
26
|
+
}
|
23
27
|
end
|
24
28
|
end
|
25
29
|
end
|
@@ -30,7 +34,16 @@ module Karafka
|
|
30
34
|
# @return [Array<Rdkafka::Consumer::Message>] given partition topic sub-buffer array
|
31
35
|
def <<(message)
|
32
36
|
@size += 1
|
33
|
-
@groups[message.topic][message.partition]
|
37
|
+
partition_state = @groups[message.topic][message.partition]
|
38
|
+
partition_state[:messages] << message
|
39
|
+
partition_state[:eof] = false
|
40
|
+
end
|
41
|
+
|
42
|
+
# Marks given topic partition as one that reached eof
|
43
|
+
# @param topic [String] topic that reached eof
|
44
|
+
# @param partition [Integer] partition that reached eof
|
45
|
+
def eof(topic, partition)
|
46
|
+
@groups[topic][partition][:eof] = true
|
34
47
|
end
|
35
48
|
|
36
49
|
# Allows to iterate over all the topics and partitions messages
|
@@ -38,10 +51,11 @@ module Karafka
|
|
38
51
|
# @yieldparam [String] topic name
|
39
52
|
# @yieldparam [Integer] partition number
|
40
53
|
# @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
|
54
|
+
# @yieldparam [Boolean] has polling of this partition reach eof
|
41
55
|
def each
|
42
56
|
@groups.each do |topic, partitions|
|
43
|
-
partitions.each do |partition,
|
44
|
-
yield(topic, partition, messages)
|
57
|
+
partitions.each do |partition, details|
|
58
|
+
yield(topic, partition, details[:messages], details[:eof])
|
45
59
|
end
|
46
60
|
end
|
47
61
|
end
|
@@ -69,8 +83,8 @@ module Karafka
|
|
69
83
|
# again and we do want to ensure as few duplications as possible
|
70
84
|
def uniq!
|
71
85
|
@groups.each_value do |partitions|
|
72
|
-
partitions.each_value do |
|
73
|
-
messages.uniq!(&:offset)
|
86
|
+
partitions.each_value do |details|
|
87
|
+
details[:messages].uniq!(&:offset)
|
74
88
|
end
|
75
89
|
end
|
76
90
|
|
@@ -83,6 +97,11 @@ module Karafka
|
|
83
97
|
# we save ourselves some objects allocations. We cannot clear the underlying arrays as they
|
84
98
|
# may be used in other threads for data processing, thus if we would clear it, we could
|
85
99
|
# potentially clear a raw messages array for a job that is in the jobs queue.
|
100
|
+
#
|
101
|
+
# @note We do not clear the eof assignments because they can span across batch pollings.
|
102
|
+
# Since eof is not raised non-stop and is silenced after an eof poll, if we would clean it
|
103
|
+
# here we would loose the notion of it. The reset state for it should happen when we do
|
104
|
+
# discover new messages for given topic partition.
|
86
105
|
def clear
|
87
106
|
@size = 0
|
88
107
|
@groups.each_value(&:clear)
|
@@ -92,8 +111,12 @@ module Karafka
|
|
92
111
|
|
93
112
|
# Updates the messages count if we performed any operations that could change the state
|
94
113
|
def recount!
|
95
|
-
@size =
|
96
|
-
|
114
|
+
@size = 0
|
115
|
+
|
116
|
+
@groups.each_value do |partitions|
|
117
|
+
partitions.each_value do |details|
|
118
|
+
@size += details[:messages].size
|
119
|
+
end
|
97
120
|
end
|
98
121
|
end
|
99
122
|
end
|
data/lib/karafka/embedded.rb
CHANGED
@@ -27,7 +27,8 @@ module Karafka
|
|
27
27
|
Thread.new do
|
28
28
|
Thread.current.name = 'karafka.embedded'
|
29
29
|
|
30
|
-
Karafka::Process.tags.add(:execution_mode, 'embedded')
|
30
|
+
Karafka::Process.tags.add(:execution_mode, 'mode:embedded')
|
31
|
+
Karafka::Server.execution_mode = :embedded
|
31
32
|
Karafka::Server.start
|
32
33
|
end
|
33
34
|
end
|
@@ -290,6 +290,9 @@ module Karafka
|
|
290
290
|
when 'consumer.tick.error'
|
291
291
|
error "Consumer on tick failed due to an error: #{error}"
|
292
292
|
error details
|
293
|
+
when 'consumer.eofed.error'
|
294
|
+
error "Consumer on eofed failed due to an error: #{error}"
|
295
|
+
error details
|
293
296
|
when 'consumer.after_consume.error'
|
294
297
|
error "Consumer on after_consume failed due to an error: #{error}"
|
295
298
|
error details
|