karafka 2.3.2 → 2.4.0.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +12 -38
- data/CHANGELOG.md +65 -0
- data/Gemfile +6 -3
- data/Gemfile.lock +25 -23
- data/README.md +2 -2
- data/bin/integrations +1 -1
- data/config/locales/errors.yml +24 -2
- data/config/locales/pro_errors.yml +19 -0
- data/karafka.gemspec +4 -2
- data/lib/active_job/queue_adapters/karafka_adapter.rb +2 -0
- data/lib/karafka/admin/configs/config.rb +81 -0
- data/lib/karafka/admin/configs/resource.rb +88 -0
- data/lib/karafka/admin/configs.rb +103 -0
- data/lib/karafka/admin.rb +200 -89
- data/lib/karafka/base_consumer.rb +2 -2
- data/lib/karafka/cli/info.rb +9 -7
- data/lib/karafka/cli/server.rb +7 -7
- data/lib/karafka/cli/topics/align.rb +109 -0
- data/lib/karafka/cli/topics/base.rb +66 -0
- data/lib/karafka/cli/topics/create.rb +35 -0
- data/lib/karafka/cli/topics/delete.rb +30 -0
- data/lib/karafka/cli/topics/migrate.rb +31 -0
- data/lib/karafka/cli/topics/plan.rb +169 -0
- data/lib/karafka/cli/topics/repartition.rb +41 -0
- data/lib/karafka/cli/topics/reset.rb +18 -0
- data/lib/karafka/cli/topics.rb +13 -123
- data/lib/karafka/connection/client.rb +62 -37
- data/lib/karafka/connection/listener.rb +22 -17
- data/lib/karafka/connection/proxy.rb +93 -4
- data/lib/karafka/connection/status.rb +14 -2
- data/lib/karafka/contracts/config.rb +36 -1
- data/lib/karafka/contracts/topic.rb +1 -1
- data/lib/karafka/deserializers/headers.rb +15 -0
- data/lib/karafka/deserializers/key.rb +15 -0
- data/lib/karafka/deserializers/payload.rb +16 -0
- data/lib/karafka/embedded.rb +2 -0
- data/lib/karafka/helpers/async.rb +5 -2
- data/lib/karafka/helpers/colorize.rb +6 -0
- data/lib/karafka/instrumentation/callbacks/oauthbearer_token_refresh.rb +29 -0
- data/lib/karafka/instrumentation/logger_listener.rb +23 -3
- data/lib/karafka/instrumentation/notifications.rb +10 -0
- data/lib/karafka/instrumentation/vendors/appsignal/client.rb +16 -2
- data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +34 -4
- data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +20 -0
- data/lib/karafka/messages/batch_metadata.rb +1 -1
- data/lib/karafka/messages/builders/batch_metadata.rb +1 -1
- data/lib/karafka/messages/builders/message.rb +10 -6
- data/lib/karafka/messages/message.rb +2 -1
- data/lib/karafka/messages/metadata.rb +20 -4
- data/lib/karafka/messages/parser.rb +1 -1
- data/lib/karafka/pro/base_consumer.rb +12 -23
- data/lib/karafka/pro/encryption/cipher.rb +7 -3
- data/lib/karafka/pro/encryption/contracts/config.rb +1 -0
- data/lib/karafka/pro/encryption/errors.rb +4 -1
- data/lib/karafka/pro/encryption/messages/middleware.rb +13 -11
- data/lib/karafka/pro/encryption/messages/parser.rb +22 -20
- data/lib/karafka/pro/encryption/setup/config.rb +5 -0
- data/lib/karafka/pro/iterator/expander.rb +2 -1
- data/lib/karafka/pro/iterator/tpl_builder.rb +38 -0
- data/lib/karafka/pro/iterator.rb +28 -2
- data/lib/karafka/pro/loader.rb +3 -0
- data/lib/karafka/pro/processing/coordinator.rb +15 -2
- data/lib/karafka/pro/processing/expansions_selector.rb +2 -0
- data/lib/karafka/pro/processing/jobs_queue.rb +122 -5
- data/lib/karafka/pro/processing/periodic_job/consumer.rb +67 -0
- data/lib/karafka/pro/processing/piping/consumer.rb +126 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +2 -0
- data/lib/karafka/pro/processing/strategies/default.rb +5 -1
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +21 -5
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +2 -0
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +2 -0
- data/lib/karafka/pro/processing/subscription_groups_coordinator.rb +52 -0
- data/lib/karafka/pro/routing/features/direct_assignments/config.rb +27 -0
- data/lib/karafka/pro/routing/features/direct_assignments/contracts/consumer_group.rb +53 -0
- data/lib/karafka/pro/routing/features/direct_assignments/contracts/topic.rb +108 -0
- data/lib/karafka/pro/routing/features/direct_assignments/subscription_group.rb +77 -0
- data/lib/karafka/pro/routing/features/direct_assignments/topic.rb +69 -0
- data/lib/karafka/pro/routing/features/direct_assignments.rb +25 -0
- data/lib/karafka/pro/routing/features/patterns/builder.rb +1 -1
- data/lib/karafka/pro/routing/features/swarm/config.rb +31 -0
- data/lib/karafka/pro/routing/features/swarm/contracts/routing.rb +76 -0
- data/lib/karafka/pro/routing/features/swarm/contracts/topic.rb +78 -0
- data/lib/karafka/pro/routing/features/swarm/topic.rb +77 -0
- data/lib/karafka/pro/routing/features/swarm.rb +36 -0
- data/lib/karafka/pro/swarm/liveness_listener.rb +20 -0
- data/lib/karafka/processing/coordinator.rb +17 -8
- data/lib/karafka/processing/coordinators_buffer.rb +5 -2
- data/lib/karafka/processing/executor.rb +6 -2
- data/lib/karafka/processing/executors_buffer.rb +5 -2
- data/lib/karafka/processing/jobs_queue.rb +9 -4
- data/lib/karafka/processing/strategies/aj_dlq_mom.rb +1 -1
- data/lib/karafka/processing/strategies/default.rb +7 -1
- data/lib/karafka/processing/strategies/dlq.rb +17 -2
- data/lib/karafka/processing/workers_batch.rb +4 -1
- data/lib/karafka/routing/builder.rb +6 -2
- data/lib/karafka/routing/consumer_group.rb +2 -1
- data/lib/karafka/routing/features/dead_letter_queue/config.rb +5 -0
- data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +8 -0
- data/lib/karafka/routing/features/dead_letter_queue/topic.rb +10 -2
- data/lib/karafka/routing/features/deserializers/config.rb +18 -0
- data/lib/karafka/routing/features/deserializers/contracts/topic.rb +31 -0
- data/lib/karafka/routing/features/deserializers/topic.rb +51 -0
- data/lib/karafka/routing/features/deserializers.rb +11 -0
- data/lib/karafka/routing/proxy.rb +9 -14
- data/lib/karafka/routing/router.rb +11 -2
- data/lib/karafka/routing/subscription_group.rb +22 -1
- data/lib/karafka/routing/topic.rb +0 -1
- data/lib/karafka/runner.rb +1 -1
- data/lib/karafka/setup/config.rb +51 -10
- data/lib/karafka/status.rb +7 -8
- data/lib/karafka/swarm/manager.rb +15 -3
- data/lib/karafka/swarm/node.rb +3 -3
- data/lib/karafka/swarm/pidfd.rb +20 -4
- data/lib/karafka/swarm/supervisor.rb +25 -8
- data/lib/karafka/templates/karafka.rb.erb +28 -1
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +42 -12
- metadata.gz.sig +0 -0
- data/lib/karafka/routing/consumer_mapper.rb +0 -23
- data/lib/karafka/serialization/json/deserializer.rb +0 -19
- data/lib/karafka/time_trackers/partition_usage.rb +0 -56
@@ -18,22 +18,17 @@ module Karafka
|
|
18
18
|
instance_eval(&defaults) if defaults
|
19
19
|
end
|
20
20
|
|
21
|
-
#
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
# Translates the no "=" DSL of routing into elements assignments on target
|
26
|
-
# @param method_name [Symbol] name of the missing method
|
27
|
-
def method_missing(method_name, #{arg_forwarding})
|
28
|
-
return super unless respond_to_missing?(method_name)
|
21
|
+
# Translates the no "=" DSL of routing into elements assignments on target
|
22
|
+
# @param method_name [Symbol] name of the missing method
|
23
|
+
def method_missing(method_name, ...)
|
24
|
+
return super unless respond_to_missing?(method_name)
|
29
25
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
end
|
26
|
+
if @target.respond_to?(:"#{method_name}=")
|
27
|
+
@target.public_send(:"#{method_name}=", ...)
|
28
|
+
else
|
29
|
+
@target.public_send(method_name, ...)
|
35
30
|
end
|
36
|
-
|
31
|
+
end
|
37
32
|
|
38
33
|
# Tells whether or not a given element exists on the target
|
39
34
|
# @param method_name [Symbol] name of the missing method
|
@@ -23,7 +23,7 @@ module Karafka
|
|
23
23
|
end
|
24
24
|
|
25
25
|
# Finds the topic by name (in any consumer group) and if not present, will built a new
|
26
|
-
# representation of the topic with the defaults and default
|
26
|
+
# representation of the topic with the defaults and default deserializers.
|
27
27
|
#
|
28
28
|
# This is used in places where we may operate on topics that are not part of the routing
|
29
29
|
# but we want to do something on them (display data, iterate over, etc)
|
@@ -33,7 +33,16 @@ module Karafka
|
|
33
33
|
# @note Please note, that in case of a new topic, it will have a newly built consumer group
|
34
34
|
# as well, that is not part of the routing.
|
35
35
|
def find_or_initialize_by_name(name)
|
36
|
-
find_by(name: name)
|
36
|
+
existing_topic = find_by(name: name)
|
37
|
+
|
38
|
+
return existing_topic if existing_topic
|
39
|
+
|
40
|
+
virtual_topic = Topic.new(name, ConsumerGroup.new(name))
|
41
|
+
|
42
|
+
Karafka::Routing::Proxy.new(
|
43
|
+
virtual_topic,
|
44
|
+
Karafka::App.config.internal.routing.builder.defaults
|
45
|
+
).target
|
37
46
|
end
|
38
47
|
|
39
48
|
module_function :find_by
|
@@ -76,7 +76,8 @@ module Karafka
|
|
76
76
|
activity_manager.active?(:subscription_groups, name)
|
77
77
|
end
|
78
78
|
|
79
|
-
# @return [Array<String>] names of topics to which we should subscribe
|
79
|
+
# @return [false, Array<String>] names of topics to which we should subscribe or false when
|
80
|
+
# operating only on direct assignments
|
80
81
|
#
|
81
82
|
# @note Most of the time it should not include inactive topics but in case of pattern
|
82
83
|
# matching the matcher topics become inactive down the road, hence we filter out so
|
@@ -85,12 +86,32 @@ module Karafka
|
|
85
86
|
topics.select(&:active?).map(&:subscription_name)
|
86
87
|
end
|
87
88
|
|
89
|
+
# @param _consumer [Karafka::Connection::Proxy]
|
90
|
+
# @return [false, Rdkafka::Consumer::TopicPartitionList] List of tpls for direct assignments
|
91
|
+
# or false for the normal mode
|
92
|
+
def assignments(_consumer)
|
93
|
+
false
|
94
|
+
end
|
95
|
+
|
88
96
|
# @return [String] id of the subscription group
|
89
97
|
# @note This is an alias for displaying in places where we print the stringified version.
|
90
98
|
def to_s
|
91
99
|
id
|
92
100
|
end
|
93
101
|
|
102
|
+
# Refreshes the configuration of this subscription group if needed based on the execution
|
103
|
+
# context.
|
104
|
+
#
|
105
|
+
# Since the initial routing setup happens in the supervisor, it is inherited by the children.
|
106
|
+
# This causes incomplete assignment of `group.instance.id` which is not expanded with proper
|
107
|
+
# node identifier. This refreshes this if needed when in swarm.
|
108
|
+
def refresh
|
109
|
+
return unless node
|
110
|
+
return unless kafka.key?(:'group.instance.id')
|
111
|
+
|
112
|
+
@kafka = build_kafka
|
113
|
+
end
|
114
|
+
|
94
115
|
private
|
95
116
|
|
96
117
|
# @return [Hash] kafka settings are a bit special. They are exactly the same for all of the
|
data/lib/karafka/runner.rb
CHANGED
@@ -25,7 +25,7 @@ module Karafka
|
|
25
25
|
# Register all the listeners so they can be started and managed
|
26
26
|
@manager.register(listeners)
|
27
27
|
|
28
|
-
workers.
|
28
|
+
workers.each_with_index { |worker, i| worker.async_call("karafka.worker##{i}") }
|
29
29
|
|
30
30
|
# We aggregate threads here for a supervised shutdown process
|
31
31
|
Karafka::Server.workers = workers
|
data/lib/karafka/setup/config.rb
CHANGED
@@ -64,15 +64,9 @@ module Karafka
|
|
64
64
|
setting :logger, default: ::Karafka::Instrumentation::Logger.new
|
65
65
|
# option monitor [Instance] monitor that we will to use (defaults to Karafka::Monitor)
|
66
66
|
setting :monitor, default: ::Karafka::Instrumentation::Monitor.new
|
67
|
-
# Mapper used to remap consumer groups ids, so in case users migrate from other tools
|
68
|
-
# or they need to maintain their own internal consumer group naming conventions, they
|
69
|
-
# can easily do it, replacing the default client_id + consumer name pattern concept
|
70
|
-
setting :consumer_mapper, default: Routing::ConsumerMapper.new
|
71
67
|
# option [Boolean] should we reload consumers with each incoming batch thus effectively
|
72
68
|
# supporting code reload (if someone reloads code) or should we keep the persistence
|
73
69
|
setting :consumer_persistence, default: true
|
74
|
-
# Default deserializer for converting incoming data into ruby objects
|
75
|
-
setting :deserializer, default: Karafka::Serialization::Json::Deserializer.new
|
76
70
|
# option [String] should we start with the earliest possible offset or latest
|
77
71
|
# This will set the `auto.offset.reset` value unless present in the kafka scope
|
78
72
|
setting :initial_offset, default: 'earliest'
|
@@ -100,6 +94,15 @@ module Karafka
|
|
100
94
|
# Disabling this may be needed in scenarios where we do not have control over topics names
|
101
95
|
# and/or we work with existing systems where we cannot change topics names.
|
102
96
|
setting :strict_topics_namespacing, default: true
|
97
|
+
# option [String] default consumer group name for implicit routing
|
98
|
+
setting :group_id, default: 'app'
|
99
|
+
|
100
|
+
setting :oauth do
|
101
|
+
# option [false, #call] Listener for using oauth bearer. This listener will be able to
|
102
|
+
# get the client name to decide whether to use a single multi-client token refreshing
|
103
|
+
# or have separate tokens per instance.
|
104
|
+
setting :token_provider_listener, default: false
|
105
|
+
end
|
103
106
|
|
104
107
|
# rdkafka default options
|
105
108
|
# @see https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
|
@@ -137,11 +140,12 @@ module Karafka
|
|
137
140
|
# involving a consumer instance
|
138
141
|
'enable.auto.commit': false,
|
139
142
|
# Make sure that topic metadata lookups do not create topics accidentally
|
140
|
-
'allow.auto.create.topics': false
|
143
|
+
'allow.auto.create.topics': false,
|
144
|
+
# Do not store offsets automatically in admin in any way
|
145
|
+
'enable.auto.offset.store': false
|
141
146
|
}
|
142
147
|
|
143
|
-
# option [String] default name for the admin consumer group.
|
144
|
-
# subject to be remapped by the consumer mapper as any other consumer group in the routes
|
148
|
+
# option [String] default name for the admin consumer group.
|
145
149
|
setting :group_id, default: 'karafka_admin'
|
146
150
|
|
147
151
|
# option max_wait_time [Integer] We wait only for this amount of time before raising error
|
@@ -196,7 +200,7 @@ module Karafka
|
|
196
200
|
setting :liveness_listener, default: Swarm::LivenessListener.new
|
197
201
|
# How long should we wait for any info from the node before we consider it hanging at
|
198
202
|
# stop it
|
199
|
-
setting :node_report_timeout, default:
|
203
|
+
setting :node_report_timeout, default: 60_000
|
200
204
|
# How long should we wait before restarting a node. This can prevent us from having a
|
201
205
|
# case where for some external reason our spawned process would die immediately and we
|
202
206
|
# would immediately try to start it back in an endless loop
|
@@ -230,6 +234,14 @@ module Karafka
|
|
230
234
|
|
231
235
|
# Settings that are altered by our client proxy layer
|
232
236
|
setting :proxy do
|
237
|
+
# commit offsets request
|
238
|
+
setting :commit do
|
239
|
+
# How many times should we try to run this call before raising an error
|
240
|
+
setting :max_attempts, default: 3
|
241
|
+
# How long should we wait before next attempt in case of a failure
|
242
|
+
setting :wait_time, default: 1_000
|
243
|
+
end
|
244
|
+
|
233
245
|
# Committed offsets for given CG query
|
234
246
|
setting :committed do
|
235
247
|
# timeout for this request. For busy or remote clusters, this should be high enough
|
@@ -259,6 +271,26 @@ module Karafka
|
|
259
271
|
# How long should we wait before next attempt in case of a failure
|
260
272
|
setting :wait_time, default: 1_000
|
261
273
|
end
|
274
|
+
|
275
|
+
# Settings for lag request
|
276
|
+
setting :lag do
|
277
|
+
# timeout for this request. For busy or remote clusters, this should be high enough
|
278
|
+
setting :timeout, default: 10_000
|
279
|
+
# How many times should we try to run this call before raising an error
|
280
|
+
setting :max_attempts, default: 3
|
281
|
+
# How long should we wait before next attempt in case of a failure
|
282
|
+
setting :wait_time, default: 1_000
|
283
|
+
end
|
284
|
+
|
285
|
+
# Settings for metadata request
|
286
|
+
setting :metadata do
|
287
|
+
# timeout for this request. For busy or remote clusters, this should be high enough
|
288
|
+
setting :timeout, default: 10_000
|
289
|
+
# How many times should we try to run this call before raising an error
|
290
|
+
setting :max_attempts, default: 3
|
291
|
+
# How long should we wait before next attempt in case of a failure
|
292
|
+
setting :wait_time, default: 1_000
|
293
|
+
end
|
262
294
|
end
|
263
295
|
end
|
264
296
|
|
@@ -368,10 +400,19 @@ module Karafka
|
|
368
400
|
# Sets up all the components that are based on the user configuration
|
369
401
|
# @note At the moment it is only WaterDrop
|
370
402
|
def configure_components
|
403
|
+
oauth_listener = config.oauth.token_provider_listener
|
404
|
+
# We need to subscribe the oauth listener here because we want it to be ready before
|
405
|
+
# any consumer/admin runs
|
406
|
+
Karafka::App.monitor.subscribe(oauth_listener) if oauth_listener
|
407
|
+
|
371
408
|
config.producer ||= ::WaterDrop::Producer.new do |producer_config|
|
372
409
|
# In some cases WaterDrop updates the config and we don't want our consumer config to
|
373
410
|
# be polluted by those updates, that's why we copy
|
374
411
|
producer_config.kafka = AttributesMap.producer(config.kafka.dup)
|
412
|
+
# We also propagate same listener to the default producer to make sure, that the
|
413
|
+
# listener for oauth is also automatically used by the producer. That way we don't
|
414
|
+
# have to configure it manually for the default producer
|
415
|
+
producer_config.oauth.token_provider_listener = oauth_listener
|
375
416
|
producer_config.logger = config.logger
|
376
417
|
end
|
377
418
|
end
|
data/lib/karafka/status.rb
CHANGED
@@ -3,6 +3,11 @@
|
|
3
3
|
module Karafka
|
4
4
|
# App status monitor
|
5
5
|
class Status
|
6
|
+
include Helpers::ConfigImporter.new(
|
7
|
+
monitor: %i[monitor],
|
8
|
+
conductor: %i[internal connection conductor]
|
9
|
+
)
|
10
|
+
|
6
11
|
# Available states and their transitions.
|
7
12
|
STATES = {
|
8
13
|
initializing: :initialize!,
|
@@ -60,14 +65,8 @@ module Karafka
|
|
60
65
|
# We skip as during this state we do not have yet a monitor
|
61
66
|
return if initializing?
|
62
67
|
|
63
|
-
|
64
|
-
|
65
|
-
# We need to signal conductor on each state change as those may be relevant to
|
66
|
-
# listeners operations
|
67
|
-
@conductor ||= Karafka::App.config.internal.connection.conductor
|
68
|
-
@conductor.signal
|
69
|
-
|
70
|
-
Karafka.monitor.instrument("app.#{state}")
|
68
|
+
conductor.signal
|
69
|
+
monitor.instrument("app.#{state}", caller: self)
|
71
70
|
end
|
72
71
|
end
|
73
72
|
RUBY
|
@@ -19,6 +19,13 @@ module Karafka
|
|
19
19
|
node_restart_timeout: %i[internal swarm node_restart_timeout]
|
20
20
|
)
|
21
21
|
|
22
|
+
# Status we issue when we decide to shutdown unresponsive node
|
23
|
+
# We use -1 because nodes are expected to report 0+ statuses and we can use negative numbers
|
24
|
+
# for non-node based statuses
|
25
|
+
NOT_RESPONDING_SHUTDOWN_STATUS = -1
|
26
|
+
|
27
|
+
private_constant :NOT_RESPONDING_SHUTDOWN_STATUS
|
28
|
+
|
22
29
|
# @return [Array<Node>] All nodes that manager manages
|
23
30
|
attr_reader :nodes
|
24
31
|
|
@@ -29,10 +36,10 @@ module Karafka
|
|
29
36
|
|
30
37
|
# Starts all the expected nodes for the first time
|
31
38
|
def start
|
32
|
-
|
39
|
+
parent_pid = ::Process.pid
|
33
40
|
|
34
41
|
@nodes = Array.new(nodes_count) do |i|
|
35
|
-
start_one Node.new(i,
|
42
|
+
start_one Node.new(i, parent_pid)
|
36
43
|
end
|
37
44
|
end
|
38
45
|
|
@@ -148,7 +155,12 @@ module Karafka
|
|
148
155
|
return true unless over?(statuses[:control], node_report_timeout)
|
149
156
|
|
150
157
|
# Start the stopping procedure if the node stopped reporting frequently enough
|
151
|
-
monitor.instrument(
|
158
|
+
monitor.instrument(
|
159
|
+
'swarm.manager.stopping',
|
160
|
+
caller: self,
|
161
|
+
node: node,
|
162
|
+
status: NOT_RESPONDING_SHUTDOWN_STATUS
|
163
|
+
) do
|
152
164
|
node.stop
|
153
165
|
statuses[:stop] = monotonic_now
|
154
166
|
end
|
data/lib/karafka/swarm/node.rb
CHANGED
@@ -30,10 +30,10 @@ module Karafka
|
|
30
30
|
# @param id [Integer] number of the fork. Used for uniqueness setup for group client ids and
|
31
31
|
# other stuff where we need to know a unique reference of the fork in regards to the rest
|
32
32
|
# of them.
|
33
|
-
# @param
|
34
|
-
def initialize(id,
|
33
|
+
# @param parent_pid [Integer] parent pid for zombie fencing
|
34
|
+
def initialize(id, parent_pid)
|
35
35
|
@id = id
|
36
|
-
@parent_pidfd =
|
36
|
+
@parent_pidfd = Pidfd.new(parent_pid)
|
37
37
|
end
|
38
38
|
|
39
39
|
# Starts a new fork and:
|
data/lib/karafka/swarm/pidfd.rb
CHANGED
@@ -72,17 +72,33 @@ module Karafka
|
|
72
72
|
def alive?
|
73
73
|
@pidfd_select ||= [@pidfd_io]
|
74
74
|
|
75
|
-
|
75
|
+
if @mutex.owned?
|
76
|
+
return false if @cleaned
|
77
|
+
|
78
|
+
IO.select(@pidfd_select, nil, nil, 0).nil?
|
79
|
+
else
|
80
|
+
@mutex.synchronize do
|
81
|
+
return false if @cleaned
|
82
|
+
|
83
|
+
IO.select(@pidfd_select, nil, nil, 0).nil?
|
84
|
+
end
|
85
|
+
end
|
76
86
|
end
|
77
87
|
|
78
88
|
# Cleans the zombie process
|
79
89
|
# @note This should run **only** on processes that exited, otherwise will wait
|
80
90
|
def cleanup
|
81
|
-
|
91
|
+
@mutex.synchronize do
|
92
|
+
return if @cleaned
|
82
93
|
|
83
|
-
|
94
|
+
waitid(P_PIDFD, @pidfd, nil, WEXITED)
|
84
95
|
|
85
|
-
|
96
|
+
@pidfd_io.close
|
97
|
+
@pidfd_select = nil
|
98
|
+
@pidfd_io = nil
|
99
|
+
@pidfd = nil
|
100
|
+
@cleaned = true
|
101
|
+
end
|
86
102
|
end
|
87
103
|
|
88
104
|
# Sends given signal to the process using its pidfd
|
@@ -23,6 +23,15 @@ module Karafka
|
|
23
23
|
process: %i[internal process]
|
24
24
|
)
|
25
25
|
|
26
|
+
# How long extra should we wait on shutdown before forceful termination
|
27
|
+
# We add this time because we send signals and it always can take a bit of time for them
|
28
|
+
# to reach out nodes and be processed to start the shutdown flow. Because of that and
|
29
|
+
# because we always want to give all nodes all the time of `shutdown_timeout` they are
|
30
|
+
# expected to have, we add this just to compensate.
|
31
|
+
SHUTDOWN_GRACE_PERIOD = 1_000
|
32
|
+
|
33
|
+
private_constant :SHUTDOWN_GRACE_PERIOD
|
34
|
+
|
26
35
|
def initialize
|
27
36
|
@mutex = Mutex.new
|
28
37
|
@queue = Processing::TimedQueue.new
|
@@ -30,14 +39,16 @@ module Karafka
|
|
30
39
|
|
31
40
|
# Creates needed number of forks, installs signals and starts supervision
|
32
41
|
def run
|
33
|
-
Karafka::App.warmup
|
34
|
-
|
35
|
-
manager.start
|
36
|
-
|
37
42
|
# Close producer just in case. While it should not be used, we do not want even a
|
38
43
|
# theoretical case since librdkafka is not thread-safe.
|
44
|
+
# We close it prior to forking just to make sure, there is no issue with initialized
|
45
|
+
# producer (should not be initialized but just in case)
|
39
46
|
Karafka.producer.close
|
40
47
|
|
48
|
+
Karafka::App.warmup
|
49
|
+
|
50
|
+
manager.start
|
51
|
+
|
41
52
|
process.on_sigint { stop }
|
42
53
|
process.on_sigquit { stop }
|
43
54
|
process.on_sigterm { stop }
|
@@ -68,7 +79,10 @@ module Karafka
|
|
68
79
|
type: 'swarm.supervisor.error'
|
69
80
|
)
|
70
81
|
|
71
|
-
|
82
|
+
manager.terminate
|
83
|
+
manager.cleanup
|
84
|
+
|
85
|
+
raise e
|
72
86
|
end
|
73
87
|
|
74
88
|
private
|
@@ -100,10 +114,12 @@ module Karafka
|
|
100
114
|
|
101
115
|
manager.stop
|
102
116
|
|
117
|
+
total_shutdown_timeout = shutdown_timeout + SHUTDOWN_GRACE_PERIOD
|
118
|
+
|
103
119
|
# We check from time to time (for the timeout period) if all the threads finished
|
104
120
|
# their work and if so, we can just return and normal shutdown process will take place
|
105
121
|
# We divide it by 1000 because we use time in ms.
|
106
|
-
((
|
122
|
+
((total_shutdown_timeout / 1_000) * (1 / supervision_sleep)).to_i.times do
|
107
123
|
if manager.stopped?
|
108
124
|
manager.cleanup
|
109
125
|
return
|
@@ -132,8 +148,9 @@ module Karafka
|
|
132
148
|
# Cleanup the process table
|
133
149
|
manager.cleanup
|
134
150
|
|
135
|
-
#
|
136
|
-
|
151
|
+
# We do not use `exit!` here similar to regular server because we do not have to worry
|
152
|
+
# about any librdkafka related hanging connections, etc
|
153
|
+
Kernel.exit(forceful_exit_code)
|
137
154
|
ensure
|
138
155
|
if initialized
|
139
156
|
Karafka::App.stopped!
|
@@ -37,7 +37,14 @@ class KarafkaApp < Karafka::App
|
|
37
37
|
# interested in logging events for certain environments. Since instrumentation
|
38
38
|
# notifications add extra boilerplate, if you want to achieve max performance,
|
39
39
|
# listen to only what you really need for given environment.
|
40
|
-
Karafka.monitor.subscribe(
|
40
|
+
Karafka.monitor.subscribe(
|
41
|
+
Karafka::Instrumentation::LoggerListener.new(
|
42
|
+
# Karafka, when the logger is set to info producers logs each time it polls data from an
|
43
|
+
# internal messages wueue. This can be extensive, so you can turn it off by setting below
|
44
|
+
# to false.
|
45
|
+
log_polling: true
|
46
|
+
)
|
47
|
+
)
|
41
48
|
# Karafka.monitor.subscribe(Karafka::Instrumentation::ProctitleListener.new)
|
42
49
|
|
43
50
|
# This logger prints the producer development info using the Karafka logger.
|
@@ -52,6 +59,26 @@ class KarafkaApp < Karafka::App
|
|
52
59
|
)
|
53
60
|
)
|
54
61
|
|
62
|
+
# You can subscribe to all consumer related errors and record/track then that way
|
63
|
+
#
|
64
|
+
# Karafka.monitor.subscribe 'error.occurred' do |event|
|
65
|
+
# type = event[:type]
|
66
|
+
# error = event[:error]
|
67
|
+
# details = (error.backtrace || []).join("\n")
|
68
|
+
# ErrorTracker.send_error(error, type, details)
|
69
|
+
# end
|
70
|
+
|
71
|
+
# You can subscribe to all producer related errors and record/track then that way
|
72
|
+
# Please note, that producer and consumer have their own notifications pipeline so you need to
|
73
|
+
# setup error tracking independently for each of them
|
74
|
+
#
|
75
|
+
# Karafka.producer.monitor.subscribe('error.occurred') do |event|
|
76
|
+
# type = event[:type]
|
77
|
+
# error = event[:error]
|
78
|
+
# details = (error.backtrace || []).join("\n")
|
79
|
+
# ErrorTracker.send_error(error, type, details)
|
80
|
+
# end
|
81
|
+
|
55
82
|
routes.draw do
|
56
83
|
<% if rails? -%>
|
57
84
|
# Uncomment this if you use Karafka with ActiveJob
|
data/lib/karafka/version.rb
CHANGED
data.tar.gz.sig
CHANGED
Binary file
|