karafka 2.3.2 → 2.4.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (132) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +12 -38
  4. data/CHANGELOG.md +65 -0
  5. data/Gemfile +6 -3
  6. data/Gemfile.lock +25 -23
  7. data/README.md +2 -2
  8. data/bin/integrations +1 -1
  9. data/config/locales/errors.yml +24 -2
  10. data/config/locales/pro_errors.yml +19 -0
  11. data/karafka.gemspec +4 -2
  12. data/lib/active_job/queue_adapters/karafka_adapter.rb +2 -0
  13. data/lib/karafka/admin/configs/config.rb +81 -0
  14. data/lib/karafka/admin/configs/resource.rb +88 -0
  15. data/lib/karafka/admin/configs.rb +103 -0
  16. data/lib/karafka/admin.rb +200 -89
  17. data/lib/karafka/base_consumer.rb +2 -2
  18. data/lib/karafka/cli/info.rb +9 -7
  19. data/lib/karafka/cli/server.rb +7 -7
  20. data/lib/karafka/cli/topics/align.rb +109 -0
  21. data/lib/karafka/cli/topics/base.rb +66 -0
  22. data/lib/karafka/cli/topics/create.rb +35 -0
  23. data/lib/karafka/cli/topics/delete.rb +30 -0
  24. data/lib/karafka/cli/topics/migrate.rb +31 -0
  25. data/lib/karafka/cli/topics/plan.rb +169 -0
  26. data/lib/karafka/cli/topics/repartition.rb +41 -0
  27. data/lib/karafka/cli/topics/reset.rb +18 -0
  28. data/lib/karafka/cli/topics.rb +13 -123
  29. data/lib/karafka/connection/client.rb +62 -37
  30. data/lib/karafka/connection/listener.rb +22 -17
  31. data/lib/karafka/connection/proxy.rb +93 -4
  32. data/lib/karafka/connection/status.rb +14 -2
  33. data/lib/karafka/contracts/config.rb +36 -1
  34. data/lib/karafka/contracts/topic.rb +1 -1
  35. data/lib/karafka/deserializers/headers.rb +15 -0
  36. data/lib/karafka/deserializers/key.rb +15 -0
  37. data/lib/karafka/deserializers/payload.rb +16 -0
  38. data/lib/karafka/embedded.rb +2 -0
  39. data/lib/karafka/helpers/async.rb +5 -2
  40. data/lib/karafka/helpers/colorize.rb +6 -0
  41. data/lib/karafka/instrumentation/callbacks/oauthbearer_token_refresh.rb +29 -0
  42. data/lib/karafka/instrumentation/logger_listener.rb +23 -3
  43. data/lib/karafka/instrumentation/notifications.rb +10 -0
  44. data/lib/karafka/instrumentation/vendors/appsignal/client.rb +16 -2
  45. data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +34 -4
  46. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +20 -0
  47. data/lib/karafka/messages/batch_metadata.rb +1 -1
  48. data/lib/karafka/messages/builders/batch_metadata.rb +1 -1
  49. data/lib/karafka/messages/builders/message.rb +10 -6
  50. data/lib/karafka/messages/message.rb +2 -1
  51. data/lib/karafka/messages/metadata.rb +20 -4
  52. data/lib/karafka/messages/parser.rb +1 -1
  53. data/lib/karafka/pro/base_consumer.rb +12 -23
  54. data/lib/karafka/pro/encryption/cipher.rb +7 -3
  55. data/lib/karafka/pro/encryption/contracts/config.rb +1 -0
  56. data/lib/karafka/pro/encryption/errors.rb +4 -1
  57. data/lib/karafka/pro/encryption/messages/middleware.rb +13 -11
  58. data/lib/karafka/pro/encryption/messages/parser.rb +22 -20
  59. data/lib/karafka/pro/encryption/setup/config.rb +5 -0
  60. data/lib/karafka/pro/iterator/expander.rb +2 -1
  61. data/lib/karafka/pro/iterator/tpl_builder.rb +38 -0
  62. data/lib/karafka/pro/iterator.rb +28 -2
  63. data/lib/karafka/pro/loader.rb +3 -0
  64. data/lib/karafka/pro/processing/coordinator.rb +15 -2
  65. data/lib/karafka/pro/processing/expansions_selector.rb +2 -0
  66. data/lib/karafka/pro/processing/jobs_queue.rb +122 -5
  67. data/lib/karafka/pro/processing/periodic_job/consumer.rb +67 -0
  68. data/lib/karafka/pro/processing/piping/consumer.rb +126 -0
  69. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +1 -1
  70. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +1 -1
  71. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom.rb +1 -1
  72. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +1 -1
  73. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +1 -1
  74. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +1 -1
  75. data/lib/karafka/pro/processing/strategies/aj/dlq_mom.rb +1 -1
  76. data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +1 -1
  77. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +2 -0
  78. data/lib/karafka/pro/processing/strategies/default.rb +5 -1
  79. data/lib/karafka/pro/processing/strategies/dlq/default.rb +21 -5
  80. data/lib/karafka/pro/processing/strategies/lrj/default.rb +2 -0
  81. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +2 -0
  82. data/lib/karafka/pro/processing/subscription_groups_coordinator.rb +52 -0
  83. data/lib/karafka/pro/routing/features/direct_assignments/config.rb +27 -0
  84. data/lib/karafka/pro/routing/features/direct_assignments/contracts/consumer_group.rb +53 -0
  85. data/lib/karafka/pro/routing/features/direct_assignments/contracts/topic.rb +108 -0
  86. data/lib/karafka/pro/routing/features/direct_assignments/subscription_group.rb +77 -0
  87. data/lib/karafka/pro/routing/features/direct_assignments/topic.rb +69 -0
  88. data/lib/karafka/pro/routing/features/direct_assignments.rb +25 -0
  89. data/lib/karafka/pro/routing/features/patterns/builder.rb +1 -1
  90. data/lib/karafka/pro/routing/features/swarm/config.rb +31 -0
  91. data/lib/karafka/pro/routing/features/swarm/contracts/routing.rb +76 -0
  92. data/lib/karafka/pro/routing/features/swarm/contracts/topic.rb +78 -0
  93. data/lib/karafka/pro/routing/features/swarm/topic.rb +77 -0
  94. data/lib/karafka/pro/routing/features/swarm.rb +36 -0
  95. data/lib/karafka/pro/swarm/liveness_listener.rb +20 -0
  96. data/lib/karafka/processing/coordinator.rb +17 -8
  97. data/lib/karafka/processing/coordinators_buffer.rb +5 -2
  98. data/lib/karafka/processing/executor.rb +6 -2
  99. data/lib/karafka/processing/executors_buffer.rb +5 -2
  100. data/lib/karafka/processing/jobs_queue.rb +9 -4
  101. data/lib/karafka/processing/strategies/aj_dlq_mom.rb +1 -1
  102. data/lib/karafka/processing/strategies/default.rb +7 -1
  103. data/lib/karafka/processing/strategies/dlq.rb +17 -2
  104. data/lib/karafka/processing/workers_batch.rb +4 -1
  105. data/lib/karafka/routing/builder.rb +6 -2
  106. data/lib/karafka/routing/consumer_group.rb +2 -1
  107. data/lib/karafka/routing/features/dead_letter_queue/config.rb +5 -0
  108. data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +8 -0
  109. data/lib/karafka/routing/features/dead_letter_queue/topic.rb +10 -2
  110. data/lib/karafka/routing/features/deserializers/config.rb +18 -0
  111. data/lib/karafka/routing/features/deserializers/contracts/topic.rb +31 -0
  112. data/lib/karafka/routing/features/deserializers/topic.rb +51 -0
  113. data/lib/karafka/routing/features/deserializers.rb +11 -0
  114. data/lib/karafka/routing/proxy.rb +9 -14
  115. data/lib/karafka/routing/router.rb +11 -2
  116. data/lib/karafka/routing/subscription_group.rb +22 -1
  117. data/lib/karafka/routing/topic.rb +0 -1
  118. data/lib/karafka/runner.rb +1 -1
  119. data/lib/karafka/setup/config.rb +51 -10
  120. data/lib/karafka/status.rb +7 -8
  121. data/lib/karafka/swarm/manager.rb +15 -3
  122. data/lib/karafka/swarm/node.rb +3 -3
  123. data/lib/karafka/swarm/pidfd.rb +20 -4
  124. data/lib/karafka/swarm/supervisor.rb +25 -8
  125. data/lib/karafka/templates/karafka.rb.erb +28 -1
  126. data/lib/karafka/version.rb +1 -1
  127. data.tar.gz.sig +0 -0
  128. metadata +42 -12
  129. metadata.gz.sig +0 -0
  130. data/lib/karafka/routing/consumer_mapper.rb +0 -23
  131. data/lib/karafka/serialization/json/deserializer.rb +0 -19
  132. data/lib/karafka/time_trackers/partition_usage.rb +0 -56
@@ -18,22 +18,17 @@ module Karafka
18
18
  instance_eval(&defaults) if defaults
19
19
  end
20
20
 
21
- # Ruby 2.7.0 to 2.7.2 do not have arg forwarding, so we fallback to the old way
22
- arg_forwarding = RUBY_VERSION < '3.0' ? '*args, &block' : '...'
23
-
24
- class_eval <<~RUBY, __FILE__, __LINE__ + 1
25
- # Translates the no "=" DSL of routing into elements assignments on target
26
- # @param method_name [Symbol] name of the missing method
27
- def method_missing(method_name, #{arg_forwarding})
28
- return super unless respond_to_missing?(method_name)
21
+ # Translates the no "=" DSL of routing into elements assignments on target
22
+ # @param method_name [Symbol] name of the missing method
23
+ def method_missing(method_name, ...)
24
+ return super unless respond_to_missing?(method_name)
29
25
 
30
- if @target.respond_to?(:"\#{method_name}=")
31
- @target.public_send(:"\#{method_name}=", #{arg_forwarding})
32
- else
33
- @target.public_send(method_name, #{arg_forwarding})
34
- end
26
+ if @target.respond_to?(:"#{method_name}=")
27
+ @target.public_send(:"#{method_name}=", ...)
28
+ else
29
+ @target.public_send(method_name, ...)
35
30
  end
36
- RUBY
31
+ end
37
32
 
38
33
  # Tells whether or not a given element exists on the target
39
34
  # @param method_name [Symbol] name of the missing method
@@ -23,7 +23,7 @@ module Karafka
23
23
  end
24
24
 
25
25
  # Finds the topic by name (in any consumer group) and if not present, will built a new
26
- # representation of the topic with the defaults and default deserializer.
26
+ # representation of the topic with the defaults and default deserializers.
27
27
  #
28
28
  # This is used in places where we may operate on topics that are not part of the routing
29
29
  # but we want to do something on them (display data, iterate over, etc)
@@ -33,7 +33,16 @@ module Karafka
33
33
  # @note Please note, that in case of a new topic, it will have a newly built consumer group
34
34
  # as well, that is not part of the routing.
35
35
  def find_or_initialize_by_name(name)
36
- find_by(name: name) || Topic.new(name, ConsumerGroup.new(name))
36
+ existing_topic = find_by(name: name)
37
+
38
+ return existing_topic if existing_topic
39
+
40
+ virtual_topic = Topic.new(name, ConsumerGroup.new(name))
41
+
42
+ Karafka::Routing::Proxy.new(
43
+ virtual_topic,
44
+ Karafka::App.config.internal.routing.builder.defaults
45
+ ).target
37
46
  end
38
47
 
39
48
  module_function :find_by
@@ -76,7 +76,8 @@ module Karafka
76
76
  activity_manager.active?(:subscription_groups, name)
77
77
  end
78
78
 
79
- # @return [Array<String>] names of topics to which we should subscribe.
79
+ # @return [false, Array<String>] names of topics to which we should subscribe or false when
80
+ # operating only on direct assignments
80
81
  #
81
82
  # @note Most of the time it should not include inactive topics but in case of pattern
82
83
  # matching the matcher topics become inactive down the road, hence we filter out so
@@ -85,12 +86,32 @@ module Karafka
85
86
  topics.select(&:active?).map(&:subscription_name)
86
87
  end
87
88
 
89
+ # @param _consumer [Karafka::Connection::Proxy]
90
+ # @return [false, Rdkafka::Consumer::TopicPartitionList] List of tpls for direct assignments
91
+ # or false for the normal mode
92
+ def assignments(_consumer)
93
+ false
94
+ end
95
+
88
96
  # @return [String] id of the subscription group
89
97
  # @note This is an alias for displaying in places where we print the stringified version.
90
98
  def to_s
91
99
  id
92
100
  end
93
101
 
102
+ # Refreshes the configuration of this subscription group if needed based on the execution
103
+ # context.
104
+ #
105
+ # Since the initial routing setup happens in the supervisor, it is inherited by the children.
106
+ # This causes incomplete assignment of `group.instance.id` which is not expanded with proper
107
+ # node identifier. This refreshes this if needed when in swarm.
108
+ def refresh
109
+ return unless node
110
+ return unless kafka.key?(:'group.instance.id')
111
+
112
+ @kafka = build_kafka
113
+ end
114
+
94
115
  private
95
116
 
96
117
  # @return [Hash] kafka settings are a bit special. They are exactly the same for all of the
@@ -18,7 +18,6 @@ module Karafka
18
18
  # Attributes we can inherit from the root unless they were defined on this level
19
19
  INHERITABLE_ATTRIBUTES = %i[
20
20
  kafka
21
- deserializer
22
21
  max_messages
23
22
  max_wait_time
24
23
  initial_offset
@@ -25,7 +25,7 @@ module Karafka
25
25
  # Register all the listeners so they can be started and managed
26
26
  @manager.register(listeners)
27
27
 
28
- workers.each(&:async_call)
28
+ workers.each_with_index { |worker, i| worker.async_call("karafka.worker##{i}") }
29
29
 
30
30
  # We aggregate threads here for a supervised shutdown process
31
31
  Karafka::Server.workers = workers
@@ -64,15 +64,9 @@ module Karafka
64
64
  setting :logger, default: ::Karafka::Instrumentation::Logger.new
65
65
  # option monitor [Instance] monitor that we will to use (defaults to Karafka::Monitor)
66
66
  setting :monitor, default: ::Karafka::Instrumentation::Monitor.new
67
- # Mapper used to remap consumer groups ids, so in case users migrate from other tools
68
- # or they need to maintain their own internal consumer group naming conventions, they
69
- # can easily do it, replacing the default client_id + consumer name pattern concept
70
- setting :consumer_mapper, default: Routing::ConsumerMapper.new
71
67
  # option [Boolean] should we reload consumers with each incoming batch thus effectively
72
68
  # supporting code reload (if someone reloads code) or should we keep the persistence
73
69
  setting :consumer_persistence, default: true
74
- # Default deserializer for converting incoming data into ruby objects
75
- setting :deserializer, default: Karafka::Serialization::Json::Deserializer.new
76
70
  # option [String] should we start with the earliest possible offset or latest
77
71
  # This will set the `auto.offset.reset` value unless present in the kafka scope
78
72
  setting :initial_offset, default: 'earliest'
@@ -100,6 +94,15 @@ module Karafka
100
94
  # Disabling this may be needed in scenarios where we do not have control over topics names
101
95
  # and/or we work with existing systems where we cannot change topics names.
102
96
  setting :strict_topics_namespacing, default: true
97
+ # option [String] default consumer group name for implicit routing
98
+ setting :group_id, default: 'app'
99
+
100
+ setting :oauth do
101
+ # option [false, #call] Listener for using oauth bearer. This listener will be able to
102
+ # get the client name to decide whether to use a single multi-client token refreshing
103
+ # or have separate tokens per instance.
104
+ setting :token_provider_listener, default: false
105
+ end
103
106
 
104
107
  # rdkafka default options
105
108
  # @see https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
@@ -137,11 +140,12 @@ module Karafka
137
140
  # involving a consumer instance
138
141
  'enable.auto.commit': false,
139
142
  # Make sure that topic metadata lookups do not create topics accidentally
140
- 'allow.auto.create.topics': false
143
+ 'allow.auto.create.topics': false,
144
+ # Do not store offsets automatically in admin in any way
145
+ 'enable.auto.offset.store': false
141
146
  }
142
147
 
143
- # option [String] default name for the admin consumer group. Please note, that this is a
144
- # subject to be remapped by the consumer mapper as any other consumer group in the routes
148
+ # option [String] default name for the admin consumer group.
145
149
  setting :group_id, default: 'karafka_admin'
146
150
 
147
151
  # option max_wait_time [Integer] We wait only for this amount of time before raising error
@@ -196,7 +200,7 @@ module Karafka
196
200
  setting :liveness_listener, default: Swarm::LivenessListener.new
197
201
  # How long should we wait for any info from the node before we consider it hanging at
198
202
  # stop it
199
- setting :node_report_timeout, default: 30_000
203
+ setting :node_report_timeout, default: 60_000
200
204
  # How long should we wait before restarting a node. This can prevent us from having a
201
205
  # case where for some external reason our spawned process would die immediately and we
202
206
  # would immediately try to start it back in an endless loop
@@ -230,6 +234,14 @@ module Karafka
230
234
 
231
235
  # Settings that are altered by our client proxy layer
232
236
  setting :proxy do
237
+ # commit offsets request
238
+ setting :commit do
239
+ # How many times should we try to run this call before raising an error
240
+ setting :max_attempts, default: 3
241
+ # How long should we wait before next attempt in case of a failure
242
+ setting :wait_time, default: 1_000
243
+ end
244
+
233
245
  # Committed offsets for given CG query
234
246
  setting :committed do
235
247
  # timeout for this request. For busy or remote clusters, this should be high enough
@@ -259,6 +271,26 @@ module Karafka
259
271
  # How long should we wait before next attempt in case of a failure
260
272
  setting :wait_time, default: 1_000
261
273
  end
274
+
275
+ # Settings for lag request
276
+ setting :lag do
277
+ # timeout for this request. For busy or remote clusters, this should be high enough
278
+ setting :timeout, default: 10_000
279
+ # How many times should we try to run this call before raising an error
280
+ setting :max_attempts, default: 3
281
+ # How long should we wait before next attempt in case of a failure
282
+ setting :wait_time, default: 1_000
283
+ end
284
+
285
+ # Settings for metadata request
286
+ setting :metadata do
287
+ # timeout for this request. For busy or remote clusters, this should be high enough
288
+ setting :timeout, default: 10_000
289
+ # How many times should we try to run this call before raising an error
290
+ setting :max_attempts, default: 3
291
+ # How long should we wait before next attempt in case of a failure
292
+ setting :wait_time, default: 1_000
293
+ end
262
294
  end
263
295
  end
264
296
 
@@ -368,10 +400,19 @@ module Karafka
368
400
  # Sets up all the components that are based on the user configuration
369
401
  # @note At the moment it is only WaterDrop
370
402
  def configure_components
403
+ oauth_listener = config.oauth.token_provider_listener
404
+ # We need to subscribe the oauth listener here because we want it to be ready before
405
+ # any consumer/admin runs
406
+ Karafka::App.monitor.subscribe(oauth_listener) if oauth_listener
407
+
371
408
  config.producer ||= ::WaterDrop::Producer.new do |producer_config|
372
409
  # In some cases WaterDrop updates the config and we don't want our consumer config to
373
410
  # be polluted by those updates, that's why we copy
374
411
  producer_config.kafka = AttributesMap.producer(config.kafka.dup)
412
+ # We also propagate same listener to the default producer to make sure, that the
413
+ # listener for oauth is also automatically used by the producer. That way we don't
414
+ # have to configure it manually for the default producer
415
+ producer_config.oauth.token_provider_listener = oauth_listener
375
416
  producer_config.logger = config.logger
376
417
  end
377
418
  end
@@ -3,6 +3,11 @@
3
3
  module Karafka
4
4
  # App status monitor
5
5
  class Status
6
+ include Helpers::ConfigImporter.new(
7
+ monitor: %i[monitor],
8
+ conductor: %i[internal connection conductor]
9
+ )
10
+
6
11
  # Available states and their transitions.
7
12
  STATES = {
8
13
  initializing: :initialize!,
@@ -60,14 +65,8 @@ module Karafka
60
65
  # We skip as during this state we do not have yet a monitor
61
66
  return if initializing?
62
67
 
63
- # We do not set conductor in the initializer because this status object is created
64
- # before the configuration kicks in
65
- # We need to signal conductor on each state change as those may be relevant to
66
- # listeners operations
67
- @conductor ||= Karafka::App.config.internal.connection.conductor
68
- @conductor.signal
69
-
70
- Karafka.monitor.instrument("app.#{state}")
68
+ conductor.signal
69
+ monitor.instrument("app.#{state}", caller: self)
71
70
  end
72
71
  end
73
72
  RUBY
@@ -19,6 +19,13 @@ module Karafka
19
19
  node_restart_timeout: %i[internal swarm node_restart_timeout]
20
20
  )
21
21
 
22
+ # Status we issue when we decide to shutdown unresponsive node
23
+ # We use -1 because nodes are expected to report 0+ statuses and we can use negative numbers
24
+ # for non-node based statuses
25
+ NOT_RESPONDING_SHUTDOWN_STATUS = -1
26
+
27
+ private_constant :NOT_RESPONDING_SHUTDOWN_STATUS
28
+
22
29
  # @return [Array<Node>] All nodes that manager manages
23
30
  attr_reader :nodes
24
31
 
@@ -29,10 +36,10 @@ module Karafka
29
36
 
30
37
  # Starts all the expected nodes for the first time
31
38
  def start
32
- pidfd = Pidfd.new(::Process.pid)
39
+ parent_pid = ::Process.pid
33
40
 
34
41
  @nodes = Array.new(nodes_count) do |i|
35
- start_one Node.new(i, pidfd)
42
+ start_one Node.new(i, parent_pid)
36
43
  end
37
44
  end
38
45
 
@@ -148,7 +155,12 @@ module Karafka
148
155
  return true unless over?(statuses[:control], node_report_timeout)
149
156
 
150
157
  # Start the stopping procedure if the node stopped reporting frequently enough
151
- monitor.instrument('swarm.manager.stopping', caller: self, node: node) do
158
+ monitor.instrument(
159
+ 'swarm.manager.stopping',
160
+ caller: self,
161
+ node: node,
162
+ status: NOT_RESPONDING_SHUTDOWN_STATUS
163
+ ) do
152
164
  node.stop
153
165
  statuses[:stop] = monotonic_now
154
166
  end
@@ -30,10 +30,10 @@ module Karafka
30
30
  # @param id [Integer] number of the fork. Used for uniqueness setup for group client ids and
31
31
  # other stuff where we need to know a unique reference of the fork in regards to the rest
32
32
  # of them.
33
- # @param parent_pidfd [Pidfd] parent pidfd for zombie fencing
34
- def initialize(id, parent_pidfd)
33
+ # @param parent_pid [Integer] parent pid for zombie fencing
34
+ def initialize(id, parent_pid)
35
35
  @id = id
36
- @parent_pidfd = parent_pidfd
36
+ @parent_pidfd = Pidfd.new(parent_pid)
37
37
  end
38
38
 
39
39
  # Starts a new fork and:
@@ -72,17 +72,33 @@ module Karafka
72
72
  def alive?
73
73
  @pidfd_select ||= [@pidfd_io]
74
74
 
75
- IO.select(@pidfd_select, nil, nil, 0).nil?
75
+ if @mutex.owned?
76
+ return false if @cleaned
77
+
78
+ IO.select(@pidfd_select, nil, nil, 0).nil?
79
+ else
80
+ @mutex.synchronize do
81
+ return false if @cleaned
82
+
83
+ IO.select(@pidfd_select, nil, nil, 0).nil?
84
+ end
85
+ end
76
86
  end
77
87
 
78
88
  # Cleans the zombie process
79
89
  # @note This should run **only** on processes that exited, otherwise will wait
80
90
  def cleanup
81
- return if @cleaned
91
+ @mutex.synchronize do
92
+ return if @cleaned
82
93
 
83
- waitid(P_PIDFD, @pidfd, nil, WEXITED)
94
+ waitid(P_PIDFD, @pidfd, nil, WEXITED)
84
95
 
85
- @cleaned = true
96
+ @pidfd_io.close
97
+ @pidfd_select = nil
98
+ @pidfd_io = nil
99
+ @pidfd = nil
100
+ @cleaned = true
101
+ end
86
102
  end
87
103
 
88
104
  # Sends given signal to the process using its pidfd
@@ -23,6 +23,15 @@ module Karafka
23
23
  process: %i[internal process]
24
24
  )
25
25
 
26
+ # How long extra should we wait on shutdown before forceful termination
27
+ # We add this time because we send signals and it always can take a bit of time for them
28
+ # to reach out nodes and be processed to start the shutdown flow. Because of that and
29
+ # because we always want to give all nodes all the time of `shutdown_timeout` they are
30
+ # expected to have, we add this just to compensate.
31
+ SHUTDOWN_GRACE_PERIOD = 1_000
32
+
33
+ private_constant :SHUTDOWN_GRACE_PERIOD
34
+
26
35
  def initialize
27
36
  @mutex = Mutex.new
28
37
  @queue = Processing::TimedQueue.new
@@ -30,14 +39,16 @@ module Karafka
30
39
 
31
40
  # Creates needed number of forks, installs signals and starts supervision
32
41
  def run
33
- Karafka::App.warmup
34
-
35
- manager.start
36
-
37
42
  # Close producer just in case. While it should not be used, we do not want even a
38
43
  # theoretical case since librdkafka is not thread-safe.
44
+ # We close it prior to forking just to make sure, there is no issue with initialized
45
+ # producer (should not be initialized but just in case)
39
46
  Karafka.producer.close
40
47
 
48
+ Karafka::App.warmup
49
+
50
+ manager.start
51
+
41
52
  process.on_sigint { stop }
42
53
  process.on_sigquit { stop }
43
54
  process.on_sigterm { stop }
@@ -68,7 +79,10 @@ module Karafka
68
79
  type: 'swarm.supervisor.error'
69
80
  )
70
81
 
71
- @nodes.terminate
82
+ manager.terminate
83
+ manager.cleanup
84
+
85
+ raise e
72
86
  end
73
87
 
74
88
  private
@@ -100,10 +114,12 @@ module Karafka
100
114
 
101
115
  manager.stop
102
116
 
117
+ total_shutdown_timeout = shutdown_timeout + SHUTDOWN_GRACE_PERIOD
118
+
103
119
  # We check from time to time (for the timeout period) if all the threads finished
104
120
  # their work and if so, we can just return and normal shutdown process will take place
105
121
  # We divide it by 1000 because we use time in ms.
106
- ((shutdown_timeout / 1_000) * (1 / supervision_sleep)).to_i.times do
122
+ ((total_shutdown_timeout / 1_000) * (1 / supervision_sleep)).to_i.times do
107
123
  if manager.stopped?
108
124
  manager.cleanup
109
125
  return
@@ -132,8 +148,9 @@ module Karafka
132
148
  # Cleanup the process table
133
149
  manager.cleanup
134
150
 
135
- # exit! is not within the instrumentation as it would not trigger due to exit
136
- Kernel.exit!(forceful_exit_code)
151
+ # We do not use `exit!` here similar to regular server because we do not have to worry
152
+ # about any librdkafka related hanging connections, etc
153
+ Kernel.exit(forceful_exit_code)
137
154
  ensure
138
155
  if initialized
139
156
  Karafka::App.stopped!
@@ -37,7 +37,14 @@ class KarafkaApp < Karafka::App
37
37
  # interested in logging events for certain environments. Since instrumentation
38
38
  # notifications add extra boilerplate, if you want to achieve max performance,
39
39
  # listen to only what you really need for given environment.
40
- Karafka.monitor.subscribe(Karafka::Instrumentation::LoggerListener.new)
40
+ Karafka.monitor.subscribe(
41
+ Karafka::Instrumentation::LoggerListener.new(
42
+ # Karafka, when the logger is set to info producers logs each time it polls data from an
43
+ # internal messages wueue. This can be extensive, so you can turn it off by setting below
44
+ # to false.
45
+ log_polling: true
46
+ )
47
+ )
41
48
  # Karafka.monitor.subscribe(Karafka::Instrumentation::ProctitleListener.new)
42
49
 
43
50
  # This logger prints the producer development info using the Karafka logger.
@@ -52,6 +59,26 @@ class KarafkaApp < Karafka::App
52
59
  )
53
60
  )
54
61
 
62
+ # You can subscribe to all consumer related errors and record/track then that way
63
+ #
64
+ # Karafka.monitor.subscribe 'error.occurred' do |event|
65
+ # type = event[:type]
66
+ # error = event[:error]
67
+ # details = (error.backtrace || []).join("\n")
68
+ # ErrorTracker.send_error(error, type, details)
69
+ # end
70
+
71
+ # You can subscribe to all producer related errors and record/track then that way
72
+ # Please note, that producer and consumer have their own notifications pipeline so you need to
73
+ # setup error tracking independently for each of them
74
+ #
75
+ # Karafka.producer.monitor.subscribe('error.occurred') do |event|
76
+ # type = event[:type]
77
+ # error = event[:error]
78
+ # details = (error.backtrace || []).join("\n")
79
+ # ErrorTracker.send_error(error, type, details)
80
+ # end
81
+
55
82
  routes.draw do
56
83
  <% if rails? -%>
57
84
  # Uncomment this if you use Karafka with ActiveJob
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.3.2'
6
+ VERSION = '2.4.0.beta1'
7
7
  end
data.tar.gz.sig CHANGED
Binary file