karafka 2.0.0.alpha6 → 2.0.0.beta3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.ruby-version +1 -1
  4. data/CHANGELOG.md +42 -2
  5. data/Gemfile.lock +9 -9
  6. data/bin/integrations +36 -14
  7. data/bin/scenario +29 -0
  8. data/config/errors.yml +1 -0
  9. data/docker-compose.yml +3 -0
  10. data/karafka.gemspec +1 -1
  11. data/lib/active_job/karafka.rb +2 -2
  12. data/lib/karafka/active_job/routing/extensions.rb +31 -0
  13. data/lib/karafka/base_consumer.rb +74 -6
  14. data/lib/karafka/connection/client.rb +39 -16
  15. data/lib/karafka/connection/listener.rb +103 -34
  16. data/lib/karafka/connection/listeners_batch.rb +24 -0
  17. data/lib/karafka/connection/messages_buffer.rb +48 -61
  18. data/lib/karafka/connection/pauses_manager.rb +2 -2
  19. data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
  20. data/lib/karafka/contracts/config.rb +10 -1
  21. data/lib/karafka/helpers/async.rb +33 -0
  22. data/lib/karafka/instrumentation/logger_listener.rb +37 -10
  23. data/lib/karafka/instrumentation/monitor.rb +4 -0
  24. data/lib/karafka/licenser.rb +26 -7
  25. data/lib/karafka/messages/batch_metadata.rb +26 -3
  26. data/lib/karafka/messages/builders/batch_metadata.rb +17 -29
  27. data/lib/karafka/messages/builders/message.rb +1 -0
  28. data/lib/karafka/messages/builders/messages.rb +4 -12
  29. data/lib/karafka/pro/active_job/consumer.rb +48 -0
  30. data/lib/karafka/pro/active_job/dispatcher.rb +3 -3
  31. data/lib/karafka/pro/active_job/job_options_contract.rb +2 -2
  32. data/lib/karafka/pro/base_consumer_extensions.rb +66 -0
  33. data/lib/karafka/pro/loader.rb +27 -4
  34. data/lib/karafka/pro/performance_tracker.rb +80 -0
  35. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +37 -0
  36. data/lib/karafka/pro/processing/jobs_builder.rb +31 -0
  37. data/lib/karafka/pro/routing/extensions.rb +32 -0
  38. data/lib/karafka/pro/scheduler.rb +54 -0
  39. data/lib/karafka/processing/executor.rb +26 -11
  40. data/lib/karafka/processing/executors_buffer.rb +15 -7
  41. data/lib/karafka/processing/jobs/base.rb +28 -0
  42. data/lib/karafka/processing/jobs/consume.rb +11 -4
  43. data/lib/karafka/processing/jobs_builder.rb +28 -0
  44. data/lib/karafka/processing/jobs_queue.rb +28 -16
  45. data/lib/karafka/processing/worker.rb +39 -10
  46. data/lib/karafka/processing/workers_batch.rb +5 -0
  47. data/lib/karafka/routing/consumer_group.rb +1 -1
  48. data/lib/karafka/routing/subscription_group.rb +2 -2
  49. data/lib/karafka/routing/subscription_groups_builder.rb +3 -2
  50. data/lib/karafka/routing/topics.rb +38 -0
  51. data/lib/karafka/runner.rb +19 -27
  52. data/lib/karafka/scheduler.rb +20 -0
  53. data/lib/karafka/server.rb +24 -23
  54. data/lib/karafka/setup/config.rb +6 -1
  55. data/lib/karafka/status.rb +1 -3
  56. data/lib/karafka/time_trackers/pause.rb +10 -2
  57. data/lib/karafka/version.rb +1 -1
  58. data.tar.gz.sig +0 -0
  59. metadata +19 -4
  60. metadata.gz.sig +0 -0
  61. data/lib/karafka/active_job/routing_extensions.rb +0 -18
@@ -3,18 +3,34 @@
3
3
  module Karafka
4
4
  module Connection
5
5
  # A single listener that listens to incoming messages from a single subscription group.
6
- # It polls the messages and then enqueues. It also takes care of potential recovery from
6
+ # It polls the messages and then enqueues jobs. It also takes care of potential recovery from
7
7
  # critical errors by restarting everything in a safe manner.
8
+ #
9
+ # This is the heart of the consumption process.
8
10
  class Listener
11
+ include Helpers::Async
12
+
13
+ # Can be useful for logging
14
+ # @return [String] id of this listener
15
+ attr_reader :id
16
+
9
17
  # @param subscription_group [Karafka::Routing::SubscriptionGroup]
10
18
  # @param jobs_queue [Karafka::Processing::JobsQueue] queue where we should push work
11
19
  # @return [Karafka::Connection::Listener] listener instance
12
20
  def initialize(subscription_group, jobs_queue)
21
+ @id = SecureRandom.uuid
13
22
  @subscription_group = subscription_group
14
23
  @jobs_queue = jobs_queue
24
+ @jobs_builder = ::Karafka::App.config.internal.jobs_builder
15
25
  @pauses_manager = PausesManager.new
16
26
  @client = Client.new(@subscription_group)
17
27
  @executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
28
+ # We reference scheduler here as it is much faster than fetching this each time
29
+ @scheduler = ::Karafka::App.config.internal.scheduler
30
+ # We keep one buffer for messages to preserve memory and not allocate extra objects
31
+ # We can do this that way because we always first schedule jobs using messages before we
32
+ # fetch another batch.
33
+ @messages_buffer = MessagesBuffer.new(subscription_group)
18
34
  end
19
35
 
20
36
  # Runs the main listener fetch loop.
@@ -51,33 +67,55 @@ module Karafka
51
67
  )
52
68
 
53
69
  resume_paused_partitions
54
- # We need to fetch data before we revoke lost partitions details as during the polling
55
- # the callbacks for tracking lost partitions are triggered. Otherwise we would be always
56
- # one batch behind.
57
- messages_buffer = @client.batch_poll
58
70
 
59
71
  Karafka.monitor.instrument(
60
72
  'connection.listener.fetch_loop.received',
61
73
  caller: self,
62
- messages_buffer: messages_buffer
63
- )
74
+ messages_buffer: @messages_buffer
75
+ ) do
76
+ # We need to fetch data before we revoke lost partitions details as during the polling
77
+ # the callbacks for tracking lost partitions are triggered. Otherwise we would be
78
+ # always one batch behind.
79
+ poll_and_remap_messages
80
+ end
64
81
 
65
82
  # If there were revoked partitions, we need to wait on their jobs to finish before
66
83
  # distributing consuming jobs as upon revoking, we might get assigned to the same
67
84
  # partitions, thus getting their jobs. The revoking jobs need to finish before
68
85
  # appropriate consumers are taken down and re-created
69
- wait(@subscription_group) if distribute_revoke_lost_partitions_jobs
70
-
71
- distribute_partitions_jobs(messages_buffer)
86
+ build_and_schedule_revoke_lost_partitions_jobs
72
87
 
73
88
  # We wait only on jobs from our subscription group. Other groups are independent.
74
- wait(@subscription_group)
89
+ wait
90
+
91
+ build_and_schedule_consumption_jobs
92
+
93
+ wait
75
94
 
76
95
  # We don't use the `#commit_offsets!` here for performance reasons. This can be achieved
77
96
  # if needed by using manual offset management.
78
97
  @client.commit_offsets
79
98
  end
80
99
 
100
+ # If we are stopping we will no longer schedule any jobs despite polling.
101
+ # We need to keep polling not to exceed the `max.poll.interval` for long-running
102
+ # non-blocking jobs and we need to allow them to finish. We however do not want to
103
+ # enqueue any new jobs. It's worth keeping in mind that it is the end user responsibility
104
+ # to detect shutdown in their long-running logic or else Karafka will force shutdown
105
+ # after a while.
106
+ #
107
+ # We do not care about resuming any partitions or lost jobs as we do not plan to do
108
+ # anything with them as we're in the shutdown phase.
109
+ wait_with_poll
110
+
111
+ # We do not want to schedule the shutdown jobs prior to finishing all the jobs
112
+ # (including non-blocking) as there might be a long-running job with a shutdown and then
113
+ # we would run two jobs in parallel for the same executor and consumer. We do not want that
114
+ # as it could create a race-condition.
115
+ build_and_schedule_shutdown_jobs
116
+
117
+ wait_with_poll
118
+
81
119
  shutdown
82
120
 
83
121
  # This is on purpose - see the notes for this method
@@ -98,55 +136,86 @@ module Karafka
98
136
 
99
137
  # Resumes processing of partitions that were paused due to an error.
100
138
  def resume_paused_partitions
101
- @pauses_manager.resume { |topic, partition| @client.resume(topic, partition) }
139
+ @pauses_manager.resume do |topic, partition|
140
+ @client.resume(topic, partition)
141
+ end
102
142
  end
103
143
 
104
144
  # Enqueues revoking jobs for partitions that were taken away from the running process.
105
- # @return [Boolean] was there anything to revoke
106
- def distribute_revoke_lost_partitions_jobs
145
+ def build_and_schedule_revoke_lost_partitions_jobs
107
146
  revoked_partitions = @client.rebalance_manager.revoked_partitions
108
147
 
109
- return false if revoked_partitions.empty?
148
+ # Stop early to save on some execution and array allocation
149
+ return if revoked_partitions.empty?
150
+
151
+ jobs = []
110
152
 
111
153
  revoked_partitions.each do |topic, partitions|
112
154
  partitions.each do |partition|
113
- pause = @pauses_manager.fetch(topic, partition)
114
- executor = @executors.fetch(topic, partition, pause)
115
- @jobs_queue << Processing::Jobs::Revoked.new(executor)
155
+ pause_tracker = @pauses_manager.fetch(topic, partition)
156
+ executor = @executors.fetch(topic, partition, pause_tracker)
157
+ jobs << @jobs_builder.revoked(executor)
116
158
  end
117
159
  end
118
160
 
119
- true
161
+ @scheduler.schedule_revocation(@jobs_queue, jobs)
162
+ end
163
+
164
+ # Enqueues the shutdown jobs for all the executors that exist in our subscription group
165
+ def build_and_schedule_shutdown_jobs
166
+ jobs = []
167
+
168
+ @executors.each do |_, _, executor|
169
+ jobs << @jobs_builder.shutdown(executor)
170
+ end
171
+
172
+ @scheduler.schedule_shutdown(@jobs_queue, jobs)
120
173
  end
121
174
 
122
- # Takes the messages per topic partition and enqueues processing jobs in threads.
175
+ # Polls messages within the time and amount boundaries defined in the settings and then
176
+ # builds karafka messages based on the raw rdkafka messages buffer returned by the
177
+ # `#batch_poll` method.
123
178
  #
124
- # @param messages_buffer [Karafka::Connection::MessagesBuffer] buffer with messages
125
- def distribute_partitions_jobs(messages_buffer)
126
- messages_buffer.each do |topic, partition, messages|
127
- pause = @pauses_manager.fetch(topic, partition)
179
+ # @note There are two buffers, one for raw messages and one for "built" karafka messages
180
+ def poll_and_remap_messages
181
+ @messages_buffer.remap(
182
+ @client.batch_poll
183
+ )
184
+ end
185
+
186
+ # Takes the messages per topic partition and enqueues processing jobs in threads using
187
+ # given scheduler.
188
+ def build_and_schedule_consumption_jobs
189
+ return if @messages_buffer.empty?
128
190
 
129
- next if pause.paused?
191
+ jobs = []
130
192
 
131
- executor = @executors.fetch(topic, partition, pause)
193
+ @messages_buffer.each do |topic, partition, messages|
194
+ pause_tracker = @pauses_manager.fetch(topic, partition)
132
195
 
133
- @jobs_queue << Processing::Jobs::Consume.new(executor, messages)
196
+ executor = @executors.fetch(topic, partition, pause_tracker)
197
+
198
+ jobs << @jobs_builder.consume(executor, messages)
134
199
  end
200
+
201
+ @scheduler.schedule_consumption(@jobs_queue, jobs)
135
202
  end
136
203
 
137
204
  # Waits for all the jobs from a given subscription group to finish before moving forward
138
- # @param subscription_group [Karafka::Routing::SubscriptionGroup]
139
- def wait(subscription_group)
140
- @jobs_queue.wait(subscription_group.id)
205
+ def wait
206
+ @jobs_queue.wait(@subscription_group.id)
207
+ end
208
+
209
+ # Waits without blocking the polling
210
+ # This should be used only when we no longer plan to use any incoming data and we can safely
211
+ # discard it
212
+ def wait_with_poll
213
+ @client.batch_poll until @jobs_queue.empty?(@subscription_group.id)
141
214
  end
142
215
 
143
216
  # Stops the jobs queue, triggers shutdown on all the executors (sync), commits offsets and
144
217
  # stops kafka client.
145
218
  def shutdown
146
- @jobs_queue.close
147
- # This runs synchronously, making sure we finish all the shutdowns before we stop the
148
- # client.
149
- @executors.shutdown
150
219
  @client.commit_offsets!
151
220
  @client.stop
152
221
  end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Abstraction layer around listeners batch.
6
+ class ListenersBatch
7
+ include Enumerable
8
+
9
+ # @param jobs_queue [JobsQueue]
10
+ # @return [ListenersBatch]
11
+ def initialize(jobs_queue)
12
+ @batch = App.subscription_groups.map do |subscription_group|
13
+ Connection::Listener.new(subscription_group, jobs_queue)
14
+ end
15
+ end
16
+
17
+ # Iterates over available listeners and yields each listener
18
+ # @param block [Proc] block we want to run
19
+ def each(&block)
20
+ @batch.each(&block)
21
+ end
22
+ end
23
+ end
24
+ end
@@ -2,16 +2,26 @@
2
2
 
3
3
  module Karafka
4
4
  module Connection
5
- # Buffer for messages.
6
- # When message is added to this buffer, it gets assigned to an array with other messages from
7
- # the same topic and partition.
5
+ # Buffer used to build and store karafka messages built based on raw librdkafka messages.
8
6
  #
9
- # @note This buffer is NOT threadsafe.
7
+ # Why do we have two buffers? `RawMessagesBuffer` is used to store raw messages and to handle
8
+ # cases related to partition revocation and reconnections. It is "internal" to the listening
9
+ # process. `MessagesBuffer` on the other hand is used to "translate" those raw messages that
10
+ # we know that are ok into Karafka messages and to simplify further work with them.
11
+ #
12
+ # While it adds a bit of overhead, it makes conceptual things much easier and it adds only two
13
+ # simple hash iterations over messages batch.
14
+ #
15
+ # @note This buffer is NOT thread safe. We do not worry about it as we do not use it outside
16
+ # of the main listener loop. It can be cleared after the jobs are scheduled with messages
17
+ # it stores, because messages arrays are not "cleared" in any way directly and their
18
+ # reference stays.
10
19
  class MessagesBuffer
11
20
  attr_reader :size
12
21
 
13
- # @return [Karafka::Connection::MessagesBuffer] buffer instance
14
- def initialize
22
+ # @param subscription_group [Karafka::Routing::SubscriptionGroup]
23
+ def initialize(subscription_group)
24
+ @subscription_group = subscription_group
15
25
  @size = 0
16
26
  @groups = Hash.new do |topic_groups, topic|
17
27
  topic_groups[topic] = Hash.new do |partition_groups, partition|
@@ -20,11 +30,35 @@ module Karafka
20
30
  end
21
31
  end
22
32
 
23
- # Iterates over aggregated data providing messages per topic partition.
33
+ # Remaps raw messages from the raw messages buffer to Karafka messages
34
+ # @param raw_messages_buffer [RawMessagesBuffer] buffer with raw messages
35
+ def remap(raw_messages_buffer)
36
+ clear unless @size.zero?
37
+
38
+ # Since it happens "right after" we've received the messages, it is close enough it time
39
+ # to be used as the moment we received messages.
40
+ received_at = Time.now
41
+
42
+ raw_messages_buffer.each do |topic, partition, messages|
43
+ @size += messages.count
44
+
45
+ ktopic = @subscription_group.topics.find(topic)
46
+
47
+ @groups[topic][partition] = messages.map do |message|
48
+ Messages::Builders::Message.call(
49
+ message,
50
+ ktopic,
51
+ received_at
52
+ )
53
+ end
54
+ end
55
+ end
56
+
57
+ # Allows to iterate over all the topics and partitions messages
24
58
  #
25
59
  # @yieldparam [String] topic name
26
60
  # @yieldparam [Integer] partition number
27
- # @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
61
+ # @yieldparam [Array<Karafka::Messages::Message>] messages from a given topic partition
28
62
  def each
29
63
  @groups.each do |topic, partitions|
30
64
  partitions.each do |partition, messages|
@@ -33,64 +67,17 @@ module Karafka
33
67
  end
34
68
  end
35
69
 
36
- # Adds a message to the buffer.
37
- #
38
- # @param message [Rdkafka::Consumer::Message] raw rdkafka message
39
- # @return [Array<Rdkafka::Consumer::Message>] given partition topic sub-buffer array
40
- def <<(message)
41
- @size += 1
42
- @groups[message.topic][message.partition] << message
70
+ # @return [Boolean] is the buffer empty or does it contain any messages
71
+ def empty?
72
+ @size.zero?
43
73
  end
44
74
 
45
- # Removes given topic and partition data out of the buffer
46
- # This is used when there's a partition revocation
47
- # @param topic [String] topic we're interested in
48
- # @param partition [Integer] partition of which data we want to remove
49
- def delete(topic, partition)
50
- return unless @groups.key?(topic)
51
- return unless @groups.fetch(topic).key?(partition)
52
-
53
- topic_data = @groups.fetch(topic)
54
- topic_data.delete(partition)
55
-
56
- recount!
57
-
58
- # If there are no more partitions to handle in a given topic, remove it completely
59
- @groups.delete(topic) if topic_data.empty?
60
- end
61
-
62
- # Removes duplicated messages from the same partitions
63
- # This should be used only when rebalance occurs, as we may get data again we already have
64
- # due to the processing from the last offset. In cases like this, we may get same data
65
- # again and we do want to ensure as few duplications as possible
66
- def uniq!
67
- @groups.each_value do |partitions|
68
- partitions.each_value do |messages|
69
- messages.uniq!(&:offset)
70
- end
71
- end
72
-
73
- recount!
74
- end
75
+ private
75
76
 
76
- # Removes all the data from the buffer.
77
- #
78
- # @note We do not clear the whole groups hash but rather we clear the partition hashes, so
79
- # we save ourselves some objects allocations. We cannot clear the underlying arrays as they
80
- # may be used in other threads for data processing, thus if we would clear it, we could
81
- # potentially clear a raw messages array for a job that is in the jobs queue.
77
+ # Clears the buffer completely
82
78
  def clear
83
79
  @size = 0
84
- @groups.each_value(&:clear)
85
- end
86
-
87
- private
88
-
89
- # Updates the messages count if we performed any operations that could change the state
90
- def recount!
91
- @size = @groups.each_value.sum do |partitions|
92
- partitions.each_value.map(&:count).sum
93
- end
80
+ @groups.clear
94
81
  end
95
82
  end
96
83
  end
@@ -12,11 +12,11 @@ module Karafka
12
12
  end
13
13
  end
14
14
 
15
- # Creates or fetches pause of a given topic partition.
15
+ # Creates or fetches pause tracker of a given topic partition.
16
16
  #
17
17
  # @param topic [String] topic name
18
18
  # @param partition [Integer] partition number
19
- # @return [Karafka::TimeTrackers::Pause] pause instance
19
+ # @return [Karafka::TimeTrackers::Pause] pause tracker instance
20
20
  def fetch(topic, partition)
21
21
  @pauses[topic][partition] ||= TimeTrackers::Pause.new(
22
22
  timeout: Karafka::App.config.pause_timeout,
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Buffer for raw librdkafka messages.
6
+ #
7
+ # When message is added to this buffer, it gets assigned to an array with other messages from
8
+ # the same topic and partition.
9
+ #
10
+ # @note This buffer is NOT threadsafe.
11
+ #
12
+ # @note We store data here in groups per topic partition to handle the revocation case, where
13
+ # we may need to remove messages from a single topic partition.
14
+ class RawMessagesBuffer
15
+ attr_reader :size
16
+
17
+ # @return [Karafka::Connection::MessagesBuffer] buffer instance
18
+ def initialize
19
+ @size = 0
20
+ @groups = Hash.new do |topic_groups, topic|
21
+ topic_groups[topic] = Hash.new do |partition_groups, partition|
22
+ partition_groups[partition] = []
23
+ end
24
+ end
25
+ end
26
+
27
+ # Adds a message to the buffer.
28
+ #
29
+ # @param message [Rdkafka::Consumer::Message] raw rdkafka message
30
+ # @return [Array<Rdkafka::Consumer::Message>] given partition topic sub-buffer array
31
+ def <<(message)
32
+ @size += 1
33
+ @groups[message.topic][message.partition] << message
34
+ end
35
+
36
+ # Allows to iterate over all the topics and partitions messages
37
+ #
38
+ # @yieldparam [String] topic name
39
+ # @yieldparam [Integer] partition number
40
+ # @yieldparam [Array<Rdkafka::Consumer::Message>] topic partition aggregated results
41
+ def each
42
+ @groups.each do |topic, partitions|
43
+ partitions.each do |partition, messages|
44
+ yield(topic, partition, messages)
45
+ end
46
+ end
47
+ end
48
+
49
+ # Removes given topic and partition data out of the buffer
50
+ # This is used when there's a partition revocation
51
+ # @param topic [String] topic we're interested in
52
+ # @param partition [Integer] partition of which data we want to remove
53
+ def delete(topic, partition)
54
+ return unless @groups.key?(topic)
55
+ return unless @groups.fetch(topic).key?(partition)
56
+
57
+ topic_data = @groups.fetch(topic)
58
+ topic_data.delete(partition)
59
+
60
+ recount!
61
+
62
+ # If there are no more partitions to handle in a given topic, remove it completely
63
+ @groups.delete(topic) if topic_data.empty?
64
+ end
65
+
66
+ # Removes duplicated messages from the same partitions
67
+ # This should be used only when rebalance occurs, as we may get data again we already have
68
+ # due to the processing from the last offset. In cases like this, we may get same data
69
+ # again and we do want to ensure as few duplications as possible
70
+ def uniq!
71
+ @groups.each_value do |partitions|
72
+ partitions.each_value do |messages|
73
+ messages.uniq!(&:offset)
74
+ end
75
+ end
76
+
77
+ recount!
78
+ end
79
+
80
+ # Removes all the data from the buffer.
81
+ #
82
+ # @note We do not clear the whole groups hash but rather we clear the partition hashes, so
83
+ # we save ourselves some objects allocations. We cannot clear the underlying arrays as they
84
+ # may be used in other threads for data processing, thus if we would clear it, we could
85
+ # potentially clear a raw messages array for a job that is in the jobs queue.
86
+ def clear
87
+ @size = 0
88
+ @groups.each_value(&:clear)
89
+ end
90
+
91
+ private
92
+
93
+ # Updates the messages count if we performed any operations that could change the state
94
+ def recount!
95
+ @size = @groups.each_value.sum do |partitions|
96
+ partitions.each_value.map(&:count).sum
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
@@ -25,14 +25,17 @@ module Karafka
25
25
  required(:pause_max_timeout) { int? & gt?(0) }
26
26
  required(:pause_with_exponential_backoff).filled(:bool?)
27
27
  required(:shutdown_timeout) { int? & gt?(0) }
28
+ required(:max_wait_time) { int? & gt?(0) }
28
29
  required(:kafka).filled(:hash)
29
30
 
30
31
  # We validate internals just to be sure, that they are present and working
31
32
  required(:internal).schema do
32
33
  required(:routing_builder)
34
+ required(:subscription_groups_builder)
35
+ required(:jobs_builder)
33
36
  required(:status)
34
37
  required(:process)
35
- required(:subscription_groups_builder)
38
+ required(:scheduler)
36
39
  end
37
40
  end
38
41
 
@@ -52,6 +55,12 @@ module Karafka
52
55
  key(:pause_timeout).failure(:max_timeout_vs_pause_max_timeout)
53
56
  end
54
57
  end
58
+
59
+ rule(:shutdown_timeout, :max_wait_time) do
60
+ if values[:max_wait_time].to_i >= values[:shutdown_timeout].to_i
61
+ key(:shutdown_timeout).failure(:shutdown_timeout_vs_max_wait_time)
62
+ end
63
+ end
55
64
  end
56
65
  end
57
66
  end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Helpers
5
+ # Allows a given class to run async in a separate thread. Provides also few methods we may
6
+ # want to use to control the underlying thread
7
+ #
8
+ # @note Thread running code needs to manage it's own exceptions. If they leak out, they will
9
+ # abort thread on exception.
10
+ module Async
11
+ class << self
12
+ # Adds forwardable to redirect thread-based control methods to the underlying thread that
13
+ # runs the async operations
14
+ #
15
+ # @param base [Class] class we're including this module in
16
+ def included(base)
17
+ base.extend ::Forwardable
18
+
19
+ base.def_delegators :@thread, :join, :terminate, :alive?
20
+ end
21
+ end
22
+
23
+ # Runs the `#call` method in a new thread
24
+ def async_call
25
+ @thread = Thread.new do
26
+ Thread.current.abort_on_exception = true
27
+
28
+ call
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -15,16 +15,43 @@ module Karafka
15
15
 
16
16
  # Logs each messages fetching attempt
17
17
  #
18
- # @param _event [Dry::Events::Event] event details including payload
19
- def on_connection_listener_fetch_loop(_event)
20
- info 'Receiving new messages from Kafka...'
18
+ # @param event [Dry::Events::Event] event details including payload
19
+ def on_connection_listener_fetch_loop(event)
20
+ listener = event[:caller]
21
+ info "[#{listener.id}] Polling messages..."
21
22
  end
22
23
 
23
24
  # Logs about messages that we've received from Kafka
24
25
  #
25
26
  # @param event [Dry::Events::Event] event details including payload
26
27
  def on_connection_listener_fetch_loop_received(event)
27
- info "Received #{event[:messages_buffer].size} new messages from Kafka"
28
+ listener = event[:caller]
29
+ time = event[:time]
30
+ messages_count = event[:messages_buffer].size
31
+ info "[#{listener.id}] Polled #{messages_count} messages in #{time}ms"
32
+ end
33
+
34
+ # Prints info about the fact that a given job has started
35
+ #
36
+ # @param event [Dry::Events::Event] event details including payload
37
+ def on_worker_process(event)
38
+ job = event[:job]
39
+ job_type = job.class.to_s.split('::').last
40
+ consumer = job.executor.topic.consumer
41
+ topic = job.executor.topic.name
42
+ info "[#{job.id}] #{job_type} job for #{consumer} on #{topic} started"
43
+ end
44
+
45
+ # Prints info about the fact that a given job has finished
46
+ #
47
+ # @param event [Dry::Events::Event] event details including payload
48
+ def on_worker_processed(event)
49
+ job = event[:job]
50
+ time = event[:time]
51
+ job_type = job.class.to_s.split('::').last
52
+ consumer = job.executor.topic.consumer
53
+ topic = job.executor.topic.name
54
+ info "[#{job.id}] #{job_type} job for #{consumer} on #{topic} finished in #{time}ms"
28
55
  end
29
56
 
30
57
  # Logs info about system signals that Karafka received.
@@ -52,16 +79,14 @@ module Karafka
52
79
  #
53
80
  # @param _event [Dry::Events::Event] event details including payload
54
81
  def on_app_stopping(_event)
55
- # We use a separate thread as logging can't be called from trap context
56
- Thread.new { info 'Stopping Karafka server' }
82
+ info 'Stopping Karafka server'
57
83
  end
58
84
 
59
85
  # Logs info that we stopped the Karafka server.
60
86
  #
61
87
  # @param _event [Dry::Events::Event] event details including payload
62
88
  def on_app_stopped(_event)
63
- # We use a separate thread as logging can't be called from trap context
64
- Thread.new { info 'Stopped Karafka server' }
89
+ info 'Stopped Karafka server'
65
90
  end
66
91
 
67
92
  # There are many types of errors that can occur in many places, but we provide a single
@@ -73,6 +98,9 @@ module Karafka
73
98
  details = (error.backtrace || []).join("\n")
74
99
 
75
100
  case type
101
+ when 'consumer.prepared.error'
102
+ error "Consumer prepared error: #{error}"
103
+ error details
76
104
  when 'consumer.consume.error'
77
105
  error "Consumer consuming error: #{error}"
78
106
  error details
@@ -95,8 +123,7 @@ module Karafka
95
123
  fatal "Runner crashed due to an error: #{error}"
96
124
  fatal details
97
125
  when 'app.stopping.error'
98
- # We use a separate thread as logging can't be called from trap context
99
- Thread.new { error 'Forceful Karafka server stop' }
126
+ error 'Forceful Karafka server stop'
100
127
  when 'librdkafka.error'
101
128
  error "librdkafka internal error occurred: #{error}"
102
129
  error details
@@ -22,6 +22,7 @@ module Karafka
22
22
  app.stopping
23
23
  app.stopped
24
24
 
25
+ consumer.prepared
25
26
  consumer.consumed
26
27
  consumer.revoked
27
28
  consumer.shutdown
@@ -32,6 +33,9 @@ module Karafka
32
33
  connection.listener.fetch_loop
33
34
  connection.listener.fetch_loop.received
34
35
 
36
+ worker.process
37
+ worker.processed
38
+
35
39
  statistics.emitted
36
40
 
37
41
  error.occurred