karafka 2.0.0.beta5 → 2.0.0.rc3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +28 -0
- data/CONTRIBUTING.md +0 -5
- data/Gemfile.lock +12 -42
- data/LICENSE-COMM +1 -1
- data/README.md +44 -16
- data/bin/stress_many +1 -1
- data/bin/stress_one +1 -1
- data/config/errors.yml +52 -5
- data/docker-compose.yml +7 -0
- data/karafka.gemspec +2 -4
- data/lib/karafka/active_job/consumer.rb +2 -0
- data/lib/karafka/active_job/job_options_contract.rb +8 -2
- data/lib/karafka/base_consumer.rb +4 -6
- data/lib/karafka/cli/install.rb +15 -2
- data/lib/karafka/cli/server.rb +4 -2
- data/lib/karafka/connection/client.rb +20 -17
- data/lib/karafka/connection/listener.rb +12 -24
- data/lib/karafka/connection/pauses_manager.rb +0 -8
- data/lib/karafka/contracts/base.rb +2 -8
- data/lib/karafka/contracts/config.rb +71 -51
- data/lib/karafka/contracts/consumer_group.rb +25 -18
- data/lib/karafka/contracts/consumer_group_topic.rb +30 -16
- data/lib/karafka/contracts/server_cli_options.rb +18 -7
- data/lib/karafka/helpers/colorize.rb +20 -0
- data/lib/karafka/instrumentation/logger_listener.rb +8 -2
- data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
- data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
- data/lib/karafka/pro/active_job/dispatcher.rb +5 -2
- data/lib/karafka/pro/active_job/job_options_contract.rb +11 -6
- data/lib/karafka/pro/base_consumer.rb +21 -12
- data/lib/karafka/pro/contracts/base.rb +21 -0
- data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
- data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
- data/lib/karafka/pro/loader.rb +23 -3
- data/lib/karafka/pro/processing/coordinator.rb +51 -0
- data/lib/karafka/pro/processing/partitioner.rb +60 -0
- data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
- data/lib/karafka/pro/routing/{extensions.rb → topic_extensions.rb} +7 -1
- data/lib/karafka/processing/coordinator.rb +6 -2
- data/lib/karafka/processing/coordinators_buffer.rb +3 -7
- data/lib/karafka/processing/executor.rb +1 -1
- data/lib/karafka/processing/jobs_queue.rb +11 -0
- data/lib/karafka/processing/partitioner.rb +22 -0
- data/lib/karafka/processing/worker.rb +4 -2
- data/lib/karafka/setup/config.rb +9 -3
- data/lib/karafka/templates/example_consumer.rb.erb +2 -2
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +2 -2
- data.tar.gz.sig +0 -0
- metadata +15 -34
- metadata.gz.sig +0 -0
@@ -0,0 +1,232 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Instrumentation
|
5
|
+
# Namespace for vendor specific instrumentation
|
6
|
+
module Vendors
|
7
|
+
# Datadog specific instrumentation
|
8
|
+
module Datadog
|
9
|
+
# Listener that can be used to subscribe to Karafka to receive stats via StatsD
|
10
|
+
# and/or Datadog
|
11
|
+
#
|
12
|
+
# @note You need to setup the `dogstatsd-ruby` client and assign it
|
13
|
+
class Listener
|
14
|
+
include WaterDrop::Configurable
|
15
|
+
extend Forwardable
|
16
|
+
|
17
|
+
def_delegators :config, :client, :rd_kafka_metrics, :namespace, :default_tags
|
18
|
+
|
19
|
+
# Value object for storing a single rdkafka metric publishing details
|
20
|
+
RdKafkaMetric = Struct.new(:type, :scope, :name, :key_location)
|
21
|
+
|
22
|
+
# Namespace under which the DD metrics should be published
|
23
|
+
setting :namespace, default: 'karafka'
|
24
|
+
|
25
|
+
# Datadog client that we should use to publish the metrics
|
26
|
+
setting :client
|
27
|
+
|
28
|
+
# Default tags we want to publish (for example hostname)
|
29
|
+
# Format as followed (example for hostname): `["host:#{Socket.gethostname}"]`
|
30
|
+
setting :default_tags, default: []
|
31
|
+
|
32
|
+
# All the rdkafka metrics we want to publish
|
33
|
+
#
|
34
|
+
# By default we publish quite a lot so this can be tuned
|
35
|
+
# Note, that the once with `_d` come from Karafka, not rdkafka or Kafka
|
36
|
+
setting :rd_kafka_metrics, default: [
|
37
|
+
# Client metrics
|
38
|
+
RdKafkaMetric.new(:count, :root, 'messages.consumed', 'rxmsgs_d'),
|
39
|
+
RdKafkaMetric.new(:count, :root, 'messages.consumed.bytes', 'rxmsg_bytes'),
|
40
|
+
|
41
|
+
# Broker metrics
|
42
|
+
RdKafkaMetric.new(:count, :brokers, 'consume.attempts', 'txretries_d'),
|
43
|
+
RdKafkaMetric.new(:count, :brokers, 'consume.errors', 'txerrs_d'),
|
44
|
+
RdKafkaMetric.new(:count, :brokers, 'receive.errors', 'rxerrs_d'),
|
45
|
+
RdKafkaMetric.new(:count, :brokers, 'connection.connects', 'connects_d'),
|
46
|
+
RdKafkaMetric.new(:count, :brokers, 'connection.disconnects', 'disconnects_d'),
|
47
|
+
RdKafkaMetric.new(:gauge, :brokers, 'network.latency.avg', %w[rtt avg]),
|
48
|
+
RdKafkaMetric.new(:gauge, :brokers, 'network.latency.p95', %w[rtt p95]),
|
49
|
+
RdKafkaMetric.new(:gauge, :brokers, 'network.latency.p99', %w[rtt p99])
|
50
|
+
].freeze
|
51
|
+
|
52
|
+
configure
|
53
|
+
|
54
|
+
# @param block [Proc] configuration block
|
55
|
+
def initialize(&block)
|
56
|
+
configure
|
57
|
+
setup(&block) if block
|
58
|
+
end
|
59
|
+
|
60
|
+
# @param block [Proc] configuration block
|
61
|
+
# @note We define this alias to be consistent with `WaterDrop#setup`
|
62
|
+
def setup(&block)
|
63
|
+
configure(&block)
|
64
|
+
end
|
65
|
+
|
66
|
+
# Hooks up to WaterDrop instrumentation for emitted statistics
|
67
|
+
#
|
68
|
+
# @param event [Dry::Events::Event]
|
69
|
+
def on_statistics_emitted(event)
|
70
|
+
statistics = event[:statistics]
|
71
|
+
|
72
|
+
rd_kafka_metrics.each do |metric|
|
73
|
+
report_metric(metric, statistics)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# Increases the errors count by 1
|
78
|
+
#
|
79
|
+
# @param event [Dry::Events::Event]
|
80
|
+
def on_error_occurred(event)
|
81
|
+
extra_tags = ["type:#{event[:type]}"]
|
82
|
+
|
83
|
+
if event.payload[:caller].respond_to?(:messages)
|
84
|
+
metadata = event.payload[:caller].messages.metadata
|
85
|
+
|
86
|
+
extra_tags += [
|
87
|
+
"topic:#{metadata.topic}",
|
88
|
+
"partition:#{metadata.partition}"
|
89
|
+
]
|
90
|
+
end
|
91
|
+
|
92
|
+
count('error_occurred', 1, tags: default_tags + extra_tags)
|
93
|
+
end
|
94
|
+
|
95
|
+
# Reports how many messages we've polled and how much time did we spend on it
|
96
|
+
#
|
97
|
+
# @param event [Dry::Events::Event]
|
98
|
+
def on_connection_listener_fetch_loop_received(event)
|
99
|
+
time_taken = event[:time]
|
100
|
+
messages_count = event[:messages_buffer].size
|
101
|
+
|
102
|
+
histogram('listener.polling.time_taken', time_taken, tags: default_tags)
|
103
|
+
histogram('listener.polling.messages', messages_count, tags: default_tags)
|
104
|
+
end
|
105
|
+
|
106
|
+
# Here we report majority of things related to processing as we have access to the
|
107
|
+
# consumer
|
108
|
+
# @param event [Dry::Events::Event]
|
109
|
+
def on_consumer_consumed(event)
|
110
|
+
messages = event.payload[:caller].messages
|
111
|
+
metadata = messages.metadata
|
112
|
+
|
113
|
+
tags = default_tags + [
|
114
|
+
"topic:#{metadata.topic}",
|
115
|
+
"partition:#{metadata.partition}"
|
116
|
+
]
|
117
|
+
|
118
|
+
count('consumer.messages', messages.count, tags: tags)
|
119
|
+
count('consumer.batches', 1, tags: tags)
|
120
|
+
gauge('consumer.offset', metadata.last_offset, tags: tags)
|
121
|
+
histogram('consumer.consumed.time_taken', event[:time], tags: tags)
|
122
|
+
histogram('consumer.batch_size', messages.count, tags: tags)
|
123
|
+
histogram('consumer.processing_lag', metadata.processing_lag, tags: tags)
|
124
|
+
histogram('consumer.consumption_lag', metadata.consumption_lag, tags: tags)
|
125
|
+
end
|
126
|
+
|
127
|
+
# @param event [Dry::Events::Event]
|
128
|
+
def on_consumer_revoked(event)
|
129
|
+
messages = event.payload[:caller].messages
|
130
|
+
metadata = messages.metadata
|
131
|
+
|
132
|
+
tags = default_tags + [
|
133
|
+
"topic:#{metadata.topic}",
|
134
|
+
"partition:#{metadata.partition}"
|
135
|
+
]
|
136
|
+
|
137
|
+
count('consumer.revoked', 1, tags: tags)
|
138
|
+
end
|
139
|
+
|
140
|
+
# @param event [Dry::Events::Event]
|
141
|
+
def on_consumer_shutdown(event)
|
142
|
+
messages = event.payload[:caller].messages
|
143
|
+
metadata = messages.metadata
|
144
|
+
|
145
|
+
tags = default_tags + [
|
146
|
+
"topic:#{metadata.topic}",
|
147
|
+
"partition:#{metadata.partition}"
|
148
|
+
]
|
149
|
+
|
150
|
+
count('consumer.shutdown', 1, tags: tags)
|
151
|
+
end
|
152
|
+
|
153
|
+
# Worker related metrics
|
154
|
+
# @param event [Dry::Events::Event]
|
155
|
+
def on_worker_process(event)
|
156
|
+
jq_stats = event[:jobs_queue].statistics
|
157
|
+
|
158
|
+
gauge('worker.total_threads', Karafka::App.config.concurrency, tags: default_tags)
|
159
|
+
histogram('worker.processing', jq_stats[:processing], tags: default_tags)
|
160
|
+
histogram('worker.enqueued_jobs', jq_stats[:enqueued], tags: default_tags)
|
161
|
+
end
|
162
|
+
|
163
|
+
# We report this metric before and after processing for higher accuracy
|
164
|
+
# Without this, the utilization would not be fully reflected
|
165
|
+
# @param event [Dry::Events::Event]
|
166
|
+
def on_worker_processed(event)
|
167
|
+
jq_stats = event[:jobs_queue].statistics
|
168
|
+
|
169
|
+
histogram('worker.processing', jq_stats[:processing], tags: default_tags)
|
170
|
+
end
|
171
|
+
|
172
|
+
private
|
173
|
+
|
174
|
+
%i[
|
175
|
+
count
|
176
|
+
gauge
|
177
|
+
histogram
|
178
|
+
increment
|
179
|
+
decrement
|
180
|
+
].each do |metric_type|
|
181
|
+
class_eval <<~METHODS, __FILE__, __LINE__ + 1
|
182
|
+
def #{metric_type}(key, *args)
|
183
|
+
client.#{metric_type}(
|
184
|
+
namespaced_metric(key),
|
185
|
+
*args
|
186
|
+
)
|
187
|
+
end
|
188
|
+
METHODS
|
189
|
+
end
|
190
|
+
|
191
|
+
# Wraps metric name in listener's namespace
|
192
|
+
# @param metric_name [String] RdKafkaMetric name
|
193
|
+
# @return [String]
|
194
|
+
def namespaced_metric(metric_name)
|
195
|
+
"#{namespace}.#{metric_name}"
|
196
|
+
end
|
197
|
+
|
198
|
+
# Reports a given metric statistics to Datadog
|
199
|
+
# @param metric [RdKafkaMetric] metric value object
|
200
|
+
# @param statistics [Hash] hash with all the statistics emitted
|
201
|
+
def report_metric(metric, statistics)
|
202
|
+
case metric.scope
|
203
|
+
when :root
|
204
|
+
public_send(
|
205
|
+
metric.type,
|
206
|
+
metric.name,
|
207
|
+
statistics.fetch(*metric.key_location),
|
208
|
+
tags: default_tags
|
209
|
+
)
|
210
|
+
when :brokers
|
211
|
+
statistics.fetch('brokers').each_value do |broker_statistics|
|
212
|
+
# Skip bootstrap nodes
|
213
|
+
# Bootstrap nodes have nodeid -1, other nodes have positive
|
214
|
+
# node ids
|
215
|
+
next if broker_statistics['nodeid'] == -1
|
216
|
+
|
217
|
+
public_send(
|
218
|
+
metric.type,
|
219
|
+
metric.name,
|
220
|
+
broker_statistics.dig(*metric.key_location),
|
221
|
+
tags: default_tags + ["broker:#{broker_statistics['nodename']}"]
|
222
|
+
)
|
223
|
+
end
|
224
|
+
else
|
225
|
+
raise ArgumentError, metric.scope
|
226
|
+
end
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
232
|
+
end
|
@@ -23,7 +23,9 @@ module Karafka
|
|
23
23
|
dispatch_method: :produce_async,
|
24
24
|
# We don't create a dummy proc based partitioner as we would have to evaluate it with
|
25
25
|
# each job.
|
26
|
-
partitioner: nil
|
26
|
+
partitioner: nil,
|
27
|
+
# Allows for usage of `:key` or `:partition_key`
|
28
|
+
partition_key_type: :key
|
27
29
|
}.freeze
|
28
30
|
|
29
31
|
private_constant :DEFAULTS
|
@@ -45,11 +47,12 @@ module Karafka
|
|
45
47
|
# @return [Hash] hash with dispatch details to which we merge topic and payload
|
46
48
|
def dispatch_details(job)
|
47
49
|
partitioner = fetch_option(job, :partitioner, DEFAULTS)
|
50
|
+
key_type = fetch_option(job, :partition_key_type, DEFAULTS)
|
48
51
|
|
49
52
|
return {} unless partitioner
|
50
53
|
|
51
54
|
{
|
52
|
-
|
55
|
+
key_type => partitioner.call(job)
|
53
56
|
}
|
54
57
|
end
|
55
58
|
end
|
@@ -14,13 +14,18 @@ module Karafka
|
|
14
14
|
module ActiveJob
|
15
15
|
# Contract for validating the options that can be altered with `#karafka_options` per job
|
16
16
|
# class that works with Pro features.
|
17
|
-
class JobOptionsContract < ::
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
17
|
+
class JobOptionsContract < Contracts::Base
|
18
|
+
configure do |config|
|
19
|
+
config.error_messages = YAML.safe_load(
|
20
|
+
File.read(
|
21
|
+
File.join(Karafka.gem_root, 'config', 'errors.yml')
|
22
|
+
)
|
23
|
+
).fetch('en').fetch('validations').fetch('job_options')
|
23
24
|
end
|
25
|
+
|
26
|
+
optional(:dispatch_method) { |val| %i[produce_async produce_sync].include?(val) }
|
27
|
+
optional(:partitioner) { |val| val.respond_to?(:call) }
|
28
|
+
optional(:partition_key_type) { |val| %i[key partition_key].include?(val) }
|
24
29
|
end
|
25
30
|
end
|
26
31
|
end
|
@@ -26,29 +26,38 @@ module Karafka
|
|
26
26
|
# Pauses processing of a given partition until we're done with the processing
|
27
27
|
# This ensures, that we can easily poll not reaching the `max.poll.interval`
|
28
28
|
def on_before_consume
|
29
|
-
# Pause at the first message in a batch. That way in case of a crash, we will not loose
|
30
|
-
# any messages
|
31
29
|
return unless topic.long_running_job?
|
32
30
|
|
33
|
-
|
31
|
+
# This ensures, that when running LRJ with VP, things operate as expected
|
32
|
+
coordinator.on_started do |first_group_message|
|
33
|
+
# Pause at the first message in a batch. That way in case of a crash, we will not loose
|
34
|
+
# any messages
|
35
|
+
pause(first_group_message.offset, MAX_PAUSE_TIME)
|
36
|
+
end
|
34
37
|
end
|
35
38
|
|
36
39
|
# Runs extra logic after consumption that is related to handling long running jobs
|
37
40
|
# @note This overwrites the '#on_after_consume' from the base consumer
|
38
41
|
def on_after_consume
|
39
|
-
|
40
|
-
|
42
|
+
coordinator.on_finished do |first_group_message, last_group_message|
|
43
|
+
on_after_consume_regular(first_group_message, last_group_message)
|
44
|
+
end
|
45
|
+
end
|
41
46
|
|
42
|
-
|
47
|
+
private
|
48
|
+
|
49
|
+
# Handles the post-consumption flow depending on topic settings
|
50
|
+
#
|
51
|
+
# @param first_message [Karafka::Messages::Message]
|
52
|
+
# @param last_message [Karafka::Messages::Message]
|
53
|
+
def on_after_consume_regular(first_message, last_message)
|
54
|
+
if coordinator.success?
|
43
55
|
coordinator.pause_tracker.reset
|
44
56
|
|
45
57
|
# We use the non-blocking one here. If someone needs the blocking one, can implement it
|
46
58
|
# with manual offset management
|
47
59
|
# Mark as consumed only if manual offset management is not on
|
48
|
-
mark_as_consumed(
|
49
|
-
|
50
|
-
# We check it twice as marking could change this state
|
51
|
-
return if revoked?
|
60
|
+
mark_as_consumed(last_message) unless topic.manual_offset_management? || revoked?
|
52
61
|
|
53
62
|
# If this is not a long running job there is nothing for us to do here
|
54
63
|
return unless topic.long_running_job?
|
@@ -60,12 +69,12 @@ module Karafka
|
|
60
69
|
# interesting (yet valid) corner case, where with manual offset management on and no
|
61
70
|
# marking as consumed, we end up with an infinite loop processing same messages over and
|
62
71
|
# over again
|
63
|
-
seek(@seek_offset ||
|
72
|
+
seek(@seek_offset || first_message.offset)
|
64
73
|
|
65
74
|
resume
|
66
75
|
else
|
67
76
|
# If processing failed, we need to pause
|
68
|
-
pause(@seek_offset ||
|
77
|
+
pause(@seek_offset || first_message.offset)
|
69
78
|
end
|
70
79
|
end
|
71
80
|
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
# Namespace for Karafka Pro related contracts
|
15
|
+
module Contracts
|
16
|
+
# Base contract for Pro components contracts
|
17
|
+
class Base < ::Karafka::Contracts::Base
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
module Contracts
|
15
|
+
# Contract for validating correct Pro components setup on a consumer group and topic levels
|
16
|
+
class ConsumerGroup < Base
|
17
|
+
virtual do |data, errors|
|
18
|
+
next unless errors.empty?
|
19
|
+
next unless data.key?(:topics)
|
20
|
+
|
21
|
+
fetched_errors = []
|
22
|
+
|
23
|
+
data.fetch(:topics).each do |topic|
|
24
|
+
ConsumerGroupTopic.new.call(topic).errors.each do |key, value|
|
25
|
+
fetched_errors << [[topic, key].flatten, value]
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
fetched_errors
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
module Contracts
|
15
|
+
# Contract for validating correct Pro components setup on a topic levels
|
16
|
+
class ConsumerGroupTopic < Base
|
17
|
+
configure do |config|
|
18
|
+
config.error_messages = YAML.safe_load(
|
19
|
+
File.read(
|
20
|
+
File.join(Karafka.gem_root, 'config', 'errors.yml')
|
21
|
+
)
|
22
|
+
).fetch('en').fetch('validations').fetch('pro_consumer_group_topic')
|
23
|
+
end
|
24
|
+
|
25
|
+
virtual do |data|
|
26
|
+
next if data[:consumer] < Karafka::Pro::BaseConsumer
|
27
|
+
|
28
|
+
[[%i[consumer], :consumer_format]]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
data/lib/karafka/pro/loader.rb
CHANGED
@@ -21,7 +21,12 @@ module Karafka
|
|
21
21
|
processing/jobs/consume_non_blocking
|
22
22
|
processing/jobs_builder
|
23
23
|
processing/coordinator
|
24
|
-
|
24
|
+
processing/partitioner
|
25
|
+
contracts/base
|
26
|
+
contracts/consumer_group
|
27
|
+
contracts/consumer_group_topic
|
28
|
+
routing/topic_extensions
|
29
|
+
routing/builder_extensions
|
25
30
|
active_job/consumer
|
26
31
|
active_job/dispatcher
|
27
32
|
active_job/job_options_contract
|
@@ -36,9 +41,20 @@ module Karafka
|
|
36
41
|
def setup(config)
|
37
42
|
COMPONENTS.each { |component| require_relative(component) }
|
38
43
|
|
44
|
+
reconfigure(config)
|
45
|
+
|
46
|
+
load_routing_extensions
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
# Sets proper config options to use pro components
|
52
|
+
# @param config [WaterDrop::Configurable::Node] root config node
|
53
|
+
def reconfigure(config)
|
39
54
|
icfg = config.internal
|
40
55
|
|
41
56
|
icfg.processing.coordinator_class = Processing::Coordinator
|
57
|
+
icfg.processing.partitioner_class = Processing::Partitioner
|
42
58
|
icfg.processing.scheduler = Processing::Scheduler.new
|
43
59
|
icfg.processing.jobs_builder = Processing::JobsBuilder.new
|
44
60
|
|
@@ -46,10 +62,14 @@ module Karafka
|
|
46
62
|
icfg.active_job.dispatcher = ActiveJob::Dispatcher.new
|
47
63
|
icfg.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
|
48
64
|
|
49
|
-
::Karafka::Routing::Topic.include(Routing::Extensions)
|
50
|
-
|
51
65
|
config.monitor.subscribe(PerformanceTracker.instance)
|
52
66
|
end
|
67
|
+
|
68
|
+
# Loads routing extensions
|
69
|
+
def load_routing_extensions
|
70
|
+
::Karafka::Routing::Topic.include(Routing::TopicExtensions)
|
71
|
+
::Karafka::Routing::Builder.prepend(Routing::BuilderExtensions)
|
72
|
+
end
|
53
73
|
end
|
54
74
|
end
|
55
75
|
end
|
@@ -6,6 +6,57 @@ module Karafka
|
|
6
6
|
# Pro coordinator that provides extra orchestration methods useful for parallel processing
|
7
7
|
# within the same partition
|
8
8
|
class Coordinator < ::Karafka::Processing::Coordinator
|
9
|
+
# @param args [Object] anything the base coordinator accepts
|
10
|
+
def initialize(*args)
|
11
|
+
super
|
12
|
+
@on_started_invoked = false
|
13
|
+
@on_finished_invoked = false
|
14
|
+
@flow_lock = Mutex.new
|
15
|
+
end
|
16
|
+
|
17
|
+
# Starts the coordination process
|
18
|
+
# @param messages [Array<Karafka::Messages::Message>] messages for which processing we are
|
19
|
+
# going to coordinate.
|
20
|
+
def start(messages)
|
21
|
+
super
|
22
|
+
|
23
|
+
@mutex.synchronize do
|
24
|
+
@on_started_invoked = false
|
25
|
+
@on_finished_invoked = false
|
26
|
+
@first_message = messages.first
|
27
|
+
@last_message = messages.last
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# @return [Boolean] is the coordinated work finished or not
|
32
|
+
def finished?
|
33
|
+
@running_jobs.zero?
|
34
|
+
end
|
35
|
+
|
36
|
+
# Runs given code only once per all the coordinated jobs upon starting first of them
|
37
|
+
def on_started
|
38
|
+
@flow_lock.synchronize do
|
39
|
+
return if @on_started_invoked
|
40
|
+
|
41
|
+
@on_started_invoked = true
|
42
|
+
|
43
|
+
yield(@first_message, @last_message)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# Runs once when all the work that is suppose to be coordinated is finished
|
48
|
+
# It runs once per all the coordinated jobs and should be used to run any type of post
|
49
|
+
# jobs coordination processing execution
|
50
|
+
def on_finished
|
51
|
+
@flow_lock.synchronize do
|
52
|
+
return unless finished?
|
53
|
+
return if @on_finished_invoked
|
54
|
+
|
55
|
+
@on_finished_invoked = true
|
56
|
+
|
57
|
+
yield(@first_message, @last_message)
|
58
|
+
end
|
59
|
+
end
|
9
60
|
end
|
10
61
|
end
|
11
62
|
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
module Processing
|
15
|
+
# Pro partitioner that can distribute work based on the virtual partitioner settings
|
16
|
+
class Partitioner < ::Karafka::Processing::Partitioner
|
17
|
+
# @param topic [String] topic name
|
18
|
+
# @param messages [Array<Karafka::Messages::Message>] karafka messages
|
19
|
+
# @yieldparam [Integer] group id
|
20
|
+
# @yieldparam [Array<Karafka::Messages::Message>] karafka messages
|
21
|
+
def call(topic, messages)
|
22
|
+
ktopic = @subscription_group.topics.find(topic)
|
23
|
+
|
24
|
+
@concurrency ||= ::Karafka::App.config.concurrency
|
25
|
+
|
26
|
+
# We only partition work if we have a virtual partitioner and more than one thread to
|
27
|
+
# process the data. With one thread it is not worth partitioning the work as the work
|
28
|
+
# itself will be assigned to one thread (pointless work)
|
29
|
+
if ktopic.virtual_partitioner? && @concurrency > 1
|
30
|
+
# We need to reduce it to number of threads, so the group_id is not a direct effect
|
31
|
+
# of the end user action. Otherwise the persistence layer for consumers would cache
|
32
|
+
# it forever and it would cause memory leaks
|
33
|
+
groupings = messages
|
34
|
+
.group_by { |msg| ktopic.virtual_partitioner.call(msg) }
|
35
|
+
.values
|
36
|
+
|
37
|
+
# Reduce the max concurrency to a size that matches the concurrency
|
38
|
+
# As mentioned above we cannot use the partitioning keys directly as it could cause
|
39
|
+
# memory leaks
|
40
|
+
#
|
41
|
+
# The algorithm here is simple, we assume that the most costly in terms of processing,
|
42
|
+
# will be processing of the biggest group and we reduce the smallest once to have
|
43
|
+
# max of groups equal to concurrency
|
44
|
+
while groupings.size > @concurrency
|
45
|
+
groupings.sort_by! { |grouping| -grouping.size }
|
46
|
+
|
47
|
+
# Offset order needs to be maintained for virtual partitions
|
48
|
+
groupings << (groupings.pop + groupings.pop).sort_by!(&:offset)
|
49
|
+
end
|
50
|
+
|
51
|
+
groupings.each_with_index { |messages_group, index| yield(index, messages_group) }
|
52
|
+
else
|
53
|
+
# When no virtual partitioner, works as regular one
|
54
|
+
yield(0, messages)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
# Pro routing components
|
15
|
+
module Routing
|
16
|
+
# Routing extensions for builder to be able to validate Pro components correct usage
|
17
|
+
module BuilderExtensions
|
18
|
+
# Validate consumer groups with pro contracts
|
19
|
+
# @param block [Proc] routing defining block
|
20
|
+
def draw(&block)
|
21
|
+
super
|
22
|
+
|
23
|
+
each do |consumer_group|
|
24
|
+
::Karafka::Pro::Contracts::ConsumerGroup.new.validate!(consumer_group.to_h)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -14,14 +14,20 @@ module Karafka
|
|
14
14
|
# Pro routing components
|
15
15
|
module Routing
|
16
16
|
# Routing extensions that allow to configure some extra PRO routing options
|
17
|
-
module
|
17
|
+
module TopicExtensions
|
18
18
|
class << self
|
19
19
|
# @param base [Class] class we extend
|
20
20
|
def included(base)
|
21
21
|
base.attr_accessor :long_running_job
|
22
|
+
base.attr_accessor :virtual_partitioner
|
22
23
|
end
|
23
24
|
end
|
24
25
|
|
26
|
+
# @return [Boolean] true if virtual partitioner is defined, false otherwise
|
27
|
+
def virtual_partitioner?
|
28
|
+
virtual_partitioner != nil
|
29
|
+
end
|
30
|
+
|
25
31
|
# @return [Boolean] is a given job on a topic a long running one
|
26
32
|
def long_running_job?
|
27
33
|
@long_running_job || false
|
@@ -23,7 +23,9 @@ module Karafka
|
|
23
23
|
end
|
24
24
|
|
25
25
|
# Starts the coordinator for given consumption jobs
|
26
|
-
|
26
|
+
# @param _messages [Array<Karafka::Messages::Message>] batch of message for which we are
|
27
|
+
# going to coordinate work. Not used with regular coordinator.
|
28
|
+
def start(_messages)
|
27
29
|
@mutex.synchronize do
|
28
30
|
@running_jobs = 0
|
29
31
|
# We need to clear the consumption results hash here, otherwise we could end up storing
|
@@ -44,7 +46,9 @@ module Karafka
|
|
44
46
|
|
45
47
|
return @running_jobs unless @running_jobs.negative?
|
46
48
|
|
47
|
-
|
49
|
+
# This should never happen. If it does, something is heavily out of sync. Please reach
|
50
|
+
# out to us if you encounter this
|
51
|
+
raise Karafka::Errors::InvalidCoordinatorState, 'Was zero before decrementation'
|
48
52
|
end
|
49
53
|
end
|
50
54
|
|