karafka 2.0.0.beta5 → 2.0.0.rc3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/CHANGELOG.md +28 -0
  4. data/CONTRIBUTING.md +0 -5
  5. data/Gemfile.lock +12 -42
  6. data/LICENSE-COMM +1 -1
  7. data/README.md +44 -16
  8. data/bin/stress_many +1 -1
  9. data/bin/stress_one +1 -1
  10. data/config/errors.yml +52 -5
  11. data/docker-compose.yml +7 -0
  12. data/karafka.gemspec +2 -4
  13. data/lib/karafka/active_job/consumer.rb +2 -0
  14. data/lib/karafka/active_job/job_options_contract.rb +8 -2
  15. data/lib/karafka/base_consumer.rb +4 -6
  16. data/lib/karafka/cli/install.rb +15 -2
  17. data/lib/karafka/cli/server.rb +4 -2
  18. data/lib/karafka/connection/client.rb +20 -17
  19. data/lib/karafka/connection/listener.rb +12 -24
  20. data/lib/karafka/connection/pauses_manager.rb +0 -8
  21. data/lib/karafka/contracts/base.rb +2 -8
  22. data/lib/karafka/contracts/config.rb +71 -51
  23. data/lib/karafka/contracts/consumer_group.rb +25 -18
  24. data/lib/karafka/contracts/consumer_group_topic.rb +30 -16
  25. data/lib/karafka/contracts/server_cli_options.rb +18 -7
  26. data/lib/karafka/helpers/colorize.rb +20 -0
  27. data/lib/karafka/instrumentation/logger_listener.rb +8 -2
  28. data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
  29. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
  30. data/lib/karafka/pro/active_job/dispatcher.rb +5 -2
  31. data/lib/karafka/pro/active_job/job_options_contract.rb +11 -6
  32. data/lib/karafka/pro/base_consumer.rb +21 -12
  33. data/lib/karafka/pro/contracts/base.rb +21 -0
  34. data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
  35. data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
  36. data/lib/karafka/pro/loader.rb +23 -3
  37. data/lib/karafka/pro/processing/coordinator.rb +51 -0
  38. data/lib/karafka/pro/processing/partitioner.rb +60 -0
  39. data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
  40. data/lib/karafka/pro/routing/{extensions.rb → topic_extensions.rb} +7 -1
  41. data/lib/karafka/processing/coordinator.rb +6 -2
  42. data/lib/karafka/processing/coordinators_buffer.rb +3 -7
  43. data/lib/karafka/processing/executor.rb +1 -1
  44. data/lib/karafka/processing/jobs_queue.rb +11 -0
  45. data/lib/karafka/processing/partitioner.rb +22 -0
  46. data/lib/karafka/processing/worker.rb +4 -2
  47. data/lib/karafka/setup/config.rb +9 -3
  48. data/lib/karafka/templates/example_consumer.rb.erb +2 -2
  49. data/lib/karafka/version.rb +1 -1
  50. data/lib/karafka.rb +2 -2
  51. data.tar.gz.sig +0 -0
  52. metadata +15 -34
  53. metadata.gz.sig +0 -0
@@ -0,0 +1,232 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Instrumentation
5
+ # Namespace for vendor specific instrumentation
6
+ module Vendors
7
+ # Datadog specific instrumentation
8
+ module Datadog
9
+ # Listener that can be used to subscribe to Karafka to receive stats via StatsD
10
+ # and/or Datadog
11
+ #
12
+ # @note You need to setup the `dogstatsd-ruby` client and assign it
13
+ class Listener
14
+ include WaterDrop::Configurable
15
+ extend Forwardable
16
+
17
+ def_delegators :config, :client, :rd_kafka_metrics, :namespace, :default_tags
18
+
19
+ # Value object for storing a single rdkafka metric publishing details
20
+ RdKafkaMetric = Struct.new(:type, :scope, :name, :key_location)
21
+
22
+ # Namespace under which the DD metrics should be published
23
+ setting :namespace, default: 'karafka'
24
+
25
+ # Datadog client that we should use to publish the metrics
26
+ setting :client
27
+
28
+ # Default tags we want to publish (for example hostname)
29
+ # Format as followed (example for hostname): `["host:#{Socket.gethostname}"]`
30
+ setting :default_tags, default: []
31
+
32
+ # All the rdkafka metrics we want to publish
33
+ #
34
+ # By default we publish quite a lot so this can be tuned
35
+ # Note, that the once with `_d` come from Karafka, not rdkafka or Kafka
36
+ setting :rd_kafka_metrics, default: [
37
+ # Client metrics
38
+ RdKafkaMetric.new(:count, :root, 'messages.consumed', 'rxmsgs_d'),
39
+ RdKafkaMetric.new(:count, :root, 'messages.consumed.bytes', 'rxmsg_bytes'),
40
+
41
+ # Broker metrics
42
+ RdKafkaMetric.new(:count, :brokers, 'consume.attempts', 'txretries_d'),
43
+ RdKafkaMetric.new(:count, :brokers, 'consume.errors', 'txerrs_d'),
44
+ RdKafkaMetric.new(:count, :brokers, 'receive.errors', 'rxerrs_d'),
45
+ RdKafkaMetric.new(:count, :brokers, 'connection.connects', 'connects_d'),
46
+ RdKafkaMetric.new(:count, :brokers, 'connection.disconnects', 'disconnects_d'),
47
+ RdKafkaMetric.new(:gauge, :brokers, 'network.latency.avg', %w[rtt avg]),
48
+ RdKafkaMetric.new(:gauge, :brokers, 'network.latency.p95', %w[rtt p95]),
49
+ RdKafkaMetric.new(:gauge, :brokers, 'network.latency.p99', %w[rtt p99])
50
+ ].freeze
51
+
52
+ configure
53
+
54
+ # @param block [Proc] configuration block
55
+ def initialize(&block)
56
+ configure
57
+ setup(&block) if block
58
+ end
59
+
60
+ # @param block [Proc] configuration block
61
+ # @note We define this alias to be consistent with `WaterDrop#setup`
62
+ def setup(&block)
63
+ configure(&block)
64
+ end
65
+
66
+ # Hooks up to WaterDrop instrumentation for emitted statistics
67
+ #
68
+ # @param event [Dry::Events::Event]
69
+ def on_statistics_emitted(event)
70
+ statistics = event[:statistics]
71
+
72
+ rd_kafka_metrics.each do |metric|
73
+ report_metric(metric, statistics)
74
+ end
75
+ end
76
+
77
+ # Increases the errors count by 1
78
+ #
79
+ # @param event [Dry::Events::Event]
80
+ def on_error_occurred(event)
81
+ extra_tags = ["type:#{event[:type]}"]
82
+
83
+ if event.payload[:caller].respond_to?(:messages)
84
+ metadata = event.payload[:caller].messages.metadata
85
+
86
+ extra_tags += [
87
+ "topic:#{metadata.topic}",
88
+ "partition:#{metadata.partition}"
89
+ ]
90
+ end
91
+
92
+ count('error_occurred', 1, tags: default_tags + extra_tags)
93
+ end
94
+
95
+ # Reports how many messages we've polled and how much time did we spend on it
96
+ #
97
+ # @param event [Dry::Events::Event]
98
+ def on_connection_listener_fetch_loop_received(event)
99
+ time_taken = event[:time]
100
+ messages_count = event[:messages_buffer].size
101
+
102
+ histogram('listener.polling.time_taken', time_taken, tags: default_tags)
103
+ histogram('listener.polling.messages', messages_count, tags: default_tags)
104
+ end
105
+
106
+ # Here we report majority of things related to processing as we have access to the
107
+ # consumer
108
+ # @param event [Dry::Events::Event]
109
+ def on_consumer_consumed(event)
110
+ messages = event.payload[:caller].messages
111
+ metadata = messages.metadata
112
+
113
+ tags = default_tags + [
114
+ "topic:#{metadata.topic}",
115
+ "partition:#{metadata.partition}"
116
+ ]
117
+
118
+ count('consumer.messages', messages.count, tags: tags)
119
+ count('consumer.batches', 1, tags: tags)
120
+ gauge('consumer.offset', metadata.last_offset, tags: tags)
121
+ histogram('consumer.consumed.time_taken', event[:time], tags: tags)
122
+ histogram('consumer.batch_size', messages.count, tags: tags)
123
+ histogram('consumer.processing_lag', metadata.processing_lag, tags: tags)
124
+ histogram('consumer.consumption_lag', metadata.consumption_lag, tags: tags)
125
+ end
126
+
127
+ # @param event [Dry::Events::Event]
128
+ def on_consumer_revoked(event)
129
+ messages = event.payload[:caller].messages
130
+ metadata = messages.metadata
131
+
132
+ tags = default_tags + [
133
+ "topic:#{metadata.topic}",
134
+ "partition:#{metadata.partition}"
135
+ ]
136
+
137
+ count('consumer.revoked', 1, tags: tags)
138
+ end
139
+
140
+ # @param event [Dry::Events::Event]
141
+ def on_consumer_shutdown(event)
142
+ messages = event.payload[:caller].messages
143
+ metadata = messages.metadata
144
+
145
+ tags = default_tags + [
146
+ "topic:#{metadata.topic}",
147
+ "partition:#{metadata.partition}"
148
+ ]
149
+
150
+ count('consumer.shutdown', 1, tags: tags)
151
+ end
152
+
153
+ # Worker related metrics
154
+ # @param event [Dry::Events::Event]
155
+ def on_worker_process(event)
156
+ jq_stats = event[:jobs_queue].statistics
157
+
158
+ gauge('worker.total_threads', Karafka::App.config.concurrency, tags: default_tags)
159
+ histogram('worker.processing', jq_stats[:processing], tags: default_tags)
160
+ histogram('worker.enqueued_jobs', jq_stats[:enqueued], tags: default_tags)
161
+ end
162
+
163
+ # We report this metric before and after processing for higher accuracy
164
+ # Without this, the utilization would not be fully reflected
165
+ # @param event [Dry::Events::Event]
166
+ def on_worker_processed(event)
167
+ jq_stats = event[:jobs_queue].statistics
168
+
169
+ histogram('worker.processing', jq_stats[:processing], tags: default_tags)
170
+ end
171
+
172
+ private
173
+
174
+ %i[
175
+ count
176
+ gauge
177
+ histogram
178
+ increment
179
+ decrement
180
+ ].each do |metric_type|
181
+ class_eval <<~METHODS, __FILE__, __LINE__ + 1
182
+ def #{metric_type}(key, *args)
183
+ client.#{metric_type}(
184
+ namespaced_metric(key),
185
+ *args
186
+ )
187
+ end
188
+ METHODS
189
+ end
190
+
191
+ # Wraps metric name in listener's namespace
192
+ # @param metric_name [String] RdKafkaMetric name
193
+ # @return [String]
194
+ def namespaced_metric(metric_name)
195
+ "#{namespace}.#{metric_name}"
196
+ end
197
+
198
+ # Reports a given metric statistics to Datadog
199
+ # @param metric [RdKafkaMetric] metric value object
200
+ # @param statistics [Hash] hash with all the statistics emitted
201
+ def report_metric(metric, statistics)
202
+ case metric.scope
203
+ when :root
204
+ public_send(
205
+ metric.type,
206
+ metric.name,
207
+ statistics.fetch(*metric.key_location),
208
+ tags: default_tags
209
+ )
210
+ when :brokers
211
+ statistics.fetch('brokers').each_value do |broker_statistics|
212
+ # Skip bootstrap nodes
213
+ # Bootstrap nodes have nodeid -1, other nodes have positive
214
+ # node ids
215
+ next if broker_statistics['nodeid'] == -1
216
+
217
+ public_send(
218
+ metric.type,
219
+ metric.name,
220
+ broker_statistics.dig(*metric.key_location),
221
+ tags: default_tags + ["broker:#{broker_statistics['nodename']}"]
222
+ )
223
+ end
224
+ else
225
+ raise ArgumentError, metric.scope
226
+ end
227
+ end
228
+ end
229
+ end
230
+ end
231
+ end
232
+ end
@@ -23,7 +23,9 @@ module Karafka
23
23
  dispatch_method: :produce_async,
24
24
  # We don't create a dummy proc based partitioner as we would have to evaluate it with
25
25
  # each job.
26
- partitioner: nil
26
+ partitioner: nil,
27
+ # Allows for usage of `:key` or `:partition_key`
28
+ partition_key_type: :key
27
29
  }.freeze
28
30
 
29
31
  private_constant :DEFAULTS
@@ -45,11 +47,12 @@ module Karafka
45
47
  # @return [Hash] hash with dispatch details to which we merge topic and payload
46
48
  def dispatch_details(job)
47
49
  partitioner = fetch_option(job, :partitioner, DEFAULTS)
50
+ key_type = fetch_option(job, :partition_key_type, DEFAULTS)
48
51
 
49
52
  return {} unless partitioner
50
53
 
51
54
  {
52
- partition_key: partitioner.call(job)
55
+ key_type => partitioner.call(job)
53
56
  }
54
57
  end
55
58
  end
@@ -14,13 +14,18 @@ module Karafka
14
14
  module ActiveJob
15
15
  # Contract for validating the options that can be altered with `#karafka_options` per job
16
16
  # class that works with Pro features.
17
- class JobOptionsContract < ::Karafka::ActiveJob::JobOptionsContract
18
- # Dry types
19
- Types = include Dry.Types()
20
-
21
- params do
22
- optional(:partitioner).value(Types.Interface(:call))
17
+ class JobOptionsContract < Contracts::Base
18
+ configure do |config|
19
+ config.error_messages = YAML.safe_load(
20
+ File.read(
21
+ File.join(Karafka.gem_root, 'config', 'errors.yml')
22
+ )
23
+ ).fetch('en').fetch('validations').fetch('job_options')
23
24
  end
25
+
26
+ optional(:dispatch_method) { |val| %i[produce_async produce_sync].include?(val) }
27
+ optional(:partitioner) { |val| val.respond_to?(:call) }
28
+ optional(:partition_key_type) { |val| %i[key partition_key].include?(val) }
24
29
  end
25
30
  end
26
31
  end
@@ -26,29 +26,38 @@ module Karafka
26
26
  # Pauses processing of a given partition until we're done with the processing
27
27
  # This ensures, that we can easily poll not reaching the `max.poll.interval`
28
28
  def on_before_consume
29
- # Pause at the first message in a batch. That way in case of a crash, we will not loose
30
- # any messages
31
29
  return unless topic.long_running_job?
32
30
 
33
- pause(messages.first.offset, MAX_PAUSE_TIME)
31
+ # This ensures, that when running LRJ with VP, things operate as expected
32
+ coordinator.on_started do |first_group_message|
33
+ # Pause at the first message in a batch. That way in case of a crash, we will not loose
34
+ # any messages
35
+ pause(first_group_message.offset, MAX_PAUSE_TIME)
36
+ end
34
37
  end
35
38
 
36
39
  # Runs extra logic after consumption that is related to handling long running jobs
37
40
  # @note This overwrites the '#on_after_consume' from the base consumer
38
41
  def on_after_consume
39
- # Nothing to do if we lost the partition
40
- return if revoked?
42
+ coordinator.on_finished do |first_group_message, last_group_message|
43
+ on_after_consume_regular(first_group_message, last_group_message)
44
+ end
45
+ end
41
46
 
42
- if @coordinator.success?
47
+ private
48
+
49
+ # Handles the post-consumption flow depending on topic settings
50
+ #
51
+ # @param first_message [Karafka::Messages::Message]
52
+ # @param last_message [Karafka::Messages::Message]
53
+ def on_after_consume_regular(first_message, last_message)
54
+ if coordinator.success?
43
55
  coordinator.pause_tracker.reset
44
56
 
45
57
  # We use the non-blocking one here. If someone needs the blocking one, can implement it
46
58
  # with manual offset management
47
59
  # Mark as consumed only if manual offset management is not on
48
- mark_as_consumed(messages.last) unless topic.manual_offset_management?
49
-
50
- # We check it twice as marking could change this state
51
- return if revoked?
60
+ mark_as_consumed(last_message) unless topic.manual_offset_management? || revoked?
52
61
 
53
62
  # If this is not a long running job there is nothing for us to do here
54
63
  return unless topic.long_running_job?
@@ -60,12 +69,12 @@ module Karafka
60
69
  # interesting (yet valid) corner case, where with manual offset management on and no
61
70
  # marking as consumed, we end up with an infinite loop processing same messages over and
62
71
  # over again
63
- seek(@seek_offset || messages.first.offset)
72
+ seek(@seek_offset || first_message.offset)
64
73
 
65
74
  resume
66
75
  else
67
76
  # If processing failed, we need to pause
68
- pause(@seek_offset || messages.first.offset)
77
+ pause(@seek_offset || first_message.offset)
69
78
  end
70
79
  end
71
80
  end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Namespace for Karafka Pro related contracts
15
+ module Contracts
16
+ # Base contract for Pro components contracts
17
+ class Base < ::Karafka::Contracts::Base
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Contracts
15
+ # Contract for validating correct Pro components setup on a consumer group and topic levels
16
+ class ConsumerGroup < Base
17
+ virtual do |data, errors|
18
+ next unless errors.empty?
19
+ next unless data.key?(:topics)
20
+
21
+ fetched_errors = []
22
+
23
+ data.fetch(:topics).each do |topic|
24
+ ConsumerGroupTopic.new.call(topic).errors.each do |key, value|
25
+ fetched_errors << [[topic, key].flatten, value]
26
+ end
27
+ end
28
+
29
+ fetched_errors
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Contracts
15
+ # Contract for validating correct Pro components setup on a topic levels
16
+ class ConsumerGroupTopic < Base
17
+ configure do |config|
18
+ config.error_messages = YAML.safe_load(
19
+ File.read(
20
+ File.join(Karafka.gem_root, 'config', 'errors.yml')
21
+ )
22
+ ).fetch('en').fetch('validations').fetch('pro_consumer_group_topic')
23
+ end
24
+
25
+ virtual do |data|
26
+ next if data[:consumer] < Karafka::Pro::BaseConsumer
27
+
28
+ [[%i[consumer], :consumer_format]]
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -21,7 +21,12 @@ module Karafka
21
21
  processing/jobs/consume_non_blocking
22
22
  processing/jobs_builder
23
23
  processing/coordinator
24
- routing/extensions
24
+ processing/partitioner
25
+ contracts/base
26
+ contracts/consumer_group
27
+ contracts/consumer_group_topic
28
+ routing/topic_extensions
29
+ routing/builder_extensions
25
30
  active_job/consumer
26
31
  active_job/dispatcher
27
32
  active_job/job_options_contract
@@ -36,9 +41,20 @@ module Karafka
36
41
  def setup(config)
37
42
  COMPONENTS.each { |component| require_relative(component) }
38
43
 
44
+ reconfigure(config)
45
+
46
+ load_routing_extensions
47
+ end
48
+
49
+ private
50
+
51
+ # Sets proper config options to use pro components
52
+ # @param config [WaterDrop::Configurable::Node] root config node
53
+ def reconfigure(config)
39
54
  icfg = config.internal
40
55
 
41
56
  icfg.processing.coordinator_class = Processing::Coordinator
57
+ icfg.processing.partitioner_class = Processing::Partitioner
42
58
  icfg.processing.scheduler = Processing::Scheduler.new
43
59
  icfg.processing.jobs_builder = Processing::JobsBuilder.new
44
60
 
@@ -46,10 +62,14 @@ module Karafka
46
62
  icfg.active_job.dispatcher = ActiveJob::Dispatcher.new
47
63
  icfg.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
48
64
 
49
- ::Karafka::Routing::Topic.include(Routing::Extensions)
50
-
51
65
  config.monitor.subscribe(PerformanceTracker.instance)
52
66
  end
67
+
68
+ # Loads routing extensions
69
+ def load_routing_extensions
70
+ ::Karafka::Routing::Topic.include(Routing::TopicExtensions)
71
+ ::Karafka::Routing::Builder.prepend(Routing::BuilderExtensions)
72
+ end
53
73
  end
54
74
  end
55
75
  end
@@ -6,6 +6,57 @@ module Karafka
6
6
  # Pro coordinator that provides extra orchestration methods useful for parallel processing
7
7
  # within the same partition
8
8
  class Coordinator < ::Karafka::Processing::Coordinator
9
+ # @param args [Object] anything the base coordinator accepts
10
+ def initialize(*args)
11
+ super
12
+ @on_started_invoked = false
13
+ @on_finished_invoked = false
14
+ @flow_lock = Mutex.new
15
+ end
16
+
17
+ # Starts the coordination process
18
+ # @param messages [Array<Karafka::Messages::Message>] messages for which processing we are
19
+ # going to coordinate.
20
+ def start(messages)
21
+ super
22
+
23
+ @mutex.synchronize do
24
+ @on_started_invoked = false
25
+ @on_finished_invoked = false
26
+ @first_message = messages.first
27
+ @last_message = messages.last
28
+ end
29
+ end
30
+
31
+ # @return [Boolean] is the coordinated work finished or not
32
+ def finished?
33
+ @running_jobs.zero?
34
+ end
35
+
36
+ # Runs given code only once per all the coordinated jobs upon starting first of them
37
+ def on_started
38
+ @flow_lock.synchronize do
39
+ return if @on_started_invoked
40
+
41
+ @on_started_invoked = true
42
+
43
+ yield(@first_message, @last_message)
44
+ end
45
+ end
46
+
47
+ # Runs once when all the work that is suppose to be coordinated is finished
48
+ # It runs once per all the coordinated jobs and should be used to run any type of post
49
+ # jobs coordination processing execution
50
+ def on_finished
51
+ @flow_lock.synchronize do
52
+ return unless finished?
53
+ return if @on_finished_invoked
54
+
55
+ @on_finished_invoked = true
56
+
57
+ yield(@first_message, @last_message)
58
+ end
59
+ end
9
60
  end
10
61
  end
11
62
  end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Processing
15
+ # Pro partitioner that can distribute work based on the virtual partitioner settings
16
+ class Partitioner < ::Karafka::Processing::Partitioner
17
+ # @param topic [String] topic name
18
+ # @param messages [Array<Karafka::Messages::Message>] karafka messages
19
+ # @yieldparam [Integer] group id
20
+ # @yieldparam [Array<Karafka::Messages::Message>] karafka messages
21
+ def call(topic, messages)
22
+ ktopic = @subscription_group.topics.find(topic)
23
+
24
+ @concurrency ||= ::Karafka::App.config.concurrency
25
+
26
+ # We only partition work if we have a virtual partitioner and more than one thread to
27
+ # process the data. With one thread it is not worth partitioning the work as the work
28
+ # itself will be assigned to one thread (pointless work)
29
+ if ktopic.virtual_partitioner? && @concurrency > 1
30
+ # We need to reduce it to number of threads, so the group_id is not a direct effect
31
+ # of the end user action. Otherwise the persistence layer for consumers would cache
32
+ # it forever and it would cause memory leaks
33
+ groupings = messages
34
+ .group_by { |msg| ktopic.virtual_partitioner.call(msg) }
35
+ .values
36
+
37
+ # Reduce the max concurrency to a size that matches the concurrency
38
+ # As mentioned above we cannot use the partitioning keys directly as it could cause
39
+ # memory leaks
40
+ #
41
+ # The algorithm here is simple, we assume that the most costly in terms of processing,
42
+ # will be processing of the biggest group and we reduce the smallest once to have
43
+ # max of groups equal to concurrency
44
+ while groupings.size > @concurrency
45
+ groupings.sort_by! { |grouping| -grouping.size }
46
+
47
+ # Offset order needs to be maintained for virtual partitions
48
+ groupings << (groupings.pop + groupings.pop).sort_by!(&:offset)
49
+ end
50
+
51
+ groupings.each_with_index { |messages_group, index| yield(index, messages_group) }
52
+ else
53
+ # When no virtual partitioner, works as regular one
54
+ yield(0, messages)
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Pro routing components
15
+ module Routing
16
+ # Routing extensions for builder to be able to validate Pro components correct usage
17
+ module BuilderExtensions
18
+ # Validate consumer groups with pro contracts
19
+ # @param block [Proc] routing defining block
20
+ def draw(&block)
21
+ super
22
+
23
+ each do |consumer_group|
24
+ ::Karafka::Pro::Contracts::ConsumerGroup.new.validate!(consumer_group.to_h)
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -14,14 +14,20 @@ module Karafka
14
14
  # Pro routing components
15
15
  module Routing
16
16
  # Routing extensions that allow to configure some extra PRO routing options
17
- module Extensions
17
+ module TopicExtensions
18
18
  class << self
19
19
  # @param base [Class] class we extend
20
20
  def included(base)
21
21
  base.attr_accessor :long_running_job
22
+ base.attr_accessor :virtual_partitioner
22
23
  end
23
24
  end
24
25
 
26
+ # @return [Boolean] true if virtual partitioner is defined, false otherwise
27
+ def virtual_partitioner?
28
+ virtual_partitioner != nil
29
+ end
30
+
25
31
  # @return [Boolean] is a given job on a topic a long running one
26
32
  def long_running_job?
27
33
  @long_running_job || false
@@ -23,7 +23,9 @@ module Karafka
23
23
  end
24
24
 
25
25
  # Starts the coordinator for given consumption jobs
26
- def start
26
+ # @param _messages [Array<Karafka::Messages::Message>] batch of message for which we are
27
+ # going to coordinate work. Not used with regular coordinator.
28
+ def start(_messages)
27
29
  @mutex.synchronize do
28
30
  @running_jobs = 0
29
31
  # We need to clear the consumption results hash here, otherwise we could end up storing
@@ -44,7 +46,9 @@ module Karafka
44
46
 
45
47
  return @running_jobs unless @running_jobs.negative?
46
48
 
47
- raise Karafka::Errors::InvalidCoordinatorState, @running_jobs
49
+ # This should never happen. If it does, something is heavily out of sync. Please reach
50
+ # out to us if you encounter this
51
+ raise Karafka::Errors::InvalidCoordinatorState, 'Was zero before decrementation'
48
52
  end
49
53
  end
50
54