karafka 2.0.0.beta5 → 2.0.0.rc3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/CHANGELOG.md +28 -0
  4. data/CONTRIBUTING.md +0 -5
  5. data/Gemfile.lock +12 -42
  6. data/LICENSE-COMM +1 -1
  7. data/README.md +44 -16
  8. data/bin/stress_many +1 -1
  9. data/bin/stress_one +1 -1
  10. data/config/errors.yml +52 -5
  11. data/docker-compose.yml +7 -0
  12. data/karafka.gemspec +2 -4
  13. data/lib/karafka/active_job/consumer.rb +2 -0
  14. data/lib/karafka/active_job/job_options_contract.rb +8 -2
  15. data/lib/karafka/base_consumer.rb +4 -6
  16. data/lib/karafka/cli/install.rb +15 -2
  17. data/lib/karafka/cli/server.rb +4 -2
  18. data/lib/karafka/connection/client.rb +20 -17
  19. data/lib/karafka/connection/listener.rb +12 -24
  20. data/lib/karafka/connection/pauses_manager.rb +0 -8
  21. data/lib/karafka/contracts/base.rb +2 -8
  22. data/lib/karafka/contracts/config.rb +71 -51
  23. data/lib/karafka/contracts/consumer_group.rb +25 -18
  24. data/lib/karafka/contracts/consumer_group_topic.rb +30 -16
  25. data/lib/karafka/contracts/server_cli_options.rb +18 -7
  26. data/lib/karafka/helpers/colorize.rb +20 -0
  27. data/lib/karafka/instrumentation/logger_listener.rb +8 -2
  28. data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
  29. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
  30. data/lib/karafka/pro/active_job/dispatcher.rb +5 -2
  31. data/lib/karafka/pro/active_job/job_options_contract.rb +11 -6
  32. data/lib/karafka/pro/base_consumer.rb +21 -12
  33. data/lib/karafka/pro/contracts/base.rb +21 -0
  34. data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
  35. data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
  36. data/lib/karafka/pro/loader.rb +23 -3
  37. data/lib/karafka/pro/processing/coordinator.rb +51 -0
  38. data/lib/karafka/pro/processing/partitioner.rb +60 -0
  39. data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
  40. data/lib/karafka/pro/routing/{extensions.rb → topic_extensions.rb} +7 -1
  41. data/lib/karafka/processing/coordinator.rb +6 -2
  42. data/lib/karafka/processing/coordinators_buffer.rb +3 -7
  43. data/lib/karafka/processing/executor.rb +1 -1
  44. data/lib/karafka/processing/jobs_queue.rb +11 -0
  45. data/lib/karafka/processing/partitioner.rb +22 -0
  46. data/lib/karafka/processing/worker.rb +4 -2
  47. data/lib/karafka/setup/config.rb +9 -3
  48. data/lib/karafka/templates/example_consumer.rb.erb +2 -2
  49. data/lib/karafka/version.rb +1 -1
  50. data/lib/karafka.rb +2 -2
  51. data.tar.gz.sig +0 -0
  52. metadata +15 -34
  53. metadata.gz.sig +0 -0
@@ -0,0 +1,232 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Instrumentation
5
+ # Namespace for vendor specific instrumentation
6
+ module Vendors
7
+ # Datadog specific instrumentation
8
+ module Datadog
9
+ # Listener that can be used to subscribe to Karafka to receive stats via StatsD
10
+ # and/or Datadog
11
+ #
12
+ # @note You need to setup the `dogstatsd-ruby` client and assign it
13
+ class Listener
14
+ include WaterDrop::Configurable
15
+ extend Forwardable
16
+
17
+ def_delegators :config, :client, :rd_kafka_metrics, :namespace, :default_tags
18
+
19
+ # Value object for storing a single rdkafka metric publishing details
20
+ RdKafkaMetric = Struct.new(:type, :scope, :name, :key_location)
21
+
22
+ # Namespace under which the DD metrics should be published
23
+ setting :namespace, default: 'karafka'
24
+
25
+ # Datadog client that we should use to publish the metrics
26
+ setting :client
27
+
28
+ # Default tags we want to publish (for example hostname)
29
+ # Format as followed (example for hostname): `["host:#{Socket.gethostname}"]`
30
+ setting :default_tags, default: []
31
+
32
+ # All the rdkafka metrics we want to publish
33
+ #
34
+ # By default we publish quite a lot so this can be tuned
35
+ # Note, that the once with `_d` come from Karafka, not rdkafka or Kafka
36
+ setting :rd_kafka_metrics, default: [
37
+ # Client metrics
38
+ RdKafkaMetric.new(:count, :root, 'messages.consumed', 'rxmsgs_d'),
39
+ RdKafkaMetric.new(:count, :root, 'messages.consumed.bytes', 'rxmsg_bytes'),
40
+
41
+ # Broker metrics
42
+ RdKafkaMetric.new(:count, :brokers, 'consume.attempts', 'txretries_d'),
43
+ RdKafkaMetric.new(:count, :brokers, 'consume.errors', 'txerrs_d'),
44
+ RdKafkaMetric.new(:count, :brokers, 'receive.errors', 'rxerrs_d'),
45
+ RdKafkaMetric.new(:count, :brokers, 'connection.connects', 'connects_d'),
46
+ RdKafkaMetric.new(:count, :brokers, 'connection.disconnects', 'disconnects_d'),
47
+ RdKafkaMetric.new(:gauge, :brokers, 'network.latency.avg', %w[rtt avg]),
48
+ RdKafkaMetric.new(:gauge, :brokers, 'network.latency.p95', %w[rtt p95]),
49
+ RdKafkaMetric.new(:gauge, :brokers, 'network.latency.p99', %w[rtt p99])
50
+ ].freeze
51
+
52
+ configure
53
+
54
+ # @param block [Proc] configuration block
55
+ def initialize(&block)
56
+ configure
57
+ setup(&block) if block
58
+ end
59
+
60
+ # @param block [Proc] configuration block
61
+ # @note We define this alias to be consistent with `WaterDrop#setup`
62
+ def setup(&block)
63
+ configure(&block)
64
+ end
65
+
66
+ # Hooks up to WaterDrop instrumentation for emitted statistics
67
+ #
68
+ # @param event [Dry::Events::Event]
69
+ def on_statistics_emitted(event)
70
+ statistics = event[:statistics]
71
+
72
+ rd_kafka_metrics.each do |metric|
73
+ report_metric(metric, statistics)
74
+ end
75
+ end
76
+
77
+ # Increases the errors count by 1
78
+ #
79
+ # @param event [Dry::Events::Event]
80
+ def on_error_occurred(event)
81
+ extra_tags = ["type:#{event[:type]}"]
82
+
83
+ if event.payload[:caller].respond_to?(:messages)
84
+ metadata = event.payload[:caller].messages.metadata
85
+
86
+ extra_tags += [
87
+ "topic:#{metadata.topic}",
88
+ "partition:#{metadata.partition}"
89
+ ]
90
+ end
91
+
92
+ count('error_occurred', 1, tags: default_tags + extra_tags)
93
+ end
94
+
95
+ # Reports how many messages we've polled and how much time did we spend on it
96
+ #
97
+ # @param event [Dry::Events::Event]
98
+ def on_connection_listener_fetch_loop_received(event)
99
+ time_taken = event[:time]
100
+ messages_count = event[:messages_buffer].size
101
+
102
+ histogram('listener.polling.time_taken', time_taken, tags: default_tags)
103
+ histogram('listener.polling.messages', messages_count, tags: default_tags)
104
+ end
105
+
106
+ # Here we report majority of things related to processing as we have access to the
107
+ # consumer
108
+ # @param event [Dry::Events::Event]
109
+ def on_consumer_consumed(event)
110
+ messages = event.payload[:caller].messages
111
+ metadata = messages.metadata
112
+
113
+ tags = default_tags + [
114
+ "topic:#{metadata.topic}",
115
+ "partition:#{metadata.partition}"
116
+ ]
117
+
118
+ count('consumer.messages', messages.count, tags: tags)
119
+ count('consumer.batches', 1, tags: tags)
120
+ gauge('consumer.offset', metadata.last_offset, tags: tags)
121
+ histogram('consumer.consumed.time_taken', event[:time], tags: tags)
122
+ histogram('consumer.batch_size', messages.count, tags: tags)
123
+ histogram('consumer.processing_lag', metadata.processing_lag, tags: tags)
124
+ histogram('consumer.consumption_lag', metadata.consumption_lag, tags: tags)
125
+ end
126
+
127
+ # @param event [Dry::Events::Event]
128
+ def on_consumer_revoked(event)
129
+ messages = event.payload[:caller].messages
130
+ metadata = messages.metadata
131
+
132
+ tags = default_tags + [
133
+ "topic:#{metadata.topic}",
134
+ "partition:#{metadata.partition}"
135
+ ]
136
+
137
+ count('consumer.revoked', 1, tags: tags)
138
+ end
139
+
140
+ # @param event [Dry::Events::Event]
141
+ def on_consumer_shutdown(event)
142
+ messages = event.payload[:caller].messages
143
+ metadata = messages.metadata
144
+
145
+ tags = default_tags + [
146
+ "topic:#{metadata.topic}",
147
+ "partition:#{metadata.partition}"
148
+ ]
149
+
150
+ count('consumer.shutdown', 1, tags: tags)
151
+ end
152
+
153
+ # Worker related metrics
154
+ # @param event [Dry::Events::Event]
155
+ def on_worker_process(event)
156
+ jq_stats = event[:jobs_queue].statistics
157
+
158
+ gauge('worker.total_threads', Karafka::App.config.concurrency, tags: default_tags)
159
+ histogram('worker.processing', jq_stats[:processing], tags: default_tags)
160
+ histogram('worker.enqueued_jobs', jq_stats[:enqueued], tags: default_tags)
161
+ end
162
+
163
+ # We report this metric before and after processing for higher accuracy
164
+ # Without this, the utilization would not be fully reflected
165
+ # @param event [Dry::Events::Event]
166
+ def on_worker_processed(event)
167
+ jq_stats = event[:jobs_queue].statistics
168
+
169
+ histogram('worker.processing', jq_stats[:processing], tags: default_tags)
170
+ end
171
+
172
+ private
173
+
174
+ %i[
175
+ count
176
+ gauge
177
+ histogram
178
+ increment
179
+ decrement
180
+ ].each do |metric_type|
181
+ class_eval <<~METHODS, __FILE__, __LINE__ + 1
182
+ def #{metric_type}(key, *args)
183
+ client.#{metric_type}(
184
+ namespaced_metric(key),
185
+ *args
186
+ )
187
+ end
188
+ METHODS
189
+ end
190
+
191
+ # Wraps metric name in listener's namespace
192
+ # @param metric_name [String] RdKafkaMetric name
193
+ # @return [String]
194
+ def namespaced_metric(metric_name)
195
+ "#{namespace}.#{metric_name}"
196
+ end
197
+
198
+ # Reports a given metric statistics to Datadog
199
+ # @param metric [RdKafkaMetric] metric value object
200
+ # @param statistics [Hash] hash with all the statistics emitted
201
+ def report_metric(metric, statistics)
202
+ case metric.scope
203
+ when :root
204
+ public_send(
205
+ metric.type,
206
+ metric.name,
207
+ statistics.fetch(*metric.key_location),
208
+ tags: default_tags
209
+ )
210
+ when :brokers
211
+ statistics.fetch('brokers').each_value do |broker_statistics|
212
+ # Skip bootstrap nodes
213
+ # Bootstrap nodes have nodeid -1, other nodes have positive
214
+ # node ids
215
+ next if broker_statistics['nodeid'] == -1
216
+
217
+ public_send(
218
+ metric.type,
219
+ metric.name,
220
+ broker_statistics.dig(*metric.key_location),
221
+ tags: default_tags + ["broker:#{broker_statistics['nodename']}"]
222
+ )
223
+ end
224
+ else
225
+ raise ArgumentError, metric.scope
226
+ end
227
+ end
228
+ end
229
+ end
230
+ end
231
+ end
232
+ end
@@ -23,7 +23,9 @@ module Karafka
23
23
  dispatch_method: :produce_async,
24
24
  # We don't create a dummy proc based partitioner as we would have to evaluate it with
25
25
  # each job.
26
- partitioner: nil
26
+ partitioner: nil,
27
+ # Allows for usage of `:key` or `:partition_key`
28
+ partition_key_type: :key
27
29
  }.freeze
28
30
 
29
31
  private_constant :DEFAULTS
@@ -45,11 +47,12 @@ module Karafka
45
47
  # @return [Hash] hash with dispatch details to which we merge topic and payload
46
48
  def dispatch_details(job)
47
49
  partitioner = fetch_option(job, :partitioner, DEFAULTS)
50
+ key_type = fetch_option(job, :partition_key_type, DEFAULTS)
48
51
 
49
52
  return {} unless partitioner
50
53
 
51
54
  {
52
- partition_key: partitioner.call(job)
55
+ key_type => partitioner.call(job)
53
56
  }
54
57
  end
55
58
  end
@@ -14,13 +14,18 @@ module Karafka
14
14
  module ActiveJob
15
15
  # Contract for validating the options that can be altered with `#karafka_options` per job
16
16
  # class that works with Pro features.
17
- class JobOptionsContract < ::Karafka::ActiveJob::JobOptionsContract
18
- # Dry types
19
- Types = include Dry.Types()
20
-
21
- params do
22
- optional(:partitioner).value(Types.Interface(:call))
17
+ class JobOptionsContract < Contracts::Base
18
+ configure do |config|
19
+ config.error_messages = YAML.safe_load(
20
+ File.read(
21
+ File.join(Karafka.gem_root, 'config', 'errors.yml')
22
+ )
23
+ ).fetch('en').fetch('validations').fetch('job_options')
23
24
  end
25
+
26
+ optional(:dispatch_method) { |val| %i[produce_async produce_sync].include?(val) }
27
+ optional(:partitioner) { |val| val.respond_to?(:call) }
28
+ optional(:partition_key_type) { |val| %i[key partition_key].include?(val) }
24
29
  end
25
30
  end
26
31
  end
@@ -26,29 +26,38 @@ module Karafka
26
26
  # Pauses processing of a given partition until we're done with the processing
27
27
  # This ensures, that we can easily poll not reaching the `max.poll.interval`
28
28
  def on_before_consume
29
- # Pause at the first message in a batch. That way in case of a crash, we will not loose
30
- # any messages
31
29
  return unless topic.long_running_job?
32
30
 
33
- pause(messages.first.offset, MAX_PAUSE_TIME)
31
+ # This ensures, that when running LRJ with VP, things operate as expected
32
+ coordinator.on_started do |first_group_message|
33
+ # Pause at the first message in a batch. That way in case of a crash, we will not loose
34
+ # any messages
35
+ pause(first_group_message.offset, MAX_PAUSE_TIME)
36
+ end
34
37
  end
35
38
 
36
39
  # Runs extra logic after consumption that is related to handling long running jobs
37
40
  # @note This overwrites the '#on_after_consume' from the base consumer
38
41
  def on_after_consume
39
- # Nothing to do if we lost the partition
40
- return if revoked?
42
+ coordinator.on_finished do |first_group_message, last_group_message|
43
+ on_after_consume_regular(first_group_message, last_group_message)
44
+ end
45
+ end
41
46
 
42
- if @coordinator.success?
47
+ private
48
+
49
+ # Handles the post-consumption flow depending on topic settings
50
+ #
51
+ # @param first_message [Karafka::Messages::Message]
52
+ # @param last_message [Karafka::Messages::Message]
53
+ def on_after_consume_regular(first_message, last_message)
54
+ if coordinator.success?
43
55
  coordinator.pause_tracker.reset
44
56
 
45
57
  # We use the non-blocking one here. If someone needs the blocking one, can implement it
46
58
  # with manual offset management
47
59
  # Mark as consumed only if manual offset management is not on
48
- mark_as_consumed(messages.last) unless topic.manual_offset_management?
49
-
50
- # We check it twice as marking could change this state
51
- return if revoked?
60
+ mark_as_consumed(last_message) unless topic.manual_offset_management? || revoked?
52
61
 
53
62
  # If this is not a long running job there is nothing for us to do here
54
63
  return unless topic.long_running_job?
@@ -60,12 +69,12 @@ module Karafka
60
69
  # interesting (yet valid) corner case, where with manual offset management on and no
61
70
  # marking as consumed, we end up with an infinite loop processing same messages over and
62
71
  # over again
63
- seek(@seek_offset || messages.first.offset)
72
+ seek(@seek_offset || first_message.offset)
64
73
 
65
74
  resume
66
75
  else
67
76
  # If processing failed, we need to pause
68
- pause(@seek_offset || messages.first.offset)
77
+ pause(@seek_offset || first_message.offset)
69
78
  end
70
79
  end
71
80
  end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Namespace for Karafka Pro related contracts
15
+ module Contracts
16
+ # Base contract for Pro components contracts
17
+ class Base < ::Karafka::Contracts::Base
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Contracts
15
+ # Contract for validating correct Pro components setup on a consumer group and topic levels
16
+ class ConsumerGroup < Base
17
+ virtual do |data, errors|
18
+ next unless errors.empty?
19
+ next unless data.key?(:topics)
20
+
21
+ fetched_errors = []
22
+
23
+ data.fetch(:topics).each do |topic|
24
+ ConsumerGroupTopic.new.call(topic).errors.each do |key, value|
25
+ fetched_errors << [[topic, key].flatten, value]
26
+ end
27
+ end
28
+
29
+ fetched_errors
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Contracts
15
+ # Contract for validating correct Pro components setup on a topic levels
16
+ class ConsumerGroupTopic < Base
17
+ configure do |config|
18
+ config.error_messages = YAML.safe_load(
19
+ File.read(
20
+ File.join(Karafka.gem_root, 'config', 'errors.yml')
21
+ )
22
+ ).fetch('en').fetch('validations').fetch('pro_consumer_group_topic')
23
+ end
24
+
25
+ virtual do |data|
26
+ next if data[:consumer] < Karafka::Pro::BaseConsumer
27
+
28
+ [[%i[consumer], :consumer_format]]
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -21,7 +21,12 @@ module Karafka
21
21
  processing/jobs/consume_non_blocking
22
22
  processing/jobs_builder
23
23
  processing/coordinator
24
- routing/extensions
24
+ processing/partitioner
25
+ contracts/base
26
+ contracts/consumer_group
27
+ contracts/consumer_group_topic
28
+ routing/topic_extensions
29
+ routing/builder_extensions
25
30
  active_job/consumer
26
31
  active_job/dispatcher
27
32
  active_job/job_options_contract
@@ -36,9 +41,20 @@ module Karafka
36
41
  def setup(config)
37
42
  COMPONENTS.each { |component| require_relative(component) }
38
43
 
44
+ reconfigure(config)
45
+
46
+ load_routing_extensions
47
+ end
48
+
49
+ private
50
+
51
+ # Sets proper config options to use pro components
52
+ # @param config [WaterDrop::Configurable::Node] root config node
53
+ def reconfigure(config)
39
54
  icfg = config.internal
40
55
 
41
56
  icfg.processing.coordinator_class = Processing::Coordinator
57
+ icfg.processing.partitioner_class = Processing::Partitioner
42
58
  icfg.processing.scheduler = Processing::Scheduler.new
43
59
  icfg.processing.jobs_builder = Processing::JobsBuilder.new
44
60
 
@@ -46,10 +62,14 @@ module Karafka
46
62
  icfg.active_job.dispatcher = ActiveJob::Dispatcher.new
47
63
  icfg.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
48
64
 
49
- ::Karafka::Routing::Topic.include(Routing::Extensions)
50
-
51
65
  config.monitor.subscribe(PerformanceTracker.instance)
52
66
  end
67
+
68
+ # Loads routing extensions
69
+ def load_routing_extensions
70
+ ::Karafka::Routing::Topic.include(Routing::TopicExtensions)
71
+ ::Karafka::Routing::Builder.prepend(Routing::BuilderExtensions)
72
+ end
53
73
  end
54
74
  end
55
75
  end
@@ -6,6 +6,57 @@ module Karafka
6
6
  # Pro coordinator that provides extra orchestration methods useful for parallel processing
7
7
  # within the same partition
8
8
  class Coordinator < ::Karafka::Processing::Coordinator
9
+ # @param args [Object] anything the base coordinator accepts
10
+ def initialize(*args)
11
+ super
12
+ @on_started_invoked = false
13
+ @on_finished_invoked = false
14
+ @flow_lock = Mutex.new
15
+ end
16
+
17
+ # Starts the coordination process
18
+ # @param messages [Array<Karafka::Messages::Message>] messages for which processing we are
19
+ # going to coordinate.
20
+ def start(messages)
21
+ super
22
+
23
+ @mutex.synchronize do
24
+ @on_started_invoked = false
25
+ @on_finished_invoked = false
26
+ @first_message = messages.first
27
+ @last_message = messages.last
28
+ end
29
+ end
30
+
31
+ # @return [Boolean] is the coordinated work finished or not
32
+ def finished?
33
+ @running_jobs.zero?
34
+ end
35
+
36
+ # Runs given code only once per all the coordinated jobs upon starting first of them
37
+ def on_started
38
+ @flow_lock.synchronize do
39
+ return if @on_started_invoked
40
+
41
+ @on_started_invoked = true
42
+
43
+ yield(@first_message, @last_message)
44
+ end
45
+ end
46
+
47
+ # Runs once when all the work that is suppose to be coordinated is finished
48
+ # It runs once per all the coordinated jobs and should be used to run any type of post
49
+ # jobs coordination processing execution
50
+ def on_finished
51
+ @flow_lock.synchronize do
52
+ return unless finished?
53
+ return if @on_finished_invoked
54
+
55
+ @on_finished_invoked = true
56
+
57
+ yield(@first_message, @last_message)
58
+ end
59
+ end
9
60
  end
10
61
  end
11
62
  end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Processing
15
+ # Pro partitioner that can distribute work based on the virtual partitioner settings
16
+ class Partitioner < ::Karafka::Processing::Partitioner
17
+ # @param topic [String] topic name
18
+ # @param messages [Array<Karafka::Messages::Message>] karafka messages
19
+ # @yieldparam [Integer] group id
20
+ # @yieldparam [Array<Karafka::Messages::Message>] karafka messages
21
+ def call(topic, messages)
22
+ ktopic = @subscription_group.topics.find(topic)
23
+
24
+ @concurrency ||= ::Karafka::App.config.concurrency
25
+
26
+ # We only partition work if we have a virtual partitioner and more than one thread to
27
+ # process the data. With one thread it is not worth partitioning the work as the work
28
+ # itself will be assigned to one thread (pointless work)
29
+ if ktopic.virtual_partitioner? && @concurrency > 1
30
+ # We need to reduce it to number of threads, so the group_id is not a direct effect
31
+ # of the end user action. Otherwise the persistence layer for consumers would cache
32
+ # it forever and it would cause memory leaks
33
+ groupings = messages
34
+ .group_by { |msg| ktopic.virtual_partitioner.call(msg) }
35
+ .values
36
+
37
+ # Reduce the max concurrency to a size that matches the concurrency
38
+ # As mentioned above we cannot use the partitioning keys directly as it could cause
39
+ # memory leaks
40
+ #
41
+ # The algorithm here is simple, we assume that the most costly in terms of processing,
42
+ # will be processing of the biggest group and we reduce the smallest once to have
43
+ # max of groups equal to concurrency
44
+ while groupings.size > @concurrency
45
+ groupings.sort_by! { |grouping| -grouping.size }
46
+
47
+ # Offset order needs to be maintained for virtual partitions
48
+ groupings << (groupings.pop + groupings.pop).sort_by!(&:offset)
49
+ end
50
+
51
+ groupings.each_with_index { |messages_group, index| yield(index, messages_group) }
52
+ else
53
+ # When no virtual partitioner, works as regular one
54
+ yield(0, messages)
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Pro routing components
15
+ module Routing
16
+ # Routing extensions for builder to be able to validate Pro components correct usage
17
+ module BuilderExtensions
18
+ # Validate consumer groups with pro contracts
19
+ # @param block [Proc] routing defining block
20
+ def draw(&block)
21
+ super
22
+
23
+ each do |consumer_group|
24
+ ::Karafka::Pro::Contracts::ConsumerGroup.new.validate!(consumer_group.to_h)
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -14,14 +14,20 @@ module Karafka
14
14
  # Pro routing components
15
15
  module Routing
16
16
  # Routing extensions that allow to configure some extra PRO routing options
17
- module Extensions
17
+ module TopicExtensions
18
18
  class << self
19
19
  # @param base [Class] class we extend
20
20
  def included(base)
21
21
  base.attr_accessor :long_running_job
22
+ base.attr_accessor :virtual_partitioner
22
23
  end
23
24
  end
24
25
 
26
+ # @return [Boolean] true if virtual partitioner is defined, false otherwise
27
+ def virtual_partitioner?
28
+ virtual_partitioner != nil
29
+ end
30
+
25
31
  # @return [Boolean] is a given job on a topic a long running one
26
32
  def long_running_job?
27
33
  @long_running_job || false
@@ -23,7 +23,9 @@ module Karafka
23
23
  end
24
24
 
25
25
  # Starts the coordinator for given consumption jobs
26
- def start
26
+ # @param _messages [Array<Karafka::Messages::Message>] batch of message for which we are
27
+ # going to coordinate work. Not used with regular coordinator.
28
+ def start(_messages)
27
29
  @mutex.synchronize do
28
30
  @running_jobs = 0
29
31
  # We need to clear the consumption results hash here, otherwise we could end up storing
@@ -44,7 +46,9 @@ module Karafka
44
46
 
45
47
  return @running_jobs unless @running_jobs.negative?
46
48
 
47
- raise Karafka::Errors::InvalidCoordinatorState, @running_jobs
49
+ # This should never happen. If it does, something is heavily out of sync. Please reach
50
+ # out to us if you encounter this
51
+ raise Karafka::Errors::InvalidCoordinatorState, 'Was zero before decrementation'
48
52
  end
49
53
  end
50
54