karafka 1.4.13 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +3 -3
- data/.github/workflows/ci.yml +85 -30
- data/.ruby-version +1 -1
- data/CHANGELOG.md +268 -7
- data/CONTRIBUTING.md +10 -19
- data/Gemfile +6 -0
- data/Gemfile.lock +44 -87
- data/LICENSE +17 -0
- data/LICENSE-COMM +89 -0
- data/LICENSE-LGPL +165 -0
- data/README.md +44 -48
- data/bin/benchmarks +85 -0
- data/bin/create_token +22 -0
- data/bin/integrations +237 -0
- data/bin/karafka +4 -0
- data/bin/scenario +29 -0
- data/bin/stress_many +13 -0
- data/bin/stress_one +13 -0
- data/bin/wait_for_kafka +20 -0
- data/certs/karafka-pro.pem +11 -0
- data/config/errors.yml +55 -40
- data/docker-compose.yml +39 -3
- data/karafka.gemspec +11 -17
- data/lib/active_job/karafka.rb +21 -0
- data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
- data/lib/karafka/active_job/consumer.rb +26 -0
- data/lib/karafka/active_job/dispatcher.rb +38 -0
- data/lib/karafka/active_job/job_extensions.rb +34 -0
- data/lib/karafka/active_job/job_options_contract.rb +21 -0
- data/lib/karafka/active_job/routing/extensions.rb +31 -0
- data/lib/karafka/app.rb +15 -20
- data/lib/karafka/base_consumer.rb +181 -31
- data/lib/karafka/cli/base.rb +4 -4
- data/lib/karafka/cli/info.rb +43 -9
- data/lib/karafka/cli/install.rb +19 -10
- data/lib/karafka/cli/server.rb +17 -42
- data/lib/karafka/cli.rb +4 -11
- data/lib/karafka/connection/client.rb +385 -90
- data/lib/karafka/connection/listener.rb +246 -38
- data/lib/karafka/connection/listeners_batch.rb +24 -0
- data/lib/karafka/connection/messages_buffer.rb +84 -0
- data/lib/karafka/connection/pauses_manager.rb +46 -0
- data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
- data/lib/karafka/connection/rebalance_manager.rb +78 -0
- data/lib/karafka/contracts/base.rb +17 -0
- data/lib/karafka/contracts/config.rb +88 -11
- data/lib/karafka/contracts/consumer_group.rb +21 -189
- data/lib/karafka/contracts/consumer_group_topic.rb +34 -11
- data/lib/karafka/contracts/server_cli_options.rb +19 -18
- data/lib/karafka/contracts.rb +1 -1
- data/lib/karafka/env.rb +46 -0
- data/lib/karafka/errors.rb +21 -21
- data/lib/karafka/helpers/async.rb +33 -0
- data/lib/karafka/helpers/colorize.rb +20 -0
- data/lib/karafka/helpers/multi_delegator.rb +2 -2
- data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
- data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
- data/lib/karafka/instrumentation/logger_listener.rb +164 -0
- data/lib/karafka/instrumentation/monitor.rb +13 -61
- data/lib/karafka/instrumentation/notifications.rb +52 -0
- data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
- data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
- data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
- data/lib/karafka/instrumentation.rb +21 -0
- data/lib/karafka/licenser.rb +75 -0
- data/lib/karafka/messages/batch_metadata.rb +45 -0
- data/lib/karafka/messages/builders/batch_metadata.rb +40 -0
- data/lib/karafka/messages/builders/message.rb +39 -0
- data/lib/karafka/messages/builders/messages.rb +32 -0
- data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
- data/lib/karafka/messages/messages.rb +64 -0
- data/lib/karafka/{params → messages}/metadata.rb +4 -6
- data/lib/karafka/messages/seek.rb +9 -0
- data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
- data/lib/karafka/pro/active_job/consumer.rb +46 -0
- data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
- data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
- data/lib/karafka/pro/base_consumer.rb +82 -0
- data/lib/karafka/pro/contracts/base.rb +21 -0
- data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
- data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
- data/lib/karafka/pro/loader.rb +76 -0
- data/lib/karafka/pro/performance_tracker.rb +80 -0
- data/lib/karafka/pro/processing/coordinator.rb +72 -0
- data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +37 -0
- data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
- data/lib/karafka/pro/processing/partitioner.rb +60 -0
- data/lib/karafka/pro/processing/scheduler.rb +56 -0
- data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
- data/lib/karafka/pro/routing/topic_extensions.rb +38 -0
- data/lib/karafka/pro.rb +13 -0
- data/lib/karafka/process.rb +1 -0
- data/lib/karafka/processing/coordinator.rb +88 -0
- data/lib/karafka/processing/coordinators_buffer.rb +54 -0
- data/lib/karafka/processing/executor.rb +118 -0
- data/lib/karafka/processing/executors_buffer.rb +88 -0
- data/lib/karafka/processing/jobs/base.rb +51 -0
- data/lib/karafka/processing/jobs/consume.rb +42 -0
- data/lib/karafka/processing/jobs/revoked.rb +22 -0
- data/lib/karafka/processing/jobs/shutdown.rb +23 -0
- data/lib/karafka/processing/jobs_builder.rb +29 -0
- data/lib/karafka/processing/jobs_queue.rb +144 -0
- data/lib/karafka/processing/partitioner.rb +22 -0
- data/lib/karafka/processing/result.rb +29 -0
- data/lib/karafka/processing/scheduler.rb +22 -0
- data/lib/karafka/processing/worker.rb +88 -0
- data/lib/karafka/processing/workers_batch.rb +27 -0
- data/lib/karafka/railtie.rb +113 -0
- data/lib/karafka/routing/builder.rb +15 -24
- data/lib/karafka/routing/consumer_group.rb +11 -19
- data/lib/karafka/routing/consumer_mapper.rb +1 -2
- data/lib/karafka/routing/router.rb +1 -1
- data/lib/karafka/routing/subscription_group.rb +53 -0
- data/lib/karafka/routing/subscription_groups_builder.rb +53 -0
- data/lib/karafka/routing/topic.rb +61 -24
- data/lib/karafka/routing/topics.rb +38 -0
- data/lib/karafka/runner.rb +51 -0
- data/lib/karafka/serialization/json/deserializer.rb +6 -15
- data/lib/karafka/server.rb +67 -26
- data/lib/karafka/setup/config.rb +147 -175
- data/lib/karafka/status.rb +14 -5
- data/lib/karafka/templates/example_consumer.rb.erb +16 -0
- data/lib/karafka/templates/karafka.rb.erb +15 -51
- data/lib/karafka/time_trackers/base.rb +19 -0
- data/lib/karafka/time_trackers/pause.rb +92 -0
- data/lib/karafka/time_trackers/poll.rb +65 -0
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +38 -17
- data.tar.gz.sig +0 -0
- metadata +118 -120
- metadata.gz.sig +0 -0
- data/MIT-LICENCE +0 -18
- data/lib/karafka/assignment_strategies/round_robin.rb +0 -13
- data/lib/karafka/attributes_map.rb +0 -63
- data/lib/karafka/backends/inline.rb +0 -16
- data/lib/karafka/base_responder.rb +0 -226
- data/lib/karafka/cli/flow.rb +0 -48
- data/lib/karafka/cli/missingno.rb +0 -19
- data/lib/karafka/code_reloader.rb +0 -67
- data/lib/karafka/connection/api_adapter.rb +0 -158
- data/lib/karafka/connection/batch_delegator.rb +0 -55
- data/lib/karafka/connection/builder.rb +0 -23
- data/lib/karafka/connection/message_delegator.rb +0 -36
- data/lib/karafka/consumers/batch_metadata.rb +0 -10
- data/lib/karafka/consumers/callbacks.rb +0 -71
- data/lib/karafka/consumers/includer.rb +0 -64
- data/lib/karafka/consumers/responders.rb +0 -24
- data/lib/karafka/consumers/single_params.rb +0 -15
- data/lib/karafka/contracts/responder_usage.rb +0 -54
- data/lib/karafka/fetcher.rb +0 -42
- data/lib/karafka/helpers/class_matcher.rb +0 -88
- data/lib/karafka/helpers/config_retriever.rb +0 -46
- data/lib/karafka/helpers/inflector.rb +0 -26
- data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
- data/lib/karafka/params/batch_metadata.rb +0 -26
- data/lib/karafka/params/builders/batch_metadata.rb +0 -30
- data/lib/karafka/params/builders/params.rb +0 -38
- data/lib/karafka/params/builders/params_batch.rb +0 -25
- data/lib/karafka/params/params_batch.rb +0 -60
- data/lib/karafka/patches/ruby_kafka.rb +0 -47
- data/lib/karafka/persistence/client.rb +0 -29
- data/lib/karafka/persistence/consumers.rb +0 -45
- data/lib/karafka/persistence/topics.rb +0 -48
- data/lib/karafka/responders/builder.rb +0 -36
- data/lib/karafka/responders/topic.rb +0 -55
- data/lib/karafka/routing/topic_mapper.rb +0 -53
- data/lib/karafka/serialization/json/serializer.rb +0 -31
- data/lib/karafka/setup/configurators/water_drop.rb +0 -36
- data/lib/karafka/templates/application_responder.rb.erb +0 -11
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
module Contracts
|
15
|
+
# Contract for validating correct Pro components setup on a consumer group and topic levels
|
16
|
+
class ConsumerGroup < Base
|
17
|
+
virtual do |data, errors|
|
18
|
+
next unless errors.empty?
|
19
|
+
next unless data.key?(:topics)
|
20
|
+
|
21
|
+
fetched_errors = []
|
22
|
+
|
23
|
+
data.fetch(:topics).each do |topic|
|
24
|
+
ConsumerGroupTopic.new.call(topic).errors.each do |key, value|
|
25
|
+
fetched_errors << [[topic, key].flatten, value]
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
fetched_errors
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
module Contracts
|
15
|
+
# Contract for validating correct Pro components setup on a topic levels
|
16
|
+
class ConsumerGroupTopic < Base
|
17
|
+
configure do |config|
|
18
|
+
config.error_messages = YAML.safe_load(
|
19
|
+
File.read(
|
20
|
+
File.join(Karafka.gem_root, 'config', 'errors.yml')
|
21
|
+
)
|
22
|
+
).fetch('en').fetch('validations').fetch('pro_consumer_group_topic')
|
23
|
+
end
|
24
|
+
|
25
|
+
virtual do |data|
|
26
|
+
next if data[:consumer] < Karafka::Pro::BaseConsumer
|
27
|
+
|
28
|
+
[[%i[consumer], :consumer_format]]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
# Loader requires and loads all the pro components only when they are needed
|
15
|
+
class Loader
|
16
|
+
# All the pro components that need to be loaded
|
17
|
+
COMPONENTS = %w[
|
18
|
+
base_consumer
|
19
|
+
performance_tracker
|
20
|
+
processing/scheduler
|
21
|
+
processing/jobs/consume_non_blocking
|
22
|
+
processing/jobs_builder
|
23
|
+
processing/coordinator
|
24
|
+
processing/partitioner
|
25
|
+
contracts/base
|
26
|
+
contracts/consumer_group
|
27
|
+
contracts/consumer_group_topic
|
28
|
+
routing/topic_extensions
|
29
|
+
routing/builder_extensions
|
30
|
+
active_job/consumer
|
31
|
+
active_job/dispatcher
|
32
|
+
active_job/job_options_contract
|
33
|
+
].freeze
|
34
|
+
|
35
|
+
private_constant :COMPONENTS
|
36
|
+
|
37
|
+
class << self
|
38
|
+
# Loads all the pro components and configures them wherever it is expected
|
39
|
+
# @param config [Karafka::Core::Configurable::Node] app config that we can alter with pro
|
40
|
+
# components
|
41
|
+
def setup(config)
|
42
|
+
COMPONENTS.each { |component| require_relative(component) }
|
43
|
+
|
44
|
+
reconfigure(config)
|
45
|
+
|
46
|
+
load_routing_extensions
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
# Sets proper config options to use pro components
|
52
|
+
# @param config [WaterDrop::Configurable::Node] root config node
|
53
|
+
def reconfigure(config)
|
54
|
+
icfg = config.internal
|
55
|
+
|
56
|
+
icfg.processing.coordinator_class = Processing::Coordinator
|
57
|
+
icfg.processing.partitioner_class = Processing::Partitioner
|
58
|
+
icfg.processing.scheduler = Processing::Scheduler.new
|
59
|
+
icfg.processing.jobs_builder = Processing::JobsBuilder.new
|
60
|
+
|
61
|
+
icfg.active_job.consumer_class = ActiveJob::Consumer
|
62
|
+
icfg.active_job.dispatcher = ActiveJob::Dispatcher.new
|
63
|
+
icfg.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
|
64
|
+
|
65
|
+
config.monitor.subscribe(PerformanceTracker.instance)
|
66
|
+
end
|
67
|
+
|
68
|
+
# Loads routing extensions
|
69
|
+
def load_routing_extensions
|
70
|
+
::Karafka::Routing::Topic.include(Routing::TopicExtensions)
|
71
|
+
::Karafka::Routing::Builder.prepend(Routing::BuilderExtensions)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
# Tracker used to keep track of performance metrics
|
15
|
+
# It provides insights that can be used to optimize processing flow
|
16
|
+
class PerformanceTracker
|
17
|
+
include Singleton
|
18
|
+
|
19
|
+
# How many samples do we collect per topic partition
|
20
|
+
SAMPLES_COUNT = 200
|
21
|
+
|
22
|
+
private_constant :SAMPLES_COUNT
|
23
|
+
|
24
|
+
# Builds up nested concurrent hash for data tracking
|
25
|
+
def initialize
|
26
|
+
@processing_times = Concurrent::Hash.new do |topics_hash, topic|
|
27
|
+
topics_hash[topic] = Concurrent::Hash.new do |partitions_hash, partition|
|
28
|
+
# This array does not have to be concurrent because we always access single partition
|
29
|
+
# data via instrumentation that operates in a single thread via consumer
|
30
|
+
partitions_hash[partition] = []
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# @param topic [String]
|
36
|
+
# @param partition [Integer]
|
37
|
+
# @return [Float] p95 processing time of a single message from a single topic partition
|
38
|
+
def processing_time_p95(topic, partition)
|
39
|
+
values = @processing_times[topic][partition]
|
40
|
+
|
41
|
+
return 0 if values.empty?
|
42
|
+
return values.first if values.size == 1
|
43
|
+
|
44
|
+
percentile(0.95, values)
|
45
|
+
end
|
46
|
+
|
47
|
+
# @private
|
48
|
+
# @param event [Karafka::Core::Monitoring::Event] event details
|
49
|
+
# Tracks time taken to process a single message of a given topic partition
|
50
|
+
def on_consumer_consumed(event)
|
51
|
+
consumer = event[:caller]
|
52
|
+
messages = consumer.messages
|
53
|
+
topic = messages.metadata.topic
|
54
|
+
partition = messages.metadata.partition
|
55
|
+
|
56
|
+
samples = @processing_times[topic][partition]
|
57
|
+
samples << event[:time] / messages.count
|
58
|
+
|
59
|
+
return unless samples.size > SAMPLES_COUNT
|
60
|
+
|
61
|
+
samples.shift
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
# Computers the requested percentile out of provided values
|
67
|
+
# @param percentile [Float]
|
68
|
+
# @param values [Array<String>] all the values based on which we should
|
69
|
+
# @return [Float] computed percentile
|
70
|
+
def percentile(percentile, values)
|
71
|
+
values_sorted = values.sort
|
72
|
+
|
73
|
+
floor = (percentile * (values_sorted.length - 1) + 1).floor - 1
|
74
|
+
mod = (percentile * (values_sorted.length - 1) + 1).modulo(1)
|
75
|
+
|
76
|
+
values_sorted[floor] + (mod * (values_sorted[floor + 1] - values_sorted[floor]))
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
module Processing
|
15
|
+
# Pro coordinator that provides extra orchestration methods useful for parallel processing
|
16
|
+
# within the same partition
|
17
|
+
class Coordinator < ::Karafka::Processing::Coordinator
|
18
|
+
# @param args [Object] anything the base coordinator accepts
|
19
|
+
def initialize(*args)
|
20
|
+
super
|
21
|
+
@on_started_invoked = false
|
22
|
+
@on_finished_invoked = false
|
23
|
+
@flow_lock = Mutex.new
|
24
|
+
end
|
25
|
+
|
26
|
+
# Starts the coordination process
|
27
|
+
# @param messages [Array<Karafka::Messages::Message>] messages for which processing we are
|
28
|
+
# going to coordinate.
|
29
|
+
def start(messages)
|
30
|
+
super
|
31
|
+
|
32
|
+
@mutex.synchronize do
|
33
|
+
@on_started_invoked = false
|
34
|
+
@on_finished_invoked = false
|
35
|
+
@first_message = messages.first
|
36
|
+
@last_message = messages.last
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
# @return [Boolean] is the coordinated work finished or not
|
41
|
+
def finished?
|
42
|
+
@running_jobs.zero?
|
43
|
+
end
|
44
|
+
|
45
|
+
# Runs given code only once per all the coordinated jobs upon starting first of them
|
46
|
+
def on_started
|
47
|
+
@flow_lock.synchronize do
|
48
|
+
return if @on_started_invoked
|
49
|
+
|
50
|
+
@on_started_invoked = true
|
51
|
+
|
52
|
+
yield(@first_message, @last_message)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# Runs once when all the work that is suppose to be coordinated is finished
|
57
|
+
# It runs once per all the coordinated jobs and should be used to run any type of post
|
58
|
+
# jobs coordination processing execution
|
59
|
+
def on_finished
|
60
|
+
@flow_lock.synchronize do
|
61
|
+
return unless finished?
|
62
|
+
return if @on_finished_invoked
|
63
|
+
|
64
|
+
@on_finished_invoked = true
|
65
|
+
|
66
|
+
yield(@first_message, @last_message)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
# Pro components related to processing part of Karafka
|
15
|
+
module Processing
|
16
|
+
# Pro jobs
|
17
|
+
module Jobs
|
18
|
+
# The main job type in a non-blocking variant.
|
19
|
+
# This variant works "like" the regular consumption but pauses the partition for as long
|
20
|
+
# as it is needed until a job is done.
|
21
|
+
#
|
22
|
+
# It can be useful when having long lasting jobs that would exceed `max.poll.interval`
|
23
|
+
# if would block.
|
24
|
+
#
|
25
|
+
# @note It needs to be working with a proper consumer that will handle the partition
|
26
|
+
# management. This layer of the framework knows nothing about Kafka messages consumption.
|
27
|
+
class ConsumeNonBlocking < ::Karafka::Processing::Jobs::Consume
|
28
|
+
# Releases the blocking lock after it is done with the preparation phase for this job
|
29
|
+
def before_call
|
30
|
+
super
|
31
|
+
@non_blocking = true
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
module Processing
|
15
|
+
# Pro jobs builder that supports lrj
|
16
|
+
class JobsBuilder < ::Karafka::Processing::JobsBuilder
|
17
|
+
# @param executor [Karafka::Processing::Executor]
|
18
|
+
# @param messages [Karafka::Messages::Messages] messages batch to be consumed
|
19
|
+
# @param coordinator [Karafka::Processing::Coordinator]
|
20
|
+
# @return [Karafka::Processing::Jobs::Consume] blocking job
|
21
|
+
# @return [Karafka::Pro::Processing::Jobs::ConsumeNonBlocking] non blocking for lrj
|
22
|
+
def consume(executor, messages, coordinator)
|
23
|
+
if executor.topic.long_running_job?
|
24
|
+
Jobs::ConsumeNonBlocking.new(executor, messages, coordinator)
|
25
|
+
else
|
26
|
+
super
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
module Processing
|
15
|
+
# Pro partitioner that can distribute work based on the virtual partitioner settings
|
16
|
+
class Partitioner < ::Karafka::Processing::Partitioner
|
17
|
+
# @param topic [String] topic name
|
18
|
+
# @param messages [Array<Karafka::Messages::Message>] karafka messages
|
19
|
+
# @yieldparam [Integer] group id
|
20
|
+
# @yieldparam [Array<Karafka::Messages::Message>] karafka messages
|
21
|
+
def call(topic, messages)
|
22
|
+
ktopic = @subscription_group.topics.find(topic)
|
23
|
+
|
24
|
+
@concurrency ||= ::Karafka::App.config.concurrency
|
25
|
+
|
26
|
+
# We only partition work if we have a virtual partitioner and more than one thread to
|
27
|
+
# process the data. With one thread it is not worth partitioning the work as the work
|
28
|
+
# itself will be assigned to one thread (pointless work)
|
29
|
+
if ktopic.virtual_partitioner? && @concurrency > 1
|
30
|
+
# We need to reduce it to number of threads, so the group_id is not a direct effect
|
31
|
+
# of the end user action. Otherwise the persistence layer for consumers would cache
|
32
|
+
# it forever and it would cause memory leaks
|
33
|
+
groupings = messages
|
34
|
+
.group_by { |msg| ktopic.virtual_partitioner.call(msg) }
|
35
|
+
.values
|
36
|
+
|
37
|
+
# Reduce the max concurrency to a size that matches the concurrency
|
38
|
+
# As mentioned above we cannot use the partitioning keys directly as it could cause
|
39
|
+
# memory leaks
|
40
|
+
#
|
41
|
+
# The algorithm here is simple, we assume that the most costly in terms of processing,
|
42
|
+
# will be processing of the biggest group and we reduce the smallest once to have
|
43
|
+
# max of groups equal to concurrency
|
44
|
+
while groupings.size > @concurrency
|
45
|
+
groupings.sort_by! { |grouping| -grouping.size }
|
46
|
+
|
47
|
+
# Offset order needs to be maintained for virtual partitions
|
48
|
+
groupings << (groupings.pop + groupings.pop).sort_by!(&:offset)
|
49
|
+
end
|
50
|
+
|
51
|
+
groupings.each_with_index { |messages_group, index| yield(index, messages_group) }
|
52
|
+
else
|
53
|
+
# When no virtual partitioner, works as regular one
|
54
|
+
yield(0, messages)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
module Processing
|
15
|
+
# Optimizes scheduler that takes into consideration of execution time needed to process
|
16
|
+
# messages from given topics partitions. It uses the non-preemptive LJF algorithm
|
17
|
+
#
|
18
|
+
# This scheduler is designed to optimize execution times on jobs that perform IO operations
|
19
|
+
# as when taking IO into consideration, the can achieve optimized parallel processing.
|
20
|
+
#
|
21
|
+
# This scheduler can also work with virtual partitions.
|
22
|
+
#
|
23
|
+
# Aside from consumption jobs, other jobs do not run often, thus we can leave them with
|
24
|
+
# default FIFO scheduler from the default Karafka scheduler
|
25
|
+
class Scheduler < ::Karafka::Processing::Scheduler
|
26
|
+
# Schedules jobs in the LJF order for consumption
|
27
|
+
#
|
28
|
+
# @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
|
29
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
30
|
+
#
|
31
|
+
def schedule_consumption(queue, jobs_array)
|
32
|
+
pt = PerformanceTracker.instance
|
33
|
+
|
34
|
+
ordered = []
|
35
|
+
|
36
|
+
jobs_array.each do |job|
|
37
|
+
messages = job.messages
|
38
|
+
message = messages.first
|
39
|
+
|
40
|
+
cost = pt.processing_time_p95(message.topic, message.partition) * messages.size
|
41
|
+
|
42
|
+
ordered << [job, cost]
|
43
|
+
end
|
44
|
+
|
45
|
+
ordered.sort_by!(&:last)
|
46
|
+
ordered.reverse!
|
47
|
+
ordered.map!(&:first)
|
48
|
+
|
49
|
+
ordered.each do |job|
|
50
|
+
queue << job
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
# Pro routing components
|
15
|
+
module Routing
|
16
|
+
# Routing extensions for builder to be able to validate Pro components correct usage
|
17
|
+
module BuilderExtensions
|
18
|
+
# Validate consumer groups with pro contracts
|
19
|
+
# @param block [Proc] routing defining block
|
20
|
+
def draw(&block)
|
21
|
+
super
|
22
|
+
|
23
|
+
each do |consumer_group|
|
24
|
+
::Karafka::Pro::Contracts::ConsumerGroup.new.validate!(consumer_group.to_h)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
# Pro routing components
|
15
|
+
module Routing
|
16
|
+
# Routing extensions that allow to configure some extra PRO routing options
|
17
|
+
module TopicExtensions
|
18
|
+
class << self
|
19
|
+
# @param base [Class] class we extend
|
20
|
+
def included(base)
|
21
|
+
base.attr_accessor :long_running_job
|
22
|
+
base.attr_accessor :virtual_partitioner
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# @return [Boolean] true if virtual partitioner is defined, false otherwise
|
27
|
+
def virtual_partitioner?
|
28
|
+
virtual_partitioner != nil
|
29
|
+
end
|
30
|
+
|
31
|
+
# @return [Boolean] is a given job on a topic a long-running one
|
32
|
+
def long_running_job?
|
33
|
+
@long_running_job || false
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/lib/karafka/pro.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this repository
|
5
|
+
# and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
module Karafka
|
10
|
+
# Namespace for pro components, licensed under the commercial license agreement.
|
11
|
+
module Pro
|
12
|
+
end
|
13
|
+
end
|
data/lib/karafka/process.rb
CHANGED
@@ -0,0 +1,88 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Processing
|
5
|
+
# Basic coordinator that allows us to provide coordination objects into consumers.
|
6
|
+
#
|
7
|
+
# This is a wrapping layer to simplify management of work to be handled around consumption.
|
8
|
+
#
|
9
|
+
# @note This coordinator needs to be thread safe. Some operations are performed only in the
|
10
|
+
# listener thread, but we go with thread-safe by default for all not to worry about potential
|
11
|
+
# future mistakes.
|
12
|
+
class Coordinator
|
13
|
+
# @return [Karafka::TimeTrackers::Pause]
|
14
|
+
attr_reader :pause_tracker
|
15
|
+
|
16
|
+
# @param pause_tracker [Karafka::TimeTrackers::Pause] pause tracker for given topic partition
|
17
|
+
def initialize(pause_tracker)
|
18
|
+
@pause_tracker = pause_tracker
|
19
|
+
@revoked = false
|
20
|
+
@consumptions = {}
|
21
|
+
@running_jobs = 0
|
22
|
+
@mutex = Mutex.new
|
23
|
+
end
|
24
|
+
|
25
|
+
# Starts the coordinator for given consumption jobs
|
26
|
+
# @param _messages [Array<Karafka::Messages::Message>] batch of message for which we are
|
27
|
+
# going to coordinate work. Not used with regular coordinator.
|
28
|
+
def start(_messages)
|
29
|
+
@mutex.synchronize do
|
30
|
+
@running_jobs = 0
|
31
|
+
# We need to clear the consumption results hash here, otherwise we could end up storing
|
32
|
+
# consumption results of consumer instances we no longer control
|
33
|
+
@consumptions.clear
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# Increases number of jobs that we handle with this coordinator
|
38
|
+
def increment
|
39
|
+
@mutex.synchronize { @running_jobs += 1 }
|
40
|
+
end
|
41
|
+
|
42
|
+
# Decrements number of jobs we handle at the moment
|
43
|
+
def decrement
|
44
|
+
@mutex.synchronize do
|
45
|
+
@running_jobs -= 1
|
46
|
+
|
47
|
+
return @running_jobs unless @running_jobs.negative?
|
48
|
+
|
49
|
+
# This should never happen. If it does, something is heavily out of sync. Please reach
|
50
|
+
# out to us if you encounter this
|
51
|
+
raise Karafka::Errors::InvalidCoordinatorState, 'Was zero before decrementation'
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
# @param consumer [Object] karafka consumer (normal or pro)
|
56
|
+
# @return [Karafka::Processing::Result] result object which we can use to indicate
|
57
|
+
# consumption processing state.
|
58
|
+
def consumption(consumer)
|
59
|
+
@mutex.synchronize do
|
60
|
+
@consumptions[consumer] ||= Processing::Result.new
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Is all the consumption done and finished successfully for this coordinator
|
65
|
+
def success?
|
66
|
+
@mutex.synchronize { @running_jobs.zero? && @consumptions.values.all?(&:success?) }
|
67
|
+
end
|
68
|
+
|
69
|
+
# Marks given coordinator for processing group as revoked
|
70
|
+
#
|
71
|
+
# This is invoked in two places:
|
72
|
+
# - from the main listener loop when we detect revoked partitions
|
73
|
+
# - from the consumer in case checkpointing fails
|
74
|
+
#
|
75
|
+
# This means, we can end up having consumer being aware that it was revoked prior to the
|
76
|
+
# listener loop dispatching the revocation job. It is ok, as effectively nothing will be
|
77
|
+
# processed until revocation jobs are done.
|
78
|
+
def revoke
|
79
|
+
@mutex.synchronize { @revoked = true }
|
80
|
+
end
|
81
|
+
|
82
|
+
# @return [Boolean] is the partition we are processing revoked or not
|
83
|
+
def revoked?
|
84
|
+
@revoked
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|