karafka 1.4.13 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +3 -3
- data/.github/workflows/ci.yml +85 -30
- data/.ruby-version +1 -1
- data/CHANGELOG.md +268 -7
- data/CONTRIBUTING.md +10 -19
- data/Gemfile +6 -0
- data/Gemfile.lock +44 -87
- data/LICENSE +17 -0
- data/LICENSE-COMM +89 -0
- data/LICENSE-LGPL +165 -0
- data/README.md +44 -48
- data/bin/benchmarks +85 -0
- data/bin/create_token +22 -0
- data/bin/integrations +237 -0
- data/bin/karafka +4 -0
- data/bin/scenario +29 -0
- data/bin/stress_many +13 -0
- data/bin/stress_one +13 -0
- data/bin/wait_for_kafka +20 -0
- data/certs/karafka-pro.pem +11 -0
- data/config/errors.yml +55 -40
- data/docker-compose.yml +39 -3
- data/karafka.gemspec +11 -17
- data/lib/active_job/karafka.rb +21 -0
- data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
- data/lib/karafka/active_job/consumer.rb +26 -0
- data/lib/karafka/active_job/dispatcher.rb +38 -0
- data/lib/karafka/active_job/job_extensions.rb +34 -0
- data/lib/karafka/active_job/job_options_contract.rb +21 -0
- data/lib/karafka/active_job/routing/extensions.rb +31 -0
- data/lib/karafka/app.rb +15 -20
- data/lib/karafka/base_consumer.rb +181 -31
- data/lib/karafka/cli/base.rb +4 -4
- data/lib/karafka/cli/info.rb +43 -9
- data/lib/karafka/cli/install.rb +19 -10
- data/lib/karafka/cli/server.rb +17 -42
- data/lib/karafka/cli.rb +4 -11
- data/lib/karafka/connection/client.rb +385 -90
- data/lib/karafka/connection/listener.rb +246 -38
- data/lib/karafka/connection/listeners_batch.rb +24 -0
- data/lib/karafka/connection/messages_buffer.rb +84 -0
- data/lib/karafka/connection/pauses_manager.rb +46 -0
- data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
- data/lib/karafka/connection/rebalance_manager.rb +78 -0
- data/lib/karafka/contracts/base.rb +17 -0
- data/lib/karafka/contracts/config.rb +88 -11
- data/lib/karafka/contracts/consumer_group.rb +21 -189
- data/lib/karafka/contracts/consumer_group_topic.rb +34 -11
- data/lib/karafka/contracts/server_cli_options.rb +19 -18
- data/lib/karafka/contracts.rb +1 -1
- data/lib/karafka/env.rb +46 -0
- data/lib/karafka/errors.rb +21 -21
- data/lib/karafka/helpers/async.rb +33 -0
- data/lib/karafka/helpers/colorize.rb +20 -0
- data/lib/karafka/helpers/multi_delegator.rb +2 -2
- data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
- data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
- data/lib/karafka/instrumentation/logger_listener.rb +164 -0
- data/lib/karafka/instrumentation/monitor.rb +13 -61
- data/lib/karafka/instrumentation/notifications.rb +52 -0
- data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
- data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
- data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
- data/lib/karafka/instrumentation.rb +21 -0
- data/lib/karafka/licenser.rb +75 -0
- data/lib/karafka/messages/batch_metadata.rb +45 -0
- data/lib/karafka/messages/builders/batch_metadata.rb +40 -0
- data/lib/karafka/messages/builders/message.rb +39 -0
- data/lib/karafka/messages/builders/messages.rb +32 -0
- data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
- data/lib/karafka/messages/messages.rb +64 -0
- data/lib/karafka/{params → messages}/metadata.rb +4 -6
- data/lib/karafka/messages/seek.rb +9 -0
- data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
- data/lib/karafka/pro/active_job/consumer.rb +46 -0
- data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
- data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
- data/lib/karafka/pro/base_consumer.rb +82 -0
- data/lib/karafka/pro/contracts/base.rb +21 -0
- data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
- data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
- data/lib/karafka/pro/loader.rb +76 -0
- data/lib/karafka/pro/performance_tracker.rb +80 -0
- data/lib/karafka/pro/processing/coordinator.rb +72 -0
- data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +37 -0
- data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
- data/lib/karafka/pro/processing/partitioner.rb +60 -0
- data/lib/karafka/pro/processing/scheduler.rb +56 -0
- data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
- data/lib/karafka/pro/routing/topic_extensions.rb +38 -0
- data/lib/karafka/pro.rb +13 -0
- data/lib/karafka/process.rb +1 -0
- data/lib/karafka/processing/coordinator.rb +88 -0
- data/lib/karafka/processing/coordinators_buffer.rb +54 -0
- data/lib/karafka/processing/executor.rb +118 -0
- data/lib/karafka/processing/executors_buffer.rb +88 -0
- data/lib/karafka/processing/jobs/base.rb +51 -0
- data/lib/karafka/processing/jobs/consume.rb +42 -0
- data/lib/karafka/processing/jobs/revoked.rb +22 -0
- data/lib/karafka/processing/jobs/shutdown.rb +23 -0
- data/lib/karafka/processing/jobs_builder.rb +29 -0
- data/lib/karafka/processing/jobs_queue.rb +144 -0
- data/lib/karafka/processing/partitioner.rb +22 -0
- data/lib/karafka/processing/result.rb +29 -0
- data/lib/karafka/processing/scheduler.rb +22 -0
- data/lib/karafka/processing/worker.rb +88 -0
- data/lib/karafka/processing/workers_batch.rb +27 -0
- data/lib/karafka/railtie.rb +113 -0
- data/lib/karafka/routing/builder.rb +15 -24
- data/lib/karafka/routing/consumer_group.rb +11 -19
- data/lib/karafka/routing/consumer_mapper.rb +1 -2
- data/lib/karafka/routing/router.rb +1 -1
- data/lib/karafka/routing/subscription_group.rb +53 -0
- data/lib/karafka/routing/subscription_groups_builder.rb +53 -0
- data/lib/karafka/routing/topic.rb +61 -24
- data/lib/karafka/routing/topics.rb +38 -0
- data/lib/karafka/runner.rb +51 -0
- data/lib/karafka/serialization/json/deserializer.rb +6 -15
- data/lib/karafka/server.rb +67 -26
- data/lib/karafka/setup/config.rb +147 -175
- data/lib/karafka/status.rb +14 -5
- data/lib/karafka/templates/example_consumer.rb.erb +16 -0
- data/lib/karafka/templates/karafka.rb.erb +15 -51
- data/lib/karafka/time_trackers/base.rb +19 -0
- data/lib/karafka/time_trackers/pause.rb +92 -0
- data/lib/karafka/time_trackers/poll.rb +65 -0
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +38 -17
- data.tar.gz.sig +0 -0
- metadata +118 -120
- metadata.gz.sig +0 -0
- data/MIT-LICENCE +0 -18
- data/lib/karafka/assignment_strategies/round_robin.rb +0 -13
- data/lib/karafka/attributes_map.rb +0 -63
- data/lib/karafka/backends/inline.rb +0 -16
- data/lib/karafka/base_responder.rb +0 -226
- data/lib/karafka/cli/flow.rb +0 -48
- data/lib/karafka/cli/missingno.rb +0 -19
- data/lib/karafka/code_reloader.rb +0 -67
- data/lib/karafka/connection/api_adapter.rb +0 -158
- data/lib/karafka/connection/batch_delegator.rb +0 -55
- data/lib/karafka/connection/builder.rb +0 -23
- data/lib/karafka/connection/message_delegator.rb +0 -36
- data/lib/karafka/consumers/batch_metadata.rb +0 -10
- data/lib/karafka/consumers/callbacks.rb +0 -71
- data/lib/karafka/consumers/includer.rb +0 -64
- data/lib/karafka/consumers/responders.rb +0 -24
- data/lib/karafka/consumers/single_params.rb +0 -15
- data/lib/karafka/contracts/responder_usage.rb +0 -54
- data/lib/karafka/fetcher.rb +0 -42
- data/lib/karafka/helpers/class_matcher.rb +0 -88
- data/lib/karafka/helpers/config_retriever.rb +0 -46
- data/lib/karafka/helpers/inflector.rb +0 -26
- data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
- data/lib/karafka/params/batch_metadata.rb +0 -26
- data/lib/karafka/params/builders/batch_metadata.rb +0 -30
- data/lib/karafka/params/builders/params.rb +0 -38
- data/lib/karafka/params/builders/params_batch.rb +0 -25
- data/lib/karafka/params/params_batch.rb +0 -60
- data/lib/karafka/patches/ruby_kafka.rb +0 -47
- data/lib/karafka/persistence/client.rb +0 -29
- data/lib/karafka/persistence/consumers.rb +0 -45
- data/lib/karafka/persistence/topics.rb +0 -48
- data/lib/karafka/responders/builder.rb +0 -36
- data/lib/karafka/responders/topic.rb +0 -55
- data/lib/karafka/routing/topic_mapper.rb +0 -53
- data/lib/karafka/serialization/json/serializer.rb +0 -31
- data/lib/karafka/setup/configurators/water_drop.rb +0 -36
- data/lib/karafka/templates/application_responder.rb.erb +0 -11
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component.
|
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
5
|
+
# repository and their usage requires commercial license agreement.
|
|
6
|
+
#
|
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
8
|
+
#
|
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
10
|
+
# your code to Maciej Mensfeld.
|
|
11
|
+
|
|
12
|
+
module Karafka
|
|
13
|
+
module Pro
|
|
14
|
+
module Contracts
|
|
15
|
+
# Contract for validating correct Pro components setup on a consumer group and topic levels
|
|
16
|
+
class ConsumerGroup < Base
|
|
17
|
+
virtual do |data, errors|
|
|
18
|
+
next unless errors.empty?
|
|
19
|
+
next unless data.key?(:topics)
|
|
20
|
+
|
|
21
|
+
fetched_errors = []
|
|
22
|
+
|
|
23
|
+
data.fetch(:topics).each do |topic|
|
|
24
|
+
ConsumerGroupTopic.new.call(topic).errors.each do |key, value|
|
|
25
|
+
fetched_errors << [[topic, key].flatten, value]
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
fetched_errors
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component.
|
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
5
|
+
# repository and their usage requires commercial license agreement.
|
|
6
|
+
#
|
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
8
|
+
#
|
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
10
|
+
# your code to Maciej Mensfeld.
|
|
11
|
+
|
|
12
|
+
module Karafka
|
|
13
|
+
module Pro
|
|
14
|
+
module Contracts
|
|
15
|
+
# Contract for validating correct Pro components setup on a topic levels
|
|
16
|
+
class ConsumerGroupTopic < Base
|
|
17
|
+
configure do |config|
|
|
18
|
+
config.error_messages = YAML.safe_load(
|
|
19
|
+
File.read(
|
|
20
|
+
File.join(Karafka.gem_root, 'config', 'errors.yml')
|
|
21
|
+
)
|
|
22
|
+
).fetch('en').fetch('validations').fetch('pro_consumer_group_topic')
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
virtual do |data|
|
|
26
|
+
next if data[:consumer] < Karafka::Pro::BaseConsumer
|
|
27
|
+
|
|
28
|
+
[[%i[consumer], :consumer_format]]
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component.
|
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
5
|
+
# repository and their usage requires commercial license agreement.
|
|
6
|
+
#
|
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
8
|
+
#
|
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
10
|
+
# your code to Maciej Mensfeld.
|
|
11
|
+
|
|
12
|
+
module Karafka
|
|
13
|
+
module Pro
|
|
14
|
+
# Loader requires and loads all the pro components only when they are needed
|
|
15
|
+
class Loader
|
|
16
|
+
# All the pro components that need to be loaded
|
|
17
|
+
COMPONENTS = %w[
|
|
18
|
+
base_consumer
|
|
19
|
+
performance_tracker
|
|
20
|
+
processing/scheduler
|
|
21
|
+
processing/jobs/consume_non_blocking
|
|
22
|
+
processing/jobs_builder
|
|
23
|
+
processing/coordinator
|
|
24
|
+
processing/partitioner
|
|
25
|
+
contracts/base
|
|
26
|
+
contracts/consumer_group
|
|
27
|
+
contracts/consumer_group_topic
|
|
28
|
+
routing/topic_extensions
|
|
29
|
+
routing/builder_extensions
|
|
30
|
+
active_job/consumer
|
|
31
|
+
active_job/dispatcher
|
|
32
|
+
active_job/job_options_contract
|
|
33
|
+
].freeze
|
|
34
|
+
|
|
35
|
+
private_constant :COMPONENTS
|
|
36
|
+
|
|
37
|
+
class << self
|
|
38
|
+
# Loads all the pro components and configures them wherever it is expected
|
|
39
|
+
# @param config [Karafka::Core::Configurable::Node] app config that we can alter with pro
|
|
40
|
+
# components
|
|
41
|
+
def setup(config)
|
|
42
|
+
COMPONENTS.each { |component| require_relative(component) }
|
|
43
|
+
|
|
44
|
+
reconfigure(config)
|
|
45
|
+
|
|
46
|
+
load_routing_extensions
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
# Sets proper config options to use pro components
|
|
52
|
+
# @param config [WaterDrop::Configurable::Node] root config node
|
|
53
|
+
def reconfigure(config)
|
|
54
|
+
icfg = config.internal
|
|
55
|
+
|
|
56
|
+
icfg.processing.coordinator_class = Processing::Coordinator
|
|
57
|
+
icfg.processing.partitioner_class = Processing::Partitioner
|
|
58
|
+
icfg.processing.scheduler = Processing::Scheduler.new
|
|
59
|
+
icfg.processing.jobs_builder = Processing::JobsBuilder.new
|
|
60
|
+
|
|
61
|
+
icfg.active_job.consumer_class = ActiveJob::Consumer
|
|
62
|
+
icfg.active_job.dispatcher = ActiveJob::Dispatcher.new
|
|
63
|
+
icfg.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
|
|
64
|
+
|
|
65
|
+
config.monitor.subscribe(PerformanceTracker.instance)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Loads routing extensions
|
|
69
|
+
def load_routing_extensions
|
|
70
|
+
::Karafka::Routing::Topic.include(Routing::TopicExtensions)
|
|
71
|
+
::Karafka::Routing::Builder.prepend(Routing::BuilderExtensions)
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component.
|
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
5
|
+
# repository and their usage requires commercial license agreement.
|
|
6
|
+
#
|
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
8
|
+
#
|
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
10
|
+
# your code to Maciej Mensfeld.
|
|
11
|
+
|
|
12
|
+
module Karafka
|
|
13
|
+
module Pro
|
|
14
|
+
# Tracker used to keep track of performance metrics
|
|
15
|
+
# It provides insights that can be used to optimize processing flow
|
|
16
|
+
class PerformanceTracker
|
|
17
|
+
include Singleton
|
|
18
|
+
|
|
19
|
+
# How many samples do we collect per topic partition
|
|
20
|
+
SAMPLES_COUNT = 200
|
|
21
|
+
|
|
22
|
+
private_constant :SAMPLES_COUNT
|
|
23
|
+
|
|
24
|
+
# Builds up nested concurrent hash for data tracking
|
|
25
|
+
def initialize
|
|
26
|
+
@processing_times = Concurrent::Hash.new do |topics_hash, topic|
|
|
27
|
+
topics_hash[topic] = Concurrent::Hash.new do |partitions_hash, partition|
|
|
28
|
+
# This array does not have to be concurrent because we always access single partition
|
|
29
|
+
# data via instrumentation that operates in a single thread via consumer
|
|
30
|
+
partitions_hash[partition] = []
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# @param topic [String]
|
|
36
|
+
# @param partition [Integer]
|
|
37
|
+
# @return [Float] p95 processing time of a single message from a single topic partition
|
|
38
|
+
def processing_time_p95(topic, partition)
|
|
39
|
+
values = @processing_times[topic][partition]
|
|
40
|
+
|
|
41
|
+
return 0 if values.empty?
|
|
42
|
+
return values.first if values.size == 1
|
|
43
|
+
|
|
44
|
+
percentile(0.95, values)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# @private
|
|
48
|
+
# @param event [Karafka::Core::Monitoring::Event] event details
|
|
49
|
+
# Tracks time taken to process a single message of a given topic partition
|
|
50
|
+
def on_consumer_consumed(event)
|
|
51
|
+
consumer = event[:caller]
|
|
52
|
+
messages = consumer.messages
|
|
53
|
+
topic = messages.metadata.topic
|
|
54
|
+
partition = messages.metadata.partition
|
|
55
|
+
|
|
56
|
+
samples = @processing_times[topic][partition]
|
|
57
|
+
samples << event[:time] / messages.count
|
|
58
|
+
|
|
59
|
+
return unless samples.size > SAMPLES_COUNT
|
|
60
|
+
|
|
61
|
+
samples.shift
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
private
|
|
65
|
+
|
|
66
|
+
# Computers the requested percentile out of provided values
|
|
67
|
+
# @param percentile [Float]
|
|
68
|
+
# @param values [Array<String>] all the values based on which we should
|
|
69
|
+
# @return [Float] computed percentile
|
|
70
|
+
def percentile(percentile, values)
|
|
71
|
+
values_sorted = values.sort
|
|
72
|
+
|
|
73
|
+
floor = (percentile * (values_sorted.length - 1) + 1).floor - 1
|
|
74
|
+
mod = (percentile * (values_sorted.length - 1) + 1).modulo(1)
|
|
75
|
+
|
|
76
|
+
values_sorted[floor] + (mod * (values_sorted[floor + 1] - values_sorted[floor]))
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component.
|
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
5
|
+
# repository and their usage requires commercial license agreement.
|
|
6
|
+
#
|
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
8
|
+
#
|
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
10
|
+
# your code to Maciej Mensfeld.
|
|
11
|
+
|
|
12
|
+
module Karafka
|
|
13
|
+
module Pro
|
|
14
|
+
module Processing
|
|
15
|
+
# Pro coordinator that provides extra orchestration methods useful for parallel processing
|
|
16
|
+
# within the same partition
|
|
17
|
+
class Coordinator < ::Karafka::Processing::Coordinator
|
|
18
|
+
# @param args [Object] anything the base coordinator accepts
|
|
19
|
+
def initialize(*args)
|
|
20
|
+
super
|
|
21
|
+
@on_started_invoked = false
|
|
22
|
+
@on_finished_invoked = false
|
|
23
|
+
@flow_lock = Mutex.new
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Starts the coordination process
|
|
27
|
+
# @param messages [Array<Karafka::Messages::Message>] messages for which processing we are
|
|
28
|
+
# going to coordinate.
|
|
29
|
+
def start(messages)
|
|
30
|
+
super
|
|
31
|
+
|
|
32
|
+
@mutex.synchronize do
|
|
33
|
+
@on_started_invoked = false
|
|
34
|
+
@on_finished_invoked = false
|
|
35
|
+
@first_message = messages.first
|
|
36
|
+
@last_message = messages.last
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# @return [Boolean] is the coordinated work finished or not
|
|
41
|
+
def finished?
|
|
42
|
+
@running_jobs.zero?
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Runs given code only once per all the coordinated jobs upon starting first of them
|
|
46
|
+
def on_started
|
|
47
|
+
@flow_lock.synchronize do
|
|
48
|
+
return if @on_started_invoked
|
|
49
|
+
|
|
50
|
+
@on_started_invoked = true
|
|
51
|
+
|
|
52
|
+
yield(@first_message, @last_message)
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Runs once when all the work that is suppose to be coordinated is finished
|
|
57
|
+
# It runs once per all the coordinated jobs and should be used to run any type of post
|
|
58
|
+
# jobs coordination processing execution
|
|
59
|
+
def on_finished
|
|
60
|
+
@flow_lock.synchronize do
|
|
61
|
+
return unless finished?
|
|
62
|
+
return if @on_finished_invoked
|
|
63
|
+
|
|
64
|
+
@on_finished_invoked = true
|
|
65
|
+
|
|
66
|
+
yield(@first_message, @last_message)
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component.
|
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
5
|
+
# repository and their usage requires commercial license agreement.
|
|
6
|
+
#
|
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
8
|
+
#
|
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
10
|
+
# your code to Maciej Mensfeld.
|
|
11
|
+
|
|
12
|
+
module Karafka
|
|
13
|
+
module Pro
|
|
14
|
+
# Pro components related to processing part of Karafka
|
|
15
|
+
module Processing
|
|
16
|
+
# Pro jobs
|
|
17
|
+
module Jobs
|
|
18
|
+
# The main job type in a non-blocking variant.
|
|
19
|
+
# This variant works "like" the regular consumption but pauses the partition for as long
|
|
20
|
+
# as it is needed until a job is done.
|
|
21
|
+
#
|
|
22
|
+
# It can be useful when having long lasting jobs that would exceed `max.poll.interval`
|
|
23
|
+
# if would block.
|
|
24
|
+
#
|
|
25
|
+
# @note It needs to be working with a proper consumer that will handle the partition
|
|
26
|
+
# management. This layer of the framework knows nothing about Kafka messages consumption.
|
|
27
|
+
class ConsumeNonBlocking < ::Karafka::Processing::Jobs::Consume
|
|
28
|
+
# Releases the blocking lock after it is done with the preparation phase for this job
|
|
29
|
+
def before_call
|
|
30
|
+
super
|
|
31
|
+
@non_blocking = true
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component.
|
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
5
|
+
# repository and their usage requires commercial license agreement.
|
|
6
|
+
#
|
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
8
|
+
#
|
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
10
|
+
# your code to Maciej Mensfeld.
|
|
11
|
+
|
|
12
|
+
module Karafka
|
|
13
|
+
module Pro
|
|
14
|
+
module Processing
|
|
15
|
+
# Pro jobs builder that supports lrj
|
|
16
|
+
class JobsBuilder < ::Karafka::Processing::JobsBuilder
|
|
17
|
+
# @param executor [Karafka::Processing::Executor]
|
|
18
|
+
# @param messages [Karafka::Messages::Messages] messages batch to be consumed
|
|
19
|
+
# @param coordinator [Karafka::Processing::Coordinator]
|
|
20
|
+
# @return [Karafka::Processing::Jobs::Consume] blocking job
|
|
21
|
+
# @return [Karafka::Pro::Processing::Jobs::ConsumeNonBlocking] non blocking for lrj
|
|
22
|
+
def consume(executor, messages, coordinator)
|
|
23
|
+
if executor.topic.long_running_job?
|
|
24
|
+
Jobs::ConsumeNonBlocking.new(executor, messages, coordinator)
|
|
25
|
+
else
|
|
26
|
+
super
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component.
|
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
5
|
+
# repository and their usage requires commercial license agreement.
|
|
6
|
+
#
|
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
8
|
+
#
|
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
10
|
+
# your code to Maciej Mensfeld.
|
|
11
|
+
|
|
12
|
+
module Karafka
|
|
13
|
+
module Pro
|
|
14
|
+
module Processing
|
|
15
|
+
# Pro partitioner that can distribute work based on the virtual partitioner settings
|
|
16
|
+
class Partitioner < ::Karafka::Processing::Partitioner
|
|
17
|
+
# @param topic [String] topic name
|
|
18
|
+
# @param messages [Array<Karafka::Messages::Message>] karafka messages
|
|
19
|
+
# @yieldparam [Integer] group id
|
|
20
|
+
# @yieldparam [Array<Karafka::Messages::Message>] karafka messages
|
|
21
|
+
def call(topic, messages)
|
|
22
|
+
ktopic = @subscription_group.topics.find(topic)
|
|
23
|
+
|
|
24
|
+
@concurrency ||= ::Karafka::App.config.concurrency
|
|
25
|
+
|
|
26
|
+
# We only partition work if we have a virtual partitioner and more than one thread to
|
|
27
|
+
# process the data. With one thread it is not worth partitioning the work as the work
|
|
28
|
+
# itself will be assigned to one thread (pointless work)
|
|
29
|
+
if ktopic.virtual_partitioner? && @concurrency > 1
|
|
30
|
+
# We need to reduce it to number of threads, so the group_id is not a direct effect
|
|
31
|
+
# of the end user action. Otherwise the persistence layer for consumers would cache
|
|
32
|
+
# it forever and it would cause memory leaks
|
|
33
|
+
groupings = messages
|
|
34
|
+
.group_by { |msg| ktopic.virtual_partitioner.call(msg) }
|
|
35
|
+
.values
|
|
36
|
+
|
|
37
|
+
# Reduce the max concurrency to a size that matches the concurrency
|
|
38
|
+
# As mentioned above we cannot use the partitioning keys directly as it could cause
|
|
39
|
+
# memory leaks
|
|
40
|
+
#
|
|
41
|
+
# The algorithm here is simple, we assume that the most costly in terms of processing,
|
|
42
|
+
# will be processing of the biggest group and we reduce the smallest once to have
|
|
43
|
+
# max of groups equal to concurrency
|
|
44
|
+
while groupings.size > @concurrency
|
|
45
|
+
groupings.sort_by! { |grouping| -grouping.size }
|
|
46
|
+
|
|
47
|
+
# Offset order needs to be maintained for virtual partitions
|
|
48
|
+
groupings << (groupings.pop + groupings.pop).sort_by!(&:offset)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
groupings.each_with_index { |messages_group, index| yield(index, messages_group) }
|
|
52
|
+
else
|
|
53
|
+
# When no virtual partitioner, works as regular one
|
|
54
|
+
yield(0, messages)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component.
|
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
5
|
+
# repository and their usage requires commercial license agreement.
|
|
6
|
+
#
|
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
8
|
+
#
|
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
10
|
+
# your code to Maciej Mensfeld.
|
|
11
|
+
|
|
12
|
+
module Karafka
|
|
13
|
+
module Pro
|
|
14
|
+
module Processing
|
|
15
|
+
# Optimizes scheduler that takes into consideration of execution time needed to process
|
|
16
|
+
# messages from given topics partitions. It uses the non-preemptive LJF algorithm
|
|
17
|
+
#
|
|
18
|
+
# This scheduler is designed to optimize execution times on jobs that perform IO operations
|
|
19
|
+
# as when taking IO into consideration, the can achieve optimized parallel processing.
|
|
20
|
+
#
|
|
21
|
+
# This scheduler can also work with virtual partitions.
|
|
22
|
+
#
|
|
23
|
+
# Aside from consumption jobs, other jobs do not run often, thus we can leave them with
|
|
24
|
+
# default FIFO scheduler from the default Karafka scheduler
|
|
25
|
+
class Scheduler < ::Karafka::Processing::Scheduler
|
|
26
|
+
# Schedules jobs in the LJF order for consumption
|
|
27
|
+
#
|
|
28
|
+
# @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
|
|
29
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
|
30
|
+
#
|
|
31
|
+
def schedule_consumption(queue, jobs_array)
|
|
32
|
+
pt = PerformanceTracker.instance
|
|
33
|
+
|
|
34
|
+
ordered = []
|
|
35
|
+
|
|
36
|
+
jobs_array.each do |job|
|
|
37
|
+
messages = job.messages
|
|
38
|
+
message = messages.first
|
|
39
|
+
|
|
40
|
+
cost = pt.processing_time_p95(message.topic, message.partition) * messages.size
|
|
41
|
+
|
|
42
|
+
ordered << [job, cost]
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
ordered.sort_by!(&:last)
|
|
46
|
+
ordered.reverse!
|
|
47
|
+
ordered.map!(&:first)
|
|
48
|
+
|
|
49
|
+
ordered.each do |job|
|
|
50
|
+
queue << job
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component.
|
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
5
|
+
# repository and their usage requires commercial license agreement.
|
|
6
|
+
#
|
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
8
|
+
#
|
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
10
|
+
# your code to Maciej Mensfeld.
|
|
11
|
+
|
|
12
|
+
module Karafka
|
|
13
|
+
module Pro
|
|
14
|
+
# Pro routing components
|
|
15
|
+
module Routing
|
|
16
|
+
# Routing extensions for builder to be able to validate Pro components correct usage
|
|
17
|
+
module BuilderExtensions
|
|
18
|
+
# Validate consumer groups with pro contracts
|
|
19
|
+
# @param block [Proc] routing defining block
|
|
20
|
+
def draw(&block)
|
|
21
|
+
super
|
|
22
|
+
|
|
23
|
+
each do |consumer_group|
|
|
24
|
+
::Karafka::Pro::Contracts::ConsumerGroup.new.validate!(consumer_group.to_h)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component.
|
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
5
|
+
# repository and their usage requires commercial license agreement.
|
|
6
|
+
#
|
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
8
|
+
#
|
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
10
|
+
# your code to Maciej Mensfeld.
|
|
11
|
+
|
|
12
|
+
module Karafka
|
|
13
|
+
module Pro
|
|
14
|
+
# Pro routing components
|
|
15
|
+
module Routing
|
|
16
|
+
# Routing extensions that allow to configure some extra PRO routing options
|
|
17
|
+
module TopicExtensions
|
|
18
|
+
class << self
|
|
19
|
+
# @param base [Class] class we extend
|
|
20
|
+
def included(base)
|
|
21
|
+
base.attr_accessor :long_running_job
|
|
22
|
+
base.attr_accessor :virtual_partitioner
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# @return [Boolean] true if virtual partitioner is defined, false otherwise
|
|
27
|
+
def virtual_partitioner?
|
|
28
|
+
virtual_partitioner != nil
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# @return [Boolean] is a given job on a topic a long-running one
|
|
32
|
+
def long_running_job?
|
|
33
|
+
@long_running_job || false
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
data/lib/karafka/pro.rb
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component.
|
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this repository
|
|
5
|
+
# and their usage requires commercial license agreement.
|
|
6
|
+
#
|
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
8
|
+
#
|
|
9
|
+
module Karafka
|
|
10
|
+
# Namespace for pro components, licensed under the commercial license agreement.
|
|
11
|
+
module Pro
|
|
12
|
+
end
|
|
13
|
+
end
|
data/lib/karafka/process.rb
CHANGED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Processing
|
|
5
|
+
# Basic coordinator that allows us to provide coordination objects into consumers.
|
|
6
|
+
#
|
|
7
|
+
# This is a wrapping layer to simplify management of work to be handled around consumption.
|
|
8
|
+
#
|
|
9
|
+
# @note This coordinator needs to be thread safe. Some operations are performed only in the
|
|
10
|
+
# listener thread, but we go with thread-safe by default for all not to worry about potential
|
|
11
|
+
# future mistakes.
|
|
12
|
+
class Coordinator
|
|
13
|
+
# @return [Karafka::TimeTrackers::Pause]
|
|
14
|
+
attr_reader :pause_tracker
|
|
15
|
+
|
|
16
|
+
# @param pause_tracker [Karafka::TimeTrackers::Pause] pause tracker for given topic partition
|
|
17
|
+
def initialize(pause_tracker)
|
|
18
|
+
@pause_tracker = pause_tracker
|
|
19
|
+
@revoked = false
|
|
20
|
+
@consumptions = {}
|
|
21
|
+
@running_jobs = 0
|
|
22
|
+
@mutex = Mutex.new
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Starts the coordinator for given consumption jobs
|
|
26
|
+
# @param _messages [Array<Karafka::Messages::Message>] batch of message for which we are
|
|
27
|
+
# going to coordinate work. Not used with regular coordinator.
|
|
28
|
+
def start(_messages)
|
|
29
|
+
@mutex.synchronize do
|
|
30
|
+
@running_jobs = 0
|
|
31
|
+
# We need to clear the consumption results hash here, otherwise we could end up storing
|
|
32
|
+
# consumption results of consumer instances we no longer control
|
|
33
|
+
@consumptions.clear
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Increases number of jobs that we handle with this coordinator
|
|
38
|
+
def increment
|
|
39
|
+
@mutex.synchronize { @running_jobs += 1 }
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Decrements number of jobs we handle at the moment
|
|
43
|
+
def decrement
|
|
44
|
+
@mutex.synchronize do
|
|
45
|
+
@running_jobs -= 1
|
|
46
|
+
|
|
47
|
+
return @running_jobs unless @running_jobs.negative?
|
|
48
|
+
|
|
49
|
+
# This should never happen. If it does, something is heavily out of sync. Please reach
|
|
50
|
+
# out to us if you encounter this
|
|
51
|
+
raise Karafka::Errors::InvalidCoordinatorState, 'Was zero before decrementation'
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# @param consumer [Object] karafka consumer (normal or pro)
|
|
56
|
+
# @return [Karafka::Processing::Result] result object which we can use to indicate
|
|
57
|
+
# consumption processing state.
|
|
58
|
+
def consumption(consumer)
|
|
59
|
+
@mutex.synchronize do
|
|
60
|
+
@consumptions[consumer] ||= Processing::Result.new
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Is all the consumption done and finished successfully for this coordinator
|
|
65
|
+
def success?
|
|
66
|
+
@mutex.synchronize { @running_jobs.zero? && @consumptions.values.all?(&:success?) }
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Marks given coordinator for processing group as revoked
|
|
70
|
+
#
|
|
71
|
+
# This is invoked in two places:
|
|
72
|
+
# - from the main listener loop when we detect revoked partitions
|
|
73
|
+
# - from the consumer in case checkpointing fails
|
|
74
|
+
#
|
|
75
|
+
# This means, we can end up having consumer being aware that it was revoked prior to the
|
|
76
|
+
# listener loop dispatching the revocation job. It is ok, as effectively nothing will be
|
|
77
|
+
# processed until revocation jobs are done.
|
|
78
|
+
def revoke
|
|
79
|
+
@mutex.synchronize { @revoked = true }
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# @return [Boolean] is the partition we are processing revoked or not
|
|
83
|
+
def revoked?
|
|
84
|
+
@revoked
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|