karafka 1.4.10 → 2.0.0.alpha2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/FUNDING.yml +3 -0
- data/.github/workflows/ci.yml +79 -26
- data/.ruby-version +1 -1
- data/CHANGELOG.md +46 -0
- data/CONTRIBUTING.md +6 -6
- data/Gemfile +6 -0
- data/Gemfile.lock +45 -53
- data/LICENSE +17 -0
- data/LICENSE-COMM +89 -0
- data/LICENSE-LGPL +165 -0
- data/README.md +16 -48
- data/bin/benchmarks +85 -0
- data/bin/create_token +28 -0
- data/bin/integrations +160 -0
- data/bin/karafka +4 -0
- data/bin/stress +13 -0
- data/certs/karafka-pro.pem +11 -0
- data/config/errors.yml +5 -38
- data/docker-compose.yml +12 -3
- data/karafka.gemspec +14 -14
- data/lib/active_job/karafka.rb +20 -0
- data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
- data/lib/karafka/active_job/consumer.rb +24 -0
- data/lib/karafka/active_job/dispatcher.rb +38 -0
- data/lib/karafka/active_job/job_extensions.rb +34 -0
- data/lib/karafka/active_job/job_options_contract.rb +15 -0
- data/lib/karafka/active_job/routing_extensions.rb +18 -0
- data/lib/karafka/app.rb +14 -20
- data/lib/karafka/base_consumer.rb +103 -34
- data/lib/karafka/cli/base.rb +4 -4
- data/lib/karafka/cli/info.rb +44 -9
- data/lib/karafka/cli/install.rb +3 -8
- data/lib/karafka/cli/server.rb +16 -43
- data/lib/karafka/cli.rb +4 -11
- data/lib/karafka/connection/client.rb +279 -93
- data/lib/karafka/connection/listener.rb +137 -38
- data/lib/karafka/connection/messages_buffer.rb +57 -0
- data/lib/karafka/connection/pauses_manager.rb +46 -0
- data/lib/karafka/connection/rebalance_manager.rb +62 -0
- data/lib/karafka/contracts/base.rb +23 -0
- data/lib/karafka/contracts/config.rb +44 -8
- data/lib/karafka/contracts/consumer_group.rb +1 -176
- data/lib/karafka/contracts/consumer_group_topic.rb +16 -8
- data/lib/karafka/contracts/server_cli_options.rb +2 -12
- data/lib/karafka/contracts.rb +1 -1
- data/lib/karafka/env.rb +46 -0
- data/lib/karafka/errors.rb +18 -18
- data/lib/karafka/helpers/multi_delegator.rb +2 -2
- data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
- data/lib/karafka/instrumentation/callbacks/statistics.rb +42 -0
- data/lib/karafka/instrumentation/monitor.rb +14 -21
- data/lib/karafka/instrumentation/stdout_listener.rb +67 -91
- data/lib/karafka/instrumentation.rb +21 -0
- data/lib/karafka/licenser.rb +76 -0
- data/lib/karafka/{params → messages}/batch_metadata.rb +9 -13
- data/lib/karafka/messages/builders/batch_metadata.rb +52 -0
- data/lib/karafka/messages/builders/message.rb +38 -0
- data/lib/karafka/messages/builders/messages.rb +40 -0
- data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
- data/lib/karafka/messages/messages.rb +64 -0
- data/lib/karafka/{params → messages}/metadata.rb +4 -6
- data/lib/karafka/messages/seek.rb +9 -0
- data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
- data/lib/karafka/pro/active_job/dispatcher.rb +58 -0
- data/lib/karafka/pro/active_job/job_options_contract.rb +27 -0
- data/lib/karafka/pro/loader.rb +29 -0
- data/lib/karafka/pro.rb +13 -0
- data/lib/karafka/processing/executor.rb +96 -0
- data/lib/karafka/processing/executors_buffer.rb +49 -0
- data/lib/karafka/processing/jobs/base.rb +18 -0
- data/lib/karafka/processing/jobs/consume.rb +28 -0
- data/lib/karafka/processing/jobs/revoked.rb +22 -0
- data/lib/karafka/processing/jobs/shutdown.rb +23 -0
- data/lib/karafka/processing/jobs_queue.rb +121 -0
- data/lib/karafka/processing/worker.rb +57 -0
- data/lib/karafka/processing/workers_batch.rb +22 -0
- data/lib/karafka/railtie.rb +75 -0
- data/lib/karafka/routing/builder.rb +15 -24
- data/lib/karafka/routing/consumer_group.rb +10 -18
- data/lib/karafka/routing/consumer_mapper.rb +1 -2
- data/lib/karafka/routing/router.rb +1 -1
- data/lib/karafka/routing/subscription_group.rb +53 -0
- data/lib/karafka/routing/subscription_groups_builder.rb +51 -0
- data/lib/karafka/routing/topic.rb +47 -25
- data/lib/karafka/runner.rb +59 -0
- data/lib/karafka/serialization/json/deserializer.rb +6 -15
- data/lib/karafka/server.rb +62 -25
- data/lib/karafka/setup/config.rb +98 -171
- data/lib/karafka/status.rb +13 -3
- data/lib/karafka/templates/example_consumer.rb.erb +16 -0
- data/lib/karafka/templates/karafka.rb.erb +14 -50
- data/lib/karafka/time_trackers/base.rb +19 -0
- data/lib/karafka/time_trackers/pause.rb +84 -0
- data/lib/karafka/time_trackers/poll.rb +65 -0
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +35 -13
- data.tar.gz.sig +0 -0
- metadata +82 -104
- metadata.gz.sig +0 -0
- data/MIT-LICENCE +0 -18
- data/lib/karafka/assignment_strategies/round_robin.rb +0 -13
- data/lib/karafka/attributes_map.rb +0 -63
- data/lib/karafka/backends/inline.rb +0 -16
- data/lib/karafka/base_responder.rb +0 -226
- data/lib/karafka/cli/flow.rb +0 -48
- data/lib/karafka/cli/missingno.rb +0 -19
- data/lib/karafka/code_reloader.rb +0 -67
- data/lib/karafka/connection/api_adapter.rb +0 -158
- data/lib/karafka/connection/batch_delegator.rb +0 -55
- data/lib/karafka/connection/builder.rb +0 -23
- data/lib/karafka/connection/message_delegator.rb +0 -36
- data/lib/karafka/consumers/batch_metadata.rb +0 -10
- data/lib/karafka/consumers/callbacks.rb +0 -71
- data/lib/karafka/consumers/includer.rb +0 -64
- data/lib/karafka/consumers/responders.rb +0 -24
- data/lib/karafka/consumers/single_params.rb +0 -15
- data/lib/karafka/contracts/responder_usage.rb +0 -54
- data/lib/karafka/fetcher.rb +0 -42
- data/lib/karafka/helpers/class_matcher.rb +0 -88
- data/lib/karafka/helpers/config_retriever.rb +0 -46
- data/lib/karafka/helpers/inflector.rb +0 -26
- data/lib/karafka/params/builders/batch_metadata.rb +0 -30
- data/lib/karafka/params/builders/params.rb +0 -38
- data/lib/karafka/params/builders/params_batch.rb +0 -25
- data/lib/karafka/params/params_batch.rb +0 -60
- data/lib/karafka/patches/ruby_kafka.rb +0 -47
- data/lib/karafka/persistence/client.rb +0 -29
- data/lib/karafka/persistence/consumers.rb +0 -45
- data/lib/karafka/persistence/topics.rb +0 -48
- data/lib/karafka/responders/builder.rb +0 -36
- data/lib/karafka/responders/topic.rb +0 -55
- data/lib/karafka/routing/topic_mapper.rb +0 -53
- data/lib/karafka/serialization/json/serializer.rb +0 -31
- data/lib/karafka/setup/configurators/water_drop.rb +0 -36
- data/lib/karafka/templates/application_responder.rb.erb +0 -11
@@ -1,18 +1,16 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Karafka
|
4
|
-
module
|
5
|
-
# Single message
|
6
|
-
# payload deserialization
|
4
|
+
module Messages
|
5
|
+
# Single message metadata details that can be accessed without the need of deserialization.
|
7
6
|
Metadata = Struct.new(
|
8
|
-
:
|
7
|
+
:timestamp,
|
9
8
|
:headers,
|
10
|
-
:is_control_record,
|
11
9
|
:key,
|
12
10
|
:offset,
|
13
11
|
:deserializer,
|
14
12
|
:partition,
|
15
|
-
:
|
13
|
+
:received_at,
|
16
14
|
:topic,
|
17
15
|
keyword_init: true
|
18
16
|
)
|
@@ -0,0 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Messages
|
5
|
+
# "Fake" message that we use as an abstraction layer when seeking back.
|
6
|
+
# This allows us to encapsulate a seek with a simple abstraction
|
7
|
+
Seek = Struct.new(:topic, :partition, :offset)
|
8
|
+
end
|
9
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
# Patches to external components
|
5
|
+
module Patches
|
6
|
+
# Rdkafka related patches
|
7
|
+
module Rdkafka
|
8
|
+
# Rdkafka::Consumer patches
|
9
|
+
module Consumer
|
10
|
+
# A method that allows us to get the native kafka producer name
|
11
|
+
# @return [String] producer instance name
|
12
|
+
# @note We need this to make sure that we allocate proper dispatched events only to
|
13
|
+
# callback listeners that should publish them
|
14
|
+
def name
|
15
|
+
::Rdkafka::Bindings.rd_kafka_name(@native_kafka)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
::Rdkafka::Consumer.include ::Karafka::Patches::Rdkafka::Consumer
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this repository
|
5
|
+
# and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
# Karafka Pro ActiveJob components
|
15
|
+
module ActiveJob
|
16
|
+
# Pro dispatcher that sends the ActiveJob job to a proper topic based on the queue name
|
17
|
+
# and that allows to inject additional options into the producer, effectively allowing for a
|
18
|
+
# much better and more granular control over the dispatch and consumption process.
|
19
|
+
class Dispatcher < ::Karafka::ActiveJob::Dispatcher
|
20
|
+
# Defaults for dispatching
|
21
|
+
# The can be updated by using `#karafka_options` on the job
|
22
|
+
DEFAULTS = {
|
23
|
+
dispatch_method: :produce_async,
|
24
|
+
# We don't create a dummy proc based partitioner as we would have to evaluate it with
|
25
|
+
# each job.
|
26
|
+
partitioner: nil
|
27
|
+
}.freeze
|
28
|
+
|
29
|
+
private_constant :DEFAULTS
|
30
|
+
|
31
|
+
# @param job [ActiveJob::Base] job
|
32
|
+
def call(job)
|
33
|
+
::Karafka.producer.public_send(
|
34
|
+
fetch_option(job, :dispatch_method, DEFAULTS),
|
35
|
+
dispatch_details(job).merge!(
|
36
|
+
topic: job.queue_name,
|
37
|
+
payload: ::ActiveSupport::JSON.encode(job.serialize)
|
38
|
+
)
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
# @param job [ActiveJob::Base] job instance
|
45
|
+
# @return [Hash] hash with dispatch details to which we merge topic and payload
|
46
|
+
def dispatch_details(job)
|
47
|
+
partitioner = fetch_option(job, :partitioner, DEFAULTS)
|
48
|
+
|
49
|
+
return {} unless partitioner
|
50
|
+
|
51
|
+
{
|
52
|
+
partition_key: partitioner.call(job)
|
53
|
+
}
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this repository
|
5
|
+
# and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
module ActiveJob
|
15
|
+
# Contract for validating the options that can be altered with `#karafka_options` per job
|
16
|
+
# class that works with Pro features.
|
17
|
+
class JobOptionsContract < ::Karafka::ActiveJob::JobOptionsContract
|
18
|
+
# Dry types
|
19
|
+
Types = include Dry.Types()
|
20
|
+
|
21
|
+
params do
|
22
|
+
optional(:partitioner).value(Types.Interface(:call))
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this repository
|
5
|
+
# and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
module Karafka
|
12
|
+
module Pro
|
13
|
+
# Loader requires and loads all the pro components only when they are needed
|
14
|
+
class Loader
|
15
|
+
class << self
|
16
|
+
# Loads all the pro components and configures them wherever it is expected
|
17
|
+
# @param config [Dry::Configurable::Config] whole app config that we can alter with pro
|
18
|
+
# components
|
19
|
+
def setup(config)
|
20
|
+
require_relative 'active_job/dispatcher'
|
21
|
+
require_relative 'active_job/job_options_contract'
|
22
|
+
|
23
|
+
config.internal.active_job.dispatcher = ActiveJob::Dispatcher.new
|
24
|
+
config.internal.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/lib/karafka/pro.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this repository
|
5
|
+
# and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
module Karafka
|
10
|
+
# Namespace for pro components, licensed under the commercial license agreement.
|
11
|
+
module Pro
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
# Namespace that encapsulates all the logic related to processing data.
|
5
|
+
module Processing
|
6
|
+
# Executors:
|
7
|
+
# - run consumers code with provided messages batch (for `#call`) or run given teardown
|
8
|
+
# operations when needed from separate threads.
|
9
|
+
# - they re-create consumer instances in case of partitions that were revoked
|
10
|
+
# and assigned back.
|
11
|
+
#
|
12
|
+
# @note Executors are not removed after partition is revoked. They are not that big and will
|
13
|
+
# be re-used in case of a re-claim
|
14
|
+
class Executor
|
15
|
+
# @return [String] unique id that we use to ensure, that we use for state tracking
|
16
|
+
attr_reader :id
|
17
|
+
|
18
|
+
# @return [String] subscription group id to which a given executor belongs
|
19
|
+
attr_reader :group_id
|
20
|
+
|
21
|
+
# @param group_id [String] id of the subscription group to which the executor belongs
|
22
|
+
# @param client [Karafka::Connection::Client] kafka client
|
23
|
+
# @param topic [Karafka::Routing::Topic] topic for which this executor will run
|
24
|
+
# @param pause [Karafka::TimeTrackers::Pause] fetch pause object for crash pausing
|
25
|
+
def initialize(group_id, client, topic, pause)
|
26
|
+
@id = SecureRandom.uuid
|
27
|
+
@group_id = group_id
|
28
|
+
@client = client
|
29
|
+
@topic = topic
|
30
|
+
@pause = pause
|
31
|
+
end
|
32
|
+
|
33
|
+
# Runs consumer data processing against given batch and handles failures and errors.
|
34
|
+
#
|
35
|
+
# @param messages [Array<Rdkafka::Consumer::Message>] raw rdkafka messages
|
36
|
+
# @param received_at [Time] the moment we've received the batch (actually the moment we've)
|
37
|
+
# enqueued it, but good enough
|
38
|
+
def consume(messages, received_at)
|
39
|
+
# Recreate consumer with each batch if persistence is not enabled
|
40
|
+
# We reload the consumers with each batch instead of relying on some external signals
|
41
|
+
# when needed for consistency. That way devs may have it on or off and not in this
|
42
|
+
# middle state, where re-creation of a consumer instance would occur only sometimes
|
43
|
+
@consumer = nil unless ::Karafka::App.config.consumer_persistence
|
44
|
+
|
45
|
+
# First we build messages batch...
|
46
|
+
consumer.messages = Messages::Builders::Messages.call(
|
47
|
+
messages,
|
48
|
+
@topic,
|
49
|
+
received_at
|
50
|
+
)
|
51
|
+
|
52
|
+
# We run the consumer client logic...
|
53
|
+
consumer.on_consume
|
54
|
+
end
|
55
|
+
|
56
|
+
# Runs the controller `#revoked` method that should be triggered when a given consumer is
|
57
|
+
# no longer needed due to partitions reassignment.
|
58
|
+
#
|
59
|
+
# @note Clearing the consumer will ensure, that if we get the partition back, it will be
|
60
|
+
# handled with a consumer with a clean state.
|
61
|
+
#
|
62
|
+
# @note We run it only when consumer was present, because presence indicates, that at least
|
63
|
+
# a single message has been consumed.
|
64
|
+
def revoked
|
65
|
+
consumer.on_revoked if @consumer
|
66
|
+
@consumer = nil
|
67
|
+
end
|
68
|
+
|
69
|
+
# Runs the controller `#shutdown` method that should be triggered when a given consumer is
|
70
|
+
# no longer needed as we're closing the process.
|
71
|
+
#
|
72
|
+
# @note While we do not need to clear the consumer here, it's a good habit to clean after
|
73
|
+
# work is done.
|
74
|
+
def shutdown
|
75
|
+
# There is a case, where the consumer no longer exists because it was revoked, in case like
|
76
|
+
# that we do not build a new instance and shutdown should not be triggered.
|
77
|
+
consumer.on_shutdown if @consumer
|
78
|
+
@consumer = nil
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
|
83
|
+
# @return [Object] cached consumer instance
|
84
|
+
def consumer
|
85
|
+
@consumer ||= begin
|
86
|
+
consumer = @topic.consumer.new
|
87
|
+
consumer.topic = @topic
|
88
|
+
consumer.client = @client
|
89
|
+
consumer.pause = @pause
|
90
|
+
consumer.producer = ::Karafka::App.producer
|
91
|
+
consumer
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Processing
|
5
|
+
# Buffer for executors of a given subscription group. It wraps around the concept of building
|
6
|
+
# and caching them, so we can re-use them instead of creating new each time.
|
7
|
+
class ExecutorsBuffer
|
8
|
+
# @param client [Connection::Client]
|
9
|
+
# @param subscription_group [Routing::SubscriptionGroup]
|
10
|
+
# @return [ExecutorsBuffer]
|
11
|
+
def initialize(client, subscription_group)
|
12
|
+
@subscription_group = subscription_group
|
13
|
+
@client = client
|
14
|
+
@buffer = Hash.new { |h, k| h[k] = {} }
|
15
|
+
end
|
16
|
+
|
17
|
+
# @param topic [String] topic name
|
18
|
+
# @param partition [Integer] partition number
|
19
|
+
# @param pause [TimeTrackers::Pause] pause corresponding with provided topic and partition
|
20
|
+
# @return [Executor] consumer executor
|
21
|
+
def fetch(
|
22
|
+
topic,
|
23
|
+
partition,
|
24
|
+
pause
|
25
|
+
)
|
26
|
+
topic = @subscription_group.topics.find { |ktopic| ktopic.name == topic }
|
27
|
+
|
28
|
+
topic || raise(Errors::TopicNotFoundError, topic)
|
29
|
+
|
30
|
+
@buffer[topic][partition] ||= Executor.new(
|
31
|
+
@subscription_group.id,
|
32
|
+
@client,
|
33
|
+
topic,
|
34
|
+
pause
|
35
|
+
)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Runs the shutdown on all active executors.
|
39
|
+
def shutdown
|
40
|
+
@buffer.values.map(&:values).flatten.each(&:shutdown)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Clears the executors buffer. Useful for critical errors recovery.
|
44
|
+
def clear
|
45
|
+
@buffer.clear
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Processing
|
5
|
+
# Namespace for all the jobs that are suppose to run in workers.
|
6
|
+
module Jobs
|
7
|
+
# Base class for all the jobs types that are suppose to run in workers threads.
|
8
|
+
class Base
|
9
|
+
extend Forwardable
|
10
|
+
|
11
|
+
# @note Since one job has always one executer, we use the jobs id and group id as reference
|
12
|
+
def_delegators :executor, :id, :group_id
|
13
|
+
|
14
|
+
attr_reader :executor
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Processing
|
5
|
+
module Jobs
|
6
|
+
# The main job type. It runs the executor that triggers given topic partition messages
|
7
|
+
# processing in an underlying consumer instance.
|
8
|
+
class Consume < Base
|
9
|
+
# @param executor [Karafka::Processing::Executor] executor that is suppose to run a given
|
10
|
+
# job
|
11
|
+
# @param messages [Array<dkafka::Consumer::Message>] array with raw rdkafka messages with
|
12
|
+
# which we are suppose to work
|
13
|
+
# @return [Consume]
|
14
|
+
def initialize(executor, messages)
|
15
|
+
@executor = executor
|
16
|
+
@messages = messages
|
17
|
+
@created_at = Time.now
|
18
|
+
super()
|
19
|
+
end
|
20
|
+
|
21
|
+
# Runs the given executor.
|
22
|
+
def call
|
23
|
+
executor.consume(@messages, @created_at)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Processing
|
5
|
+
module Jobs
|
6
|
+
# Job that runs the revoked operation when we loose a partition on a consumer that lost it.
|
7
|
+
class Revoked < Base
|
8
|
+
# @param executor [Karafka::Processing::Executor] executor that is suppose to run the job
|
9
|
+
# @return [Revoked]
|
10
|
+
def initialize(executor)
|
11
|
+
@executor = executor
|
12
|
+
super()
|
13
|
+
end
|
14
|
+
|
15
|
+
# Runs the revoking job via an executor.
|
16
|
+
def call
|
17
|
+
executor.revoked
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Processing
|
5
|
+
module Jobs
|
6
|
+
# Job that runs on each active consumer upon process shutdown (one job per consumer).
|
7
|
+
class Shutdown < Base
|
8
|
+
# @param executor [Karafka::Processing::Executor] executor that is suppose to run a given
|
9
|
+
# job on an active consumer
|
10
|
+
# @return [Shutdown]
|
11
|
+
def initialize(executor)
|
12
|
+
@executor = executor
|
13
|
+
super()
|
14
|
+
end
|
15
|
+
|
16
|
+
# Runs the shutdown job via an executor.
|
17
|
+
def call
|
18
|
+
executor.shutdown
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Processing
|
5
|
+
# This is the key work component for Karafka jobs distribution. It provides API for running
|
6
|
+
# jobs in parallel while operating within more than one subscription group.
|
7
|
+
#
|
8
|
+
# We need to take into consideration fact, that more than one subscription group can operate
|
9
|
+
# on this queue, that's why internally we keep track of processing per group.
|
10
|
+
#
|
11
|
+
# We work with the assumption, that partitions data is evenly distributed.
|
12
|
+
class JobsQueue
|
13
|
+
# @return [Karafka::Processing::JobsQueue]
|
14
|
+
def initialize
|
15
|
+
@queue = ::Queue.new
|
16
|
+
# Those queues will act as a semaphores internally. Since we need an indicator for waiting
|
17
|
+
# we could use Thread.pass but this is expensive. Instead we can just lock until any
|
18
|
+
# of the workers finishes their work and we can re-check. This means that in the worse
|
19
|
+
# scenario, we will context switch 10 times per poll instead of getting this thread
|
20
|
+
# scheduled by Ruby hundreds of thousands of times per group.
|
21
|
+
# We cannot use a single semaphore as it could potentially block in listeners that should
|
22
|
+
# process with their data and also could unlock when a given group needs to remain locked
|
23
|
+
@semaphores = Hash.new { |h, k| h[k] = Queue.new }
|
24
|
+
@in_processing = Hash.new { |h, k| h[k] = {} }
|
25
|
+
@mutex = Mutex.new
|
26
|
+
end
|
27
|
+
|
28
|
+
# Returns number of jobs that are either enqueued or in processing (but not finished)
|
29
|
+
# @return [Integer] number of elements in the queue
|
30
|
+
# @note Using `#pop` won't decrease this number as only marking job as completed does this
|
31
|
+
def size
|
32
|
+
@in_processing.values.map(&:size).sum
|
33
|
+
end
|
34
|
+
|
35
|
+
# Adds the job to the internal main queue, scheduling it for execution in a worker and marks
|
36
|
+
# this job as in processing pipeline.
|
37
|
+
#
|
38
|
+
# @param job [Jobs::Base] job that we want to run
|
39
|
+
def <<(job)
|
40
|
+
# We do not push the job if the queue is closed as it means that it would anyhow not be
|
41
|
+
# executed
|
42
|
+
return if @queue.closed?
|
43
|
+
|
44
|
+
@mutex.synchronize do
|
45
|
+
group = @in_processing[job.group_id]
|
46
|
+
|
47
|
+
raise(Errors::JobsQueueSynchronizationError, job.group_id) if group.key?(job.id)
|
48
|
+
|
49
|
+
group[job.id] = true
|
50
|
+
end
|
51
|
+
|
52
|
+
@queue << job
|
53
|
+
end
|
54
|
+
|
55
|
+
# @return [Jobs::Base, nil] waits for a job from the main queue and returns it once available
|
56
|
+
# or returns nil if the queue has been stopped and there won't be anything more to process
|
57
|
+
# ever.
|
58
|
+
# @note This command is blocking and will wait until any job is available on the main queue
|
59
|
+
def pop
|
60
|
+
@queue.pop
|
61
|
+
end
|
62
|
+
|
63
|
+
# Marks a given job from a given group as completed. When there are no more jobs from a given
|
64
|
+
# group to be executed, we won't wait.
|
65
|
+
#
|
66
|
+
# @param [Jobs::Base] job that was completed
|
67
|
+
def complete(job)
|
68
|
+
@mutex.synchronize do
|
69
|
+
@in_processing[job.group_id].delete(job.id)
|
70
|
+
@semaphores[job.group_id] << true
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# Clears the processing states for a provided group. Useful when a recovery happens and we
|
75
|
+
# need to clean up state but only for a given subscription group.
|
76
|
+
#
|
77
|
+
# @param group_id [String]
|
78
|
+
def clear(group_id)
|
79
|
+
@mutex.synchronize do
|
80
|
+
@in_processing[group_id].clear
|
81
|
+
# We unlock it just in case it was blocked when clearing started
|
82
|
+
@semaphores[group_id] << true
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# Stops the whole processing queue.
|
87
|
+
def close
|
88
|
+
@mutex.synchronize do
|
89
|
+
return if @queue.closed?
|
90
|
+
|
91
|
+
@queue.close
|
92
|
+
@semaphores.values.each(&:close)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Blocks when there are things in the queue in a given group and waits until all the jobs
|
97
|
+
# from a given group are completed
|
98
|
+
# @param group_id [String] id of the group in which jobs we're interested.
|
99
|
+
# @note This method is blocking.
|
100
|
+
def wait(group_id)
|
101
|
+
# Go doing other things while we cannot process and wait for anyone to finish their work
|
102
|
+
# and re-check the wait status
|
103
|
+
@semaphores[group_id].pop while wait?(group_id)
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
|
108
|
+
# @param group_id [String] id of the group in which jobs we're interested.
|
109
|
+
# @return [Boolean] should we keep waiting or not
|
110
|
+
def wait?(group_id)
|
111
|
+
# If it is stopping, all the previous messages that are processed at the moment need to
|
112
|
+
# finish. Otherwise we may risk closing the client and committing offsets afterwards
|
113
|
+
return false if Karafka::App.stopping? && @in_processing[group_id].empty?
|
114
|
+
return false if @queue.closed?
|
115
|
+
return false if @in_processing[group_id].empty?
|
116
|
+
|
117
|
+
true
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Processing
|
5
|
+
# Workers are used to run jobs in separate threads.
|
6
|
+
# Workers are the main processing units of the Karafka framework.
|
7
|
+
class Worker
|
8
|
+
extend Forwardable
|
9
|
+
|
10
|
+
def_delegators :@thread, :join, :terminate, :alive?
|
11
|
+
|
12
|
+
# @param jobs_queue [JobsQueue]
|
13
|
+
# @return [Worker]
|
14
|
+
def initialize(jobs_queue)
|
15
|
+
@jobs_queue = jobs_queue
|
16
|
+
@thread = Thread.new do
|
17
|
+
# If anything goes wrong in this worker thread, it means something went really wrong and
|
18
|
+
# we should terminate.
|
19
|
+
Thread.current.abort_on_exception = true
|
20
|
+
loop { break unless process }
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
# Fetches a single job, processes it and marks as completed.
|
27
|
+
#
|
28
|
+
# @note We do not have error handling here, as no errors should propagate this far. If they
|
29
|
+
# do, it is a critical error and should bubble up.
|
30
|
+
#
|
31
|
+
# @note Upon closing the jobs queue, worker will close it's thread
|
32
|
+
def process
|
33
|
+
job = @jobs_queue.pop
|
34
|
+
|
35
|
+
if job
|
36
|
+
job.call
|
37
|
+
true
|
38
|
+
else
|
39
|
+
false
|
40
|
+
end
|
41
|
+
# We signal critical exceptions, notify and do not allow worker to fail
|
42
|
+
# rubocop:disable Lint/RescueException
|
43
|
+
rescue Exception => e
|
44
|
+
# rubocop:enable Lint/RescueException
|
45
|
+
Karafka.monitor.instrument(
|
46
|
+
'error.occurred',
|
47
|
+
caller: self,
|
48
|
+
error: e,
|
49
|
+
type: 'worker.process.error'
|
50
|
+
)
|
51
|
+
ensure
|
52
|
+
# job can be nil when the queue is being closed
|
53
|
+
@jobs_queue.complete(job) if job
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Processing
|
5
|
+
# Abstraction layer around workers batch.
|
6
|
+
class WorkersBatch
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
# @param jobs_queue [JobsQueue]
|
10
|
+
# @return [WorkersBatch]
|
11
|
+
def initialize(jobs_queue)
|
12
|
+
@batch = Array.new(App.config.concurrency) { Processing::Worker.new(jobs_queue) }
|
13
|
+
end
|
14
|
+
|
15
|
+
# Iterates over available workers and yields each worker
|
16
|
+
# @param block [Proc] block we want to run
|
17
|
+
def each(&block)
|
18
|
+
@batch.each(&block)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|