karafka 1.4.0 → 2.0.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +89 -18
- data/.ruby-version +1 -1
- data/CHANGELOG.md +365 -1
- data/CONTRIBUTING.md +10 -19
- data/Gemfile +6 -0
- data/Gemfile.lock +56 -112
- data/LICENSE +17 -0
- data/LICENSE-COMM +89 -0
- data/LICENSE-LGPL +165 -0
- data/README.md +61 -68
- data/bin/benchmarks +85 -0
- data/bin/create_token +22 -0
- data/bin/integrations +272 -0
- data/bin/karafka +10 -0
- data/bin/scenario +29 -0
- data/bin/stress_many +13 -0
- data/bin/stress_one +13 -0
- data/certs/cert_chain.pem +26 -0
- data/certs/karafka-pro.pem +11 -0
- data/config/errors.yml +59 -38
- data/docker-compose.yml +10 -3
- data/karafka.gemspec +18 -21
- data/lib/active_job/karafka.rb +21 -0
- data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
- data/lib/karafka/active_job/consumer.rb +26 -0
- data/lib/karafka/active_job/dispatcher.rb +38 -0
- data/lib/karafka/active_job/job_extensions.rb +34 -0
- data/lib/karafka/active_job/job_options_contract.rb +21 -0
- data/lib/karafka/active_job/routing/extensions.rb +33 -0
- data/lib/karafka/admin.rb +63 -0
- data/lib/karafka/app.rb +15 -20
- data/lib/karafka/base_consumer.rb +197 -31
- data/lib/karafka/cli/info.rb +44 -10
- data/lib/karafka/cli/install.rb +22 -12
- data/lib/karafka/cli/server.rb +17 -42
- data/lib/karafka/cli.rb +4 -3
- data/lib/karafka/connection/client.rb +379 -89
- data/lib/karafka/connection/listener.rb +250 -38
- data/lib/karafka/connection/listeners_batch.rb +24 -0
- data/lib/karafka/connection/messages_buffer.rb +84 -0
- data/lib/karafka/connection/pauses_manager.rb +46 -0
- data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
- data/lib/karafka/connection/rebalance_manager.rb +78 -0
- data/lib/karafka/contracts/base.rb +17 -0
- data/lib/karafka/contracts/config.rb +88 -11
- data/lib/karafka/contracts/consumer_group.rb +21 -184
- data/lib/karafka/contracts/consumer_group_topic.rb +35 -11
- data/lib/karafka/contracts/server_cli_options.rb +19 -18
- data/lib/karafka/contracts.rb +1 -1
- data/lib/karafka/env.rb +46 -0
- data/lib/karafka/errors.rb +21 -21
- data/lib/karafka/helpers/async.rb +33 -0
- data/lib/karafka/helpers/colorize.rb +20 -0
- data/lib/karafka/helpers/multi_delegator.rb +2 -2
- data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
- data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
- data/lib/karafka/instrumentation/logger.rb +6 -10
- data/lib/karafka/instrumentation/logger_listener.rb +174 -0
- data/lib/karafka/instrumentation/monitor.rb +13 -61
- data/lib/karafka/instrumentation/notifications.rb +53 -0
- data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
- data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
- data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
- data/lib/karafka/instrumentation.rb +21 -0
- data/lib/karafka/licenser.rb +75 -0
- data/lib/karafka/messages/batch_metadata.rb +45 -0
- data/lib/karafka/messages/builders/batch_metadata.rb +39 -0
- data/lib/karafka/messages/builders/message.rb +39 -0
- data/lib/karafka/messages/builders/messages.rb +34 -0
- data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
- data/lib/karafka/messages/messages.rb +64 -0
- data/lib/karafka/{params → messages}/metadata.rb +4 -6
- data/lib/karafka/messages/seek.rb +9 -0
- data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
- data/lib/karafka/pro/active_job/consumer.rb +46 -0
- data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
- data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
- data/lib/karafka/pro/base_consumer.rb +107 -0
- data/lib/karafka/pro/contracts/base.rb +21 -0
- data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
- data/lib/karafka/pro/contracts/consumer_group_topic.rb +69 -0
- data/lib/karafka/pro/loader.rb +76 -0
- data/lib/karafka/pro/performance_tracker.rb +80 -0
- data/lib/karafka/pro/processing/coordinator.rb +85 -0
- data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +38 -0
- data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
- data/lib/karafka/pro/processing/partitioner.rb +58 -0
- data/lib/karafka/pro/processing/scheduler.rb +56 -0
- data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
- data/lib/karafka/pro/routing/topic_extensions.rb +74 -0
- data/lib/karafka/pro.rb +13 -0
- data/lib/karafka/process.rb +1 -0
- data/lib/karafka/processing/coordinator.rb +103 -0
- data/lib/karafka/processing/coordinators_buffer.rb +54 -0
- data/lib/karafka/processing/executor.rb +126 -0
- data/lib/karafka/processing/executors_buffer.rb +88 -0
- data/lib/karafka/processing/jobs/base.rb +55 -0
- data/lib/karafka/processing/jobs/consume.rb +47 -0
- data/lib/karafka/processing/jobs/revoked.rb +22 -0
- data/lib/karafka/processing/jobs/shutdown.rb +23 -0
- data/lib/karafka/processing/jobs_builder.rb +29 -0
- data/lib/karafka/processing/jobs_queue.rb +144 -0
- data/lib/karafka/processing/partitioner.rb +22 -0
- data/lib/karafka/processing/result.rb +37 -0
- data/lib/karafka/processing/scheduler.rb +22 -0
- data/lib/karafka/processing/worker.rb +91 -0
- data/lib/karafka/processing/workers_batch.rb +27 -0
- data/lib/karafka/railtie.rb +127 -0
- data/lib/karafka/routing/builder.rb +26 -23
- data/lib/karafka/routing/consumer_group.rb +37 -17
- data/lib/karafka/routing/consumer_mapper.rb +1 -2
- data/lib/karafka/routing/proxy.rb +9 -16
- data/lib/karafka/routing/router.rb +1 -1
- data/lib/karafka/routing/subscription_group.rb +53 -0
- data/lib/karafka/routing/subscription_groups_builder.rb +54 -0
- data/lib/karafka/routing/topic.rb +65 -24
- data/lib/karafka/routing/topics.rb +38 -0
- data/lib/karafka/runner.rb +51 -0
- data/lib/karafka/serialization/json/deserializer.rb +6 -15
- data/lib/karafka/server.rb +67 -26
- data/lib/karafka/setup/config.rb +153 -175
- data/lib/karafka/status.rb +14 -5
- data/lib/karafka/templates/example_consumer.rb.erb +16 -0
- data/lib/karafka/templates/karafka.rb.erb +17 -55
- data/lib/karafka/time_trackers/base.rb +19 -0
- data/lib/karafka/time_trackers/pause.rb +92 -0
- data/lib/karafka/time_trackers/poll.rb +65 -0
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +46 -16
- data.tar.gz.sig +0 -0
- metadata +145 -171
- metadata.gz.sig +0 -0
- data/.github/FUNDING.yml +0 -3
- data/MIT-LICENCE +0 -18
- data/certs/mensfeld.pem +0 -25
- data/lib/karafka/attributes_map.rb +0 -62
- data/lib/karafka/backends/inline.rb +0 -16
- data/lib/karafka/base_responder.rb +0 -226
- data/lib/karafka/cli/flow.rb +0 -48
- data/lib/karafka/code_reloader.rb +0 -67
- data/lib/karafka/connection/api_adapter.rb +0 -161
- data/lib/karafka/connection/batch_delegator.rb +0 -55
- data/lib/karafka/connection/builder.rb +0 -18
- data/lib/karafka/connection/message_delegator.rb +0 -36
- data/lib/karafka/consumers/batch_metadata.rb +0 -10
- data/lib/karafka/consumers/callbacks.rb +0 -71
- data/lib/karafka/consumers/includer.rb +0 -64
- data/lib/karafka/consumers/responders.rb +0 -24
- data/lib/karafka/consumers/single_params.rb +0 -15
- data/lib/karafka/contracts/responder_usage.rb +0 -54
- data/lib/karafka/fetcher.rb +0 -42
- data/lib/karafka/helpers/class_matcher.rb +0 -88
- data/lib/karafka/helpers/config_retriever.rb +0 -46
- data/lib/karafka/helpers/inflector.rb +0 -26
- data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
- data/lib/karafka/params/batch_metadata.rb +0 -26
- data/lib/karafka/params/builders/batch_metadata.rb +0 -30
- data/lib/karafka/params/builders/params.rb +0 -38
- data/lib/karafka/params/builders/params_batch.rb +0 -25
- data/lib/karafka/params/params_batch.rb +0 -60
- data/lib/karafka/patches/ruby_kafka.rb +0 -47
- data/lib/karafka/persistence/client.rb +0 -29
- data/lib/karafka/persistence/consumers.rb +0 -45
- data/lib/karafka/persistence/topics.rb +0 -48
- data/lib/karafka/responders/builder.rb +0 -36
- data/lib/karafka/responders/topic.rb +0 -55
- data/lib/karafka/routing/topic_mapper.rb +0 -53
- data/lib/karafka/serialization/json/serializer.rb +0 -31
- data/lib/karafka/setup/configurators/water_drop.rb +0 -36
- data/lib/karafka/templates/application_responder.rb.erb +0 -11
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
module Processing
|
15
|
+
# Pro partitioner that can distribute work based on the virtual partitioner settings
|
16
|
+
class Partitioner < ::Karafka::Processing::Partitioner
|
17
|
+
# @param topic [String] topic name
|
18
|
+
# @param messages [Array<Karafka::Messages::Message>] karafka messages
|
19
|
+
# @yieldparam [Integer] group id
|
20
|
+
# @yieldparam [Array<Karafka::Messages::Message>] karafka messages
|
21
|
+
def call(topic, messages)
|
22
|
+
ktopic = @subscription_group.topics.find(topic)
|
23
|
+
|
24
|
+
# We only partition work if we have a virtual partitioner and more than one thread to
|
25
|
+
# process the data. With one thread it is not worth partitioning the work as the work
|
26
|
+
# itself will be assigned to one thread (pointless work)
|
27
|
+
if ktopic.virtual_partitions? && ktopic.virtual_partitions.max_partitions > 1
|
28
|
+
# We need to reduce it to number of threads, so the group_id is not a direct effect
|
29
|
+
# of the end user action. Otherwise the persistence layer for consumers would cache
|
30
|
+
# it forever and it would cause memory leaks
|
31
|
+
groupings = messages
|
32
|
+
.group_by { |msg| ktopic.virtual_partitions.partitioner.call(msg) }
|
33
|
+
.values
|
34
|
+
|
35
|
+
# Reduce the number of virtual partitions to a size that matches the max_partitions
|
36
|
+
# As mentioned above we cannot use the partitioning keys directly as it could cause
|
37
|
+
# memory leaks
|
38
|
+
#
|
39
|
+
# The algorithm here is simple, we assume that the most costly in terms of processing,
|
40
|
+
# will be processing of the biggest group and we reduce the smallest once to have
|
41
|
+
# max of groups equal to max_partitions
|
42
|
+
while groupings.size > ktopic.virtual_partitions.max_partitions
|
43
|
+
groupings.sort_by! { |grouping| -grouping.size }
|
44
|
+
|
45
|
+
# Offset order needs to be maintained for virtual partitions
|
46
|
+
groupings << (groupings.pop + groupings.pop).sort_by!(&:offset)
|
47
|
+
end
|
48
|
+
|
49
|
+
groupings.each_with_index { |messages_group, index| yield(index, messages_group) }
|
50
|
+
else
|
51
|
+
# When no virtual partitioner, works as regular one
|
52
|
+
yield(0, messages)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
module Processing
|
15
|
+
# Optimizes scheduler that takes into consideration of execution time needed to process
|
16
|
+
# messages from given topics partitions. It uses the non-preemptive LJF algorithm
|
17
|
+
#
|
18
|
+
# This scheduler is designed to optimize execution times on jobs that perform IO operations
|
19
|
+
# as when taking IO into consideration, the can achieve optimized parallel processing.
|
20
|
+
#
|
21
|
+
# This scheduler can also work with virtual partitions.
|
22
|
+
#
|
23
|
+
# Aside from consumption jobs, other jobs do not run often, thus we can leave them with
|
24
|
+
# default FIFO scheduler from the default Karafka scheduler
|
25
|
+
class Scheduler < ::Karafka::Processing::Scheduler
|
26
|
+
# Schedules jobs in the LJF order for consumption
|
27
|
+
#
|
28
|
+
# @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
|
29
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
30
|
+
#
|
31
|
+
def schedule_consumption(queue, jobs_array)
|
32
|
+
pt = PerformanceTracker.instance
|
33
|
+
|
34
|
+
ordered = []
|
35
|
+
|
36
|
+
jobs_array.each do |job|
|
37
|
+
messages = job.messages
|
38
|
+
message = messages.first
|
39
|
+
|
40
|
+
cost = pt.processing_time_p95(message.topic, message.partition) * messages.size
|
41
|
+
|
42
|
+
ordered << [job, cost]
|
43
|
+
end
|
44
|
+
|
45
|
+
ordered.sort_by!(&:last)
|
46
|
+
ordered.reverse!
|
47
|
+
ordered.map!(&:first)
|
48
|
+
|
49
|
+
ordered.each do |job|
|
50
|
+
queue << job
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
# Pro routing components
|
15
|
+
module Routing
|
16
|
+
# Routing extensions for builder to be able to validate Pro components correct usage
|
17
|
+
module BuilderExtensions
|
18
|
+
# Validate consumer groups with pro contracts
|
19
|
+
# @param block [Proc] routing defining block
|
20
|
+
def draw(&block)
|
21
|
+
super
|
22
|
+
|
23
|
+
each do |consumer_group|
|
24
|
+
::Karafka::Pro::Contracts::ConsumerGroup.new.validate!(consumer_group.to_h)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
# Pro routing components
|
15
|
+
module Routing
|
16
|
+
# Routing extensions that allow to configure some extra PRO routing options
|
17
|
+
module TopicExtensions
|
18
|
+
# Internal representation of the virtual partitions settings and configuration
|
19
|
+
# This allows us to abstract away things in a nice manner
|
20
|
+
#
|
21
|
+
# For features with more options than just on/off we use this approach as it simplifies
|
22
|
+
# the code. We do not use it for all not to create unneeded complexity
|
23
|
+
VirtualPartitions = Struct.new(
|
24
|
+
:active,
|
25
|
+
:partitioner,
|
26
|
+
:max_partitions,
|
27
|
+
keyword_init: true
|
28
|
+
) { alias_method :active?, :active }
|
29
|
+
|
30
|
+
class << self
|
31
|
+
# @param base [Class] class we extend
|
32
|
+
def prepended(base)
|
33
|
+
base.attr_accessor :long_running_job
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# @param max_partitions [Integer] max number of virtual partitions that can come out of the
|
38
|
+
# single distribution flow. When set to more than the Karafka threading, will create
|
39
|
+
# more work than workers. When less, can ensure we have spare resources to process other
|
40
|
+
# things in parallel.
|
41
|
+
# @param partitioner [nil, #call] nil or callable partitioner
|
42
|
+
# @return [VirtualPartitions] method that allows to set the virtual partitions details
|
43
|
+
# during the routing configuration and then allows to retrieve it
|
44
|
+
def virtual_partitions(
|
45
|
+
max_partitions: Karafka::App.config.concurrency,
|
46
|
+
partitioner: nil
|
47
|
+
)
|
48
|
+
@virtual_partitions ||= VirtualPartitions.new(
|
49
|
+
active: !partitioner.nil?,
|
50
|
+
max_partitions: max_partitions,
|
51
|
+
partitioner: partitioner
|
52
|
+
)
|
53
|
+
end
|
54
|
+
|
55
|
+
# @return [Boolean] are virtual partitions enabled for given topic
|
56
|
+
def virtual_partitions?
|
57
|
+
virtual_partitions.active?
|
58
|
+
end
|
59
|
+
|
60
|
+
# @return [Boolean] is a given job on a topic a long-running one
|
61
|
+
def long_running_job?
|
62
|
+
@long_running_job || false
|
63
|
+
end
|
64
|
+
|
65
|
+
# @return [Hash] hash with topic details and the extensions details
|
66
|
+
def to_h
|
67
|
+
super.merge(
|
68
|
+
virtual_partitions: virtual_partitions.to_h
|
69
|
+
)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
data/lib/karafka/pro.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this repository
|
5
|
+
# and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
module Karafka
|
10
|
+
# Namespace for pro components, licensed under the commercial license agreement.
|
11
|
+
module Pro
|
12
|
+
end
|
13
|
+
end
|
data/lib/karafka/process.rb
CHANGED
@@ -0,0 +1,103 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Processing
|
5
|
+
# Basic coordinator that allows us to provide coordination objects into consumers.
|
6
|
+
#
|
7
|
+
# This is a wrapping layer to simplify management of work to be handled around consumption.
|
8
|
+
#
|
9
|
+
# @note This coordinator needs to be thread safe. Some operations are performed only in the
|
10
|
+
# listener thread, but we go with thread-safe by default for all not to worry about potential
|
11
|
+
# future mistakes.
|
12
|
+
class Coordinator
|
13
|
+
# @return [Karafka::TimeTrackers::Pause]
|
14
|
+
attr_reader :pause_tracker
|
15
|
+
|
16
|
+
attr_reader :seek_offset
|
17
|
+
|
18
|
+
# @param pause_tracker [Karafka::TimeTrackers::Pause] pause tracker for given topic partition
|
19
|
+
def initialize(pause_tracker)
|
20
|
+
@pause_tracker = pause_tracker
|
21
|
+
@revoked = false
|
22
|
+
@consumptions = {}
|
23
|
+
@running_jobs = 0
|
24
|
+
@mutex = Mutex.new
|
25
|
+
end
|
26
|
+
|
27
|
+
# Starts the coordinator for given consumption jobs
|
28
|
+
# @param messages [Array<Karafka::Messages::Message>] batch of message for which we are
|
29
|
+
# going to coordinate work. Not used with regular coordinator.
|
30
|
+
def start(messages)
|
31
|
+
@mutex.synchronize do
|
32
|
+
@running_jobs = 0
|
33
|
+
# We need to clear the consumption results hash here, otherwise we could end up storing
|
34
|
+
# consumption results of consumer instances we no longer control
|
35
|
+
@consumptions.clear
|
36
|
+
|
37
|
+
# We set it on the first encounter and never again, because then the offset setting
|
38
|
+
# should be up to the consumers logic (our or the end user)
|
39
|
+
# Seek offset needs to be always initialized as for case where manual offset management
|
40
|
+
# is turned on, we need to have reference to the first offset even in case of running
|
41
|
+
# multiple batches without marking any messages as consumed. Rollback needs to happen to
|
42
|
+
# the last place we know of or the last message + 1 that was marked
|
43
|
+
@seek_offset ||= messages.first.offset
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# @param offset [Integer] message offset
|
48
|
+
def seek_offset=(offset)
|
49
|
+
@mutex.synchronize { @seek_offset = offset }
|
50
|
+
end
|
51
|
+
|
52
|
+
# Increases number of jobs that we handle with this coordinator
|
53
|
+
def increment
|
54
|
+
@mutex.synchronize { @running_jobs += 1 }
|
55
|
+
end
|
56
|
+
|
57
|
+
# Decrements number of jobs we handle at the moment
|
58
|
+
def decrement
|
59
|
+
@mutex.synchronize do
|
60
|
+
@running_jobs -= 1
|
61
|
+
|
62
|
+
return @running_jobs unless @running_jobs.negative?
|
63
|
+
|
64
|
+
# This should never happen. If it does, something is heavily out of sync. Please reach
|
65
|
+
# out to us if you encounter this
|
66
|
+
raise Karafka::Errors::InvalidCoordinatorState, 'Was zero before decrementation'
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# @param consumer [Object] karafka consumer (normal or pro)
|
71
|
+
# @return [Karafka::Processing::Result] result object which we can use to indicate
|
72
|
+
# consumption processing state.
|
73
|
+
def consumption(consumer)
|
74
|
+
@mutex.synchronize do
|
75
|
+
@consumptions[consumer] ||= Processing::Result.new
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# Is all the consumption done and finished successfully for this coordinator
|
80
|
+
def success?
|
81
|
+
@mutex.synchronize { @running_jobs.zero? && @consumptions.values.all?(&:success?) }
|
82
|
+
end
|
83
|
+
|
84
|
+
# Marks given coordinator for processing group as revoked
|
85
|
+
#
|
86
|
+
# This is invoked in two places:
|
87
|
+
# - from the main listener loop when we detect revoked partitions
|
88
|
+
# - from the consumer in case checkpointing fails
|
89
|
+
#
|
90
|
+
# This means, we can end up having consumer being aware that it was revoked prior to the
|
91
|
+
# listener loop dispatching the revocation job. It is ok, as effectively nothing will be
|
92
|
+
# processed until revocation jobs are done.
|
93
|
+
def revoke
|
94
|
+
@mutex.synchronize { @revoked = true }
|
95
|
+
end
|
96
|
+
|
97
|
+
# @return [Boolean] is the partition we are processing revoked or not
|
98
|
+
def revoked?
|
99
|
+
@revoked
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Processing
|
5
|
+
# Coordinators builder used to build coordinators per topic partition
|
6
|
+
#
|
7
|
+
# It provides direct pauses access for revocation
|
8
|
+
#
|
9
|
+
# @note This buffer operates only from the listener loop, thus we do not have to make it
|
10
|
+
# thread-safe.
|
11
|
+
class CoordinatorsBuffer
|
12
|
+
def initialize
|
13
|
+
@pauses_manager = Connection::PausesManager.new
|
14
|
+
@coordinator_class = ::Karafka::App.config.internal.processing.coordinator_class
|
15
|
+
@coordinators = Hash.new { |h, k| h[k] = {} }
|
16
|
+
end
|
17
|
+
|
18
|
+
# @param topic [String] topic name
|
19
|
+
# @param partition [Integer] partition number
|
20
|
+
def find_or_create(topic, partition)
|
21
|
+
@coordinators[topic][partition] ||= @coordinator_class.new(
|
22
|
+
@pauses_manager.fetch(topic, partition)
|
23
|
+
)
|
24
|
+
end
|
25
|
+
|
26
|
+
# Resumes processing of partitions for which pause time has ended.
|
27
|
+
# @param block we want to run for resumed topic partitions
|
28
|
+
# @yieldparam [String] topic name
|
29
|
+
# @yieldparam [Integer] partition number
|
30
|
+
def resume(&block)
|
31
|
+
@pauses_manager.resume(&block)
|
32
|
+
end
|
33
|
+
|
34
|
+
# @param topic [String] topic name
|
35
|
+
# @param partition [Integer] partition number
|
36
|
+
def revoke(topic, partition)
|
37
|
+
return unless @coordinators[topic].key?(partition)
|
38
|
+
|
39
|
+
# The fact that we delete here does not change the fact that the executor still holds the
|
40
|
+
# reference to this coordinator. We delete it here, as we will no longer process any
|
41
|
+
# new stuff with it and we may need a new coordinator if we regain this partition, but the
|
42
|
+
# coordinator may still be in use
|
43
|
+
@coordinators[topic].delete(partition).revoke
|
44
|
+
end
|
45
|
+
|
46
|
+
# Clears coordinators and re-created the pauses manager
|
47
|
+
# This should be used only for critical errors recovery
|
48
|
+
def reset
|
49
|
+
@pauses_manager = Connection::PausesManager.new
|
50
|
+
@coordinators.clear
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
# Namespace that encapsulates all the logic related to processing data.
|
5
|
+
module Processing
|
6
|
+
# Executors:
|
7
|
+
# - run consumers code (for `#call`) or run given preparation / teardown operations when needed
|
8
|
+
# from separate threads.
|
9
|
+
# - they re-create consumer instances in case of partitions that were revoked and assigned
|
10
|
+
# back.
|
11
|
+
#
|
12
|
+
# @note Executors are not removed after partition is revoked. They are not that big and will
|
13
|
+
# be re-used in case of a re-claim
|
14
|
+
class Executor
|
15
|
+
# @return [String] unique id that we use to ensure, that we use for state tracking
|
16
|
+
attr_reader :id
|
17
|
+
|
18
|
+
# @return [String] subscription group id to which a given executor belongs
|
19
|
+
attr_reader :group_id
|
20
|
+
|
21
|
+
# @return [Karafka::Messages::Messages] messages batch
|
22
|
+
attr_reader :messages
|
23
|
+
|
24
|
+
# Topic accessibility may be needed for the jobs builder to be able to build a proper job
|
25
|
+
# based on the topic settings defined by the end user
|
26
|
+
#
|
27
|
+
# @return [Karafka::Routing::Topic] topic of this executor
|
28
|
+
attr_reader :topic
|
29
|
+
|
30
|
+
# @param group_id [String] id of the subscription group to which the executor belongs
|
31
|
+
# @param client [Karafka::Connection::Client] kafka client
|
32
|
+
# @param topic [Karafka::Routing::Topic] topic for which this executor will run
|
33
|
+
def initialize(group_id, client, topic)
|
34
|
+
@id = SecureRandom.uuid
|
35
|
+
@group_id = group_id
|
36
|
+
@client = client
|
37
|
+
@topic = topic
|
38
|
+
end
|
39
|
+
|
40
|
+
# Allows us to prepare the consumer in the listener thread prior to the job being send to
|
41
|
+
# the queue. It also allows to run some code that is time sensitive and cannot wait in the
|
42
|
+
# queue as it could cause starvation.
|
43
|
+
#
|
44
|
+
# @param messages [Array<Karafka::Messages::Message>]
|
45
|
+
# @param coordinator [Karafka::Processing::Coordinator] coordinator for processing management
|
46
|
+
def before_enqueue(messages, coordinator)
|
47
|
+
# the moment we've received the batch or actually the moment we've enqueued it,
|
48
|
+
# but good enough
|
49
|
+
@enqueued_at = Time.now
|
50
|
+
|
51
|
+
# Recreate consumer with each batch if persistence is not enabled
|
52
|
+
# We reload the consumers with each batch instead of relying on some external signals
|
53
|
+
# when needed for consistency. That way devs may have it on or off and not in this
|
54
|
+
# middle state, where re-creation of a consumer instance would occur only sometimes
|
55
|
+
@consumer = nil unless ::Karafka::App.config.consumer_persistence
|
56
|
+
|
57
|
+
consumer.coordinator = coordinator
|
58
|
+
|
59
|
+
# First we build messages batch...
|
60
|
+
consumer.messages = Messages::Builders::Messages.call(
|
61
|
+
messages,
|
62
|
+
@topic,
|
63
|
+
@enqueued_at
|
64
|
+
)
|
65
|
+
|
66
|
+
consumer.on_before_enqueue
|
67
|
+
end
|
68
|
+
|
69
|
+
# Runs setup and warm-up code in the worker prior to running the consumption
|
70
|
+
def before_consume
|
71
|
+
consumer.on_before_consume
|
72
|
+
end
|
73
|
+
|
74
|
+
# Runs consumer data processing against given batch and handles failures and errors.
|
75
|
+
def consume
|
76
|
+
# We run the consumer client logic...
|
77
|
+
consumer.on_consume
|
78
|
+
end
|
79
|
+
|
80
|
+
# Runs consumer after consumption code
|
81
|
+
def after_consume
|
82
|
+
consumer.on_after_consume
|
83
|
+
end
|
84
|
+
|
85
|
+
# Runs the controller `#revoked` method that should be triggered when a given consumer is
|
86
|
+
# no longer needed due to partitions reassignment.
|
87
|
+
#
|
88
|
+
# @note Clearing the consumer will ensure, that if we get the partition back, it will be
|
89
|
+
# handled with a consumer with a clean state.
|
90
|
+
#
|
91
|
+
# @note We run it only when consumer was present, because presence indicates, that at least
|
92
|
+
# a single message has been consumed.
|
93
|
+
#
|
94
|
+
# @note We do not reset the consumer but we indicate need for recreation instead, because
|
95
|
+
# after the revocation, there still may be `#after_consume` running that needs a given
|
96
|
+
# consumer instance.
|
97
|
+
def revoked
|
98
|
+
consumer.on_revoked if @consumer
|
99
|
+
end
|
100
|
+
|
101
|
+
# Runs the controller `#shutdown` method that should be triggered when a given consumer is
|
102
|
+
# no longer needed as we're closing the process.
|
103
|
+
#
|
104
|
+
# @note While we do not need to clear the consumer here, it's a good habit to clean after
|
105
|
+
# work is done.
|
106
|
+
def shutdown
|
107
|
+
# There is a case, where the consumer no longer exists because it was revoked, in case like
|
108
|
+
# that we do not build a new instance and shutdown should not be triggered.
|
109
|
+
consumer.on_shutdown if @consumer
|
110
|
+
end
|
111
|
+
|
112
|
+
private
|
113
|
+
|
114
|
+
# @return [Object] cached consumer instance
|
115
|
+
def consumer
|
116
|
+
@consumer ||= begin
|
117
|
+
consumer = @topic.consumer_class.new
|
118
|
+
consumer.topic = @topic
|
119
|
+
consumer.client = @client
|
120
|
+
consumer.producer = ::Karafka::App.producer
|
121
|
+
consumer
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Processing
|
5
|
+
# Buffer for executors of a given subscription group. It wraps around the concept of building
|
6
|
+
# and caching them, so we can re-use them instead of creating new each time.
|
7
|
+
class ExecutorsBuffer
|
8
|
+
# @param client [Connection::Client]
|
9
|
+
# @param subscription_group [Routing::SubscriptionGroup]
|
10
|
+
# @return [ExecutorsBuffer]
|
11
|
+
def initialize(client, subscription_group)
|
12
|
+
@subscription_group = subscription_group
|
13
|
+
@client = client
|
14
|
+
# We need two layers here to keep track of topics, partitions and processing groups
|
15
|
+
@buffer = Hash.new { |h, k| h[k] = Hash.new { |h2, k2| h2[k2] = {} } }
|
16
|
+
end
|
17
|
+
|
18
|
+
# Finds or creates an executor based on the provided details
|
19
|
+
#
|
20
|
+
# @param topic [String] topic name
|
21
|
+
# @param partition [Integer] partition number
|
22
|
+
# @param parallel_key [String] parallel group key
|
23
|
+
# @return [Executor] consumer executor
|
24
|
+
def find_or_create(topic, partition, parallel_key)
|
25
|
+
ktopic = find_topic(topic)
|
26
|
+
|
27
|
+
@buffer[ktopic][partition][parallel_key] ||= Executor.new(
|
28
|
+
@subscription_group.id,
|
29
|
+
@client,
|
30
|
+
ktopic
|
31
|
+
)
|
32
|
+
end
|
33
|
+
|
34
|
+
# Revokes executors of a given topic partition, so they won't be used anymore for incoming
|
35
|
+
# messages
|
36
|
+
#
|
37
|
+
# @param topic [String] topic name
|
38
|
+
# @param partition [Integer] partition number
|
39
|
+
def revoke(topic, partition)
|
40
|
+
ktopic = find_topic(topic)
|
41
|
+
|
42
|
+
@buffer[ktopic][partition].clear
|
43
|
+
end
|
44
|
+
|
45
|
+
# Finds all the executors available for a given topic partition
|
46
|
+
#
|
47
|
+
# @param topic [String] topic name
|
48
|
+
# @param partition [Integer] partition number
|
49
|
+
# @return [Array<Executor>] executors in use for this topic + partition
|
50
|
+
def find_all(topic, partition)
|
51
|
+
ktopic = find_topic(topic)
|
52
|
+
|
53
|
+
@buffer[ktopic][partition].values
|
54
|
+
end
|
55
|
+
|
56
|
+
# Iterates over all available executors and yields them together with topic and partition
|
57
|
+
# info
|
58
|
+
# @yieldparam [Routing::Topic] karafka routing topic object
|
59
|
+
# @yieldparam [Integer] partition number
|
60
|
+
# @yieldparam [Executor] given executor
|
61
|
+
def each
|
62
|
+
@buffer.each do |ktopic, partitions|
|
63
|
+
partitions.each do |partition, executors|
|
64
|
+
executors.each do |_parallel_key, executor|
|
65
|
+
# We skip the parallel key here as it does not serve any value when iterating
|
66
|
+
yield(ktopic, partition, executor)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
# Clears the executors buffer. Useful for critical errors recovery.
|
73
|
+
def clear
|
74
|
+
@buffer.clear
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
|
79
|
+
# Finds topic based on its name
|
80
|
+
#
|
81
|
+
# @param topic [String] topic we're looking for
|
82
|
+
# @return [Karafka::Routing::Topic] topic we're interested in
|
83
|
+
def find_topic(topic)
|
84
|
+
@subscription_group.topics.find(topic) || raise(Errors::TopicNotFoundError, topic)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Processing
|
5
|
+
# Namespace for all the jobs that are supposed to run in workers.
|
6
|
+
module Jobs
|
7
|
+
# Base class for all the jobs types that are suppose to run in workers threads.
|
8
|
+
# Each job can have 3 main entry-points: `#before_call`, `#call` and `#after_call`
|
9
|
+
# Only `#call` is required.
|
10
|
+
class Base
|
11
|
+
extend Forwardable
|
12
|
+
|
13
|
+
# @note Since one job has always one executor, we use the jobs id and group id as reference
|
14
|
+
def_delegators :executor, :id, :group_id
|
15
|
+
|
16
|
+
attr_reader :executor
|
17
|
+
|
18
|
+
# Creates a new job instance
|
19
|
+
def initialize
|
20
|
+
# All jobs are blocking by default and they can release the lock when blocking operations
|
21
|
+
# are done (if needed)
|
22
|
+
@non_blocking = false
|
23
|
+
end
|
24
|
+
|
25
|
+
# When redefined can run any code prior to the job being enqueued
|
26
|
+
# @note This will run in the listener thread and not in the worker
|
27
|
+
def before_enqueue; end
|
28
|
+
|
29
|
+
# When redefined can run any code that should run before executing the proper code
|
30
|
+
def before_call; end
|
31
|
+
|
32
|
+
# The main entry-point of a job
|
33
|
+
def call
|
34
|
+
raise NotImplementedError, 'Please implement in a subclass'
|
35
|
+
end
|
36
|
+
|
37
|
+
# When redefined can run any code that should run after executing the proper code
|
38
|
+
def after_call; end
|
39
|
+
|
40
|
+
# @return [Boolean] is this a non-blocking job
|
41
|
+
#
|
42
|
+
# @note Blocking job is a job, that will cause the job queue to wait until it is finished
|
43
|
+
# before removing the lock on new jobs being added
|
44
|
+
#
|
45
|
+
# @note All the jobs are blocking by default
|
46
|
+
#
|
47
|
+
# @note Job **needs** to mark itself as non-blocking only **after** it is done with all
|
48
|
+
# the blocking things (pausing partition, etc).
|
49
|
+
def non_blocking?
|
50
|
+
@non_blocking
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|