karafka 1.4.13 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +3 -3
- data/.github/workflows/ci.yml +85 -30
- data/.ruby-version +1 -1
- data/CHANGELOG.md +268 -7
- data/CONTRIBUTING.md +10 -19
- data/Gemfile +6 -0
- data/Gemfile.lock +44 -87
- data/LICENSE +17 -0
- data/LICENSE-COMM +89 -0
- data/LICENSE-LGPL +165 -0
- data/README.md +44 -48
- data/bin/benchmarks +85 -0
- data/bin/create_token +22 -0
- data/bin/integrations +237 -0
- data/bin/karafka +4 -0
- data/bin/scenario +29 -0
- data/bin/stress_many +13 -0
- data/bin/stress_one +13 -0
- data/bin/wait_for_kafka +20 -0
- data/certs/karafka-pro.pem +11 -0
- data/config/errors.yml +55 -40
- data/docker-compose.yml +39 -3
- data/karafka.gemspec +11 -17
- data/lib/active_job/karafka.rb +21 -0
- data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
- data/lib/karafka/active_job/consumer.rb +26 -0
- data/lib/karafka/active_job/dispatcher.rb +38 -0
- data/lib/karafka/active_job/job_extensions.rb +34 -0
- data/lib/karafka/active_job/job_options_contract.rb +21 -0
- data/lib/karafka/active_job/routing/extensions.rb +31 -0
- data/lib/karafka/app.rb +15 -20
- data/lib/karafka/base_consumer.rb +181 -31
- data/lib/karafka/cli/base.rb +4 -4
- data/lib/karafka/cli/info.rb +43 -9
- data/lib/karafka/cli/install.rb +19 -10
- data/lib/karafka/cli/server.rb +17 -42
- data/lib/karafka/cli.rb +4 -11
- data/lib/karafka/connection/client.rb +385 -90
- data/lib/karafka/connection/listener.rb +246 -38
- data/lib/karafka/connection/listeners_batch.rb +24 -0
- data/lib/karafka/connection/messages_buffer.rb +84 -0
- data/lib/karafka/connection/pauses_manager.rb +46 -0
- data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
- data/lib/karafka/connection/rebalance_manager.rb +78 -0
- data/lib/karafka/contracts/base.rb +17 -0
- data/lib/karafka/contracts/config.rb +88 -11
- data/lib/karafka/contracts/consumer_group.rb +21 -189
- data/lib/karafka/contracts/consumer_group_topic.rb +34 -11
- data/lib/karafka/contracts/server_cli_options.rb +19 -18
- data/lib/karafka/contracts.rb +1 -1
- data/lib/karafka/env.rb +46 -0
- data/lib/karafka/errors.rb +21 -21
- data/lib/karafka/helpers/async.rb +33 -0
- data/lib/karafka/helpers/colorize.rb +20 -0
- data/lib/karafka/helpers/multi_delegator.rb +2 -2
- data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
- data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
- data/lib/karafka/instrumentation/logger_listener.rb +164 -0
- data/lib/karafka/instrumentation/monitor.rb +13 -61
- data/lib/karafka/instrumentation/notifications.rb +52 -0
- data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
- data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
- data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
- data/lib/karafka/instrumentation.rb +21 -0
- data/lib/karafka/licenser.rb +75 -0
- data/lib/karafka/messages/batch_metadata.rb +45 -0
- data/lib/karafka/messages/builders/batch_metadata.rb +40 -0
- data/lib/karafka/messages/builders/message.rb +39 -0
- data/lib/karafka/messages/builders/messages.rb +32 -0
- data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
- data/lib/karafka/messages/messages.rb +64 -0
- data/lib/karafka/{params → messages}/metadata.rb +4 -6
- data/lib/karafka/messages/seek.rb +9 -0
- data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
- data/lib/karafka/pro/active_job/consumer.rb +46 -0
- data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
- data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
- data/lib/karafka/pro/base_consumer.rb +82 -0
- data/lib/karafka/pro/contracts/base.rb +21 -0
- data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
- data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
- data/lib/karafka/pro/loader.rb +76 -0
- data/lib/karafka/pro/performance_tracker.rb +80 -0
- data/lib/karafka/pro/processing/coordinator.rb +72 -0
- data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +37 -0
- data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
- data/lib/karafka/pro/processing/partitioner.rb +60 -0
- data/lib/karafka/pro/processing/scheduler.rb +56 -0
- data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
- data/lib/karafka/pro/routing/topic_extensions.rb +38 -0
- data/lib/karafka/pro.rb +13 -0
- data/lib/karafka/process.rb +1 -0
- data/lib/karafka/processing/coordinator.rb +88 -0
- data/lib/karafka/processing/coordinators_buffer.rb +54 -0
- data/lib/karafka/processing/executor.rb +118 -0
- data/lib/karafka/processing/executors_buffer.rb +88 -0
- data/lib/karafka/processing/jobs/base.rb +51 -0
- data/lib/karafka/processing/jobs/consume.rb +42 -0
- data/lib/karafka/processing/jobs/revoked.rb +22 -0
- data/lib/karafka/processing/jobs/shutdown.rb +23 -0
- data/lib/karafka/processing/jobs_builder.rb +29 -0
- data/lib/karafka/processing/jobs_queue.rb +144 -0
- data/lib/karafka/processing/partitioner.rb +22 -0
- data/lib/karafka/processing/result.rb +29 -0
- data/lib/karafka/processing/scheduler.rb +22 -0
- data/lib/karafka/processing/worker.rb +88 -0
- data/lib/karafka/processing/workers_batch.rb +27 -0
- data/lib/karafka/railtie.rb +113 -0
- data/lib/karafka/routing/builder.rb +15 -24
- data/lib/karafka/routing/consumer_group.rb +11 -19
- data/lib/karafka/routing/consumer_mapper.rb +1 -2
- data/lib/karafka/routing/router.rb +1 -1
- data/lib/karafka/routing/subscription_group.rb +53 -0
- data/lib/karafka/routing/subscription_groups_builder.rb +53 -0
- data/lib/karafka/routing/topic.rb +61 -24
- data/lib/karafka/routing/topics.rb +38 -0
- data/lib/karafka/runner.rb +51 -0
- data/lib/karafka/serialization/json/deserializer.rb +6 -15
- data/lib/karafka/server.rb +67 -26
- data/lib/karafka/setup/config.rb +147 -175
- data/lib/karafka/status.rb +14 -5
- data/lib/karafka/templates/example_consumer.rb.erb +16 -0
- data/lib/karafka/templates/karafka.rb.erb +15 -51
- data/lib/karafka/time_trackers/base.rb +19 -0
- data/lib/karafka/time_trackers/pause.rb +92 -0
- data/lib/karafka/time_trackers/poll.rb +65 -0
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +38 -17
- data.tar.gz.sig +0 -0
- metadata +118 -120
- metadata.gz.sig +0 -0
- data/MIT-LICENCE +0 -18
- data/lib/karafka/assignment_strategies/round_robin.rb +0 -13
- data/lib/karafka/attributes_map.rb +0 -63
- data/lib/karafka/backends/inline.rb +0 -16
- data/lib/karafka/base_responder.rb +0 -226
- data/lib/karafka/cli/flow.rb +0 -48
- data/lib/karafka/cli/missingno.rb +0 -19
- data/lib/karafka/code_reloader.rb +0 -67
- data/lib/karafka/connection/api_adapter.rb +0 -158
- data/lib/karafka/connection/batch_delegator.rb +0 -55
- data/lib/karafka/connection/builder.rb +0 -23
- data/lib/karafka/connection/message_delegator.rb +0 -36
- data/lib/karafka/consumers/batch_metadata.rb +0 -10
- data/lib/karafka/consumers/callbacks.rb +0 -71
- data/lib/karafka/consumers/includer.rb +0 -64
- data/lib/karafka/consumers/responders.rb +0 -24
- data/lib/karafka/consumers/single_params.rb +0 -15
- data/lib/karafka/contracts/responder_usage.rb +0 -54
- data/lib/karafka/fetcher.rb +0 -42
- data/lib/karafka/helpers/class_matcher.rb +0 -88
- data/lib/karafka/helpers/config_retriever.rb +0 -46
- data/lib/karafka/helpers/inflector.rb +0 -26
- data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
- data/lib/karafka/params/batch_metadata.rb +0 -26
- data/lib/karafka/params/builders/batch_metadata.rb +0 -30
- data/lib/karafka/params/builders/params.rb +0 -38
- data/lib/karafka/params/builders/params_batch.rb +0 -25
- data/lib/karafka/params/params_batch.rb +0 -60
- data/lib/karafka/patches/ruby_kafka.rb +0 -47
- data/lib/karafka/persistence/client.rb +0 -29
- data/lib/karafka/persistence/consumers.rb +0 -45
- data/lib/karafka/persistence/topics.rb +0 -48
- data/lib/karafka/responders/builder.rb +0 -36
- data/lib/karafka/responders/topic.rb +0 -55
- data/lib/karafka/routing/topic_mapper.rb +0 -53
- data/lib/karafka/serialization/json/serializer.rb +0 -31
- data/lib/karafka/setup/configurators/water_drop.rb +0 -36
- data/lib/karafka/templates/application_responder.rb.erb +0 -11
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Processing
|
|
5
|
+
# Coordinators builder used to build coordinators per topic partition
|
|
6
|
+
#
|
|
7
|
+
# It provides direct pauses access for revocation
|
|
8
|
+
#
|
|
9
|
+
# @note This buffer operates only from the listener loop, thus we do not have to make it
|
|
10
|
+
# thread-safe.
|
|
11
|
+
class CoordinatorsBuffer
|
|
12
|
+
def initialize
|
|
13
|
+
@pauses_manager = Connection::PausesManager.new
|
|
14
|
+
@coordinator_class = ::Karafka::App.config.internal.processing.coordinator_class
|
|
15
|
+
@coordinators = Hash.new { |h, k| h[k] = {} }
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# @param topic [String] topic name
|
|
19
|
+
# @param partition [Integer] partition number
|
|
20
|
+
def find_or_create(topic, partition)
|
|
21
|
+
@coordinators[topic][partition] ||= @coordinator_class.new(
|
|
22
|
+
@pauses_manager.fetch(topic, partition)
|
|
23
|
+
)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Resumes processing of partitions for which pause time has ended.
|
|
27
|
+
# @param block we want to run for resumed topic partitions
|
|
28
|
+
# @yieldparam [String] topic name
|
|
29
|
+
# @yieldparam [Integer] partition number
|
|
30
|
+
def resume(&block)
|
|
31
|
+
@pauses_manager.resume(&block)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# @param topic [String] topic name
|
|
35
|
+
# @param partition [Integer] partition number
|
|
36
|
+
def revoke(topic, partition)
|
|
37
|
+
return unless @coordinators[topic].key?(partition)
|
|
38
|
+
|
|
39
|
+
# The fact that we delete here does not change the fact that the executor still holds the
|
|
40
|
+
# reference to this coordinator. We delete it here, as we will no longer process any
|
|
41
|
+
# new stuff with it and we may need a new coordinator if we regain this partition, but the
|
|
42
|
+
# coordinator may still be in use
|
|
43
|
+
@coordinators[topic].delete(partition).revoke
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Clears coordinators and re-created the pauses manager
|
|
47
|
+
# This should be used only for critical errors recovery
|
|
48
|
+
def reset
|
|
49
|
+
@pauses_manager = Connection::PausesManager.new
|
|
50
|
+
@coordinators.clear
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
# Namespace that encapsulates all the logic related to processing data.
|
|
5
|
+
module Processing
|
|
6
|
+
# Executors:
|
|
7
|
+
# - run consumers code (for `#call`) or run given preparation / teardown operations when needed
|
|
8
|
+
# from separate threads.
|
|
9
|
+
# - they re-create consumer instances in case of partitions that were revoked and assigned
|
|
10
|
+
# back.
|
|
11
|
+
#
|
|
12
|
+
# @note Executors are not removed after partition is revoked. They are not that big and will
|
|
13
|
+
# be re-used in case of a re-claim
|
|
14
|
+
class Executor
|
|
15
|
+
# @return [String] unique id that we use to ensure, that we use for state tracking
|
|
16
|
+
attr_reader :id
|
|
17
|
+
|
|
18
|
+
# @return [String] subscription group id to which a given executor belongs
|
|
19
|
+
attr_reader :group_id
|
|
20
|
+
|
|
21
|
+
# @return [Karafka::Messages::Messages] messages batch
|
|
22
|
+
attr_reader :messages
|
|
23
|
+
|
|
24
|
+
# Topic accessibility may be needed for the jobs builder to be able to build a proper job
|
|
25
|
+
# based on the topic settings defined by the end user
|
|
26
|
+
#
|
|
27
|
+
# @return [Karafka::Routing::Topic] topic of this executor
|
|
28
|
+
attr_reader :topic
|
|
29
|
+
|
|
30
|
+
# @param group_id [String] id of the subscription group to which the executor belongs
|
|
31
|
+
# @param client [Karafka::Connection::Client] kafka client
|
|
32
|
+
# @param topic [Karafka::Routing::Topic] topic for which this executor will run
|
|
33
|
+
def initialize(group_id, client, topic)
|
|
34
|
+
@id = SecureRandom.uuid
|
|
35
|
+
@group_id = group_id
|
|
36
|
+
@client = client
|
|
37
|
+
@topic = topic
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Builds the consumer instance, builds messages batch and sets all that is needed to run the
|
|
41
|
+
# user consumption logic
|
|
42
|
+
#
|
|
43
|
+
# @param messages [Array<Karafka::Messages::Message>]
|
|
44
|
+
# @param received_at [Time] the moment we've received the batch (actually the moment we've)
|
|
45
|
+
# enqueued it, but good enough
|
|
46
|
+
# @param coordinator [Karafka::Processing::Coordinator] coordinator for processing management
|
|
47
|
+
def before_consume(messages, received_at, coordinator)
|
|
48
|
+
# Recreate consumer with each batch if persistence is not enabled
|
|
49
|
+
# We reload the consumers with each batch instead of relying on some external signals
|
|
50
|
+
# when needed for consistency. That way devs may have it on or off and not in this
|
|
51
|
+
# middle state, where re-creation of a consumer instance would occur only sometimes
|
|
52
|
+
@consumer = nil unless ::Karafka::App.config.consumer_persistence
|
|
53
|
+
|
|
54
|
+
consumer.coordinator = coordinator
|
|
55
|
+
|
|
56
|
+
# First we build messages batch...
|
|
57
|
+
consumer.messages = Messages::Builders::Messages.call(
|
|
58
|
+
messages,
|
|
59
|
+
@topic,
|
|
60
|
+
received_at
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
consumer.on_before_consume
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Runs consumer data processing against given batch and handles failures and errors.
|
|
67
|
+
def consume
|
|
68
|
+
# We run the consumer client logic...
|
|
69
|
+
consumer.on_consume
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Runs consumer after consumption code
|
|
73
|
+
def after_consume
|
|
74
|
+
consumer.on_after_consume
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Runs the controller `#revoked` method that should be triggered when a given consumer is
|
|
78
|
+
# no longer needed due to partitions reassignment.
|
|
79
|
+
#
|
|
80
|
+
# @note Clearing the consumer will ensure, that if we get the partition back, it will be
|
|
81
|
+
# handled with a consumer with a clean state.
|
|
82
|
+
#
|
|
83
|
+
# @note We run it only when consumer was present, because presence indicates, that at least
|
|
84
|
+
# a single message has been consumed.
|
|
85
|
+
#
|
|
86
|
+
# @note We do not reset the consumer but we indicate need for recreation instead, because
|
|
87
|
+
# after the revocation, there still may be `#after_consume` running that needs a given
|
|
88
|
+
# consumer instance.
|
|
89
|
+
def revoked
|
|
90
|
+
consumer.on_revoked if @consumer
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Runs the controller `#shutdown` method that should be triggered when a given consumer is
|
|
94
|
+
# no longer needed as we're closing the process.
|
|
95
|
+
#
|
|
96
|
+
# @note While we do not need to clear the consumer here, it's a good habit to clean after
|
|
97
|
+
# work is done.
|
|
98
|
+
def shutdown
|
|
99
|
+
# There is a case, where the consumer no longer exists because it was revoked, in case like
|
|
100
|
+
# that we do not build a new instance and shutdown should not be triggered.
|
|
101
|
+
consumer.on_shutdown if @consumer
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
private
|
|
105
|
+
|
|
106
|
+
# @return [Object] cached consumer instance
|
|
107
|
+
def consumer
|
|
108
|
+
@consumer ||= begin
|
|
109
|
+
consumer = @topic.consumer_class.new
|
|
110
|
+
consumer.topic = @topic
|
|
111
|
+
consumer.client = @client
|
|
112
|
+
consumer.producer = ::Karafka::App.producer
|
|
113
|
+
consumer
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Processing
|
|
5
|
+
# Buffer for executors of a given subscription group. It wraps around the concept of building
|
|
6
|
+
# and caching them, so we can re-use them instead of creating new each time.
|
|
7
|
+
class ExecutorsBuffer
|
|
8
|
+
# @param client [Connection::Client]
|
|
9
|
+
# @param subscription_group [Routing::SubscriptionGroup]
|
|
10
|
+
# @return [ExecutorsBuffer]
|
|
11
|
+
def initialize(client, subscription_group)
|
|
12
|
+
@subscription_group = subscription_group
|
|
13
|
+
@client = client
|
|
14
|
+
# We need two layers here to keep track of topics, partitions and processing groups
|
|
15
|
+
@buffer = Hash.new { |h, k| h[k] = Hash.new { |h2, k2| h2[k2] = {} } }
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Finds or creates an executor based on the provided details
|
|
19
|
+
#
|
|
20
|
+
# @param topic [String] topic name
|
|
21
|
+
# @param partition [Integer] partition number
|
|
22
|
+
# @param parallel_key [String] parallel group key
|
|
23
|
+
# @return [Executor] consumer executor
|
|
24
|
+
def find_or_create(topic, partition, parallel_key)
|
|
25
|
+
ktopic = find_topic(topic)
|
|
26
|
+
|
|
27
|
+
@buffer[ktopic][partition][parallel_key] ||= Executor.new(
|
|
28
|
+
@subscription_group.id,
|
|
29
|
+
@client,
|
|
30
|
+
ktopic
|
|
31
|
+
)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Revokes executors of a given topic partition, so they won't be used anymore for incoming
|
|
35
|
+
# messages
|
|
36
|
+
#
|
|
37
|
+
# @param topic [String] topic name
|
|
38
|
+
# @param partition [Integer] partition number
|
|
39
|
+
def revoke(topic, partition)
|
|
40
|
+
ktopic = find_topic(topic)
|
|
41
|
+
|
|
42
|
+
@buffer[ktopic][partition].clear
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Finds all the executors available for a given topic partition
|
|
46
|
+
#
|
|
47
|
+
# @param topic [String] topic name
|
|
48
|
+
# @param partition [Integer] partition number
|
|
49
|
+
# @return [Array<Executor>] executors in use for this topic + partition
|
|
50
|
+
def find_all(topic, partition)
|
|
51
|
+
ktopic = find_topic(topic)
|
|
52
|
+
|
|
53
|
+
@buffer[ktopic][partition].values
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Iterates over all available executors and yields them together with topic and partition
|
|
57
|
+
# info
|
|
58
|
+
# @yieldparam [Routing::Topic] karafka routing topic object
|
|
59
|
+
# @yieldparam [Integer] partition number
|
|
60
|
+
# @yieldparam [Executor] given executor
|
|
61
|
+
def each
|
|
62
|
+
@buffer.each do |ktopic, partitions|
|
|
63
|
+
partitions.each do |partition, executors|
|
|
64
|
+
executors.each do |_parallel_key, executor|
|
|
65
|
+
# We skip the parallel key here as it does not serve any value when iterating
|
|
66
|
+
yield(ktopic, partition, executor)
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Clears the executors buffer. Useful for critical errors recovery.
|
|
73
|
+
def clear
|
|
74
|
+
@buffer.clear
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
private
|
|
78
|
+
|
|
79
|
+
# Finds topic based on its name
|
|
80
|
+
#
|
|
81
|
+
# @param topic [String] topic we're looking for
|
|
82
|
+
# @return [Karafka::Routing::Topic] topic we're interested in
|
|
83
|
+
def find_topic(topic)
|
|
84
|
+
@subscription_group.topics.find(topic) || raise(Errors::TopicNotFoundError, topic)
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Processing
|
|
5
|
+
# Namespace for all the jobs that are suppose to run in workers.
|
|
6
|
+
module Jobs
|
|
7
|
+
# Base class for all the jobs types that are suppose to run in workers threads.
|
|
8
|
+
# Each job can have 3 main entry-points: `#before_call`, `#call` and `#after_call`
|
|
9
|
+
# Only `#call` is required.
|
|
10
|
+
class Base
|
|
11
|
+
extend Forwardable
|
|
12
|
+
|
|
13
|
+
# @note Since one job has always one executer, we use the jobs id and group id as reference
|
|
14
|
+
def_delegators :executor, :id, :group_id
|
|
15
|
+
|
|
16
|
+
attr_reader :executor
|
|
17
|
+
|
|
18
|
+
# Creates a new job instance
|
|
19
|
+
def initialize
|
|
20
|
+
# All jobs are blocking by default and they can release the lock when blocking operations
|
|
21
|
+
# are done (if needed)
|
|
22
|
+
@non_blocking = false
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# When redefined can run any code that should run before executing the proper code
|
|
26
|
+
def before_call; end
|
|
27
|
+
|
|
28
|
+
# The main entry-point of a job
|
|
29
|
+
def call
|
|
30
|
+
raise NotImplementedError, 'Please implement in a subclass'
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# When redefined can run any code that should run after executing the proper code
|
|
34
|
+
def after_call; end
|
|
35
|
+
|
|
36
|
+
# @return [Boolean] is this a non-blocking job
|
|
37
|
+
#
|
|
38
|
+
# @note Blocking job is a job, that will cause the job queue to wait until it is finished
|
|
39
|
+
# before removing the lock on new jobs being added
|
|
40
|
+
#
|
|
41
|
+
# @note All the jobs are blocking by default
|
|
42
|
+
#
|
|
43
|
+
# @note Job **needs** to mark itself as non-blocking only **after** it is done with all
|
|
44
|
+
# the blocking things (pausing partition, etc).
|
|
45
|
+
def non_blocking?
|
|
46
|
+
@non_blocking
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Processing
|
|
5
|
+
module Jobs
|
|
6
|
+
# The main job type. It runs the executor that triggers given topic partition messages
|
|
7
|
+
# processing in an underlying consumer instance.
|
|
8
|
+
class Consume < Base
|
|
9
|
+
# @return [Array<Rdkafka::Consumer::Message>] array with messages
|
|
10
|
+
attr_reader :messages
|
|
11
|
+
|
|
12
|
+
# @param executor [Karafka::Processing::Executor] executor that is suppose to run a given
|
|
13
|
+
# job
|
|
14
|
+
# @param messages [Karafka::Messages::Messages] karafka messages batch
|
|
15
|
+
# @param coordinator [Karafka::Processing::Coordinator] processing coordinator
|
|
16
|
+
# @return [Consume]
|
|
17
|
+
def initialize(executor, messages, coordinator)
|
|
18
|
+
@executor = executor
|
|
19
|
+
@messages = messages
|
|
20
|
+
@coordinator = coordinator
|
|
21
|
+
@created_at = Time.now
|
|
22
|
+
super()
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Runs the before consumption preparations on the executor
|
|
26
|
+
def before_call
|
|
27
|
+
executor.before_consume(@messages, @created_at, @coordinator)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Runs the given executor
|
|
31
|
+
def call
|
|
32
|
+
executor.consume
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Runs any error handling and other post-consumption stuff on the executor
|
|
36
|
+
def after_call
|
|
37
|
+
executor.after_consume
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Processing
|
|
5
|
+
module Jobs
|
|
6
|
+
# Job that runs the revoked operation when we loose a partition on a consumer that lost it.
|
|
7
|
+
class Revoked < Base
|
|
8
|
+
# @param executor [Karafka::Processing::Executor] executor that is suppose to run the job
|
|
9
|
+
# @return [Revoked]
|
|
10
|
+
def initialize(executor)
|
|
11
|
+
@executor = executor
|
|
12
|
+
super()
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Runs the revoking job via an executor.
|
|
16
|
+
def call
|
|
17
|
+
executor.revoked
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Processing
|
|
5
|
+
module Jobs
|
|
6
|
+
# Job that runs on each active consumer upon process shutdown (one job per consumer).
|
|
7
|
+
class Shutdown < Base
|
|
8
|
+
# @param executor [Karafka::Processing::Executor] executor that is suppose to run a given
|
|
9
|
+
# job on an active consumer
|
|
10
|
+
# @return [Shutdown]
|
|
11
|
+
def initialize(executor)
|
|
12
|
+
@executor = executor
|
|
13
|
+
super()
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Runs the shutdown job via an executor.
|
|
17
|
+
def call
|
|
18
|
+
executor.shutdown
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Processing
|
|
5
|
+
# Class responsible for deciding what type of job should we build to run a given command and
|
|
6
|
+
# for building a proper job for it.
|
|
7
|
+
class JobsBuilder
|
|
8
|
+
# @param executor [Karafka::Processing::Executor]
|
|
9
|
+
# @param messages [Karafka::Messages::Messages] messages batch to be consumed
|
|
10
|
+
# @param coordinator [Karafka::Processing::Coordinator]
|
|
11
|
+
# @return [Karafka::Processing::Jobs::Consume] consumption job
|
|
12
|
+
def consume(executor, messages, coordinator)
|
|
13
|
+
Jobs::Consume.new(executor, messages, coordinator)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# @param executor [Karafka::Processing::Executor]
|
|
17
|
+
# @return [Karafka::Processing::Jobs::Revoked] revocation job
|
|
18
|
+
def revoked(executor)
|
|
19
|
+
Jobs::Revoked.new(executor)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# @param executor [Karafka::Processing::Executor]
|
|
23
|
+
# @return [Karafka::Processing::Jobs::Shutdown] shutdown job
|
|
24
|
+
def shutdown(executor)
|
|
25
|
+
Jobs::Shutdown.new(executor)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Processing
|
|
5
|
+
# This is the key work component for Karafka jobs distribution. It provides API for running
|
|
6
|
+
# jobs in parallel while operating within more than one subscription group.
|
|
7
|
+
#
|
|
8
|
+
# We need to take into consideration fact, that more than one subscription group can operate
|
|
9
|
+
# on this queue, that's why internally we keep track of processing per group.
|
|
10
|
+
#
|
|
11
|
+
# We work with the assumption, that partitions data is evenly distributed.
|
|
12
|
+
class JobsQueue
|
|
13
|
+
# @return [Karafka::Processing::JobsQueue]
|
|
14
|
+
def initialize
|
|
15
|
+
@queue = Queue.new
|
|
16
|
+
# Those queues will act as a semaphores internally. Since we need an indicator for waiting
|
|
17
|
+
# we could use Thread.pass but this is expensive. Instead we can just lock until any
|
|
18
|
+
# of the workers finishes their work and we can re-check. This means that in the worse
|
|
19
|
+
# scenario, we will context switch 10 times per poll instead of getting this thread
|
|
20
|
+
# scheduled by Ruby hundreds of thousands of times per group.
|
|
21
|
+
# We cannot use a single semaphore as it could potentially block in listeners that should
|
|
22
|
+
# process with their data and also could unlock when a given group needs to remain locked
|
|
23
|
+
@semaphores = Hash.new { |h, k| h[k] = Queue.new }
|
|
24
|
+
@in_processing = Hash.new { |h, k| h[k] = [] }
|
|
25
|
+
@mutex = Mutex.new
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Returns number of jobs that are either enqueued or in processing (but not finished)
|
|
29
|
+
# @return [Integer] number of elements in the queue
|
|
30
|
+
# @note Using `#pop` won't decrease this number as only marking job as completed does this
|
|
31
|
+
def size
|
|
32
|
+
@in_processing.values.map(&:size).sum
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Adds the job to the internal main queue, scheduling it for execution in a worker and marks
|
|
36
|
+
# this job as in processing pipeline.
|
|
37
|
+
#
|
|
38
|
+
# @param job [Jobs::Base] job that we want to run
|
|
39
|
+
def <<(job)
|
|
40
|
+
# We do not push the job if the queue is closed as it means that it would anyhow not be
|
|
41
|
+
# executed
|
|
42
|
+
return if @queue.closed?
|
|
43
|
+
|
|
44
|
+
@mutex.synchronize do
|
|
45
|
+
group = @in_processing[job.group_id]
|
|
46
|
+
|
|
47
|
+
raise(Errors::JobsQueueSynchronizationError, job.group_id) if group.include?(job)
|
|
48
|
+
|
|
49
|
+
group << job
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
@queue << job
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# @return [Jobs::Base, nil] waits for a job from the main queue and returns it once available
|
|
56
|
+
# or returns nil if the queue has been stopped and there won't be anything more to process
|
|
57
|
+
# ever.
|
|
58
|
+
# @note This command is blocking and will wait until any job is available on the main queue
|
|
59
|
+
def pop
|
|
60
|
+
@queue.pop
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Causes the wait lock to re-check the lock conditions and potential unlock.
|
|
64
|
+
# @param group_id [String] id of the group we want to unlock for one tick
|
|
65
|
+
# @note This does not release the wait lock. It just causes a conditions recheck
|
|
66
|
+
def tick(group_id)
|
|
67
|
+
@semaphores[group_id] << true
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Marks a given job from a given group as completed. When there are no more jobs from a given
|
|
71
|
+
# group to be executed, we won't wait.
|
|
72
|
+
#
|
|
73
|
+
# @param [Jobs::Base] job that was completed
|
|
74
|
+
def complete(job)
|
|
75
|
+
@mutex.synchronize do
|
|
76
|
+
@in_processing[job.group_id].delete(job)
|
|
77
|
+
tick(job.group_id)
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Clears the processing states for a provided group. Useful when a recovery happens and we
|
|
82
|
+
# need to clean up state but only for a given subscription group.
|
|
83
|
+
#
|
|
84
|
+
# @param group_id [String]
|
|
85
|
+
def clear(group_id)
|
|
86
|
+
@mutex.synchronize do
|
|
87
|
+
@in_processing[group_id].clear
|
|
88
|
+
# We unlock it just in case it was blocked when clearing started
|
|
89
|
+
tick(group_id)
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Stops the whole processing queue.
|
|
94
|
+
def close
|
|
95
|
+
@mutex.synchronize do
|
|
96
|
+
return if @queue.closed?
|
|
97
|
+
|
|
98
|
+
@queue.close
|
|
99
|
+
@semaphores.values.each(&:close)
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# @param group_id [String]
|
|
104
|
+
#
|
|
105
|
+
# @return [Boolean] tell us if we have anything in the processing (or for processing) from
|
|
106
|
+
# a given group.
|
|
107
|
+
def empty?(group_id)
|
|
108
|
+
@in_processing[group_id].empty?
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Blocks when there are things in the queue in a given group and waits until all the blocking
|
|
112
|
+
# jobs from a given group are completed
|
|
113
|
+
#
|
|
114
|
+
# @param group_id [String] id of the group in which jobs we're interested.
|
|
115
|
+
# @note This method is blocking.
|
|
116
|
+
def wait(group_id)
|
|
117
|
+
# Go doing other things while we cannot process and wait for anyone to finish their work
|
|
118
|
+
# and re-check the wait status
|
|
119
|
+
@semaphores[group_id].pop while wait?(group_id)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# - `processing` - number of jobs that are currently being processed (active work)
|
|
123
|
+
# - `enqueued` - number of jobs in the queue that are waiting to be picked up by a worker
|
|
124
|
+
#
|
|
125
|
+
# @return [Hash] hash with basic usage statistics of this queue.
|
|
126
|
+
def statistics
|
|
127
|
+
{
|
|
128
|
+
processing: size - @queue.size,
|
|
129
|
+
enqueued: @queue.size
|
|
130
|
+
}.freeze
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
private
|
|
134
|
+
|
|
135
|
+
# @param group_id [String] id of the group in which jobs we're interested.
|
|
136
|
+
# @return [Boolean] should we keep waiting or not
|
|
137
|
+
# @note We do not wait for non-blocking jobs. Their flow should allow for `poll` running
|
|
138
|
+
# as they may exceed `max.poll.interval`
|
|
139
|
+
def wait?(group_id)
|
|
140
|
+
!@in_processing[group_id].all?(&:non_blocking?)
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Processing
|
|
5
|
+
# Basic partitioner for work division
|
|
6
|
+
# It does not divide any work.
|
|
7
|
+
class Partitioner
|
|
8
|
+
# @param subscription_group [Karafka::Routing::SubscriptionGroup] subscription group
|
|
9
|
+
def initialize(subscription_group)
|
|
10
|
+
@subscription_group = subscription_group
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
# @param _topic [String] topic name
|
|
14
|
+
# @param messages [Array<Karafka::Messages::Message>] karafka messages
|
|
15
|
+
# @yieldparam [Integer] group id
|
|
16
|
+
# @yieldparam [Array<Karafka::Messages::Message>] karafka messages
|
|
17
|
+
def call(_topic, messages)
|
|
18
|
+
yield(0, messages)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Processing
|
|
5
|
+
# A simple object that allows us to keep track of processing state.
|
|
6
|
+
# It allows to indicate if given thing moved from success to a failure or the other way around
|
|
7
|
+
# Useful for tracking consumption state
|
|
8
|
+
class Result
|
|
9
|
+
def initialize
|
|
10
|
+
@success = true
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
# @return [Boolean]
|
|
14
|
+
def success?
|
|
15
|
+
@success
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Marks state as successful
|
|
19
|
+
def success!
|
|
20
|
+
@success = true
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Marks state as failure
|
|
24
|
+
def failure!
|
|
25
|
+
@success = false
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Processing
|
|
5
|
+
# FIFO scheduler for messages coming from various topics and partitions
|
|
6
|
+
class Scheduler
|
|
7
|
+
# Schedules jobs in the fifo order
|
|
8
|
+
#
|
|
9
|
+
# @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
|
|
10
|
+
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
|
11
|
+
def schedule_consumption(queue, jobs_array)
|
|
12
|
+
jobs_array.each do |job|
|
|
13
|
+
queue << job
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Both revocation and shutdown jobs can also run in fifo by default
|
|
18
|
+
alias schedule_revocation schedule_consumption
|
|
19
|
+
alias schedule_shutdown schedule_consumption
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|