karafka 1.4.13 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +3 -3
- data/.github/workflows/ci.yml +85 -30
- data/.ruby-version +1 -1
- data/CHANGELOG.md +268 -7
- data/CONTRIBUTING.md +10 -19
- data/Gemfile +6 -0
- data/Gemfile.lock +44 -87
- data/LICENSE +17 -0
- data/LICENSE-COMM +89 -0
- data/LICENSE-LGPL +165 -0
- data/README.md +44 -48
- data/bin/benchmarks +85 -0
- data/bin/create_token +22 -0
- data/bin/integrations +237 -0
- data/bin/karafka +4 -0
- data/bin/scenario +29 -0
- data/bin/stress_many +13 -0
- data/bin/stress_one +13 -0
- data/bin/wait_for_kafka +20 -0
- data/certs/karafka-pro.pem +11 -0
- data/config/errors.yml +55 -40
- data/docker-compose.yml +39 -3
- data/karafka.gemspec +11 -17
- data/lib/active_job/karafka.rb +21 -0
- data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
- data/lib/karafka/active_job/consumer.rb +26 -0
- data/lib/karafka/active_job/dispatcher.rb +38 -0
- data/lib/karafka/active_job/job_extensions.rb +34 -0
- data/lib/karafka/active_job/job_options_contract.rb +21 -0
- data/lib/karafka/active_job/routing/extensions.rb +31 -0
- data/lib/karafka/app.rb +15 -20
- data/lib/karafka/base_consumer.rb +181 -31
- data/lib/karafka/cli/base.rb +4 -4
- data/lib/karafka/cli/info.rb +43 -9
- data/lib/karafka/cli/install.rb +19 -10
- data/lib/karafka/cli/server.rb +17 -42
- data/lib/karafka/cli.rb +4 -11
- data/lib/karafka/connection/client.rb +385 -90
- data/lib/karafka/connection/listener.rb +246 -38
- data/lib/karafka/connection/listeners_batch.rb +24 -0
- data/lib/karafka/connection/messages_buffer.rb +84 -0
- data/lib/karafka/connection/pauses_manager.rb +46 -0
- data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
- data/lib/karafka/connection/rebalance_manager.rb +78 -0
- data/lib/karafka/contracts/base.rb +17 -0
- data/lib/karafka/contracts/config.rb +88 -11
- data/lib/karafka/contracts/consumer_group.rb +21 -189
- data/lib/karafka/contracts/consumer_group_topic.rb +34 -11
- data/lib/karafka/contracts/server_cli_options.rb +19 -18
- data/lib/karafka/contracts.rb +1 -1
- data/lib/karafka/env.rb +46 -0
- data/lib/karafka/errors.rb +21 -21
- data/lib/karafka/helpers/async.rb +33 -0
- data/lib/karafka/helpers/colorize.rb +20 -0
- data/lib/karafka/helpers/multi_delegator.rb +2 -2
- data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
- data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
- data/lib/karafka/instrumentation/logger_listener.rb +164 -0
- data/lib/karafka/instrumentation/monitor.rb +13 -61
- data/lib/karafka/instrumentation/notifications.rb +52 -0
- data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
- data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
- data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
- data/lib/karafka/instrumentation.rb +21 -0
- data/lib/karafka/licenser.rb +75 -0
- data/lib/karafka/messages/batch_metadata.rb +45 -0
- data/lib/karafka/messages/builders/batch_metadata.rb +40 -0
- data/lib/karafka/messages/builders/message.rb +39 -0
- data/lib/karafka/messages/builders/messages.rb +32 -0
- data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
- data/lib/karafka/messages/messages.rb +64 -0
- data/lib/karafka/{params → messages}/metadata.rb +4 -6
- data/lib/karafka/messages/seek.rb +9 -0
- data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
- data/lib/karafka/pro/active_job/consumer.rb +46 -0
- data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
- data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
- data/lib/karafka/pro/base_consumer.rb +82 -0
- data/lib/karafka/pro/contracts/base.rb +21 -0
- data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
- data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
- data/lib/karafka/pro/loader.rb +76 -0
- data/lib/karafka/pro/performance_tracker.rb +80 -0
- data/lib/karafka/pro/processing/coordinator.rb +72 -0
- data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +37 -0
- data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
- data/lib/karafka/pro/processing/partitioner.rb +60 -0
- data/lib/karafka/pro/processing/scheduler.rb +56 -0
- data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
- data/lib/karafka/pro/routing/topic_extensions.rb +38 -0
- data/lib/karafka/pro.rb +13 -0
- data/lib/karafka/process.rb +1 -0
- data/lib/karafka/processing/coordinator.rb +88 -0
- data/lib/karafka/processing/coordinators_buffer.rb +54 -0
- data/lib/karafka/processing/executor.rb +118 -0
- data/lib/karafka/processing/executors_buffer.rb +88 -0
- data/lib/karafka/processing/jobs/base.rb +51 -0
- data/lib/karafka/processing/jobs/consume.rb +42 -0
- data/lib/karafka/processing/jobs/revoked.rb +22 -0
- data/lib/karafka/processing/jobs/shutdown.rb +23 -0
- data/lib/karafka/processing/jobs_builder.rb +29 -0
- data/lib/karafka/processing/jobs_queue.rb +144 -0
- data/lib/karafka/processing/partitioner.rb +22 -0
- data/lib/karafka/processing/result.rb +29 -0
- data/lib/karafka/processing/scheduler.rb +22 -0
- data/lib/karafka/processing/worker.rb +88 -0
- data/lib/karafka/processing/workers_batch.rb +27 -0
- data/lib/karafka/railtie.rb +113 -0
- data/lib/karafka/routing/builder.rb +15 -24
- data/lib/karafka/routing/consumer_group.rb +11 -19
- data/lib/karafka/routing/consumer_mapper.rb +1 -2
- data/lib/karafka/routing/router.rb +1 -1
- data/lib/karafka/routing/subscription_group.rb +53 -0
- data/lib/karafka/routing/subscription_groups_builder.rb +53 -0
- data/lib/karafka/routing/topic.rb +61 -24
- data/lib/karafka/routing/topics.rb +38 -0
- data/lib/karafka/runner.rb +51 -0
- data/lib/karafka/serialization/json/deserializer.rb +6 -15
- data/lib/karafka/server.rb +67 -26
- data/lib/karafka/setup/config.rb +147 -175
- data/lib/karafka/status.rb +14 -5
- data/lib/karafka/templates/example_consumer.rb.erb +16 -0
- data/lib/karafka/templates/karafka.rb.erb +15 -51
- data/lib/karafka/time_trackers/base.rb +19 -0
- data/lib/karafka/time_trackers/pause.rb +92 -0
- data/lib/karafka/time_trackers/poll.rb +65 -0
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +38 -17
- data.tar.gz.sig +0 -0
- metadata +118 -120
- metadata.gz.sig +0 -0
- data/MIT-LICENCE +0 -18
- data/lib/karafka/assignment_strategies/round_robin.rb +0 -13
- data/lib/karafka/attributes_map.rb +0 -63
- data/lib/karafka/backends/inline.rb +0 -16
- data/lib/karafka/base_responder.rb +0 -226
- data/lib/karafka/cli/flow.rb +0 -48
- data/lib/karafka/cli/missingno.rb +0 -19
- data/lib/karafka/code_reloader.rb +0 -67
- data/lib/karafka/connection/api_adapter.rb +0 -158
- data/lib/karafka/connection/batch_delegator.rb +0 -55
- data/lib/karafka/connection/builder.rb +0 -23
- data/lib/karafka/connection/message_delegator.rb +0 -36
- data/lib/karafka/consumers/batch_metadata.rb +0 -10
- data/lib/karafka/consumers/callbacks.rb +0 -71
- data/lib/karafka/consumers/includer.rb +0 -64
- data/lib/karafka/consumers/responders.rb +0 -24
- data/lib/karafka/consumers/single_params.rb +0 -15
- data/lib/karafka/contracts/responder_usage.rb +0 -54
- data/lib/karafka/fetcher.rb +0 -42
- data/lib/karafka/helpers/class_matcher.rb +0 -88
- data/lib/karafka/helpers/config_retriever.rb +0 -46
- data/lib/karafka/helpers/inflector.rb +0 -26
- data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
- data/lib/karafka/params/batch_metadata.rb +0 -26
- data/lib/karafka/params/builders/batch_metadata.rb +0 -30
- data/lib/karafka/params/builders/params.rb +0 -38
- data/lib/karafka/params/builders/params_batch.rb +0 -25
- data/lib/karafka/params/params_batch.rb +0 -60
- data/lib/karafka/patches/ruby_kafka.rb +0 -47
- data/lib/karafka/persistence/client.rb +0 -29
- data/lib/karafka/persistence/consumers.rb +0 -45
- data/lib/karafka/persistence/topics.rb +0 -48
- data/lib/karafka/responders/builder.rb +0 -36
- data/lib/karafka/responders/topic.rb +0 -55
- data/lib/karafka/routing/topic_mapper.rb +0 -53
- data/lib/karafka/serialization/json/serializer.rb +0 -31
- data/lib/karafka/setup/configurators/water_drop.rb +0 -36
- data/lib/karafka/templates/application_responder.rb.erb +0 -11
data/lib/karafka/cli/server.rb
CHANGED
|
@@ -5,27 +5,18 @@ module Karafka
|
|
|
5
5
|
class Cli < Thor
|
|
6
6
|
# Server Karafka Cli action
|
|
7
7
|
class Server < Base
|
|
8
|
-
|
|
9
|
-
CONTRACT = Contracts::ServerCliOptions.new.freeze
|
|
10
|
-
|
|
11
|
-
private_constant :CONTRACT
|
|
8
|
+
include Helpers::Colorize
|
|
12
9
|
|
|
13
10
|
desc 'Start the Karafka server (short-cut alias: "s")'
|
|
14
11
|
option aliases: 's'
|
|
15
|
-
option :daemon, default: false, type: :boolean, aliases: :d
|
|
16
|
-
option :pid, default: 'tmp/pids/karafka', type: :string, aliases: :p
|
|
17
12
|
option :consumer_groups, type: :array, default: nil, aliases: :g
|
|
18
13
|
|
|
19
14
|
# Start the Karafka server
|
|
20
15
|
def call
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
validate!
|
|
16
|
+
# Print our banner and info in the dev mode
|
|
17
|
+
print_marketing_info if Karafka::App.env.development?
|
|
24
18
|
|
|
25
|
-
|
|
26
|
-
FileUtils.mkdir_p File.dirname(cli.options[:pid])
|
|
27
|
-
daemonize
|
|
28
|
-
end
|
|
19
|
+
Contracts::ServerCliOptions.new.validate!(cli.options)
|
|
29
20
|
|
|
30
21
|
# We assign active topics on a server level, as only server is expected to listen on
|
|
31
22
|
# part of the topics
|
|
@@ -36,35 +27,19 @@ module Karafka
|
|
|
36
27
|
|
|
37
28
|
private
|
|
38
29
|
|
|
39
|
-
#
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
cli.options[:pid],
|
|
53
|
-
'w'
|
|
54
|
-
) { |file| file.write(::Process.pid) }
|
|
55
|
-
|
|
56
|
-
# Remove pidfile on stop, just before the server instance is going to be GCed
|
|
57
|
-
# We want to delay the moment in which the pidfile is removed as much as we can,
|
|
58
|
-
# so instead of removing it after the server stops running, we rely on the gc moment
|
|
59
|
-
# when this object gets removed (it is a bit later), so it is closer to the actual
|
|
60
|
-
# system process end. We do that, so monitoring and deployment tools that rely on a pid
|
|
61
|
-
# won't alarm or start new system process up until the current one is finished
|
|
62
|
-
ObjectSpace.define_finalizer(self, proc { send(:clean) })
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
# Removes a pidfile (if exist)
|
|
66
|
-
def clean
|
|
67
|
-
FileUtils.rm_f(cli.options[:pid]) if cli.options[:pid]
|
|
30
|
+
# Prints marketing info
|
|
31
|
+
def print_marketing_info
|
|
32
|
+
Karafka.logger.info Info::BANNER
|
|
33
|
+
|
|
34
|
+
if Karafka.pro?
|
|
35
|
+
Karafka.logger.info(
|
|
36
|
+
green('Thank you for investing in the Karafka Pro subscription!')
|
|
37
|
+
)
|
|
38
|
+
else
|
|
39
|
+
Karafka.logger.info(
|
|
40
|
+
red('You like Karafka? Please consider getting a Pro version!')
|
|
41
|
+
)
|
|
42
|
+
end
|
|
68
43
|
end
|
|
69
44
|
end
|
|
70
45
|
end
|
data/lib/karafka/cli.rb
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
module Karafka
|
|
4
4
|
# Karafka framework Cli
|
|
5
|
+
#
|
|
5
6
|
# If you want to add/modify command that belongs to CLI, please review all commands
|
|
6
7
|
# available in cli/ directory inside Karafka source code.
|
|
7
8
|
#
|
|
@@ -10,24 +11,16 @@ module Karafka
|
|
|
10
11
|
class Cli < Thor
|
|
11
12
|
package_name 'Karafka'
|
|
12
13
|
|
|
13
|
-
default_task :missingno
|
|
14
|
-
|
|
15
14
|
class << self
|
|
16
|
-
# Loads all Cli commands into Thor framework
|
|
15
|
+
# Loads all Cli commands into Thor framework.
|
|
17
16
|
# This method should be executed before we run Karafka::Cli.start, otherwise we won't
|
|
18
|
-
# have any Cli commands available
|
|
17
|
+
# have any Cli commands available.
|
|
19
18
|
def prepare
|
|
20
19
|
cli_commands.each do |action|
|
|
21
20
|
action.bind_to(self)
|
|
22
21
|
end
|
|
23
22
|
end
|
|
24
23
|
|
|
25
|
-
# When there is a CLI crash, exit
|
|
26
|
-
# @return [true]
|
|
27
|
-
def exit_on_failure?
|
|
28
|
-
true
|
|
29
|
-
end
|
|
30
|
-
|
|
31
24
|
private
|
|
32
25
|
|
|
33
26
|
# @return [Array<Class>] Array with Cli action classes that can be used as commands
|
|
@@ -42,7 +35,7 @@ module Karafka
|
|
|
42
35
|
end
|
|
43
36
|
end
|
|
44
37
|
|
|
45
|
-
# This is kinda
|
|
38
|
+
# This is kinda tricky - since we don't have an autoload and other magic stuff
|
|
46
39
|
# like Rails does, so instead this method allows us to replace currently running
|
|
47
40
|
# console with a new one via Kernel.exec. It will start console with new code loaded
|
|
48
41
|
# Yes, we know that it is not turbo fast, however it is turbo convenient and small
|
|
@@ -1,119 +1,414 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Karafka
|
|
4
|
+
# Namespace for Kafka connection related logic
|
|
4
5
|
module Connection
|
|
5
|
-
#
|
|
6
|
-
#
|
|
6
|
+
# An abstraction layer on top of the rdkafka consumer.
|
|
7
|
+
#
|
|
8
|
+
# It is threadsafe and provides some security measures so we won't end up operating on a
|
|
9
|
+
# closed consumer instance as it causes Ruby VM process to crash.
|
|
7
10
|
class Client
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
#
|
|
20
|
-
#
|
|
21
|
-
# @
|
|
22
|
-
#
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
11
|
+
attr_reader :rebalance_manager
|
|
12
|
+
|
|
13
|
+
# @return [String] underlying consumer name
|
|
14
|
+
# @note Consumer name may change in case we regenerate it
|
|
15
|
+
attr_reader :name
|
|
16
|
+
|
|
17
|
+
# How many times should we retry polling in case of a failure
|
|
18
|
+
MAX_POLL_RETRIES = 10
|
|
19
|
+
|
|
20
|
+
private_constant :MAX_POLL_RETRIES
|
|
21
|
+
|
|
22
|
+
# Creates a new consumer instance.
|
|
23
|
+
#
|
|
24
|
+
# @param subscription_group [Karafka::Routing::SubscriptionGroup] subscription group
|
|
25
|
+
# with all the configuration details needed for us to create a client
|
|
26
|
+
# @return [Karafka::Connection::Rdk::Consumer]
|
|
27
|
+
def initialize(subscription_group)
|
|
28
|
+
# Name is set when we build consumer
|
|
29
|
+
@name = ''
|
|
30
|
+
@mutex = Mutex.new
|
|
31
|
+
@closed = false
|
|
32
|
+
@subscription_group = subscription_group
|
|
33
|
+
@buffer = RawMessagesBuffer.new
|
|
34
|
+
@rebalance_manager = RebalanceManager.new
|
|
35
|
+
@kafka = build_consumer
|
|
36
|
+
# Marks if we need to offset. If we did not store offsets, we should not commit the offset
|
|
37
|
+
# position as it will crash rdkafka
|
|
38
|
+
@offsetting = false
|
|
39
|
+
# We need to keep track of what we have paused for resuming
|
|
40
|
+
# In case we loose partition, we still need to resume it, otherwise it won't be fetched
|
|
41
|
+
# again if we get reassigned to it later on. We need to keep them as after revocation we
|
|
42
|
+
# no longer may be able to fetch them from Kafka. We could build them but it is easier
|
|
43
|
+
# to just keep them here and use if needed when cannot be obtained
|
|
44
|
+
@paused_tpls = Hash.new { |h, k| h[k] = {} }
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Fetches messages within boundaries defined by the settings (time, size, topics, etc).
|
|
48
|
+
#
|
|
49
|
+
# @return [Karafka::Connection::MessagesBuffer] messages buffer that holds messages per topic
|
|
50
|
+
# partition
|
|
51
|
+
# @note This method should not be executed from many threads at the same time
|
|
52
|
+
def batch_poll
|
|
53
|
+
time_poll = TimeTrackers::Poll.new(@subscription_group.max_wait_time)
|
|
54
|
+
|
|
55
|
+
@buffer.clear
|
|
56
|
+
@rebalance_manager.clear
|
|
57
|
+
|
|
58
|
+
loop do
|
|
59
|
+
time_poll.start
|
|
60
|
+
|
|
61
|
+
# Don't fetch more messages if we do not have any time left
|
|
62
|
+
break if time_poll.exceeded?
|
|
63
|
+
# Don't fetch more messages if we've fetched max as we've wanted
|
|
64
|
+
break if @buffer.size >= @subscription_group.max_messages
|
|
65
|
+
|
|
66
|
+
# Fetch message within our time boundaries
|
|
67
|
+
message = poll(time_poll.remaining)
|
|
68
|
+
|
|
69
|
+
# Put a message to the buffer if there is one
|
|
70
|
+
@buffer << message if message
|
|
71
|
+
|
|
72
|
+
# Upon polling rebalance manager might have been updated.
|
|
73
|
+
# If partition revocation happens, we need to remove messages from revoked partitions
|
|
74
|
+
# as well as ensure we do not have duplicated due to the offset reset for partitions
|
|
75
|
+
# that we got assigned
|
|
76
|
+
# We also do early break, so the information about rebalance is used as soon as possible
|
|
77
|
+
if @rebalance_manager.changed?
|
|
78
|
+
remove_revoked_and_duplicated_messages
|
|
79
|
+
break
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Track time spent on all of the processing and polling
|
|
83
|
+
time_poll.checkpoint
|
|
84
|
+
|
|
85
|
+
# Finally once we've (potentially) removed revoked, etc, if no messages were returned
|
|
86
|
+
# we can break.
|
|
87
|
+
# Worth keeping in mind, that the rebalance manager might have been updated despite no
|
|
88
|
+
# messages being returned during a poll
|
|
89
|
+
break unless message
|
|
39
90
|
end
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
rescue Kafka::ProcessingError => e
|
|
43
|
-
# If there was an error during consumption, we have to log it, pause current partition
|
|
44
|
-
# and process other things
|
|
45
|
-
Karafka.monitor.instrument(
|
|
46
|
-
'connection.client.fetch_loop.error',
|
|
47
|
-
caller: self,
|
|
48
|
-
error: e.cause
|
|
49
|
-
)
|
|
50
|
-
pause(e.topic, e.partition)
|
|
51
|
-
retry
|
|
91
|
+
|
|
92
|
+
@buffer
|
|
52
93
|
end
|
|
53
94
|
|
|
54
|
-
#
|
|
95
|
+
# Stores offset for a given partition of a given topic based on the provided message.
|
|
96
|
+
#
|
|
97
|
+
# @param message [Karafka::Messages::Message]
|
|
98
|
+
def store_offset(message)
|
|
99
|
+
@mutex.synchronize do
|
|
100
|
+
internal_store_offset(message)
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Commits the offset on a current consumer in a non-blocking or blocking way.
|
|
105
|
+
# Ignoring a case where there would not be an offset (for example when rebalance occurs).
|
|
106
|
+
#
|
|
107
|
+
# @param async [Boolean] should the commit happen async or sync (async by default)
|
|
108
|
+
# @return [Boolean] did committing was successful. It may be not, when we no longer own
|
|
109
|
+
# given partition.
|
|
110
|
+
#
|
|
111
|
+
# @note This will commit all the offsets for the whole consumer. In order to achieve
|
|
112
|
+
# granular control over where the offset should be for particular topic partitions, the
|
|
113
|
+
# store_offset should be used to only store new offset when we want to to be flushed
|
|
114
|
+
def commit_offsets(async: true)
|
|
115
|
+
@mutex.lock
|
|
116
|
+
|
|
117
|
+
internal_commit_offsets(async: async)
|
|
118
|
+
ensure
|
|
119
|
+
@mutex.unlock
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Commits offset in a synchronous way.
|
|
123
|
+
#
|
|
124
|
+
# @see `#commit_offset` for more details
|
|
125
|
+
def commit_offsets!
|
|
126
|
+
commit_offsets(async: false)
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Seek to a particular message. The next poll on the topic/partition will return the
|
|
130
|
+
# message at the given offset.
|
|
131
|
+
#
|
|
132
|
+
# @param message [Messages::Message, Messages::Seek] message to which we want to seek to
|
|
133
|
+
def seek(message)
|
|
134
|
+
@mutex.lock
|
|
135
|
+
|
|
136
|
+
@kafka.seek(message)
|
|
137
|
+
ensure
|
|
138
|
+
@mutex.unlock
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Pauses given partition and moves back to last successful offset processed.
|
|
142
|
+
#
|
|
143
|
+
# @param topic [String] topic name
|
|
144
|
+
# @param partition [Integer] partition
|
|
145
|
+
# @param offset [Integer] offset of the message on which we want to pause (this message will
|
|
146
|
+
# be reprocessed after getting back to processing)
|
|
147
|
+
# @note This will pause indefinitely and requires manual `#resume`
|
|
148
|
+
def pause(topic, partition, offset)
|
|
149
|
+
@mutex.lock
|
|
150
|
+
|
|
151
|
+
# Do not pause if the client got closed, would not change anything
|
|
152
|
+
return if @closed
|
|
153
|
+
|
|
154
|
+
pause_msg = Messages::Seek.new(topic, partition, offset)
|
|
155
|
+
|
|
156
|
+
internal_commit_offsets(async: false)
|
|
157
|
+
|
|
158
|
+
# Here we do not use our cached tpls because we should not try to pause something we do
|
|
159
|
+
# not own anymore.
|
|
160
|
+
tpl = topic_partition_list(topic, partition)
|
|
161
|
+
|
|
162
|
+
return unless tpl
|
|
163
|
+
|
|
164
|
+
@paused_tpls[topic][partition] = tpl
|
|
165
|
+
|
|
166
|
+
@kafka.pause(tpl)
|
|
167
|
+
|
|
168
|
+
@kafka.seek(pause_msg)
|
|
169
|
+
ensure
|
|
170
|
+
@mutex.unlock
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
# Resumes processing of a give topic partition after it was paused.
|
|
174
|
+
#
|
|
175
|
+
# @param topic [String] topic name
|
|
176
|
+
# @param partition [Integer] partition
|
|
177
|
+
def resume(topic, partition)
|
|
178
|
+
@mutex.lock
|
|
179
|
+
|
|
180
|
+
return if @closed
|
|
181
|
+
|
|
182
|
+
# Always commit synchronously offsets if any when we resume
|
|
183
|
+
# This prevents resuming without offset in case it would not be committed prior
|
|
184
|
+
# We can skip performance penalty since resuming should not happen too often
|
|
185
|
+
internal_commit_offsets(async: false)
|
|
186
|
+
|
|
187
|
+
# If we were not able, let's try to reuse the one we have (if we have)
|
|
188
|
+
tpl = topic_partition_list(topic, partition) || @paused_tpls[topic][partition]
|
|
189
|
+
|
|
190
|
+
return unless tpl
|
|
191
|
+
# If we did not have it, it means we never paused this partition, thus no resume should
|
|
192
|
+
# happen in the first place
|
|
193
|
+
return unless @paused_tpls[topic].delete(partition)
|
|
194
|
+
|
|
195
|
+
@kafka.resume(tpl)
|
|
196
|
+
ensure
|
|
197
|
+
@mutex.unlock
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# Gracefully stops topic consumption.
|
|
201
|
+
#
|
|
55
202
|
# @note Stopping running consumers without a really important reason is not recommended
|
|
56
203
|
# as until all the consumers are stopped, the server will keep running serving only
|
|
57
204
|
# part of the messages
|
|
58
205
|
def stop
|
|
59
|
-
|
|
60
|
-
@kafka_consumer = nil
|
|
206
|
+
close
|
|
61
207
|
end
|
|
62
208
|
|
|
63
|
-
#
|
|
64
|
-
#
|
|
65
|
-
# @param
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
209
|
+
# Marks given message as consumed.
|
|
210
|
+
#
|
|
211
|
+
# @param [Karafka::Messages::Message] message that we want to mark as processed
|
|
212
|
+
# @return [Boolean] true if successful. False if we no longer own given partition
|
|
213
|
+
# @note This method won't trigger automatic offsets commits, rather relying on the offset
|
|
214
|
+
# check-pointing trigger that happens with each batch processed
|
|
215
|
+
def mark_as_consumed(message)
|
|
216
|
+
store_offset(message)
|
|
69
217
|
end
|
|
70
218
|
|
|
71
|
-
# Marks given message as consumed
|
|
72
|
-
#
|
|
73
|
-
# @
|
|
74
|
-
#
|
|
75
|
-
def mark_as_consumed(
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
219
|
+
# Marks a given message as consumed and commits the offsets in a blocking way.
|
|
220
|
+
#
|
|
221
|
+
# @param [Karafka::Messages::Message] message that we want to mark as processed
|
|
222
|
+
# @return [Boolean] true if successful. False if we no longer own given partition
|
|
223
|
+
def mark_as_consumed!(message)
|
|
224
|
+
return false unless mark_as_consumed(message)
|
|
225
|
+
|
|
226
|
+
commit_offsets!
|
|
79
227
|
end
|
|
80
228
|
|
|
81
|
-
#
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
229
|
+
# Closes and resets the client completely.
|
|
230
|
+
def reset
|
|
231
|
+
close
|
|
232
|
+
|
|
233
|
+
@mutex.synchronize do
|
|
234
|
+
@closed = false
|
|
235
|
+
@offsetting = false
|
|
236
|
+
@paused_tpls.clear
|
|
237
|
+
@kafka = build_consumer
|
|
238
|
+
end
|
|
90
239
|
end
|
|
91
240
|
|
|
92
241
|
private
|
|
93
242
|
|
|
94
|
-
|
|
243
|
+
# When we cannot store an offset, it means we no longer own the partition
|
|
244
|
+
#
|
|
245
|
+
# Non thread-safe offset storing method
|
|
246
|
+
# @param message [Karafka::Messages::Message]
|
|
247
|
+
# @return [Boolean] true if we could store the offset (if we still own the partition)
|
|
248
|
+
def internal_store_offset(message)
|
|
249
|
+
@offsetting = true
|
|
250
|
+
@kafka.store_offset(message)
|
|
251
|
+
true
|
|
252
|
+
rescue Rdkafka::RdkafkaError => e
|
|
253
|
+
return false if e.code == :assignment_lost
|
|
254
|
+
return false if e.code == :state
|
|
255
|
+
|
|
256
|
+
raise e
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
# Non thread-safe message committing method
|
|
260
|
+
# @param async [Boolean] should the commit happen async or sync (async by default)
|
|
261
|
+
# @return [Boolean] true if offset commit worked, false if we've lost the assignment
|
|
262
|
+
def internal_commit_offsets(async: true)
|
|
263
|
+
return true unless @offsetting
|
|
264
|
+
|
|
265
|
+
@kafka.commit(nil, async)
|
|
266
|
+
@offsetting = false
|
|
267
|
+
|
|
268
|
+
true
|
|
269
|
+
rescue Rdkafka::RdkafkaError => e
|
|
270
|
+
return false if e.code == :assignment_lost
|
|
271
|
+
return true if e.code == :no_offset
|
|
272
|
+
|
|
273
|
+
raise e
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
# Commits the stored offsets in a sync way and closes the consumer.
|
|
277
|
+
def close
|
|
278
|
+
# Once client is closed, we should not close it again
|
|
279
|
+
# This could only happen in case of a race-condition when forceful shutdown happens
|
|
280
|
+
# and triggers this from a different thread
|
|
281
|
+
return if @closed
|
|
282
|
+
|
|
283
|
+
@mutex.synchronize do
|
|
284
|
+
internal_commit_offsets(async: false)
|
|
285
|
+
|
|
286
|
+
@closed = true
|
|
287
|
+
|
|
288
|
+
# Remove callbacks runners that were registered
|
|
289
|
+
::Karafka::Instrumentation.statistics_callbacks.delete(@subscription_group.id)
|
|
290
|
+
::Karafka::Instrumentation.error_callbacks.delete(@subscription_group.id)
|
|
291
|
+
|
|
292
|
+
@kafka.close
|
|
293
|
+
@buffer.clear
|
|
294
|
+
# @note We do not clear rebalance manager here as we may still have revocation info here
|
|
295
|
+
# that we want to consider valid prior to running another reconnection
|
|
296
|
+
end
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
# @param topic [String]
|
|
300
|
+
# @param partition [Integer]
|
|
301
|
+
# @return [Rdkafka::Consumer::TopicPartitionList]
|
|
302
|
+
def topic_partition_list(topic, partition)
|
|
303
|
+
rdkafka_partition = @kafka
|
|
304
|
+
.assignment
|
|
305
|
+
.to_h[topic]
|
|
306
|
+
&.detect { |part| part.partition == partition }
|
|
307
|
+
|
|
308
|
+
return unless rdkafka_partition
|
|
309
|
+
|
|
310
|
+
Rdkafka::Consumer::TopicPartitionList.new({ topic => [rdkafka_partition] })
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
# Performs a single poll operation.
|
|
314
|
+
#
|
|
315
|
+
# @param timeout [Integer] timeout for a single poll
|
|
316
|
+
# @return [Rdkafka::Consumer::Message, nil] fetched message or nil if nothing polled
|
|
317
|
+
def poll(timeout)
|
|
318
|
+
time_poll ||= TimeTrackers::Poll.new(timeout)
|
|
319
|
+
|
|
320
|
+
return nil if time_poll.exceeded?
|
|
95
321
|
|
|
96
|
-
|
|
97
|
-
# that is set up to consume from topics of a given consumer group
|
|
98
|
-
def kafka_consumer
|
|
99
|
-
# @note We don't cache the connection internally because we cache kafka_consumer that uses
|
|
100
|
-
# kafka client object instance
|
|
101
|
-
@kafka_consumer ||= Builder.call(consumer_group).consumer(
|
|
102
|
-
**ApiAdapter.consumer(consumer_group)
|
|
103
|
-
).tap do |consumer|
|
|
104
|
-
consumer_group.topics.each do |topic|
|
|
105
|
-
settings = ApiAdapter.subscribe(topic)
|
|
322
|
+
time_poll.start
|
|
106
323
|
|
|
107
|
-
|
|
324
|
+
@kafka.poll(timeout)
|
|
325
|
+
rescue ::Rdkafka::RdkafkaError => e
|
|
326
|
+
# We return nil, so we do not restart until running the whole loop
|
|
327
|
+
# This allows us to run revocation jobs and other things and we will pick up new work
|
|
328
|
+
# next time after dispatching all the things that are needed
|
|
329
|
+
#
|
|
330
|
+
# If we would retry here, the client reset would become transparent and we would not have
|
|
331
|
+
# a chance to take any actions
|
|
332
|
+
case e.code
|
|
333
|
+
when :max_poll_exceeded # -147
|
|
334
|
+
reset
|
|
335
|
+
return nil
|
|
336
|
+
when :transport # -195
|
|
337
|
+
reset
|
|
338
|
+
return nil
|
|
339
|
+
when :rebalance_in_progress # -27
|
|
340
|
+
reset
|
|
341
|
+
return nil
|
|
342
|
+
when :not_coordinator # 16
|
|
343
|
+
reset
|
|
344
|
+
return nil
|
|
345
|
+
when :network_exception # 13
|
|
346
|
+
reset
|
|
347
|
+
return nil
|
|
348
|
+
when :unknown_topic_or_part
|
|
349
|
+
# This is expected and temporary until rdkafka catches up with metadata
|
|
350
|
+
return nil
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
raise if time_poll.attempts > MAX_POLL_RETRIES
|
|
354
|
+
raise unless time_poll.retryable?
|
|
355
|
+
|
|
356
|
+
time_poll.checkpoint
|
|
357
|
+
time_poll.backoff
|
|
358
|
+
|
|
359
|
+
# On unknown errors we do our best to retry and handle them before raising
|
|
360
|
+
retry
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
# Builds a new rdkafka consumer instance based on the subscription group configuration
|
|
364
|
+
# @return [Rdkafka::Consumer]
|
|
365
|
+
def build_consumer
|
|
366
|
+
::Rdkafka::Config.logger = ::Karafka::App.config.logger
|
|
367
|
+
config = ::Rdkafka::Config.new(@subscription_group.kafka)
|
|
368
|
+
config.consumer_rebalance_listener = @rebalance_manager
|
|
369
|
+
consumer = config.consumer
|
|
370
|
+
@name = consumer.name
|
|
371
|
+
|
|
372
|
+
# Register statistics runner for this particular type of callbacks
|
|
373
|
+
::Karafka::Instrumentation.statistics_callbacks.add(
|
|
374
|
+
@subscription_group.id,
|
|
375
|
+
Instrumentation::Callbacks::Statistics.new(
|
|
376
|
+
@subscription_group.id,
|
|
377
|
+
@subscription_group.consumer_group_id,
|
|
378
|
+
@name,
|
|
379
|
+
::Karafka::App.config.monitor
|
|
380
|
+
)
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
# Register error tracking callback
|
|
384
|
+
::Karafka::Instrumentation.error_callbacks.add(
|
|
385
|
+
@subscription_group.id,
|
|
386
|
+
Instrumentation::Callbacks::Error.new(
|
|
387
|
+
@subscription_group.id,
|
|
388
|
+
@subscription_group.consumer_group_id,
|
|
389
|
+
@name,
|
|
390
|
+
::Karafka::App.config.monitor
|
|
391
|
+
)
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
# Subscription needs to happen after we assigned the rebalance callbacks just in case of
|
|
395
|
+
# a race condition
|
|
396
|
+
consumer.subscribe(*@subscription_group.topics.map(&:name))
|
|
397
|
+
consumer
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
# We may have a case where in the middle of data polling, we've lost a partition.
|
|
401
|
+
# In a case like this we should remove all the pre-buffered messages from list partitions as
|
|
402
|
+
# we are no longer responsible in a given process for processing those messages and they
|
|
403
|
+
# should have been picked up by a different process.
|
|
404
|
+
def remove_revoked_and_duplicated_messages
|
|
405
|
+
@rebalance_manager.lost_partitions.each do |topic, partitions|
|
|
406
|
+
partitions.each do |partition|
|
|
407
|
+
@buffer.delete(topic, partition)
|
|
108
408
|
end
|
|
109
409
|
end
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
# attempts if Kafka is down
|
|
113
|
-
sleep(consumer_group.reconnect_timeout)
|
|
114
|
-
# We don't log and just re-raise - this will be logged
|
|
115
|
-
# down the road
|
|
116
|
-
raise
|
|
410
|
+
|
|
411
|
+
@buffer.uniq!
|
|
117
412
|
end
|
|
118
413
|
end
|
|
119
414
|
end
|