karafka 1.4.13 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +3 -3
  3. data/.github/workflows/ci.yml +85 -30
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +268 -7
  6. data/CONTRIBUTING.md +10 -19
  7. data/Gemfile +6 -0
  8. data/Gemfile.lock +44 -87
  9. data/LICENSE +17 -0
  10. data/LICENSE-COMM +89 -0
  11. data/LICENSE-LGPL +165 -0
  12. data/README.md +44 -48
  13. data/bin/benchmarks +85 -0
  14. data/bin/create_token +22 -0
  15. data/bin/integrations +237 -0
  16. data/bin/karafka +4 -0
  17. data/bin/scenario +29 -0
  18. data/bin/stress_many +13 -0
  19. data/bin/stress_one +13 -0
  20. data/bin/wait_for_kafka +20 -0
  21. data/certs/karafka-pro.pem +11 -0
  22. data/config/errors.yml +55 -40
  23. data/docker-compose.yml +39 -3
  24. data/karafka.gemspec +11 -17
  25. data/lib/active_job/karafka.rb +21 -0
  26. data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
  27. data/lib/karafka/active_job/consumer.rb +26 -0
  28. data/lib/karafka/active_job/dispatcher.rb +38 -0
  29. data/lib/karafka/active_job/job_extensions.rb +34 -0
  30. data/lib/karafka/active_job/job_options_contract.rb +21 -0
  31. data/lib/karafka/active_job/routing/extensions.rb +31 -0
  32. data/lib/karafka/app.rb +15 -20
  33. data/lib/karafka/base_consumer.rb +181 -31
  34. data/lib/karafka/cli/base.rb +4 -4
  35. data/lib/karafka/cli/info.rb +43 -9
  36. data/lib/karafka/cli/install.rb +19 -10
  37. data/lib/karafka/cli/server.rb +17 -42
  38. data/lib/karafka/cli.rb +4 -11
  39. data/lib/karafka/connection/client.rb +385 -90
  40. data/lib/karafka/connection/listener.rb +246 -38
  41. data/lib/karafka/connection/listeners_batch.rb +24 -0
  42. data/lib/karafka/connection/messages_buffer.rb +84 -0
  43. data/lib/karafka/connection/pauses_manager.rb +46 -0
  44. data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
  45. data/lib/karafka/connection/rebalance_manager.rb +78 -0
  46. data/lib/karafka/contracts/base.rb +17 -0
  47. data/lib/karafka/contracts/config.rb +88 -11
  48. data/lib/karafka/contracts/consumer_group.rb +21 -189
  49. data/lib/karafka/contracts/consumer_group_topic.rb +34 -11
  50. data/lib/karafka/contracts/server_cli_options.rb +19 -18
  51. data/lib/karafka/contracts.rb +1 -1
  52. data/lib/karafka/env.rb +46 -0
  53. data/lib/karafka/errors.rb +21 -21
  54. data/lib/karafka/helpers/async.rb +33 -0
  55. data/lib/karafka/helpers/colorize.rb +20 -0
  56. data/lib/karafka/helpers/multi_delegator.rb +2 -2
  57. data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
  58. data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
  59. data/lib/karafka/instrumentation/logger_listener.rb +164 -0
  60. data/lib/karafka/instrumentation/monitor.rb +13 -61
  61. data/lib/karafka/instrumentation/notifications.rb +52 -0
  62. data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
  63. data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
  64. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
  65. data/lib/karafka/instrumentation.rb +21 -0
  66. data/lib/karafka/licenser.rb +75 -0
  67. data/lib/karafka/messages/batch_metadata.rb +45 -0
  68. data/lib/karafka/messages/builders/batch_metadata.rb +40 -0
  69. data/lib/karafka/messages/builders/message.rb +39 -0
  70. data/lib/karafka/messages/builders/messages.rb +32 -0
  71. data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
  72. data/lib/karafka/messages/messages.rb +64 -0
  73. data/lib/karafka/{params → messages}/metadata.rb +4 -6
  74. data/lib/karafka/messages/seek.rb +9 -0
  75. data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
  76. data/lib/karafka/pro/active_job/consumer.rb +46 -0
  77. data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
  78. data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
  79. data/lib/karafka/pro/base_consumer.rb +82 -0
  80. data/lib/karafka/pro/contracts/base.rb +21 -0
  81. data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
  82. data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
  83. data/lib/karafka/pro/loader.rb +76 -0
  84. data/lib/karafka/pro/performance_tracker.rb +80 -0
  85. data/lib/karafka/pro/processing/coordinator.rb +72 -0
  86. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +37 -0
  87. data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
  88. data/lib/karafka/pro/processing/partitioner.rb +60 -0
  89. data/lib/karafka/pro/processing/scheduler.rb +56 -0
  90. data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
  91. data/lib/karafka/pro/routing/topic_extensions.rb +38 -0
  92. data/lib/karafka/pro.rb +13 -0
  93. data/lib/karafka/process.rb +1 -0
  94. data/lib/karafka/processing/coordinator.rb +88 -0
  95. data/lib/karafka/processing/coordinators_buffer.rb +54 -0
  96. data/lib/karafka/processing/executor.rb +118 -0
  97. data/lib/karafka/processing/executors_buffer.rb +88 -0
  98. data/lib/karafka/processing/jobs/base.rb +51 -0
  99. data/lib/karafka/processing/jobs/consume.rb +42 -0
  100. data/lib/karafka/processing/jobs/revoked.rb +22 -0
  101. data/lib/karafka/processing/jobs/shutdown.rb +23 -0
  102. data/lib/karafka/processing/jobs_builder.rb +29 -0
  103. data/lib/karafka/processing/jobs_queue.rb +144 -0
  104. data/lib/karafka/processing/partitioner.rb +22 -0
  105. data/lib/karafka/processing/result.rb +29 -0
  106. data/lib/karafka/processing/scheduler.rb +22 -0
  107. data/lib/karafka/processing/worker.rb +88 -0
  108. data/lib/karafka/processing/workers_batch.rb +27 -0
  109. data/lib/karafka/railtie.rb +113 -0
  110. data/lib/karafka/routing/builder.rb +15 -24
  111. data/lib/karafka/routing/consumer_group.rb +11 -19
  112. data/lib/karafka/routing/consumer_mapper.rb +1 -2
  113. data/lib/karafka/routing/router.rb +1 -1
  114. data/lib/karafka/routing/subscription_group.rb +53 -0
  115. data/lib/karafka/routing/subscription_groups_builder.rb +53 -0
  116. data/lib/karafka/routing/topic.rb +61 -24
  117. data/lib/karafka/routing/topics.rb +38 -0
  118. data/lib/karafka/runner.rb +51 -0
  119. data/lib/karafka/serialization/json/deserializer.rb +6 -15
  120. data/lib/karafka/server.rb +67 -26
  121. data/lib/karafka/setup/config.rb +147 -175
  122. data/lib/karafka/status.rb +14 -5
  123. data/lib/karafka/templates/example_consumer.rb.erb +16 -0
  124. data/lib/karafka/templates/karafka.rb.erb +15 -51
  125. data/lib/karafka/time_trackers/base.rb +19 -0
  126. data/lib/karafka/time_trackers/pause.rb +92 -0
  127. data/lib/karafka/time_trackers/poll.rb +65 -0
  128. data/lib/karafka/version.rb +1 -1
  129. data/lib/karafka.rb +38 -17
  130. data.tar.gz.sig +0 -0
  131. metadata +118 -120
  132. metadata.gz.sig +0 -0
  133. data/MIT-LICENCE +0 -18
  134. data/lib/karafka/assignment_strategies/round_robin.rb +0 -13
  135. data/lib/karafka/attributes_map.rb +0 -63
  136. data/lib/karafka/backends/inline.rb +0 -16
  137. data/lib/karafka/base_responder.rb +0 -226
  138. data/lib/karafka/cli/flow.rb +0 -48
  139. data/lib/karafka/cli/missingno.rb +0 -19
  140. data/lib/karafka/code_reloader.rb +0 -67
  141. data/lib/karafka/connection/api_adapter.rb +0 -158
  142. data/lib/karafka/connection/batch_delegator.rb +0 -55
  143. data/lib/karafka/connection/builder.rb +0 -23
  144. data/lib/karafka/connection/message_delegator.rb +0 -36
  145. data/lib/karafka/consumers/batch_metadata.rb +0 -10
  146. data/lib/karafka/consumers/callbacks.rb +0 -71
  147. data/lib/karafka/consumers/includer.rb +0 -64
  148. data/lib/karafka/consumers/responders.rb +0 -24
  149. data/lib/karafka/consumers/single_params.rb +0 -15
  150. data/lib/karafka/contracts/responder_usage.rb +0 -54
  151. data/lib/karafka/fetcher.rb +0 -42
  152. data/lib/karafka/helpers/class_matcher.rb +0 -88
  153. data/lib/karafka/helpers/config_retriever.rb +0 -46
  154. data/lib/karafka/helpers/inflector.rb +0 -26
  155. data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
  156. data/lib/karafka/params/batch_metadata.rb +0 -26
  157. data/lib/karafka/params/builders/batch_metadata.rb +0 -30
  158. data/lib/karafka/params/builders/params.rb +0 -38
  159. data/lib/karafka/params/builders/params_batch.rb +0 -25
  160. data/lib/karafka/params/params_batch.rb +0 -60
  161. data/lib/karafka/patches/ruby_kafka.rb +0 -47
  162. data/lib/karafka/persistence/client.rb +0 -29
  163. data/lib/karafka/persistence/consumers.rb +0 -45
  164. data/lib/karafka/persistence/topics.rb +0 -48
  165. data/lib/karafka/responders/builder.rb +0 -36
  166. data/lib/karafka/responders/topic.rb +0 -55
  167. data/lib/karafka/routing/topic_mapper.rb +0 -53
  168. data/lib/karafka/serialization/json/serializer.rb +0 -31
  169. data/lib/karafka/setup/configurators/water_drop.rb +0 -36
  170. data/lib/karafka/templates/application_responder.rb.erb +0 -11
@@ -5,27 +5,18 @@ module Karafka
5
5
  class Cli < Thor
6
6
  # Server Karafka Cli action
7
7
  class Server < Base
8
- # Server config settings contract
9
- CONTRACT = Contracts::ServerCliOptions.new.freeze
10
-
11
- private_constant :CONTRACT
8
+ include Helpers::Colorize
12
9
 
13
10
  desc 'Start the Karafka server (short-cut alias: "s")'
14
11
  option aliases: 's'
15
- option :daemon, default: false, type: :boolean, aliases: :d
16
- option :pid, default: 'tmp/pids/karafka', type: :string, aliases: :p
17
12
  option :consumer_groups, type: :array, default: nil, aliases: :g
18
13
 
19
14
  # Start the Karafka server
20
15
  def call
21
- cli.info
22
-
23
- validate!
16
+ # Print our banner and info in the dev mode
17
+ print_marketing_info if Karafka::App.env.development?
24
18
 
25
- if cli.options[:daemon]
26
- FileUtils.mkdir_p File.dirname(cli.options[:pid])
27
- daemonize
28
- end
19
+ Contracts::ServerCliOptions.new.validate!(cli.options)
29
20
 
30
21
  # We assign active topics on a server level, as only server is expected to listen on
31
22
  # part of the topics
@@ -36,35 +27,19 @@ module Karafka
36
27
 
37
28
  private
38
29
 
39
- # Checks the server cli configuration
40
- # options validations in terms of app setup (topics, pid existence, etc)
41
- def validate!
42
- result = CONTRACT.call(cli.options)
43
- return if result.success?
44
-
45
- raise Errors::InvalidConfigurationError, result.errors.to_h
46
- end
47
-
48
- # Detaches current process into background and writes its pidfile
49
- def daemonize
50
- ::Process.daemon(true)
51
- File.open(
52
- cli.options[:pid],
53
- 'w'
54
- ) { |file| file.write(::Process.pid) }
55
-
56
- # Remove pidfile on stop, just before the server instance is going to be GCed
57
- # We want to delay the moment in which the pidfile is removed as much as we can,
58
- # so instead of removing it after the server stops running, we rely on the gc moment
59
- # when this object gets removed (it is a bit later), so it is closer to the actual
60
- # system process end. We do that, so monitoring and deployment tools that rely on a pid
61
- # won't alarm or start new system process up until the current one is finished
62
- ObjectSpace.define_finalizer(self, proc { send(:clean) })
63
- end
64
-
65
- # Removes a pidfile (if exist)
66
- def clean
67
- FileUtils.rm_f(cli.options[:pid]) if cli.options[:pid]
30
+ # Prints marketing info
31
+ def print_marketing_info
32
+ Karafka.logger.info Info::BANNER
33
+
34
+ if Karafka.pro?
35
+ Karafka.logger.info(
36
+ green('Thank you for investing in the Karafka Pro subscription!')
37
+ )
38
+ else
39
+ Karafka.logger.info(
40
+ red('You like Karafka? Please consider getting a Pro version!')
41
+ )
42
+ end
68
43
  end
69
44
  end
70
45
  end
data/lib/karafka/cli.rb CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  module Karafka
4
4
  # Karafka framework Cli
5
+ #
5
6
  # If you want to add/modify command that belongs to CLI, please review all commands
6
7
  # available in cli/ directory inside Karafka source code.
7
8
  #
@@ -10,24 +11,16 @@ module Karafka
10
11
  class Cli < Thor
11
12
  package_name 'Karafka'
12
13
 
13
- default_task :missingno
14
-
15
14
  class << self
16
- # Loads all Cli commands into Thor framework
15
+ # Loads all Cli commands into Thor framework.
17
16
  # This method should be executed before we run Karafka::Cli.start, otherwise we won't
18
- # have any Cli commands available
17
+ # have any Cli commands available.
19
18
  def prepare
20
19
  cli_commands.each do |action|
21
20
  action.bind_to(self)
22
21
  end
23
22
  end
24
23
 
25
- # When there is a CLI crash, exit
26
- # @return [true]
27
- def exit_on_failure?
28
- true
29
- end
30
-
31
24
  private
32
25
 
33
26
  # @return [Array<Class>] Array with Cli action classes that can be used as commands
@@ -42,7 +35,7 @@ module Karafka
42
35
  end
43
36
  end
44
37
 
45
- # This is kinda trick - since we don't have a autoload and other magic stuff
38
+ # This is kinda tricky - since we don't have an autoload and other magic stuff
46
39
  # like Rails does, so instead this method allows us to replace currently running
47
40
  # console with a new one via Kernel.exec. It will start console with new code loaded
48
41
  # Yes, we know that it is not turbo fast, however it is turbo convenient and small
@@ -1,119 +1,414 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Karafka
4
+ # Namespace for Kafka connection related logic
4
5
  module Connection
5
- # Class used as a wrapper around Ruby-Kafka client to simplify additional
6
- # features that we provide/might provide in future and to hide the internal implementation
6
+ # An abstraction layer on top of the rdkafka consumer.
7
+ #
8
+ # It is threadsafe and provides some security measures so we won't end up operating on a
9
+ # closed consumer instance as it causes Ruby VM process to crash.
7
10
  class Client
8
- extend Forwardable
9
-
10
- %i[
11
- seek
12
- trigger_heartbeat
13
- trigger_heartbeat!
14
- ].each do |delegated_method|
15
- def_delegator :kafka_consumer, delegated_method
16
- end
17
-
18
- # Creates a queue consumer client that will pull the data from Kafka
19
- # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group for which
20
- # we create a client
21
- # @return [Karafka::Connection::Client] group consumer that can subscribe to
22
- # multiple topics
23
- def initialize(consumer_group)
24
- @consumer_group = consumer_group
25
- Persistence::Client.write(self)
26
- end
27
-
28
- # Opens connection, gets messages and calls a block for each of the incoming messages
29
- # @yieldparam [Array<Kafka::FetchedMessage>, Symbol] kafka response with an info about
30
- # the type of the fetcher that is being used
31
- # @note This will yield with raw messages - no preprocessing or reformatting.
32
- def fetch_loop
33
- settings = ApiAdapter.consumption(consumer_group)
34
-
35
- if consumer_group.batch_fetching
36
- kafka_consumer.each_batch(**settings) { |batch| yield(batch, :batch) }
37
- else
38
- kafka_consumer.each_message(**settings) { |message| yield(message, :message) }
11
+ attr_reader :rebalance_manager
12
+
13
+ # @return [String] underlying consumer name
14
+ # @note Consumer name may change in case we regenerate it
15
+ attr_reader :name
16
+
17
+ # How many times should we retry polling in case of a failure
18
+ MAX_POLL_RETRIES = 10
19
+
20
+ private_constant :MAX_POLL_RETRIES
21
+
22
+ # Creates a new consumer instance.
23
+ #
24
+ # @param subscription_group [Karafka::Routing::SubscriptionGroup] subscription group
25
+ # with all the configuration details needed for us to create a client
26
+ # @return [Karafka::Connection::Rdk::Consumer]
27
+ def initialize(subscription_group)
28
+ # Name is set when we build consumer
29
+ @name = ''
30
+ @mutex = Mutex.new
31
+ @closed = false
32
+ @subscription_group = subscription_group
33
+ @buffer = RawMessagesBuffer.new
34
+ @rebalance_manager = RebalanceManager.new
35
+ @kafka = build_consumer
36
+ # Marks if we need to offset. If we did not store offsets, we should not commit the offset
37
+ # position as it will crash rdkafka
38
+ @offsetting = false
39
+ # We need to keep track of what we have paused for resuming
40
+ # In case we loose partition, we still need to resume it, otherwise it won't be fetched
41
+ # again if we get reassigned to it later on. We need to keep them as after revocation we
42
+ # no longer may be able to fetch them from Kafka. We could build them but it is easier
43
+ # to just keep them here and use if needed when cannot be obtained
44
+ @paused_tpls = Hash.new { |h, k| h[k] = {} }
45
+ end
46
+
47
+ # Fetches messages within boundaries defined by the settings (time, size, topics, etc).
48
+ #
49
+ # @return [Karafka::Connection::MessagesBuffer] messages buffer that holds messages per topic
50
+ # partition
51
+ # @note This method should not be executed from many threads at the same time
52
+ def batch_poll
53
+ time_poll = TimeTrackers::Poll.new(@subscription_group.max_wait_time)
54
+
55
+ @buffer.clear
56
+ @rebalance_manager.clear
57
+
58
+ loop do
59
+ time_poll.start
60
+
61
+ # Don't fetch more messages if we do not have any time left
62
+ break if time_poll.exceeded?
63
+ # Don't fetch more messages if we've fetched max as we've wanted
64
+ break if @buffer.size >= @subscription_group.max_messages
65
+
66
+ # Fetch message within our time boundaries
67
+ message = poll(time_poll.remaining)
68
+
69
+ # Put a message to the buffer if there is one
70
+ @buffer << message if message
71
+
72
+ # Upon polling rebalance manager might have been updated.
73
+ # If partition revocation happens, we need to remove messages from revoked partitions
74
+ # as well as ensure we do not have duplicated due to the offset reset for partitions
75
+ # that we got assigned
76
+ # We also do early break, so the information about rebalance is used as soon as possible
77
+ if @rebalance_manager.changed?
78
+ remove_revoked_and_duplicated_messages
79
+ break
80
+ end
81
+
82
+ # Track time spent on all of the processing and polling
83
+ time_poll.checkpoint
84
+
85
+ # Finally once we've (potentially) removed revoked, etc, if no messages were returned
86
+ # we can break.
87
+ # Worth keeping in mind, that the rebalance manager might have been updated despite no
88
+ # messages being returned during a poll
89
+ break unless message
39
90
  end
40
- # @note We catch only the processing errors as any other are considered critical (exceptions)
41
- # and should require a client restart with a backoff
42
- rescue Kafka::ProcessingError => e
43
- # If there was an error during consumption, we have to log it, pause current partition
44
- # and process other things
45
- Karafka.monitor.instrument(
46
- 'connection.client.fetch_loop.error',
47
- caller: self,
48
- error: e.cause
49
- )
50
- pause(e.topic, e.partition)
51
- retry
91
+
92
+ @buffer
52
93
  end
53
94
 
54
- # Gracefully stops topic consumption
95
+ # Stores offset for a given partition of a given topic based on the provided message.
96
+ #
97
+ # @param message [Karafka::Messages::Message]
98
+ def store_offset(message)
99
+ @mutex.synchronize do
100
+ internal_store_offset(message)
101
+ end
102
+ end
103
+
104
+ # Commits the offset on a current consumer in a non-blocking or blocking way.
105
+ # Ignoring a case where there would not be an offset (for example when rebalance occurs).
106
+ #
107
+ # @param async [Boolean] should the commit happen async or sync (async by default)
108
+ # @return [Boolean] did committing was successful. It may be not, when we no longer own
109
+ # given partition.
110
+ #
111
+ # @note This will commit all the offsets for the whole consumer. In order to achieve
112
+ # granular control over where the offset should be for particular topic partitions, the
113
+ # store_offset should be used to only store new offset when we want to to be flushed
114
+ def commit_offsets(async: true)
115
+ @mutex.lock
116
+
117
+ internal_commit_offsets(async: async)
118
+ ensure
119
+ @mutex.unlock
120
+ end
121
+
122
+ # Commits offset in a synchronous way.
123
+ #
124
+ # @see `#commit_offset` for more details
125
+ def commit_offsets!
126
+ commit_offsets(async: false)
127
+ end
128
+
129
+ # Seek to a particular message. The next poll on the topic/partition will return the
130
+ # message at the given offset.
131
+ #
132
+ # @param message [Messages::Message, Messages::Seek] message to which we want to seek to
133
+ def seek(message)
134
+ @mutex.lock
135
+
136
+ @kafka.seek(message)
137
+ ensure
138
+ @mutex.unlock
139
+ end
140
+
141
+ # Pauses given partition and moves back to last successful offset processed.
142
+ #
143
+ # @param topic [String] topic name
144
+ # @param partition [Integer] partition
145
+ # @param offset [Integer] offset of the message on which we want to pause (this message will
146
+ # be reprocessed after getting back to processing)
147
+ # @note This will pause indefinitely and requires manual `#resume`
148
+ def pause(topic, partition, offset)
149
+ @mutex.lock
150
+
151
+ # Do not pause if the client got closed, would not change anything
152
+ return if @closed
153
+
154
+ pause_msg = Messages::Seek.new(topic, partition, offset)
155
+
156
+ internal_commit_offsets(async: false)
157
+
158
+ # Here we do not use our cached tpls because we should not try to pause something we do
159
+ # not own anymore.
160
+ tpl = topic_partition_list(topic, partition)
161
+
162
+ return unless tpl
163
+
164
+ @paused_tpls[topic][partition] = tpl
165
+
166
+ @kafka.pause(tpl)
167
+
168
+ @kafka.seek(pause_msg)
169
+ ensure
170
+ @mutex.unlock
171
+ end
172
+
173
+ # Resumes processing of a give topic partition after it was paused.
174
+ #
175
+ # @param topic [String] topic name
176
+ # @param partition [Integer] partition
177
+ def resume(topic, partition)
178
+ @mutex.lock
179
+
180
+ return if @closed
181
+
182
+ # Always commit synchronously offsets if any when we resume
183
+ # This prevents resuming without offset in case it would not be committed prior
184
+ # We can skip performance penalty since resuming should not happen too often
185
+ internal_commit_offsets(async: false)
186
+
187
+ # If we were not able, let's try to reuse the one we have (if we have)
188
+ tpl = topic_partition_list(topic, partition) || @paused_tpls[topic][partition]
189
+
190
+ return unless tpl
191
+ # If we did not have it, it means we never paused this partition, thus no resume should
192
+ # happen in the first place
193
+ return unless @paused_tpls[topic].delete(partition)
194
+
195
+ @kafka.resume(tpl)
196
+ ensure
197
+ @mutex.unlock
198
+ end
199
+
200
+ # Gracefully stops topic consumption.
201
+ #
55
202
  # @note Stopping running consumers without a really important reason is not recommended
56
203
  # as until all the consumers are stopped, the server will keep running serving only
57
204
  # part of the messages
58
205
  def stop
59
- @kafka_consumer&.stop
60
- @kafka_consumer = nil
206
+ close
61
207
  end
62
208
 
63
- # Pauses fetching and consumption of a given topic partition
64
- # @param topic [String] topic that we want to pause
65
- # @param partition [Integer] number partition that we want to pause
66
- def pause(topic, partition)
67
- args, kwargs = ApiAdapter.pause(topic, partition, consumer_group).values_at(:args, :kwargs)
68
- kafka_consumer.pause(*args, **kwargs)
209
+ # Marks given message as consumed.
210
+ #
211
+ # @param [Karafka::Messages::Message] message that we want to mark as processed
212
+ # @return [Boolean] true if successful. False if we no longer own given partition
213
+ # @note This method won't trigger automatic offsets commits, rather relying on the offset
214
+ # check-pointing trigger that happens with each batch processed
215
+ def mark_as_consumed(message)
216
+ store_offset(message)
69
217
  end
70
218
 
71
- # Marks given message as consumed
72
- # @param [Karafka::Params::Params] params message that we want to mark as processed
73
- # @note This method won't trigger automatic offsets commits, rather relying on the ruby-kafka
74
- # offsets time-interval based committing
75
- def mark_as_consumed(params)
76
- kafka_consumer.mark_message_as_processed(
77
- *ApiAdapter.mark_message_as_processed(params)
78
- )
219
+ # Marks a given message as consumed and commits the offsets in a blocking way.
220
+ #
221
+ # @param [Karafka::Messages::Message] message that we want to mark as processed
222
+ # @return [Boolean] true if successful. False if we no longer own given partition
223
+ def mark_as_consumed!(message)
224
+ return false unless mark_as_consumed(message)
225
+
226
+ commit_offsets!
79
227
  end
80
228
 
81
- # Marks a given message as consumed and commit the offsets in a blocking way
82
- # @param [Karafka::Params::Params] params message that we want to mark as processed
83
- # @note This method commits the offset for each manual marking to be sure
84
- # that offset commit happen asap in case of a crash
85
- def mark_as_consumed!(params)
86
- mark_as_consumed(params)
87
- # Trigger an immediate, blocking offset commit in order to minimize the risk of crashing
88
- # before the automatic triggers have kicked in.
89
- kafka_consumer.commit_offsets
229
+ # Closes and resets the client completely.
230
+ def reset
231
+ close
232
+
233
+ @mutex.synchronize do
234
+ @closed = false
235
+ @offsetting = false
236
+ @paused_tpls.clear
237
+ @kafka = build_consumer
238
+ end
90
239
  end
91
240
 
92
241
  private
93
242
 
94
- attr_reader :consumer_group
243
+ # When we cannot store an offset, it means we no longer own the partition
244
+ #
245
+ # Non thread-safe offset storing method
246
+ # @param message [Karafka::Messages::Message]
247
+ # @return [Boolean] true if we could store the offset (if we still own the partition)
248
+ def internal_store_offset(message)
249
+ @offsetting = true
250
+ @kafka.store_offset(message)
251
+ true
252
+ rescue Rdkafka::RdkafkaError => e
253
+ return false if e.code == :assignment_lost
254
+ return false if e.code == :state
255
+
256
+ raise e
257
+ end
258
+
259
+ # Non thread-safe message committing method
260
+ # @param async [Boolean] should the commit happen async or sync (async by default)
261
+ # @return [Boolean] true if offset commit worked, false if we've lost the assignment
262
+ def internal_commit_offsets(async: true)
263
+ return true unless @offsetting
264
+
265
+ @kafka.commit(nil, async)
266
+ @offsetting = false
267
+
268
+ true
269
+ rescue Rdkafka::RdkafkaError => e
270
+ return false if e.code == :assignment_lost
271
+ return true if e.code == :no_offset
272
+
273
+ raise e
274
+ end
275
+
276
+ # Commits the stored offsets in a sync way and closes the consumer.
277
+ def close
278
+ # Once client is closed, we should not close it again
279
+ # This could only happen in case of a race-condition when forceful shutdown happens
280
+ # and triggers this from a different thread
281
+ return if @closed
282
+
283
+ @mutex.synchronize do
284
+ internal_commit_offsets(async: false)
285
+
286
+ @closed = true
287
+
288
+ # Remove callbacks runners that were registered
289
+ ::Karafka::Instrumentation.statistics_callbacks.delete(@subscription_group.id)
290
+ ::Karafka::Instrumentation.error_callbacks.delete(@subscription_group.id)
291
+
292
+ @kafka.close
293
+ @buffer.clear
294
+ # @note We do not clear rebalance manager here as we may still have revocation info here
295
+ # that we want to consider valid prior to running another reconnection
296
+ end
297
+ end
298
+
299
+ # @param topic [String]
300
+ # @param partition [Integer]
301
+ # @return [Rdkafka::Consumer::TopicPartitionList]
302
+ def topic_partition_list(topic, partition)
303
+ rdkafka_partition = @kafka
304
+ .assignment
305
+ .to_h[topic]
306
+ &.detect { |part| part.partition == partition }
307
+
308
+ return unless rdkafka_partition
309
+
310
+ Rdkafka::Consumer::TopicPartitionList.new({ topic => [rdkafka_partition] })
311
+ end
312
+
313
+ # Performs a single poll operation.
314
+ #
315
+ # @param timeout [Integer] timeout for a single poll
316
+ # @return [Rdkafka::Consumer::Message, nil] fetched message or nil if nothing polled
317
+ def poll(timeout)
318
+ time_poll ||= TimeTrackers::Poll.new(timeout)
319
+
320
+ return nil if time_poll.exceeded?
95
321
 
96
- # @return [Kafka::Consumer] returns a ready to consume Kafka consumer
97
- # that is set up to consume from topics of a given consumer group
98
- def kafka_consumer
99
- # @note We don't cache the connection internally because we cache kafka_consumer that uses
100
- # kafka client object instance
101
- @kafka_consumer ||= Builder.call(consumer_group).consumer(
102
- **ApiAdapter.consumer(consumer_group)
103
- ).tap do |consumer|
104
- consumer_group.topics.each do |topic|
105
- settings = ApiAdapter.subscribe(topic)
322
+ time_poll.start
106
323
 
107
- consumer.subscribe(settings[0], **settings[1])
324
+ @kafka.poll(timeout)
325
+ rescue ::Rdkafka::RdkafkaError => e
326
+ # We return nil, so we do not restart until running the whole loop
327
+ # This allows us to run revocation jobs and other things and we will pick up new work
328
+ # next time after dispatching all the things that are needed
329
+ #
330
+ # If we would retry here, the client reset would become transparent and we would not have
331
+ # a chance to take any actions
332
+ case e.code
333
+ when :max_poll_exceeded # -147
334
+ reset
335
+ return nil
336
+ when :transport # -195
337
+ reset
338
+ return nil
339
+ when :rebalance_in_progress # -27
340
+ reset
341
+ return nil
342
+ when :not_coordinator # 16
343
+ reset
344
+ return nil
345
+ when :network_exception # 13
346
+ reset
347
+ return nil
348
+ when :unknown_topic_or_part
349
+ # This is expected and temporary until rdkafka catches up with metadata
350
+ return nil
351
+ end
352
+
353
+ raise if time_poll.attempts > MAX_POLL_RETRIES
354
+ raise unless time_poll.retryable?
355
+
356
+ time_poll.checkpoint
357
+ time_poll.backoff
358
+
359
+ # On unknown errors we do our best to retry and handle them before raising
360
+ retry
361
+ end
362
+
363
+ # Builds a new rdkafka consumer instance based on the subscription group configuration
364
+ # @return [Rdkafka::Consumer]
365
+ def build_consumer
366
+ ::Rdkafka::Config.logger = ::Karafka::App.config.logger
367
+ config = ::Rdkafka::Config.new(@subscription_group.kafka)
368
+ config.consumer_rebalance_listener = @rebalance_manager
369
+ consumer = config.consumer
370
+ @name = consumer.name
371
+
372
+ # Register statistics runner for this particular type of callbacks
373
+ ::Karafka::Instrumentation.statistics_callbacks.add(
374
+ @subscription_group.id,
375
+ Instrumentation::Callbacks::Statistics.new(
376
+ @subscription_group.id,
377
+ @subscription_group.consumer_group_id,
378
+ @name,
379
+ ::Karafka::App.config.monitor
380
+ )
381
+ )
382
+
383
+ # Register error tracking callback
384
+ ::Karafka::Instrumentation.error_callbacks.add(
385
+ @subscription_group.id,
386
+ Instrumentation::Callbacks::Error.new(
387
+ @subscription_group.id,
388
+ @subscription_group.consumer_group_id,
389
+ @name,
390
+ ::Karafka::App.config.monitor
391
+ )
392
+ )
393
+
394
+ # Subscription needs to happen after we assigned the rebalance callbacks just in case of
395
+ # a race condition
396
+ consumer.subscribe(*@subscription_group.topics.map(&:name))
397
+ consumer
398
+ end
399
+
400
+ # We may have a case where in the middle of data polling, we've lost a partition.
401
+ # In a case like this we should remove all the pre-buffered messages from list partitions as
402
+ # we are no longer responsible in a given process for processing those messages and they
403
+ # should have been picked up by a different process.
404
+ def remove_revoked_and_duplicated_messages
405
+ @rebalance_manager.lost_partitions.each do |topic, partitions|
406
+ partitions.each do |partition|
407
+ @buffer.delete(topic, partition)
108
408
  end
109
409
  end
110
- rescue Kafka::ConnectionError
111
- # If we would not wait it will spam log file with failed
112
- # attempts if Kafka is down
113
- sleep(consumer_group.reconnect_timeout)
114
- # We don't log and just re-raise - this will be logged
115
- # down the road
116
- raise
410
+
411
+ @buffer.uniq!
117
412
  end
118
413
  end
119
414
  end