karafka 1.4.13 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +3 -3
  3. data/.github/workflows/ci.yml +85 -30
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +268 -7
  6. data/CONTRIBUTING.md +10 -19
  7. data/Gemfile +6 -0
  8. data/Gemfile.lock +44 -87
  9. data/LICENSE +17 -0
  10. data/LICENSE-COMM +89 -0
  11. data/LICENSE-LGPL +165 -0
  12. data/README.md +44 -48
  13. data/bin/benchmarks +85 -0
  14. data/bin/create_token +22 -0
  15. data/bin/integrations +237 -0
  16. data/bin/karafka +4 -0
  17. data/bin/scenario +29 -0
  18. data/bin/stress_many +13 -0
  19. data/bin/stress_one +13 -0
  20. data/bin/wait_for_kafka +20 -0
  21. data/certs/karafka-pro.pem +11 -0
  22. data/config/errors.yml +55 -40
  23. data/docker-compose.yml +39 -3
  24. data/karafka.gemspec +11 -17
  25. data/lib/active_job/karafka.rb +21 -0
  26. data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
  27. data/lib/karafka/active_job/consumer.rb +26 -0
  28. data/lib/karafka/active_job/dispatcher.rb +38 -0
  29. data/lib/karafka/active_job/job_extensions.rb +34 -0
  30. data/lib/karafka/active_job/job_options_contract.rb +21 -0
  31. data/lib/karafka/active_job/routing/extensions.rb +31 -0
  32. data/lib/karafka/app.rb +15 -20
  33. data/lib/karafka/base_consumer.rb +181 -31
  34. data/lib/karafka/cli/base.rb +4 -4
  35. data/lib/karafka/cli/info.rb +43 -9
  36. data/lib/karafka/cli/install.rb +19 -10
  37. data/lib/karafka/cli/server.rb +17 -42
  38. data/lib/karafka/cli.rb +4 -11
  39. data/lib/karafka/connection/client.rb +385 -90
  40. data/lib/karafka/connection/listener.rb +246 -38
  41. data/lib/karafka/connection/listeners_batch.rb +24 -0
  42. data/lib/karafka/connection/messages_buffer.rb +84 -0
  43. data/lib/karafka/connection/pauses_manager.rb +46 -0
  44. data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
  45. data/lib/karafka/connection/rebalance_manager.rb +78 -0
  46. data/lib/karafka/contracts/base.rb +17 -0
  47. data/lib/karafka/contracts/config.rb +88 -11
  48. data/lib/karafka/contracts/consumer_group.rb +21 -189
  49. data/lib/karafka/contracts/consumer_group_topic.rb +34 -11
  50. data/lib/karafka/contracts/server_cli_options.rb +19 -18
  51. data/lib/karafka/contracts.rb +1 -1
  52. data/lib/karafka/env.rb +46 -0
  53. data/lib/karafka/errors.rb +21 -21
  54. data/lib/karafka/helpers/async.rb +33 -0
  55. data/lib/karafka/helpers/colorize.rb +20 -0
  56. data/lib/karafka/helpers/multi_delegator.rb +2 -2
  57. data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
  58. data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
  59. data/lib/karafka/instrumentation/logger_listener.rb +164 -0
  60. data/lib/karafka/instrumentation/monitor.rb +13 -61
  61. data/lib/karafka/instrumentation/notifications.rb +52 -0
  62. data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
  63. data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
  64. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
  65. data/lib/karafka/instrumentation.rb +21 -0
  66. data/lib/karafka/licenser.rb +75 -0
  67. data/lib/karafka/messages/batch_metadata.rb +45 -0
  68. data/lib/karafka/messages/builders/batch_metadata.rb +40 -0
  69. data/lib/karafka/messages/builders/message.rb +39 -0
  70. data/lib/karafka/messages/builders/messages.rb +32 -0
  71. data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
  72. data/lib/karafka/messages/messages.rb +64 -0
  73. data/lib/karafka/{params → messages}/metadata.rb +4 -6
  74. data/lib/karafka/messages/seek.rb +9 -0
  75. data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
  76. data/lib/karafka/pro/active_job/consumer.rb +46 -0
  77. data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
  78. data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
  79. data/lib/karafka/pro/base_consumer.rb +82 -0
  80. data/lib/karafka/pro/contracts/base.rb +21 -0
  81. data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
  82. data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
  83. data/lib/karafka/pro/loader.rb +76 -0
  84. data/lib/karafka/pro/performance_tracker.rb +80 -0
  85. data/lib/karafka/pro/processing/coordinator.rb +72 -0
  86. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +37 -0
  87. data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
  88. data/lib/karafka/pro/processing/partitioner.rb +60 -0
  89. data/lib/karafka/pro/processing/scheduler.rb +56 -0
  90. data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
  91. data/lib/karafka/pro/routing/topic_extensions.rb +38 -0
  92. data/lib/karafka/pro.rb +13 -0
  93. data/lib/karafka/process.rb +1 -0
  94. data/lib/karafka/processing/coordinator.rb +88 -0
  95. data/lib/karafka/processing/coordinators_buffer.rb +54 -0
  96. data/lib/karafka/processing/executor.rb +118 -0
  97. data/lib/karafka/processing/executors_buffer.rb +88 -0
  98. data/lib/karafka/processing/jobs/base.rb +51 -0
  99. data/lib/karafka/processing/jobs/consume.rb +42 -0
  100. data/lib/karafka/processing/jobs/revoked.rb +22 -0
  101. data/lib/karafka/processing/jobs/shutdown.rb +23 -0
  102. data/lib/karafka/processing/jobs_builder.rb +29 -0
  103. data/lib/karafka/processing/jobs_queue.rb +144 -0
  104. data/lib/karafka/processing/partitioner.rb +22 -0
  105. data/lib/karafka/processing/result.rb +29 -0
  106. data/lib/karafka/processing/scheduler.rb +22 -0
  107. data/lib/karafka/processing/worker.rb +88 -0
  108. data/lib/karafka/processing/workers_batch.rb +27 -0
  109. data/lib/karafka/railtie.rb +113 -0
  110. data/lib/karafka/routing/builder.rb +15 -24
  111. data/lib/karafka/routing/consumer_group.rb +11 -19
  112. data/lib/karafka/routing/consumer_mapper.rb +1 -2
  113. data/lib/karafka/routing/router.rb +1 -1
  114. data/lib/karafka/routing/subscription_group.rb +53 -0
  115. data/lib/karafka/routing/subscription_groups_builder.rb +53 -0
  116. data/lib/karafka/routing/topic.rb +61 -24
  117. data/lib/karafka/routing/topics.rb +38 -0
  118. data/lib/karafka/runner.rb +51 -0
  119. data/lib/karafka/serialization/json/deserializer.rb +6 -15
  120. data/lib/karafka/server.rb +67 -26
  121. data/lib/karafka/setup/config.rb +147 -175
  122. data/lib/karafka/status.rb +14 -5
  123. data/lib/karafka/templates/example_consumer.rb.erb +16 -0
  124. data/lib/karafka/templates/karafka.rb.erb +15 -51
  125. data/lib/karafka/time_trackers/base.rb +19 -0
  126. data/lib/karafka/time_trackers/pause.rb +92 -0
  127. data/lib/karafka/time_trackers/poll.rb +65 -0
  128. data/lib/karafka/version.rb +1 -1
  129. data/lib/karafka.rb +38 -17
  130. data.tar.gz.sig +0 -0
  131. metadata +118 -120
  132. metadata.gz.sig +0 -0
  133. data/MIT-LICENCE +0 -18
  134. data/lib/karafka/assignment_strategies/round_robin.rb +0 -13
  135. data/lib/karafka/attributes_map.rb +0 -63
  136. data/lib/karafka/backends/inline.rb +0 -16
  137. data/lib/karafka/base_responder.rb +0 -226
  138. data/lib/karafka/cli/flow.rb +0 -48
  139. data/lib/karafka/cli/missingno.rb +0 -19
  140. data/lib/karafka/code_reloader.rb +0 -67
  141. data/lib/karafka/connection/api_adapter.rb +0 -158
  142. data/lib/karafka/connection/batch_delegator.rb +0 -55
  143. data/lib/karafka/connection/builder.rb +0 -23
  144. data/lib/karafka/connection/message_delegator.rb +0 -36
  145. data/lib/karafka/consumers/batch_metadata.rb +0 -10
  146. data/lib/karafka/consumers/callbacks.rb +0 -71
  147. data/lib/karafka/consumers/includer.rb +0 -64
  148. data/lib/karafka/consumers/responders.rb +0 -24
  149. data/lib/karafka/consumers/single_params.rb +0 -15
  150. data/lib/karafka/contracts/responder_usage.rb +0 -54
  151. data/lib/karafka/fetcher.rb +0 -42
  152. data/lib/karafka/helpers/class_matcher.rb +0 -88
  153. data/lib/karafka/helpers/config_retriever.rb +0 -46
  154. data/lib/karafka/helpers/inflector.rb +0 -26
  155. data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
  156. data/lib/karafka/params/batch_metadata.rb +0 -26
  157. data/lib/karafka/params/builders/batch_metadata.rb +0 -30
  158. data/lib/karafka/params/builders/params.rb +0 -38
  159. data/lib/karafka/params/builders/params_batch.rb +0 -25
  160. data/lib/karafka/params/params_batch.rb +0 -60
  161. data/lib/karafka/patches/ruby_kafka.rb +0 -47
  162. data/lib/karafka/persistence/client.rb +0 -29
  163. data/lib/karafka/persistence/consumers.rb +0 -45
  164. data/lib/karafka/persistence/topics.rb +0 -48
  165. data/lib/karafka/responders/builder.rb +0 -36
  166. data/lib/karafka/responders/topic.rb +0 -55
  167. data/lib/karafka/routing/topic_mapper.rb +0 -53
  168. data/lib/karafka/serialization/json/serializer.rb +0 -31
  169. data/lib/karafka/setup/configurators/water_drop.rb +0 -36
  170. data/lib/karafka/templates/application_responder.rb.erb +0 -11
@@ -5,27 +5,18 @@ module Karafka
5
5
  class Cli < Thor
6
6
  # Server Karafka Cli action
7
7
  class Server < Base
8
- # Server config settings contract
9
- CONTRACT = Contracts::ServerCliOptions.new.freeze
10
-
11
- private_constant :CONTRACT
8
+ include Helpers::Colorize
12
9
 
13
10
  desc 'Start the Karafka server (short-cut alias: "s")'
14
11
  option aliases: 's'
15
- option :daemon, default: false, type: :boolean, aliases: :d
16
- option :pid, default: 'tmp/pids/karafka', type: :string, aliases: :p
17
12
  option :consumer_groups, type: :array, default: nil, aliases: :g
18
13
 
19
14
  # Start the Karafka server
20
15
  def call
21
- cli.info
22
-
23
- validate!
16
+ # Print our banner and info in the dev mode
17
+ print_marketing_info if Karafka::App.env.development?
24
18
 
25
- if cli.options[:daemon]
26
- FileUtils.mkdir_p File.dirname(cli.options[:pid])
27
- daemonize
28
- end
19
+ Contracts::ServerCliOptions.new.validate!(cli.options)
29
20
 
30
21
  # We assign active topics on a server level, as only server is expected to listen on
31
22
  # part of the topics
@@ -36,35 +27,19 @@ module Karafka
36
27
 
37
28
  private
38
29
 
39
- # Checks the server cli configuration
40
- # options validations in terms of app setup (topics, pid existence, etc)
41
- def validate!
42
- result = CONTRACT.call(cli.options)
43
- return if result.success?
44
-
45
- raise Errors::InvalidConfigurationError, result.errors.to_h
46
- end
47
-
48
- # Detaches current process into background and writes its pidfile
49
- def daemonize
50
- ::Process.daemon(true)
51
- File.open(
52
- cli.options[:pid],
53
- 'w'
54
- ) { |file| file.write(::Process.pid) }
55
-
56
- # Remove pidfile on stop, just before the server instance is going to be GCed
57
- # We want to delay the moment in which the pidfile is removed as much as we can,
58
- # so instead of removing it after the server stops running, we rely on the gc moment
59
- # when this object gets removed (it is a bit later), so it is closer to the actual
60
- # system process end. We do that, so monitoring and deployment tools that rely on a pid
61
- # won't alarm or start new system process up until the current one is finished
62
- ObjectSpace.define_finalizer(self, proc { send(:clean) })
63
- end
64
-
65
- # Removes a pidfile (if exist)
66
- def clean
67
- FileUtils.rm_f(cli.options[:pid]) if cli.options[:pid]
30
+ # Prints marketing info
31
+ def print_marketing_info
32
+ Karafka.logger.info Info::BANNER
33
+
34
+ if Karafka.pro?
35
+ Karafka.logger.info(
36
+ green('Thank you for investing in the Karafka Pro subscription!')
37
+ )
38
+ else
39
+ Karafka.logger.info(
40
+ red('You like Karafka? Please consider getting a Pro version!')
41
+ )
42
+ end
68
43
  end
69
44
  end
70
45
  end
data/lib/karafka/cli.rb CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  module Karafka
4
4
  # Karafka framework Cli
5
+ #
5
6
  # If you want to add/modify command that belongs to CLI, please review all commands
6
7
  # available in cli/ directory inside Karafka source code.
7
8
  #
@@ -10,24 +11,16 @@ module Karafka
10
11
  class Cli < Thor
11
12
  package_name 'Karafka'
12
13
 
13
- default_task :missingno
14
-
15
14
  class << self
16
- # Loads all Cli commands into Thor framework
15
+ # Loads all Cli commands into Thor framework.
17
16
  # This method should be executed before we run Karafka::Cli.start, otherwise we won't
18
- # have any Cli commands available
17
+ # have any Cli commands available.
19
18
  def prepare
20
19
  cli_commands.each do |action|
21
20
  action.bind_to(self)
22
21
  end
23
22
  end
24
23
 
25
- # When there is a CLI crash, exit
26
- # @return [true]
27
- def exit_on_failure?
28
- true
29
- end
30
-
31
24
  private
32
25
 
33
26
  # @return [Array<Class>] Array with Cli action classes that can be used as commands
@@ -42,7 +35,7 @@ module Karafka
42
35
  end
43
36
  end
44
37
 
45
- # This is kinda trick - since we don't have a autoload and other magic stuff
38
+ # This is kinda tricky - since we don't have an autoload and other magic stuff
46
39
  # like Rails does, so instead this method allows us to replace currently running
47
40
  # console with a new one via Kernel.exec. It will start console with new code loaded
48
41
  # Yes, we know that it is not turbo fast, however it is turbo convenient and small
@@ -1,119 +1,414 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Karafka
4
+ # Namespace for Kafka connection related logic
4
5
  module Connection
5
- # Class used as a wrapper around Ruby-Kafka client to simplify additional
6
- # features that we provide/might provide in future and to hide the internal implementation
6
+ # An abstraction layer on top of the rdkafka consumer.
7
+ #
8
+ # It is threadsafe and provides some security measures so we won't end up operating on a
9
+ # closed consumer instance as it causes Ruby VM process to crash.
7
10
  class Client
8
- extend Forwardable
9
-
10
- %i[
11
- seek
12
- trigger_heartbeat
13
- trigger_heartbeat!
14
- ].each do |delegated_method|
15
- def_delegator :kafka_consumer, delegated_method
16
- end
17
-
18
- # Creates a queue consumer client that will pull the data from Kafka
19
- # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group for which
20
- # we create a client
21
- # @return [Karafka::Connection::Client] group consumer that can subscribe to
22
- # multiple topics
23
- def initialize(consumer_group)
24
- @consumer_group = consumer_group
25
- Persistence::Client.write(self)
26
- end
27
-
28
- # Opens connection, gets messages and calls a block for each of the incoming messages
29
- # @yieldparam [Array<Kafka::FetchedMessage>, Symbol] kafka response with an info about
30
- # the type of the fetcher that is being used
31
- # @note This will yield with raw messages - no preprocessing or reformatting.
32
- def fetch_loop
33
- settings = ApiAdapter.consumption(consumer_group)
34
-
35
- if consumer_group.batch_fetching
36
- kafka_consumer.each_batch(**settings) { |batch| yield(batch, :batch) }
37
- else
38
- kafka_consumer.each_message(**settings) { |message| yield(message, :message) }
11
+ attr_reader :rebalance_manager
12
+
13
+ # @return [String] underlying consumer name
14
+ # @note Consumer name may change in case we regenerate it
15
+ attr_reader :name
16
+
17
+ # How many times should we retry polling in case of a failure
18
+ MAX_POLL_RETRIES = 10
19
+
20
+ private_constant :MAX_POLL_RETRIES
21
+
22
+ # Creates a new consumer instance.
23
+ #
24
+ # @param subscription_group [Karafka::Routing::SubscriptionGroup] subscription group
25
+ # with all the configuration details needed for us to create a client
26
+ # @return [Karafka::Connection::Rdk::Consumer]
27
+ def initialize(subscription_group)
28
+ # Name is set when we build consumer
29
+ @name = ''
30
+ @mutex = Mutex.new
31
+ @closed = false
32
+ @subscription_group = subscription_group
33
+ @buffer = RawMessagesBuffer.new
34
+ @rebalance_manager = RebalanceManager.new
35
+ @kafka = build_consumer
36
+ # Marks if we need to offset. If we did not store offsets, we should not commit the offset
37
+ # position as it will crash rdkafka
38
+ @offsetting = false
39
+ # We need to keep track of what we have paused for resuming
40
+ # In case we loose partition, we still need to resume it, otherwise it won't be fetched
41
+ # again if we get reassigned to it later on. We need to keep them as after revocation we
42
+ # no longer may be able to fetch them from Kafka. We could build them but it is easier
43
+ # to just keep them here and use if needed when cannot be obtained
44
+ @paused_tpls = Hash.new { |h, k| h[k] = {} }
45
+ end
46
+
47
+ # Fetches messages within boundaries defined by the settings (time, size, topics, etc).
48
+ #
49
+ # @return [Karafka::Connection::MessagesBuffer] messages buffer that holds messages per topic
50
+ # partition
51
+ # @note This method should not be executed from many threads at the same time
52
+ def batch_poll
53
+ time_poll = TimeTrackers::Poll.new(@subscription_group.max_wait_time)
54
+
55
+ @buffer.clear
56
+ @rebalance_manager.clear
57
+
58
+ loop do
59
+ time_poll.start
60
+
61
+ # Don't fetch more messages if we do not have any time left
62
+ break if time_poll.exceeded?
63
+ # Don't fetch more messages if we've fetched max as we've wanted
64
+ break if @buffer.size >= @subscription_group.max_messages
65
+
66
+ # Fetch message within our time boundaries
67
+ message = poll(time_poll.remaining)
68
+
69
+ # Put a message to the buffer if there is one
70
+ @buffer << message if message
71
+
72
+ # Upon polling rebalance manager might have been updated.
73
+ # If partition revocation happens, we need to remove messages from revoked partitions
74
+ # as well as ensure we do not have duplicated due to the offset reset for partitions
75
+ # that we got assigned
76
+ # We also do early break, so the information about rebalance is used as soon as possible
77
+ if @rebalance_manager.changed?
78
+ remove_revoked_and_duplicated_messages
79
+ break
80
+ end
81
+
82
+ # Track time spent on all of the processing and polling
83
+ time_poll.checkpoint
84
+
85
+ # Finally once we've (potentially) removed revoked, etc, if no messages were returned
86
+ # we can break.
87
+ # Worth keeping in mind, that the rebalance manager might have been updated despite no
88
+ # messages being returned during a poll
89
+ break unless message
39
90
  end
40
- # @note We catch only the processing errors as any other are considered critical (exceptions)
41
- # and should require a client restart with a backoff
42
- rescue Kafka::ProcessingError => e
43
- # If there was an error during consumption, we have to log it, pause current partition
44
- # and process other things
45
- Karafka.monitor.instrument(
46
- 'connection.client.fetch_loop.error',
47
- caller: self,
48
- error: e.cause
49
- )
50
- pause(e.topic, e.partition)
51
- retry
91
+
92
+ @buffer
52
93
  end
53
94
 
54
- # Gracefully stops topic consumption
95
+ # Stores offset for a given partition of a given topic based on the provided message.
96
+ #
97
+ # @param message [Karafka::Messages::Message]
98
+ def store_offset(message)
99
+ @mutex.synchronize do
100
+ internal_store_offset(message)
101
+ end
102
+ end
103
+
104
+ # Commits the offset on a current consumer in a non-blocking or blocking way.
105
+ # Ignoring a case where there would not be an offset (for example when rebalance occurs).
106
+ #
107
+ # @param async [Boolean] should the commit happen async or sync (async by default)
108
+ # @return [Boolean] did committing was successful. It may be not, when we no longer own
109
+ # given partition.
110
+ #
111
+ # @note This will commit all the offsets for the whole consumer. In order to achieve
112
+ # granular control over where the offset should be for particular topic partitions, the
113
+ # store_offset should be used to only store new offset when we want to to be flushed
114
+ def commit_offsets(async: true)
115
+ @mutex.lock
116
+
117
+ internal_commit_offsets(async: async)
118
+ ensure
119
+ @mutex.unlock
120
+ end
121
+
122
+ # Commits offset in a synchronous way.
123
+ #
124
+ # @see `#commit_offset` for more details
125
+ def commit_offsets!
126
+ commit_offsets(async: false)
127
+ end
128
+
129
+ # Seek to a particular message. The next poll on the topic/partition will return the
130
+ # message at the given offset.
131
+ #
132
+ # @param message [Messages::Message, Messages::Seek] message to which we want to seek to
133
+ def seek(message)
134
+ @mutex.lock
135
+
136
+ @kafka.seek(message)
137
+ ensure
138
+ @mutex.unlock
139
+ end
140
+
141
+ # Pauses given partition and moves back to last successful offset processed.
142
+ #
143
+ # @param topic [String] topic name
144
+ # @param partition [Integer] partition
145
+ # @param offset [Integer] offset of the message on which we want to pause (this message will
146
+ # be reprocessed after getting back to processing)
147
+ # @note This will pause indefinitely and requires manual `#resume`
148
+ def pause(topic, partition, offset)
149
+ @mutex.lock
150
+
151
+ # Do not pause if the client got closed, would not change anything
152
+ return if @closed
153
+
154
+ pause_msg = Messages::Seek.new(topic, partition, offset)
155
+
156
+ internal_commit_offsets(async: false)
157
+
158
+ # Here we do not use our cached tpls because we should not try to pause something we do
159
+ # not own anymore.
160
+ tpl = topic_partition_list(topic, partition)
161
+
162
+ return unless tpl
163
+
164
+ @paused_tpls[topic][partition] = tpl
165
+
166
+ @kafka.pause(tpl)
167
+
168
+ @kafka.seek(pause_msg)
169
+ ensure
170
+ @mutex.unlock
171
+ end
172
+
173
+ # Resumes processing of a give topic partition after it was paused.
174
+ #
175
+ # @param topic [String] topic name
176
+ # @param partition [Integer] partition
177
+ def resume(topic, partition)
178
+ @mutex.lock
179
+
180
+ return if @closed
181
+
182
+ # Always commit synchronously offsets if any when we resume
183
+ # This prevents resuming without offset in case it would not be committed prior
184
+ # We can skip performance penalty since resuming should not happen too often
185
+ internal_commit_offsets(async: false)
186
+
187
+ # If we were not able, let's try to reuse the one we have (if we have)
188
+ tpl = topic_partition_list(topic, partition) || @paused_tpls[topic][partition]
189
+
190
+ return unless tpl
191
+ # If we did not have it, it means we never paused this partition, thus no resume should
192
+ # happen in the first place
193
+ return unless @paused_tpls[topic].delete(partition)
194
+
195
+ @kafka.resume(tpl)
196
+ ensure
197
+ @mutex.unlock
198
+ end
199
+
200
+ # Gracefully stops topic consumption.
201
+ #
55
202
  # @note Stopping running consumers without a really important reason is not recommended
56
203
  # as until all the consumers are stopped, the server will keep running serving only
57
204
  # part of the messages
58
205
  def stop
59
- @kafka_consumer&.stop
60
- @kafka_consumer = nil
206
+ close
61
207
  end
62
208
 
63
- # Pauses fetching and consumption of a given topic partition
64
- # @param topic [String] topic that we want to pause
65
- # @param partition [Integer] number partition that we want to pause
66
- def pause(topic, partition)
67
- args, kwargs = ApiAdapter.pause(topic, partition, consumer_group).values_at(:args, :kwargs)
68
- kafka_consumer.pause(*args, **kwargs)
209
+ # Marks given message as consumed.
210
+ #
211
+ # @param [Karafka::Messages::Message] message that we want to mark as processed
212
+ # @return [Boolean] true if successful. False if we no longer own given partition
213
+ # @note This method won't trigger automatic offsets commits, rather relying on the offset
214
+ # check-pointing trigger that happens with each batch processed
215
+ def mark_as_consumed(message)
216
+ store_offset(message)
69
217
  end
70
218
 
71
- # Marks given message as consumed
72
- # @param [Karafka::Params::Params] params message that we want to mark as processed
73
- # @note This method won't trigger automatic offsets commits, rather relying on the ruby-kafka
74
- # offsets time-interval based committing
75
- def mark_as_consumed(params)
76
- kafka_consumer.mark_message_as_processed(
77
- *ApiAdapter.mark_message_as_processed(params)
78
- )
219
+ # Marks a given message as consumed and commits the offsets in a blocking way.
220
+ #
221
+ # @param [Karafka::Messages::Message] message that we want to mark as processed
222
+ # @return [Boolean] true if successful. False if we no longer own given partition
223
+ def mark_as_consumed!(message)
224
+ return false unless mark_as_consumed(message)
225
+
226
+ commit_offsets!
79
227
  end
80
228
 
81
- # Marks a given message as consumed and commit the offsets in a blocking way
82
- # @param [Karafka::Params::Params] params message that we want to mark as processed
83
- # @note This method commits the offset for each manual marking to be sure
84
- # that offset commit happen asap in case of a crash
85
- def mark_as_consumed!(params)
86
- mark_as_consumed(params)
87
- # Trigger an immediate, blocking offset commit in order to minimize the risk of crashing
88
- # before the automatic triggers have kicked in.
89
- kafka_consumer.commit_offsets
229
+ # Closes and resets the client completely.
230
+ def reset
231
+ close
232
+
233
+ @mutex.synchronize do
234
+ @closed = false
235
+ @offsetting = false
236
+ @paused_tpls.clear
237
+ @kafka = build_consumer
238
+ end
90
239
  end
91
240
 
92
241
  private
93
242
 
94
- attr_reader :consumer_group
243
+ # When we cannot store an offset, it means we no longer own the partition
244
+ #
245
+ # Non thread-safe offset storing method
246
+ # @param message [Karafka::Messages::Message]
247
+ # @return [Boolean] true if we could store the offset (if we still own the partition)
248
+ def internal_store_offset(message)
249
+ @offsetting = true
250
+ @kafka.store_offset(message)
251
+ true
252
+ rescue Rdkafka::RdkafkaError => e
253
+ return false if e.code == :assignment_lost
254
+ return false if e.code == :state
255
+
256
+ raise e
257
+ end
258
+
259
+ # Non thread-safe message committing method
260
+ # @param async [Boolean] should the commit happen async or sync (async by default)
261
+ # @return [Boolean] true if offset commit worked, false if we've lost the assignment
262
+ def internal_commit_offsets(async: true)
263
+ return true unless @offsetting
264
+
265
+ @kafka.commit(nil, async)
266
+ @offsetting = false
267
+
268
+ true
269
+ rescue Rdkafka::RdkafkaError => e
270
+ return false if e.code == :assignment_lost
271
+ return true if e.code == :no_offset
272
+
273
+ raise e
274
+ end
275
+
276
+ # Commits the stored offsets in a sync way and closes the consumer.
277
+ def close
278
+ # Once client is closed, we should not close it again
279
+ # This could only happen in case of a race-condition when forceful shutdown happens
280
+ # and triggers this from a different thread
281
+ return if @closed
282
+
283
+ @mutex.synchronize do
284
+ internal_commit_offsets(async: false)
285
+
286
+ @closed = true
287
+
288
+ # Remove callbacks runners that were registered
289
+ ::Karafka::Instrumentation.statistics_callbacks.delete(@subscription_group.id)
290
+ ::Karafka::Instrumentation.error_callbacks.delete(@subscription_group.id)
291
+
292
+ @kafka.close
293
+ @buffer.clear
294
+ # @note We do not clear rebalance manager here as we may still have revocation info here
295
+ # that we want to consider valid prior to running another reconnection
296
+ end
297
+ end
298
+
299
+ # @param topic [String]
300
+ # @param partition [Integer]
301
+ # @return [Rdkafka::Consumer::TopicPartitionList]
302
+ def topic_partition_list(topic, partition)
303
+ rdkafka_partition = @kafka
304
+ .assignment
305
+ .to_h[topic]
306
+ &.detect { |part| part.partition == partition }
307
+
308
+ return unless rdkafka_partition
309
+
310
+ Rdkafka::Consumer::TopicPartitionList.new({ topic => [rdkafka_partition] })
311
+ end
312
+
313
+ # Performs a single poll operation.
314
+ #
315
+ # @param timeout [Integer] timeout for a single poll
316
+ # @return [Rdkafka::Consumer::Message, nil] fetched message or nil if nothing polled
317
+ def poll(timeout)
318
+ time_poll ||= TimeTrackers::Poll.new(timeout)
319
+
320
+ return nil if time_poll.exceeded?
95
321
 
96
- # @return [Kafka::Consumer] returns a ready to consume Kafka consumer
97
- # that is set up to consume from topics of a given consumer group
98
- def kafka_consumer
99
- # @note We don't cache the connection internally because we cache kafka_consumer that uses
100
- # kafka client object instance
101
- @kafka_consumer ||= Builder.call(consumer_group).consumer(
102
- **ApiAdapter.consumer(consumer_group)
103
- ).tap do |consumer|
104
- consumer_group.topics.each do |topic|
105
- settings = ApiAdapter.subscribe(topic)
322
+ time_poll.start
106
323
 
107
- consumer.subscribe(settings[0], **settings[1])
324
+ @kafka.poll(timeout)
325
+ rescue ::Rdkafka::RdkafkaError => e
326
+ # We return nil, so we do not restart until running the whole loop
327
+ # This allows us to run revocation jobs and other things and we will pick up new work
328
+ # next time after dispatching all the things that are needed
329
+ #
330
+ # If we would retry here, the client reset would become transparent and we would not have
331
+ # a chance to take any actions
332
+ case e.code
333
+ when :max_poll_exceeded # -147
334
+ reset
335
+ return nil
336
+ when :transport # -195
337
+ reset
338
+ return nil
339
+ when :rebalance_in_progress # -27
340
+ reset
341
+ return nil
342
+ when :not_coordinator # 16
343
+ reset
344
+ return nil
345
+ when :network_exception # 13
346
+ reset
347
+ return nil
348
+ when :unknown_topic_or_part
349
+ # This is expected and temporary until rdkafka catches up with metadata
350
+ return nil
351
+ end
352
+
353
+ raise if time_poll.attempts > MAX_POLL_RETRIES
354
+ raise unless time_poll.retryable?
355
+
356
+ time_poll.checkpoint
357
+ time_poll.backoff
358
+
359
+ # On unknown errors we do our best to retry and handle them before raising
360
+ retry
361
+ end
362
+
363
+ # Builds a new rdkafka consumer instance based on the subscription group configuration
364
+ # @return [Rdkafka::Consumer]
365
+ def build_consumer
366
+ ::Rdkafka::Config.logger = ::Karafka::App.config.logger
367
+ config = ::Rdkafka::Config.new(@subscription_group.kafka)
368
+ config.consumer_rebalance_listener = @rebalance_manager
369
+ consumer = config.consumer
370
+ @name = consumer.name
371
+
372
+ # Register statistics runner for this particular type of callbacks
373
+ ::Karafka::Instrumentation.statistics_callbacks.add(
374
+ @subscription_group.id,
375
+ Instrumentation::Callbacks::Statistics.new(
376
+ @subscription_group.id,
377
+ @subscription_group.consumer_group_id,
378
+ @name,
379
+ ::Karafka::App.config.monitor
380
+ )
381
+ )
382
+
383
+ # Register error tracking callback
384
+ ::Karafka::Instrumentation.error_callbacks.add(
385
+ @subscription_group.id,
386
+ Instrumentation::Callbacks::Error.new(
387
+ @subscription_group.id,
388
+ @subscription_group.consumer_group_id,
389
+ @name,
390
+ ::Karafka::App.config.monitor
391
+ )
392
+ )
393
+
394
+ # Subscription needs to happen after we assigned the rebalance callbacks just in case of
395
+ # a race condition
396
+ consumer.subscribe(*@subscription_group.topics.map(&:name))
397
+ consumer
398
+ end
399
+
400
+ # We may have a case where in the middle of data polling, we've lost a partition.
401
+ # In a case like this we should remove all the pre-buffered messages from list partitions as
402
+ # we are no longer responsible in a given process for processing those messages and they
403
+ # should have been picked up by a different process.
404
+ def remove_revoked_and_duplicated_messages
405
+ @rebalance_manager.lost_partitions.each do |topic, partitions|
406
+ partitions.each do |partition|
407
+ @buffer.delete(topic, partition)
108
408
  end
109
409
  end
110
- rescue Kafka::ConnectionError
111
- # If we would not wait it will spam log file with failed
112
- # attempts if Kafka is down
113
- sleep(consumer_group.reconnect_timeout)
114
- # We don't log and just re-raise - this will be logged
115
- # down the road
116
- raise
410
+
411
+ @buffer.uniq!
117
412
  end
118
413
  end
119
414
  end