karafka 2.0.39 → 2.0.41

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 12fe8a47dc0ab16b0f7783424cd1aa043c2d2b228b4f4164f1cecefe604269d9
4
- data.tar.gz: 9fa3bae282770dd67503c41ef4b73a27a38bfcff3bf472ddd63753d14d03614f
3
+ metadata.gz: 9560b22fc8cfd59dcaeb6551bcc0b2d2ebfef2f162a12905fa3aefe0c9c5865e
4
+ data.tar.gz: a5b7aba125288ec96cf3b862e72447bf467fe23f923c232fe1d3ff9c48b1fdb4
5
5
  SHA512:
6
- metadata.gz: 9e6536c90a411a0b42337f73c00d9f454028366f42eabb1b7f40902181bcbcfd43258741d6fc51c6e29046b9ee1f8598755440d28a00ca96104a61a8095c20c2
7
- data.tar.gz: be75dd1bfa744187f770f2e1f0deeedfba4f3fb1b824d6bab91f056f96e60a33498429e35ea22841404def0935e584db0df4289d2818631ace2e597d28785960
6
+ metadata.gz: d6581af85f8900d2d5ce91b6f9ec8ed0e1f6be5f3e80c36315c44c8dc07c30949566e281f40feb1b54cc9bbca771ac2188637e916d19edcb2fe26c04aeb362e1
7
+ data.tar.gz: e467612b3185b5ec764d387e72507b617bf49d702436da2021d467fb0c23630aa98151c9679444a34114317a8c52e3c37f0268c6a6bdb4564ffa1bab51993109
checksums.yaml.gz.sig CHANGED
Binary file
data/CHANGELOG.md CHANGED
@@ -1,5 +1,21 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.0.41 (2023-14-19)
4
+ - **[Feature]** Provide `Karafka::Pro::Iterator` for anonymous topic/partitions iterations and messages lookups (#1389 and #1427).
5
+ - [Improvement] Optimize topic lookup for `read_topic` admin method usage.
6
+ - [Improvement] Report via `LoggerListener` information about the partition on which a given job has started and finished.
7
+ - [Improvement] Slightly normalize the `LoggerListener` format. Always report partition related operations as followed: `TOPIC_NAME/PARTITION`.
8
+ - [Improvement] Do not retry recovery from `unknown_topic_or_part` when Karafka is shutting down as there is no point and no risk of any data losses.
9
+ - [Improvement] Report `client.software.name` and `client.software.version` according to `librdkafka` recommendation.
10
+ - [Improvement] Report ten longest integration specs after the suite execution.
11
+ - [Improvement] Prevent user originating errors related to statistics processing after listener loop crash from potentially crashing the listener loop and hanging Karafka process.
12
+
13
+ ## 2.0.40 (2023-04-13)
14
+ - [Improvement] Introduce `Karafka::Messages::Messages#empty?` method to handle Idle related cases where shutdown or revocation would be called on an empty messages set. This method allows for checking if there are any messages in the messages batch.
15
+ - [Refactor] Require messages builder to accept partition and do not fetch it from messages.
16
+ - [Refactor] Use empty messages set for internal APIs (Idle) (so there always is `Karafka::Messages::Messages`)
17
+ - [Refactor] Allow for empty messages set initialization with -1001 and -1 on metadata (similar to `librdkafka`)
18
+
3
19
  ## 2.0.39 (2023-04-11)
4
20
  - **[Feature]** Provide ability to throttle/limit number of messages processed in a time unit (#1203)
5
21
  - **[Feature]** Provide Delayed Topics (#1000)
data/Gemfile CHANGED
@@ -6,10 +6,11 @@ plugin 'diffend'
6
6
 
7
7
  gemspec
8
8
 
9
- # Karafka gem does not require this but we add it here so we can test the integration with
10
- # ActiveJob much easier
9
+ # Karafka gem does not require activejob nor karafka-web to work
10
+ # They are added here because they are part of the integration suite
11
11
  group :integrations do
12
12
  gem 'activejob'
13
+ gem 'karafka-web'
13
14
  end
14
15
 
15
16
  group :test do
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.0.39)
4
+ karafka (2.0.41)
5
5
  karafka-core (>= 2.0.12, < 3.0.0)
6
6
  thor (>= 0.20)
7
7
  waterdrop (>= 2.4.10, < 3.0.0)
@@ -22,6 +22,7 @@ GEM
22
22
  concurrent-ruby (1.2.2)
23
23
  diff-lcs (1.5.0)
24
24
  docile (1.4.0)
25
+ erubi (1.12.0)
25
26
  factory_bot (6.2.1)
26
27
  activesupport (>= 5.0.0)
27
28
  ffi (1.15.5)
@@ -36,9 +37,18 @@ GEM
36
37
  ffi (~> 1.15)
37
38
  mini_portile2 (~> 2.6)
38
39
  rake (> 12)
40
+ karafka-web (0.5.1)
41
+ erubi (~> 1.4)
42
+ karafka (>= 2.0.40, < 3.0.0)
43
+ karafka-core (>= 2.0.12, < 3.0.0)
44
+ roda (~> 3.63)
45
+ tilt (~> 2.0)
39
46
  mini_portile2 (2.8.1)
40
47
  minitest (5.18.0)
48
+ rack (3.0.7)
41
49
  rake (13.0.6)
50
+ roda (3.67.0)
51
+ rack
42
52
  rspec (3.12.0)
43
53
  rspec-core (~> 3.12.0)
44
54
  rspec-expectations (~> 3.12.0)
@@ -59,6 +69,7 @@ GEM
59
69
  simplecov-html (0.12.3)
60
70
  simplecov_json_formatter (0.1.4)
61
71
  thor (1.2.1)
72
+ tilt (2.1.0)
62
73
  tzinfo (2.0.6)
63
74
  concurrent-ruby (~> 1.0)
64
75
  waterdrop (2.5.1)
@@ -67,7 +78,6 @@ GEM
67
78
  zeitwerk (2.6.7)
68
79
 
69
80
  PLATFORMS
70
- arm64-darwin-21
71
81
  x86_64-linux
72
82
 
73
83
  DEPENDENCIES
@@ -75,6 +85,7 @@ DEPENDENCIES
75
85
  byebug
76
86
  factory_bot
77
87
  karafka!
88
+ karafka-web
78
89
  rspec
79
90
  simplecov
80
91
 
data/bin/integrations CHANGED
@@ -152,8 +152,14 @@ class Scenario
152
152
  end
153
153
  end
154
154
 
155
+ # @return [Float] number of seconds that a given spec took to run
156
+ def time_taken
157
+ @finished_at - @started_at
158
+ end
159
+
155
160
  # Close all the files that are open, so they do not pile up
156
161
  def close
162
+ @finished_at = current_time
157
163
  @stdin.close
158
164
  @stdout.close
159
165
  @stderr.close
@@ -262,13 +268,22 @@ while finished_scenarios.size < scenarios.size
262
268
  sleep(0.1)
263
269
  end
264
270
 
271
+ # Report longest scenarios
272
+ puts
273
+ puts "\nLongest scenarios:\n\n"
274
+
275
+ finished_scenarios.sort_by(&:time_taken).reverse.first(10).each do |long_scenario|
276
+ puts "[#{'%6.2f' % long_scenario.time_taken}] #{long_scenario.name}"
277
+ end
278
+
265
279
  failed_scenarios = finished_scenarios.reject(&:success?)
266
280
 
267
- # Report once more on the failed jobs
268
- # This will only list scenarios that failed without printing their stdout here.
269
281
  if failed_scenarios.empty?
270
282
  puts
271
283
  else
284
+ # Report once more on the failed jobs
285
+ # This will only list scenarios that failed without printing their stdout here.
286
+ puts
272
287
  puts "\nFailed scenarios:\n\n"
273
288
 
274
289
  failed_scenarios.each do |scenario|
data/lib/karafka/admin.rb CHANGED
@@ -96,13 +96,15 @@ module Karafka
96
96
  end
97
97
  end
98
98
 
99
+ # Use topic from routes if we can match it or create a dummy one
100
+ # Dummy one is used in case we cannot match the topic with routes. This can happen
101
+ # when admin API is used to read topics that are not part of the routing
102
+ topic = ::Karafka::Routing::Router.find_or_initialize_by_name(name)
103
+
99
104
  messages.map! do |message|
100
105
  Messages::Builders::Message.call(
101
106
  message,
102
- # Use topic from routes if we can match it or create a dummy one
103
- # Dummy one is used in case we cannot match the topic with routes. This can happen
104
- # when admin API is used to read topics that are not part of the routing
105
- Routing::Router.find_by(name: name) || Topic.new(name, App.config.deserializer),
107
+ topic,
106
108
  Time.now
107
109
  )
108
110
  end
@@ -173,6 +175,17 @@ module Karafka
173
175
  end
174
176
  end
175
177
 
178
+ # Creates consumer instance and yields it. After usage it closes the consumer instance
179
+ # This API can be used in other pieces of code and allows for low-level consumer usage
180
+ #
181
+ # @param settings [Hash] extra settings to customize consumer
182
+ def with_consumer(settings = {})
183
+ consumer = config(:consumer, settings).consumer
184
+ yield(consumer)
185
+ ensure
186
+ consumer&.close
187
+ end
188
+
176
189
  private
177
190
 
178
191
  # @return [Array<String>] topics names
@@ -195,15 +208,6 @@ module Karafka
195
208
  admin&.close
196
209
  end
197
210
 
198
- # Creates consumer instance and yields it. After usage it closes the consumer instance
199
- # @param settings [Hash] extra settings to customize consumer
200
- def with_consumer(settings = {})
201
- consumer = config(:consumer, settings).consumer
202
- yield(consumer)
203
- ensure
204
- consumer&.close
205
- end
206
-
207
211
  # There are some cases where rdkafka admin operations finish successfully but without the
208
212
  # callback being triggered to materialize the post-promise object. Until this is fixed we
209
213
  # can figure out, that operation we wanted to do finished successfully by checking that the
@@ -369,6 +369,8 @@ module Karafka
369
369
  rescue ::Rdkafka::RdkafkaError => e
370
370
  early_report = false
371
371
 
372
+ retryable = time_poll.attempts <= MAX_POLL_RETRIES && time_poll.retryable?
373
+
372
374
  # There are retryable issues on which we want to report fast as they are source of
373
375
  # problems and can mean some bigger system instabilities
374
376
  # Those are mainly network issues and exceeding the max poll interval
@@ -389,9 +391,10 @@ module Karafka
389
391
  return nil if @subscription_group.kafka[:'allow.auto.create.topics']
390
392
 
391
393
  early_report = true
392
- end
393
394
 
394
- retryable = time_poll.attempts <= MAX_POLL_RETRIES && time_poll.retryable?
395
+ # No sense in retrying when no topic/partition and we're no longer running
396
+ retryable = false unless Karafka::App.running?
397
+ end
395
398
 
396
399
  if early_report || !retryable
397
400
  Karafka.monitor.instrument(
@@ -32,6 +32,18 @@ module Karafka
32
32
  consumer_group_id: @consumer_group_id,
33
33
  statistics: @statistics_decorator.call(statistics)
34
34
  )
35
+ # We need to catch and handle any potential errors coming from the instrumentation pipeline
36
+ # as otherwise, in case of statistics which run in the main librdkafka thread, any crash
37
+ # will hang the whole process.
38
+ rescue StandardError => e
39
+ ::Karafka.monitor.instrument(
40
+ 'error.occurred',
41
+ caller: self,
42
+ subscription_group_id: @subscription_group_id,
43
+ consumer_group_id: @consumer_group_id,
44
+ type: 'statistics.emitted.error',
45
+ error: e
46
+ )
35
47
  end
36
48
  end
37
49
  end
@@ -48,7 +48,8 @@ module Karafka
48
48
  job_type = job.class.to_s.split('::').last
49
49
  consumer = job.executor.topic.consumer
50
50
  topic = job.executor.topic.name
51
- info "[#{job.id}] #{job_type} job for #{consumer} on #{topic} started"
51
+ partition = job.executor.partition
52
+ info "[#{job.id}] #{job_type} job for #{consumer} on #{topic}/#{partition} started"
52
53
  end
53
54
 
54
55
  # Prints info about the fact that a given job has finished
@@ -60,7 +61,11 @@ module Karafka
60
61
  job_type = job.class.to_s.split('::').last
61
62
  consumer = job.executor.topic.consumer
62
63
  topic = job.executor.topic.name
63
- info "[#{job.id}] #{job_type} job for #{consumer} on #{topic} finished in #{time}ms"
64
+ partition = job.executor.partition
65
+ info <<~MSG.tr("\n", ' ').strip!
66
+ [#{job.id}] #{job_type} job for #{consumer}
67
+ on #{topic}/#{partition} finished in #{time}ms
68
+ MSG
64
69
  end
65
70
 
66
71
  # Prints info about a consumer pause occurrence. Irrelevant if user or system initiated.
@@ -73,7 +78,7 @@ module Karafka
73
78
  client = event[:caller]
74
79
 
75
80
  info <<~MSG.tr("\n", ' ').strip!
76
- [#{client.id}] Pausing partition #{partition} of topic #{topic} on offset #{offset}
81
+ [#{client.id}] Pausing on topic #{topic}/#{partition} on offset #{offset}
77
82
  MSG
78
83
  end
79
84
 
@@ -86,7 +91,7 @@ module Karafka
86
91
  client = event[:caller]
87
92
 
88
93
  info <<~MSG.tr("\n", ' ').strip!
89
- [#{client.id}] Resuming partition #{partition} of topic #{topic}
94
+ [#{client.id}] Resuming on topic #{topic}/#{partition}
90
95
  MSG
91
96
  end
92
97
 
@@ -102,7 +107,7 @@ module Karafka
102
107
 
103
108
  info <<~MSG.tr("\n", ' ').strip!
104
109
  [#{consumer.id}] Retrying of #{consumer.class} after #{timeout} ms
105
- on partition #{partition} of topic #{topic} from offset #{offset}
110
+ on topic #{topic}/#{partition} from offset #{offset}
106
111
  MSG
107
112
  end
108
113
 
@@ -261,6 +266,12 @@ module Karafka
261
266
  when 'librdkafka.error'
262
267
  error "librdkafka internal error occurred: #{error}"
263
268
  error details
269
+ # Those can occur when emitted statistics are consumed by the end user and the processing
270
+ # of statistics fails. The statistics are emitted from librdkafka main loop thread and
271
+ # any errors there crash the whole thread
272
+ when 'statistics.emitted.error'
273
+ error "statistics.emitted processing failed due to an error: #{error}"
274
+ error details
264
275
  # Those will only occur when retries in the client fail and when they did not stop after
265
276
  # back-offs
266
277
  when 'connection.client.poll.error'
@@ -10,22 +10,23 @@ module Karafka
10
10
  #
11
11
  # @param messages [Array<Karafka::Messages::Message>] messages array
12
12
  # @param topic [Karafka::Routing::Topic] topic for which we've fetched the batch
13
+ # @param partition [Integer] partition of this metadata
13
14
  # @param scheduled_at [Time] moment when the batch was scheduled for processing
14
15
  # @return [Karafka::Messages::BatchMetadata] batch metadata object
15
16
  #
16
17
  # @note We do not set `processed_at` as this needs to be assigned when the batch is
17
18
  # picked up for processing.
18
- def call(messages, topic, scheduled_at)
19
+ def call(messages, topic, partition, scheduled_at)
19
20
  Karafka::Messages::BatchMetadata.new(
20
21
  size: messages.count,
21
- first_offset: messages.first.offset,
22
- last_offset: messages.last.offset,
22
+ first_offset: messages.first&.offset || -1001,
23
+ last_offset: messages.last&.offset || -1001,
23
24
  deserializer: topic.deserializer,
24
- partition: messages.first.partition,
25
+ partition: partition,
25
26
  topic: topic.name,
26
27
  # We go with the assumption that the creation of the whole batch is the last message
27
28
  # creation time
28
- created_at: messages.last.timestamp,
29
+ created_at: messages.last&.timestamp || nil,
29
30
  # When this batch was built and scheduled for execution
30
31
  scheduled_at: scheduled_at,
31
32
  # This needs to be set to a correct value prior to processing starting
@@ -11,14 +11,16 @@ module Karafka
11
11
  #
12
12
  # @param messages [Array<Karafka::Messages::Message>] karafka messages array
13
13
  # @param topic [Karafka::Routing::Topic] topic for which we're received messages
14
+ # @param partition [Integer] partition of those messages
14
15
  # @param received_at [Time] moment in time when the messages were received
15
16
  # @return [Karafka::Messages::Messages] messages batch object
16
- def call(messages, topic, received_at)
17
+ def call(messages, topic, partition, received_at)
17
18
  # We cannot freeze the batch metadata because it is altered with the processed_at time
18
19
  # prior to the consumption. It is being frozen there
19
20
  metadata = BatchMetadata.call(
20
21
  messages,
21
22
  topic,
23
+ partition,
22
24
  received_at
23
25
  )
24
26
 
@@ -40,6 +40,11 @@ module Karafka
40
40
  map(&:raw_payload)
41
41
  end
42
42
 
43
+ # @return [Boolean] is the messages batch empty
44
+ def empty?
45
+ @messages_array.empty?
46
+ end
47
+
43
48
  # @return [Karafka::Messages::Message] first message
44
49
  def first
45
50
  @messages_array.first
@@ -0,0 +1,253 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ # Topic iterator allows you to iterate over topic/partition data and perform lookups for
17
+ # information that you need.
18
+ #
19
+ # It supports early stops on finding the requested data and allows for seeking till
20
+ # the end. It also allows for signaling, when a given message should be last out of certain
21
+ # partition, but we still want to continue iterating in other messages.
22
+ #
23
+ # It does **not** create a consumer group and does not have any offset management.
24
+ class Iterator
25
+ # Local partition reference for librdkafka
26
+ Partition = Struct.new(:partition, :offset)
27
+
28
+ private_constant :Partition
29
+
30
+ # A simple API allowing to iterate over topic/partition data, without having to subscribe
31
+ # and deal with rebalances. This API allows for multi-partition streaming and is optimized
32
+ # for data lookups. It allows for explicit stopping iteration over any partition during
33
+ # the iteration process, allowing for optimized lookups.
34
+ #
35
+ # @param topics [Array<String>, Hash] list of strings if we want to subscribe to multiple
36
+ # topics and all of their partitions or a hash where keys are the topics and values are
37
+ # hashes with partitions and their initial offsets.
38
+ # @param settings [Hash] extra settings for the consumer. Please keep in mind, that if
39
+ # overwritten, you may want to include `auto.offset.reset` to match your case.
40
+ # @param yield_nil [Boolean] should we yield also `nil` values when poll returns nothing.
41
+ # Useful in particular for long-living iterators.
42
+ #
43
+ # @note It is worth keeping in mind, that this API also needs to operate within
44
+ # `max.poll.interval.ms` limitations on each iteration
45
+ #
46
+ # @note In case of a never-ending iterator, you need to set `enable.partition.eof` to `false`
47
+ # so we don't stop polling data even when reaching the end (end on a given moment)
48
+ def initialize(
49
+ topics,
50
+ settings: { 'auto.offset.reset': 'beginning' },
51
+ yield_nil: false
52
+ )
53
+ @topics_with_partitions = expand_topics_with_partitions(topics)
54
+
55
+ @routing_topics = @topics_with_partitions.map do |name, _|
56
+ [name, ::Karafka::Routing::Router.find_or_initialize_by_name(name)]
57
+ end.to_h
58
+
59
+ @total_partitions = @topics_with_partitions.map(&:last).sum(&:count)
60
+
61
+ @stopped_partitions = 0
62
+
63
+ @settings = settings
64
+ @yield_nil = yield_nil
65
+ end
66
+
67
+ # Iterates over requested topic partitions and yields the results with the iterator itself
68
+ # Iterator instance is yielded because one can run `stop_partition` to stop iterating over
69
+ # part of data. It is useful for scenarios where we are looking for some information in all
70
+ # the partitions but once we found it, given partition data is no longer needed and would
71
+ # only eat up resources.
72
+ def each
73
+ Admin.with_consumer(@settings) do |consumer|
74
+ tpl = tpl_with_expanded_offsets(consumer)
75
+ consumer.assign(tpl)
76
+
77
+ # We need this for self-referenced APIs like pausing
78
+ @current_consumer = consumer
79
+
80
+ # Stream data until we reach the end of all the partitions or until the end user
81
+ # indicates that they are done
82
+ until done?
83
+ message = poll(200)
84
+
85
+ # Skip nils if not explicitly required
86
+ next if message.nil? && !@yield_nil
87
+
88
+ if message
89
+ @current_message = build_message(message)
90
+
91
+ yield(@current_message, self)
92
+ else
93
+ yield(nil, self)
94
+ end
95
+ end
96
+
97
+ @current_message = nil
98
+ @current_consumer = nil
99
+ end
100
+
101
+ # Reset so we can use the same iterator again if needed
102
+ @stopped_partitions = 0
103
+ end
104
+
105
+ # Stops the partition we're currently yielded into
106
+ def stop_current_partition
107
+ stop_partition(
108
+ @current_message.topic,
109
+ @current_message.partition
110
+ )
111
+ end
112
+
113
+ # Stops processing of a given partition
114
+ # We expect the partition to be provided because of a scenario, where there is a
115
+ # multi-partition iteration and we want to stop a different partition that the one that
116
+ # is currently yielded.
117
+ #
118
+ # We pause it forever and no longer work with it.
119
+ #
120
+ # @param name [String] topic name of which partition we want to stop
121
+ # @param partition [Integer] partition we want to stop processing
122
+ def stop_partition(name, partition)
123
+ @stopped_partitions += 1
124
+
125
+ @current_consumer.pause(
126
+ Rdkafka::Consumer::TopicPartitionList.new(
127
+ name => [Partition.new(partition, 0)]
128
+ )
129
+ )
130
+ end
131
+
132
+ private
133
+
134
+ # Expands topics to which we want to subscribe with partitions information in case this
135
+ # info is not provided. For our convenience we want to support 5 formats of defining
136
+ # the subscribed topics:
137
+ #
138
+ # - 'topic1' - just a string with one topic name
139
+ # - ['topic1', 'topic2'] - just the names
140
+ # - { 'topic1' => -100 } - names with negative lookup offset
141
+ # - { 'topic1' => { 0 => 5 } } - names with exact partitions offsets
142
+ # - { 'topic1' => { 0 => -5 }, 'topic2' => { 1 => 5 } } - with per partition negative offsets
143
+ #
144
+ # @param topics [Array, Hash] topics definitions
145
+ # @return [Hash] hash with topics containing partitions definitions
146
+ def expand_topics_with_partitions(topics)
147
+ # Simplification for the single topic case
148
+ topics = [topics] if topics.is_a?(String)
149
+ # If we've got just array with topics, we need to convert that into a representation
150
+ # that we can expand with offsets
151
+ topics = topics.map { |name| [name, false] }.to_h if topics.is_a?(Array)
152
+
153
+ expanded = Hash.new { |h, k| h[k] = {} }
154
+
155
+ topics.map do |topic, details|
156
+ if details.is_a?(Hash)
157
+ details.each do |partition, offset|
158
+ expanded[topic][partition] = offset
159
+ end
160
+ else
161
+ partition_count(topic.to_s).times do |partition|
162
+ # If no offsets are provided, we just start from zero
163
+ expanded[topic][partition] = details || 0
164
+ end
165
+ end
166
+ end
167
+
168
+ expanded
169
+ end
170
+
171
+ # @param timeout [Integer] timeout in ms
172
+ # @return [Rdkafka::Consumer::Message, nil] message or nil if nothing to do
173
+ def poll(timeout)
174
+ @current_consumer.poll(timeout)
175
+ rescue Rdkafka::RdkafkaError => e
176
+ # End of partition
177
+ if e.code == :partition_eof
178
+ @stopped_partitions += 1
179
+
180
+ retry
181
+ end
182
+
183
+ raise e
184
+ end
185
+
186
+ # Converts raw rdkafka message into Karafka message
187
+ #
188
+ # @param message [Rdkafka::Consumer::Message] raw rdkafka message
189
+ # @return [::Karafka::Messages::Message]
190
+ def build_message(message)
191
+ Messages::Builders::Message.call(
192
+ message,
193
+ @routing_topics.fetch(message.topic),
194
+ Time.now
195
+ )
196
+ end
197
+
198
+ # Do we have all the data we wanted or did every topic partition has reached eof.
199
+ # @return [Boolean]
200
+ def done?
201
+ @stopped_partitions >= @total_partitions
202
+ end
203
+
204
+ # Builds the tpl representing all the subscriptions we want to run
205
+ #
206
+ # Additionally for negative offsets, does the watermark calculation where to start
207
+ #
208
+ # @param consumer [Rdkafka::Consumer] consumer we need in case of negative offsets as
209
+ # negative are going to be used to do "give me last X". We use the already initialized
210
+ # consumer instance, not to start another one again.
211
+ # @return [Rdkafka::Consumer::TopicPartitionList]
212
+ def tpl_with_expanded_offsets(consumer)
213
+ tpl = Rdkafka::Consumer::TopicPartitionList.new
214
+
215
+ @topics_with_partitions.each do |name, partitions|
216
+ partitions_with_offsets = {}
217
+
218
+ # When no offsets defined, we just start from zero
219
+ if partitions.is_a?(Array) || partitions.is_a?(Range)
220
+ partitions_with_offsets = partitions.map { |partition| [partition, 0] }.to_h
221
+ else
222
+ # When offsets defined, we can either use them if positive or expand and move back
223
+ # in case of negative (-1000 means last 1000 messages, etc)
224
+ partitions.each do |partition, offset|
225
+ if offset.negative?
226
+ _, high_watermark_offset = consumer.query_watermark_offsets(name, partition)
227
+ # We add because this offset is negative
228
+ partitions_with_offsets[partition] = high_watermark_offset + offset
229
+ else
230
+ partitions_with_offsets[partition] = offset
231
+ end
232
+ end
233
+ end
234
+
235
+ tpl.add_topic_and_partitions_with_offsets(name, partitions_with_offsets)
236
+ end
237
+
238
+ tpl
239
+ end
240
+
241
+ # @param name [String] topic name
242
+ # @return [Integer] number of partitions of the topic we want to iterate over
243
+ def partition_count(name)
244
+ Admin
245
+ .cluster_info
246
+ .topics
247
+ .find { |topic| topic.fetch(:topic_name) == name }
248
+ .fetch(:partitions)
249
+ .count
250
+ end
251
+ end
252
+ end
253
+ end
@@ -21,7 +21,8 @@ module Karafka
21
21
  #
22
22
  # In general because we collapse processing in virtual partitions to one on errors, there
23
23
  # is no special action that needs to be taken because we warranty that even with VPs
24
- # on errors a retry collapses into a single state.
24
+ # on errors a retry collapses into a single state and from this single state we can
25
+ # mark as consumed the message that we are moving to the DLQ.
25
26
  module Vp
26
27
  # Features for this strategy
27
28
  FEATURES = %i[
@@ -44,10 +44,6 @@ module Karafka
44
44
  #
45
45
  # @param messages [Array<Karafka::Messages::Message>]
46
46
  def before_enqueue(messages)
47
- # the moment we've received the batch or actually the moment we've enqueued it,
48
- # but good enough
49
- @enqueued_at = Time.now
50
-
51
47
  # Recreate consumer with each batch if persistence is not enabled
52
48
  # We reload the consumers with each batch instead of relying on some external signals
53
49
  # when needed for consistency. That way devs may have it on or off and not in this
@@ -57,8 +53,11 @@ module Karafka
57
53
  # First we build messages batch...
58
54
  consumer.messages = Messages::Builders::Messages.call(
59
55
  messages,
60
- coordinator.topic,
61
- @enqueued_at
56
+ topic,
57
+ partition,
58
+ # the moment we've received the batch or actually the moment we've enqueued it,
59
+ # but good enough
60
+ Time.now
62
61
  )
63
62
 
64
63
  consumer.on_before_enqueue
@@ -84,6 +83,16 @@ module Karafka
84
83
  # This may include house-keeping or other state management changes that can occur but that
85
84
  # not mean there are any new messages available for the end user to process
86
85
  def idle
86
+ # Initializes the messages set in case idle operation would happen before any processing
87
+ # This prevents us from having no messages object at all as the messages object and
88
+ # its metadata may be used for statistics
89
+ consumer.messages ||= Messages::Builders::Messages.call(
90
+ [],
91
+ topic,
92
+ partition,
93
+ Time.now
94
+ )
95
+
87
96
  consumer.on_idle
88
97
  end
89
98
 
@@ -32,8 +32,23 @@ module Karafka
32
32
  nil
33
33
  end
34
34
 
35
+ # Finds the topic by name (in any consumer group) and if not present, will built a new
36
+ # representation of the topic with the defaults and default deserializer.
37
+ #
38
+ # This is used in places where we may operate on topics that are not part of the routing
39
+ # but we want to do something on them (display data, iterate over, etc)
40
+ # @param name [String] name of the topic we are looking for
41
+ # @return [Karafka::Routing::Topic]
42
+ #
43
+ # @note Please note, that in case of a new topic, it will have a newly built consumer group
44
+ # as well, that is not part of the routing.
45
+ def find_or_initialize_by_name(name)
46
+ find_by(name: name) || Topic.new(name, ConsumerGroup.new(name))
47
+ end
48
+
35
49
  module_function :find
36
50
  module_function :find_by
51
+ module_function :find_or_initialize_by_name
37
52
  end
38
53
  end
39
54
  end
@@ -18,7 +18,13 @@ module Karafka
18
18
  KAFKA_DEFAULTS = {
19
19
  # We emit the statistics by default, so all the instrumentation and web-ui work out of
20
20
  # the box, without requiring users to take any extra actions aside from enabling.
21
- 'statistics.interval.ms': 5_000
21
+ 'statistics.interval.ms': 5_000,
22
+ 'client.software.name': 'karafka',
23
+ 'client.software.version': [
24
+ "v#{Karafka::VERSION}",
25
+ "rdkafka-ruby-v#{Rdkafka::VERSION}",
26
+ "librdkafka-v#{Rdkafka::LIBRDKAFKA_VERSION}"
27
+ ].join('-')
22
28
  }.freeze
23
29
 
24
30
  # Contains settings that should not be used in production but make life easier in dev
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.0.39'
6
+ VERSION = '2.0.41'
7
7
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.39
4
+ version: 2.0.41
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  Qf04B9ceLUaC4fPVEz10FyobjaFoY4i32xRto3XnrzeAgfEe4swLq8bQsR3w/EF3
36
36
  MGU0FeSV2Yj7Xc2x/7BzLK8xQn5l7Yy75iPF+KP3vVmDHnNl
37
37
  -----END CERTIFICATE-----
38
- date: 2023-04-11 00:00:00.000000000 Z
38
+ date: 2023-04-19 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: karafka-core
@@ -224,6 +224,7 @@ files:
224
224
  - lib/karafka/pro/encryption/messages/middleware.rb
225
225
  - lib/karafka/pro/encryption/messages/parser.rb
226
226
  - lib/karafka/pro/encryption/setup/config.rb
227
+ - lib/karafka/pro/iterator.rb
227
228
  - lib/karafka/pro/loader.rb
228
229
  - lib/karafka/pro/performance_tracker.rb
229
230
  - lib/karafka/pro/processing/collapser.rb
metadata.gz.sig CHANGED
Binary file