karafka 2.4.11 → 2.4.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +1 -3
  4. data/CHANGELOG.md +12 -0
  5. data/Gemfile.lock +7 -7
  6. data/config/locales/errors.yml +1 -0
  7. data/config/locales/pro_errors.yml +8 -0
  8. data/docker-compose.yml +1 -1
  9. data/lib/active_job/queue_adapters/karafka_adapter.rb +12 -7
  10. data/lib/karafka/active_job/dispatcher.rb +13 -0
  11. data/lib/karafka/app.rb +17 -0
  12. data/lib/karafka/base_consumer.rb +18 -6
  13. data/lib/karafka/connection/client.rb +4 -0
  14. data/lib/karafka/connection/listener.rb +2 -1
  15. data/lib/karafka/connection/messages_buffer.rb +5 -2
  16. data/lib/karafka/connection/raw_messages_buffer.rb +11 -0
  17. data/lib/karafka/contracts/topic.rb +14 -0
  18. data/lib/karafka/helpers/config_importer.rb +13 -0
  19. data/lib/karafka/instrumentation/logger_listener.rb +15 -0
  20. data/lib/karafka/instrumentation/notifications.rb +1 -0
  21. data/lib/karafka/messages/message.rb +3 -9
  22. data/lib/karafka/pro/active_job/dispatcher.rb +28 -0
  23. data/lib/karafka/pro/active_job/job_options_contract.rb +9 -0
  24. data/lib/karafka/pro/loader.rb +1 -1
  25. data/lib/karafka/pro/processing/adaptive_iterator/consumer.rb +62 -0
  26. data/lib/karafka/pro/processing/adaptive_iterator/tracker.rb +75 -0
  27. data/lib/karafka/pro/processing/expansions_selector.rb +1 -0
  28. data/lib/karafka/pro/routing/features/adaptive_iterator/config.rb +34 -0
  29. data/lib/karafka/pro/routing/features/adaptive_iterator/contracts/topic.rb +74 -0
  30. data/lib/karafka/pro/routing/features/adaptive_iterator/topic.rb +62 -0
  31. data/lib/karafka/pro/routing/features/adaptive_iterator.rb +31 -0
  32. data/lib/karafka/pro/scheduled_messages/contracts/message.rb +23 -4
  33. data/lib/karafka/pro/scheduled_messages/day.rb +1 -1
  34. data/lib/karafka/pro/scheduled_messages/proxy.rb +16 -7
  35. data/lib/karafka/processing/coordinator.rb +4 -0
  36. data/lib/karafka/processing/executor.rb +4 -1
  37. data/lib/karafka/version.rb +1 -1
  38. data.tar.gz.sig +0 -0
  39. metadata +8 -2
  40. metadata.gz.sig +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 346743e75bc80a6a3e04361aa6b7b5caa540b31697bc25bb138baa490edd0a93
4
- data.tar.gz: 6a87b0f7af16210b93732f55e98072b0c2eaf4541b700bb1929864b515a91747
3
+ metadata.gz: f96e616f91d60d5054e276f52cc2ecde9d4a6f6c8e25b3d61c384936b9239e4f
4
+ data.tar.gz: 8dfad6cc5d0cb4cdcc885b58e129929f67b0fdc4ee7922c21f9728cbdd863f5b
5
5
  SHA512:
6
- metadata.gz: c20b7bb58d31b7771e593334783285edf13bfaff977350226aeb5a979fe1fd92482bb27acf72385a05a762d69581260196f953bdcea4aafbd1d50d0842fec9a2
7
- data.tar.gz: 07b991c9048f20352c1670a5782c028757e177f7f062ce3a1ce113fd01a97184717b8c7d3612d515993cb048e21b3dcf1804b22852c1b6d9bccebb16463ca2aa
6
+ metadata.gz: bfea8217fb7ba89158b926417a0dd0cab42460a607b3bf25a62a400ab83e510806b966f81bd2ca76144e5aff62c99fb58b35be94ca1ad5c61ca113e760098243
7
+ data.tar.gz: ee66d2c6a11dc6baac3cc836ea5455d6c88838f1db7c0f9af9a92c3f5d3c7b8dd9d1f8c42dac4c9908042da3b192d8579466deecc687126640fc7bc5e0aeafb2
checksums.yaml.gz.sig CHANGED
Binary file
@@ -117,7 +117,6 @@ jobs:
117
117
  - '3.3'
118
118
  - '3.2'
119
119
  - '3.1'
120
- - '3.0'
121
120
  steps:
122
121
  - uses: actions/checkout@v4
123
122
  - name: Install package dependencies
@@ -165,7 +164,7 @@ jobs:
165
164
  run: bin/integrations --exclude '/pro'
166
165
 
167
166
  integrations_pro:
168
- timeout-minutes: 40
167
+ timeout-minutes: 45
169
168
  runs-on: ubuntu-latest
170
169
  needs: diffend
171
170
  strategy:
@@ -176,7 +175,6 @@ jobs:
176
175
  - '3.3'
177
176
  - '3.2'
178
177
  - '3.1'
179
- - '3.0'
180
178
  steps:
181
179
  - uses: actions/checkout@v4
182
180
  - name: Install package dependencies
data/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Karafka Framework Changelog
2
2
 
3
+ ## 2.4.12 (2024-09-17)
4
+ - **[Feature]** Provide Adaptive Iterator feature as a fast alternative to Long-Running Jobs (Pro).
5
+ - [Enhancement] Provide `Consumer#each` as a delegation to messages batch.
6
+ - [Enhancement] Verify cancellation request envelope topic similar to the schedule one.
7
+ - [Enhancement] Validate presence of `bootstrap.servers` to avoid incomplete partial reconfiguration.
8
+ - [Enhancement] Support `ActiveJob#enqueue_at` via Scheduled Messages feature (Pro).
9
+ - [Enhancement] Introduce `Karafka::App#debug!` that will switch Karafka and the default producer into extensive debug mode. Useful for CLI debugging.
10
+ - [Enhancement] Support full overwrite of the `BaseConsumer#producer`.
11
+ - [Enhancement] Transfer the time of last poll back to the coordinator for more accurate metrics tracking.
12
+ - [Enhancement] Instrument `Consumer#seek` via `consumer.consuming.seek`.
13
+ - [Fix] Fix incorrect time reference reload in scheduled messages.
14
+
3
15
  ## 2.4.11 (2024-09-04)
4
16
  - [Enhancement] Validate envelope target topic type for Scheduled Messages.
5
17
  - [Enhancement] Support for enqueue_after_transaction_commit in rails active job.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.4.11)
4
+ karafka (2.4.12)
5
5
  base64 (~> 0.2)
6
6
  karafka-core (>= 2.4.3, < 2.5.0)
7
7
  karafka-rdkafka (>= 0.17.2)
@@ -36,7 +36,7 @@ GEM
36
36
  erubi (1.13.0)
37
37
  et-orbi (1.2.11)
38
38
  tzinfo
39
- factory_bot (6.4.6)
39
+ factory_bot (6.5.0)
40
40
  activesupport (>= 5.0.0)
41
41
  ffi (1.17.0)
42
42
  fugit (1.11.1)
@@ -55,20 +55,20 @@ GEM
55
55
  karafka-testing (2.4.6)
56
56
  karafka (>= 2.4.0, < 2.5.0)
57
57
  waterdrop (>= 2.7.0)
58
- karafka-web (0.10.2)
58
+ karafka-web (0.10.3)
59
59
  erubi (~> 1.4)
60
60
  karafka (>= 2.4.10, < 2.5.0)
61
61
  karafka-core (>= 2.4.0, < 2.5.0)
62
62
  roda (~> 3.68, >= 3.69)
63
63
  tilt (~> 2.0)
64
- logger (1.6.0)
64
+ logger (1.6.1)
65
65
  mini_portile2 (2.8.7)
66
66
  minitest (5.25.1)
67
67
  ostruct (0.6.0)
68
68
  raabro (1.4.0)
69
69
  rack (3.1.7)
70
70
  rake (13.2.1)
71
- roda (3.83.0)
71
+ roda (3.84.0)
72
72
  rack
73
73
  rspec (3.13.0)
74
74
  rspec-core (~> 3.13.0)
@@ -93,9 +93,9 @@ GEM
93
93
  tilt (2.4.0)
94
94
  tzinfo (2.0.6)
95
95
  concurrent-ruby (~> 1.0)
96
- waterdrop (2.7.4)
96
+ waterdrop (2.8.0)
97
97
  karafka-core (>= 2.4.3, < 3.0.0)
98
- karafka-rdkafka (>= 0.15.1)
98
+ karafka-rdkafka (>= 0.17.5)
99
99
  zeitwerk (~> 2.3)
100
100
  zeitwerk (2.6.18)
101
101
 
@@ -154,6 +154,7 @@ en:
154
154
  partitioner_format: 'needs to respond to #call'
155
155
  partition_key_type_format: 'needs to be either :key or :partition_key'
156
156
  producer_format: 'needs to respond to #call'
157
+ scheduled_messages_topic_format: 'needs to be a string with a Kafka accepted format'
157
158
 
158
159
  test:
159
160
  missing: needs to be present
@@ -76,6 +76,14 @@ en:
76
76
  direct_assignments_swarm_overbooked: 'cannot allocate partitions in swarm that were not assigned'
77
77
  direct_assignments_patterns_active: 'patterns cannot be used with direct assignments'
78
78
 
79
+ adaptive_iterator.active_missing: needs to be present
80
+ adaptive_iterator.active_format: 'needs to be boolean'
81
+ adaptive_iterator.marking_method_format: 'needs to be either #mark_as_consumed or #mark_as_consumed!'
82
+ adaptive_iterator.clean_after_yielding_format: 'needs to be boolean'
83
+ adaptive_iterator.safety_margin_format: 'needs to be between 1 and 99'
84
+ adaptive_iterator_with_virtual_partitions: 'cannot be used with virtual partitions'
85
+ adaptive_iterator_with_long_running_job: 'cannot be used with long running jobs'
86
+
79
87
  consumer_group:
80
88
  patterns_format: must be an array with hashes
81
89
  patterns_missing: needs to be present
data/docker-compose.yml CHANGED
@@ -3,7 +3,7 @@ version: '2'
3
3
  services:
4
4
  kafka:
5
5
  container_name: kafka
6
- image: confluentinc/cp-kafka:7.7.0
6
+ image: confluentinc/cp-kafka:7.7.1
7
7
 
8
8
  ports:
9
9
  - 9092:9092
@@ -7,27 +7,32 @@ module ActiveJob
7
7
  # Karafka adapter for enqueuing jobs
8
8
  # This is here for ease of integration with ActiveJob.
9
9
  class KarafkaAdapter
10
+ include Karafka::Helpers::ConfigImporter.new(
11
+ dispatcher: %i[internal active_job dispatcher]
12
+ )
13
+
10
14
  # Enqueues the job using the configured dispatcher
11
15
  #
12
16
  # @param job [Object] job that should be enqueued
13
17
  def enqueue(job)
14
- ::Karafka::App.config.internal.active_job.dispatcher.dispatch(job)
18
+ dispatcher.dispatch(job)
15
19
  end
16
20
 
17
21
  # Enqueues multiple jobs in one go
18
22
  # @param jobs [Array<Object>] jobs that we want to enqueue
19
23
  # @return [Integer] number of jobs enqueued (required by Rails)
20
24
  def enqueue_all(jobs)
21
- ::Karafka::App.config.internal.active_job.dispatcher.dispatch_many(jobs)
25
+ dispatcher.dispatch_many(jobs)
22
26
  jobs.size
23
27
  end
24
28
 
25
- # Raises info, that Karafka backend does not support scheduling jobs
29
+ # Delegates time sensitive dispatch to the dispatcher. OSS will raise error, Pro will handle
30
+ # this as it supports scheduled messages.
26
31
  #
27
- # @param _job [Object] job we cannot enqueue
28
- # @param _timestamp [Time] time when job should run
29
- def enqueue_at(_job, _timestamp)
30
- raise NotImplementedError, 'This queueing backend does not support scheduling jobs.'
32
+ # @param job [Object] job we want to enqueue
33
+ # @param timestamp [Time] time when job should run
34
+ def enqueue_at(job, timestamp)
35
+ dispatcher.dispatch_at(job, timestamp)
31
36
  end
32
37
 
33
38
  # @return [true] should we by default enqueue after the transaction and not during.
@@ -46,6 +46,19 @@ module Karafka
46
46
  end
47
47
  end
48
48
 
49
+ # Raises info, that Karafka backend does not support scheduling jobs
50
+ #
51
+ # @param _job [Object] job we cannot enqueue
52
+ # @param _timestamp [Time] time when job should run
53
+ #
54
+ # @note Karafka Pro supports this feature
55
+ def dispatch_at(_job, _timestamp)
56
+ raise NotImplementedError, <<~ERROR_MESSAGE
57
+ This queueing backend does not support scheduling jobs.
58
+ Consider using Karafka Pro, which supports this via the Scheduled Messages feature.
59
+ ERROR_MESSAGE
60
+ end
61
+
49
62
  private
50
63
 
51
64
  # @param job [ActiveJob::Base] job
data/lib/karafka/app.rb CHANGED
@@ -95,6 +95,23 @@ module Karafka
95
95
  end
96
96
  RUBY
97
97
  end
98
+
99
+ # Forces the debug setup onto Karafka and default WaterDrop producer.
100
+ # This needs to run prior to any operations that would cache state, like consuming or
101
+ # producing messages.
102
+ #
103
+ # @param contexts [String] librdkafka low level debug contexts for granular debugging
104
+ def debug!(contexts = 'all')
105
+ logger.level = ::Logger::DEBUG
106
+ producer.config.logger.level = ::Logger::DEBUG
107
+
108
+ config.kafka[:debug] = contexts
109
+ producer.config.kafka[:debug] = contexts
110
+
111
+ consumer_groups.map(&:topics).flat_map(&:to_a).each do |topic|
112
+ topic.kafka[:debug] = contexts
113
+ end
114
+ end
98
115
  end
99
116
  end
100
117
  end
@@ -14,6 +14,8 @@ module Karafka
14
14
  def_delegators :producer, :produce_async, :produce_sync, :produce_many_async,
15
15
  :produce_many_sync
16
16
 
17
+ def_delegators :messages, :each
18
+
17
19
  # @return [String] id of the current consumer
18
20
  attr_reader :id
19
21
  # @return [Karafka::Routing::Topic] topic to which a given consumer is subscribed
@@ -291,13 +293,23 @@ module Karafka
291
293
  coordinator.manual_seek if manual_seek
292
294
  coordinator.seek_offset = nil if reset_offset
293
295
 
294
- client.seek(
295
- Karafka::Messages::Seek.new(
296
- topic.name,
297
- partition,
298
- offset
299
- )
296
+ message = Karafka::Messages::Seek.new(
297
+ topic.name,
298
+ partition,
299
+ offset
300
300
  )
301
+
302
+ Karafka.monitor.instrument(
303
+ 'consumer.consuming.seek',
304
+ caller: self,
305
+ topic: topic.name,
306
+ partition: partition,
307
+ message: message,
308
+ manual_seek: manual_seek,
309
+ reset_offset: reset_offset
310
+ ) do
311
+ client.seek(message)
312
+ end
301
313
  end
302
314
 
303
315
  # @return [Boolean] true if partition was revoked from the current consumer
@@ -112,6 +112,10 @@ module Karafka
112
112
  # Fetch message within our time boundaries
113
113
  response = poll(time_poll.remaining)
114
114
 
115
+ # We track when last polling happened so we can provide means to detect upcoming
116
+ # `max.poll.interval.ms` limit
117
+ @buffer.polled
118
+
115
119
  case response
116
120
  when :tick_time
117
121
  nil
@@ -336,7 +336,7 @@ module Karafka
336
336
  idle_jobs = []
337
337
  eofed_jobs = []
338
338
 
339
- @messages_buffer.each do |topic, partition, messages, eof|
339
+ @messages_buffer.each do |topic, partition, messages, eof, last_polled_at|
340
340
  # In case we did not receive any new messages without eof we skip.
341
341
  # We may yield empty array here in case we have reached eof without new messages but in
342
342
  # such cases, we can run an eof job
@@ -344,6 +344,7 @@ module Karafka
344
344
 
345
345
  coordinator = @coordinators.find_or_create(topic, partition)
346
346
  coordinator.eofed = eof
347
+ coordinator.last_polled_at = last_polled_at
347
348
 
348
349
  # If we did not receive any messages and we did receive eof signal, we run the eofed
349
350
  # jobs so user can take actions on reaching eof
@@ -42,6 +42,7 @@ module Karafka
42
42
  # Since it happens "right after" we've received the messages, it is close enough it time
43
43
  # to be used as the moment we received messages.
44
44
  received_at = Time.now
45
+ last_polled_at = raw_messages_buffer.last_polled_at
45
46
 
46
47
  raw_messages_buffer.each do |topic, partition, messages, eof|
47
48
  @size += messages.count
@@ -58,7 +59,8 @@ module Karafka
58
59
 
59
60
  @groups[topic][partition] = {
60
61
  eof: eof,
61
- messages: built_messages
62
+ messages: built_messages,
63
+ last_polled_at: last_polled_at
62
64
  }
63
65
  end
64
66
  end
@@ -69,10 +71,11 @@ module Karafka
69
71
  # @yieldparam [Integer] partition number
70
72
  # @yieldparam [Array<Karafka::Messages::Message>] messages from a given topic partition
71
73
  # @yieldparam [Boolean] true if eof, false otherwise
74
+ # @yieldparam [Float] last polled at monotonic clock time
72
75
  def each
73
76
  @groups.each do |topic, partitions|
74
77
  partitions.each do |partition, details|
75
- yield(topic, partition, details[:messages], details[:eof])
78
+ yield(topic, partition, details[:messages], details[:eof], details[:last_polled_at])
76
79
  end
77
80
  end
78
81
  end
@@ -12,11 +12,17 @@ module Karafka
12
12
  # @note We store data here in groups per topic partition to handle the revocation case, where
13
13
  # we may need to remove messages from a single topic partition.
14
14
  class RawMessagesBuffer
15
+ include Karafka::Core::Helpers::Time
16
+
15
17
  attr_reader :size
16
18
 
19
+ # @return [Float] last polling time in milliseconds (monotonic)
20
+ attr_reader :last_polled_at
21
+
17
22
  # @return [Karafka::Connection::MessagesBuffer] buffer instance
18
23
  def initialize
19
24
  @size = 0
25
+ @last_polled_at = monotonic_now
20
26
 
21
27
  @groups = Hash.new do |topic_groups, topic|
22
28
  topic_groups[topic] = Hash.new do |partition_groups, partition|
@@ -46,6 +52,11 @@ module Karafka
46
52
  @groups[topic][partition][:eof] = true
47
53
  end
48
54
 
55
+ # Marks the last polling time that can be accessed via `#last_polled_at`
56
+ def polled
57
+ @last_polled_at = monotonic_now
58
+ end
59
+
49
60
  # Allows to iterate over all the topics and partitions messages
50
61
  #
51
62
  # @yieldparam [String] topic name
@@ -51,6 +51,20 @@ module Karafka
51
51
  end
52
52
  end
53
53
 
54
+ # When users redefine kafka scope settings per topic, they often forget to define the
55
+ # basic stuff as they assume it is auto-inherited. It is not (unless inherit flag used),
56
+ # leaving them with things like bootstrap.servers undefined. This checks that bootstrap
57
+ # servers are defined so we can catch those issues before they cause more problems.
58
+ virtual do |data, errors|
59
+ next unless errors.empty?
60
+
61
+ kafka = data.fetch(:kafka)
62
+
63
+ next if kafka.key?(:'bootstrap.servers')
64
+
65
+ [[%w[kafka bootstrap.servers], :missing]]
66
+ end
67
+
54
68
  virtual do |data, errors|
55
69
  next unless errors.empty?
56
70
  next unless ::Karafka::App.config.strict_topics_namespacing
@@ -25,6 +25,19 @@ module Karafka
25
25
  RUBY
26
26
  end
27
27
  end
28
+
29
+ # @param model [Object] object to which we want to add the config fetcher on a class level
30
+ def extended(model)
31
+ super
32
+
33
+ @attributes.each do |name, path|
34
+ model.class_eval <<~RUBY, __FILE__, __LINE__ + 1
35
+ def self.#{name}
36
+ @#{name} ||= ::Karafka::App.config.#{path.join('.')}
37
+ end
38
+ RUBY
39
+ end
40
+ end
28
41
  end
29
42
  end
30
43
  end
@@ -127,6 +127,21 @@ module Karafka
127
127
  MSG
128
128
  end
129
129
 
130
+ # Prints info about seeking to a particular location
131
+ #
132
+ # @param event [Karafka::Core::Monitoring::Event] event details including payload
133
+ def on_consumer_consuming_seek(event)
134
+ topic = event[:topic]
135
+ partition = event[:partition]
136
+ seek_offset = event[:message].offset
137
+ consumer = event[:caller]
138
+
139
+ info <<~MSG.tr("\n", ' ').strip!
140
+ [#{consumer.id}] Seeking from #{consumer.class}
141
+ on topic #{topic}/#{partition} to offset #{seek_offset}
142
+ MSG
143
+ end
144
+
130
145
  # Logs info about system signals that Karafka received and prints backtrace for threads in
131
146
  # case of ttin
132
147
  #
@@ -56,6 +56,7 @@ module Karafka
56
56
  consumer.consumed
57
57
  consumer.consuming.pause
58
58
  consumer.consuming.retry
59
+ consumer.consuming.seek
59
60
 
60
61
  consumer.before_schedule_idle
61
62
  consumer.idle
@@ -8,15 +8,9 @@ module Karafka
8
8
  # heavy-deserialization data without slowing down the whole application.
9
9
  class Message
10
10
  extend Forwardable
11
-
12
- class << self
13
- # @return [Object] general parser
14
- # @note We cache it here for performance reasons. It is 2.5x times faster than getting it
15
- # via the config chain.
16
- def parser
17
- @parser ||= App.config.internal.messages.parser
18
- end
19
- end
11
+ extend Helpers::ConfigImporter.new(
12
+ parser: %i[internal messages parser]
13
+ )
20
14
 
21
15
  attr_reader :metadata
22
16
  # raw payload needs to be mutable as we want to have option to change it in the parser
@@ -29,6 +29,9 @@ module Karafka
29
29
  partitioner: nil,
30
30
  # Allows for usage of `:key` or `:partition_key`
31
31
  partition_key_type: :key,
32
+ # Topic to where this message should go when using scheduled messages. When defined,
33
+ # it will be used with `enqueue_at`. If not defined it will raise an error.
34
+ scheduled_messages_topic: nil,
32
35
  # Allows for setting a callable producer since at the moment of defining the class,
33
36
  # variants may not be available
34
37
  #
@@ -83,6 +86,31 @@ module Karafka
83
86
  end
84
87
  end
85
88
 
89
+ # Will enqueue a job to run in the future
90
+ #
91
+ # @param job [Object] job we want to enqueue
92
+ # @param timestamp [Time] time when job should run
93
+ def dispatch_at(job, timestamp)
94
+ target_message = dispatch_details(job).merge!(
95
+ topic: job.queue_name,
96
+ payload: ::ActiveSupport::JSON.encode(serialize_job(job))
97
+ )
98
+
99
+ proxy_message = Pro::ScheduledMessages.schedule(
100
+ message: target_message,
101
+ epoch: timestamp.to_i,
102
+ envelope: {
103
+ # Select the scheduled messages proxy topic
104
+ topic: fetch_option(job, :scheduled_messages_topic, DEFAULTS)
105
+ }
106
+ )
107
+
108
+ producer(job).public_send(
109
+ fetch_option(job, :dispatch_method, DEFAULTS),
110
+ proxy_message
111
+ )
112
+ end
113
+
86
114
  private
87
115
 
88
116
  # Selects the producer based on options. If callable `:producer` is defined, it will use
@@ -28,12 +28,21 @@ module Karafka
28
28
  optional(:producer) { |val| val.nil? || val.respond_to?(:call) }
29
29
  optional(:partitioner) { |val| val.respond_to?(:call) }
30
30
  optional(:partition_key_type) { |val| %i[key partition_key partition].include?(val) }
31
+
32
+ # Whether this is a legit scheduled messages topic will be validated during the first
33
+ # dispatch, so we do not repeat validations here
34
+ optional(:scheduled_messages_topic) do |val|
35
+ (val.is_a?(String) || val.is_a?(Symbol)) &&
36
+ ::Karafka::Contracts::TOPIC_REGEXP.match?(val.to_s)
37
+ end
38
+
31
39
  optional(:dispatch_method) do |val|
32
40
  %i[
33
41
  produce_async
34
42
  produce_sync
35
43
  ].include?(val)
36
44
  end
45
+
37
46
  optional(:dispatch_many_method) do |val|
38
47
  %i[
39
48
  produce_many_async
@@ -74,8 +74,8 @@ module Karafka
74
74
  # that need to have some special configuration stuff injected into config, etc
75
75
  def features
76
76
  [
77
- Encryption,
78
77
  Cleaner,
78
+ Encryption,
79
79
  RecurringTasks,
80
80
  ScheduledMessages
81
81
  ]
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Namespace for adaptive iterator consumer components
18
+ module AdaptiveIterator
19
+ # Consumer enhancements needed to wrap the batch iterator for adaptive iterating
20
+ # It automatically marks as consumed, ensures that we do not reach `max.poll.interval.ms`
21
+ # and does other stuff to simplify user per-message processing
22
+ module Consumer
23
+ # @param args [Array] anything accepted by `Karafka::Messages::Messages#each`
24
+ def each(*args)
25
+ adi_config = topic.adaptive_iterator
26
+
27
+ tracker = Tracker.new(
28
+ adi_config.safety_margin,
29
+ coordinator.last_polled_at,
30
+ topic.subscription_group.kafka.fetch(:'max.poll.interval.ms')
31
+ )
32
+
33
+ messages.each(*args) do |message|
34
+ # Always stop if we've lost the assignment
35
+ return if revoked?
36
+ # No automatic marking risk when mom is enabled so we can fast stop
37
+ return if Karafka::App.done? && topic.manual_offset_management?
38
+
39
+ # Seek request on done will allow us to stop without marking the offset when user had
40
+ # the automatic offset marking. This should not be a big network traffic issue for
41
+ # the end user as we're stopping anyhow but should improve shutdown time
42
+ if tracker.enough? || Karafka::App.done?
43
+ # Enough means we no longer have time to process more data without polling as we
44
+ # risk reaching max poll interval. Instead we seek and we will poll again soon.
45
+ seek(message.offset, reset_offset: true)
46
+
47
+ return
48
+ end
49
+
50
+ tracker.track { yield(message) }
51
+
52
+ # Clean if this is what user configured
53
+ message.clean! if adi_config.clean_after_yielding?
54
+
55
+ public_send(adi_config.marking_method, message)
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module AdaptiveIterator
18
+ # Tracker is responsible for monitoring the processing of messages within the poll
19
+ # interval limitation.
20
+ # It ensures that the consumer does not exceed the maximum poll interval by tracking the
21
+ # processing cost and determining when to halt further processing (if needed).
22
+ class Tracker
23
+ include Karafka::Core::Helpers::Time
24
+
25
+ # Initializes a new Tracker instance.
26
+ #
27
+ # @param safety_margin [Float] The safety margin percentage (0-100) to leave as a buffer.
28
+ # @param last_polled_at [Float] The timestamp of the last polling in milliseconds.
29
+ # @param max_poll_interval_ms [Integer] The maximum poll interval time in milliseconds.
30
+ def initialize(
31
+ safety_margin,
32
+ last_polled_at,
33
+ max_poll_interval_ms
34
+ )
35
+ @safety_margin = safety_margin / 100.0 # Convert percentage to decimal
36
+ @last_polled_at = last_polled_at
37
+ @max_processing_cost = 0
38
+ @max_poll_interval_ms = max_poll_interval_ms
39
+ end
40
+
41
+ # Tracks the processing time of a block and updates the maximum processing cost.
42
+ #
43
+ # @yield Executes the block, measuring the time taken for processing.
44
+ def track
45
+ before = monotonic_now
46
+
47
+ yield
48
+
49
+ time_taken = monotonic_now - before
50
+
51
+ return unless time_taken > @max_processing_cost
52
+
53
+ @max_processing_cost = time_taken
54
+ end
55
+
56
+ # Determines if there is enough time left to process more messages without exceeding the
57
+ # maximum poll interval, considering both the safety margin and adaptive margin.
58
+ #
59
+ # @return [Boolean] Returns true if it is time to stop processing. False otherwise.
60
+ def enough?
61
+ elapsed_time_ms = monotonic_now - @last_polled_at
62
+ remaining_time_ms = @max_poll_interval_ms - elapsed_time_ms
63
+
64
+ safety_margin_ms = @max_poll_interval_ms * @safety_margin
65
+
66
+ return true if remaining_time_ms <= safety_margin_ms
67
+ return true if remaining_time_ms - @max_processing_cost <= safety_margin_ms
68
+
69
+ false
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
@@ -25,6 +25,7 @@ module Karafka
25
25
  expansions = super
26
26
  expansions << Pro::Processing::Piping::Consumer
27
27
  expansions << Pro::Processing::OffsetMetadata::Consumer if topic.offset_metadata?
28
+ expansions << Pro::Processing::AdaptiveIterator::Consumer if topic.adaptive_iterator?
28
29
  expansions << Pro::Processing::PeriodicJob::Consumer if topic.periodic_job?
29
30
  expansions
30
31
  end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Routing
17
+ module Features
18
+ class AdaptiveIterator < Base
19
+ # Adaptive Iterator configuration
20
+ Config = Struct.new(
21
+ :active,
22
+ :safety_margin,
23
+ :marking_method,
24
+ :clean_after_yielding,
25
+ keyword_init: true
26
+ ) do
27
+ alias_method :active?, :active
28
+ alias_method :clean_after_yielding?, :clean_after_yielding
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Routing
17
+ module Features
18
+ class AdaptiveIterator < Base
19
+ # Namespace for adaptive iterator contracts
20
+ module Contracts
21
+ # Contract to validate configuration of the adaptive iterator feature
22
+ class Topic < Karafka::Contracts::Base
23
+ configure do |config|
24
+ config.error_messages = YAML.safe_load(
25
+ File.read(
26
+ File.join(Karafka.gem_root, 'config', 'locales', 'pro_errors.yml')
27
+ )
28
+ ).fetch('en').fetch('validations').fetch('topic')
29
+ end
30
+
31
+ nested(:adaptive_iterator) do
32
+ required(:active) { |val| [true, false].include?(val) }
33
+ required(:safety_margin) { |val| val.is_a?(Integer) && val.positive? && val < 100 }
34
+ required(:clean_after_yielding) { |val| [true, false].include?(val) }
35
+
36
+ required(:marking_method) do |val|
37
+ %i[mark_as_consumed mark_as_consumed!].include?(val)
38
+ end
39
+ end
40
+
41
+ # Since adaptive iterator uses `#seek` and can break processing in the middle, we
42
+ # cannot use it with virtual partitions that can process data in a distributed
43
+ # manner
44
+ virtual do |data, errors|
45
+ next unless errors.empty?
46
+
47
+ adaptive_iterator = data[:adaptive_iterator]
48
+ virtual_partitions = data[:virtual_partitions]
49
+
50
+ next unless adaptive_iterator[:active]
51
+ next unless virtual_partitions[:active]
52
+
53
+ [[%i[adaptive_iterator], :with_virtual_partitions]]
54
+ end
55
+
56
+ # There is no point of using the adaptive iterator with LRJ because of how LRJ works
57
+ virtual do |data, errors|
58
+ next unless errors.empty?
59
+
60
+ adaptive_iterator = data[:adaptive_iterator]
61
+ long_running_jobs = data[:long_running_job]
62
+
63
+ next unless adaptive_iterator[:active]
64
+ next unless long_running_jobs[:active]
65
+
66
+ [[%i[adaptive_iterator], :with_long_running_job]]
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Routing
17
+ module Features
18
+ class AdaptiveIterator < Base
19
+ # Topic extension allowing us to enable and configure adaptive iterator
20
+ module Topic
21
+ # @param active [Boolean] should we use the automatic adaptive iterator
22
+ # @param safety_margin [Integer]
23
+ # How big of a margin we leave ourselves so we can safely communicate back with
24
+ # Kafka, etc. We stop and seek back when we've burned 85% of the time by default.
25
+ # We leave 15% of time for post-processing operations so we have space before we
26
+ # hit max.poll.interval.ms.
27
+ # @param marking_method [Symbol] If we should, how should we mark
28
+ # @param clean_after_yielding [Boolean] Should we clean post-yielding via the
29
+ # cleaner API
30
+ def adaptive_iterator(
31
+ active: false,
32
+ safety_margin: 10,
33
+ marking_method: :mark_as_consumed,
34
+ clean_after_yielding: true
35
+ )
36
+ @adaptive_iterator ||= Config.new(
37
+ active: active,
38
+ safety_margin: safety_margin,
39
+ marking_method: marking_method,
40
+ clean_after_yielding: clean_after_yielding
41
+ )
42
+ end
43
+
44
+ # @return [Boolean] Is adaptive iterator active. It is always `true`, since we use it
45
+ # via explicit messages batch wrapper
46
+ def adaptive_iterator?
47
+ adaptive_iterator.active?
48
+ end
49
+
50
+ # @return [Hash] topic with all its native configuration options plus poll guarding
51
+ # setup configuration.
52
+ def to_h
53
+ super.merge(
54
+ adaptive_iterator: adaptive_iterator.to_h
55
+ ).freeze
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Routing
17
+ module Features
18
+ # Feature that pro-actively monitors remaining time until max poll interval ms and
19
+ # cost of processing of each message in a batch. When there is no more time to process
20
+ # more messages from the batch, it will seek back so we do not reach max poll interval.
21
+ # It can be useful when we reach this once in a while. For a constant long-running jobs,
22
+ # please use the Long-Running Jobs feature instead.
23
+ #
24
+ # It also provides some wrapping over typical operations users do, like stopping if
25
+ # revoked, auto-marking, etc
26
+ class AdaptiveIterator < Base
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -28,17 +28,33 @@ module Karafka
28
28
  ).fetch('en').fetch('validations').fetch('scheduled_messages_message')
29
29
  end
30
30
 
31
- # Headers we expect in each message of type "message" that goes to our scheduled messages
32
- # topic
33
- EXPECTED_HEADERS = %w[
31
+ # Headers we expect in each message of type "schedule" that goes to our scheduled
32
+ # messages topic
33
+ EXPECTED_SCHEDULE_HEADERS = %w[
34
34
  schedule_schema_version
35
35
  schedule_target_epoch
36
36
  schedule_source_type
37
37
  schedule_target_topic
38
38
  ].freeze
39
39
 
40
+ # Headers we expect in each message of type "cancel"
41
+ EXPECTED_CANCEL_HEADERS = %w[
42
+ schedule_schema_version
43
+ schedule_source_type
44
+ ].freeze
45
+
40
46
  required(:key) { |val| val.is_a?(String) && val.size.positive? }
41
- required(:headers) { |val| val.is_a?(Hash) && (val.keys & EXPECTED_HEADERS).size == 4 }
47
+
48
+ # Ensure that schedule has all correct keys and that others have other related data
49
+ required(:headers) do |val|
50
+ next false unless val.is_a?(Hash)
51
+
52
+ if val['schedule_source_type'] == 'message'
53
+ (val.keys & EXPECTED_SCHEDULE_HEADERS).size >= 4
54
+ else
55
+ (val.keys & EXPECTED_CANCEL_HEADERS).size >= 2
56
+ end
57
+ end
42
58
 
43
59
  # Make sure, that schedule_target_epoch is not older than grace period behind us.
44
60
  # While this is not ideal verification of scheduling stuff in past, at leats it will
@@ -46,6 +62,9 @@ module Karafka
46
62
  virtual do |data, errors|
47
63
  next unless errors.empty?
48
64
 
65
+ # Validate epoch only for schedules
66
+ next unless data[:headers]['schedule_source_type'] == 'schedule'
67
+
49
68
  epoch_time = data[:headers].fetch('schedule_target_epoch').to_i
50
69
 
51
70
  # We allow for small lag as those will be dispatched but we should prevent dispatching
@@ -29,7 +29,7 @@ module Karafka
29
29
  def initialize
30
30
  @created_at = Time.now.to_i
31
31
 
32
- time = Time.at(@created_at)
32
+ time = Time.at(@created_at).utc
33
33
 
34
34
  @starts_at = Time.utc(time.year, time.month, time.day).to_i
35
35
  @ends_at = @starts_at + 86_399
@@ -87,12 +87,7 @@ module Karafka
87
87
  }.merge(envelope)
88
88
 
89
89
  enrich(proxy_message, message)
90
-
91
- # Final validation to make sure all user provided extra data and what we have built
92
- # complies with our requirements
93
- POST_CONTRACT.validate!(proxy_message)
94
- # After proxy specific validations we also ensure, that the final form is correct
95
- MSG_CONTRACT.validate!(proxy_message, WaterDrop::Errors::MessageInvalidError)
90
+ validate!(proxy_message)
96
91
 
97
92
  proxy_message
98
93
  end
@@ -106,7 +101,7 @@ module Karafka
106
101
  # @note Technically it is a tombstone but we differentiate just for the sake of ability
107
102
  # to debug stuff if needed
108
103
  def cancel(key:, envelope: {})
109
- {
104
+ proxy_message = {
110
105
  key: key,
111
106
  payload: nil,
112
107
  headers: {
@@ -114,6 +109,11 @@ module Karafka
114
109
  'schedule_source_type' => 'cancel'
115
110
  }
116
111
  }.merge(envelope)
112
+
113
+ # Ensure user provided envelope is with all expected details
114
+ validate!(proxy_message)
115
+
116
+ proxy_message
117
117
  end
118
118
 
119
119
  # Builds tombstone with the dispatched message details. Those details can be used
@@ -169,6 +169,15 @@ module Karafka
169
169
  proxy_message[:partition_key] = message.fetch(attribute).to_s
170
170
  end
171
171
  end
172
+
173
+ # Final validations to make sure all user provided extra data and what we have built
174
+ # complies with our requirements
175
+ # @param proxy_message [Hash] our message envelope
176
+ def validate!(proxy_message)
177
+ POST_CONTRACT.validate!(proxy_message)
178
+ # After proxy specific validations we also ensure, that the final form is correct
179
+ MSG_CONTRACT.validate!(proxy_message, WaterDrop::Errors::MessageInvalidError)
180
+ end
172
181
  end
173
182
  end
174
183
  end
@@ -19,6 +19,9 @@ module Karafka
19
19
  # any messages
20
20
  attr_accessor :eofed
21
21
 
22
+ # Last polled at time set based on the incoming last poll time
23
+ attr_accessor :last_polled_at
24
+
22
25
  def_delegators :@pause_tracker, :attempt, :paused?
23
26
 
24
27
  # @param topic [Karafka::Routing::Topic]
@@ -38,6 +41,7 @@ module Karafka
38
41
  @failure = false
39
42
  @eofed = false
40
43
  @changed_at = monotonic_now
44
+ @last_polled_at = @changed_at
41
45
  end
42
46
 
43
47
  # Starts the coordinator for given consumption jobs
@@ -174,8 +174,11 @@ module Karafka
174
174
  expansions.each { |expansion| consumer.singleton_class.include(expansion) }
175
175
 
176
176
  consumer.client = @client
177
- consumer.producer = ::Karafka::App.producer
178
177
  consumer.coordinator = @coordinator
178
+ # We assign producer only when not available already. It may already be available if
179
+ # user redefined the `#producer` method for example. This can be useful for example when
180
+ # having a multi-cluster setup and using a totally custom producer
181
+ consumer.producer ||= ::Karafka::App.producer
179
182
  # Since we have some message-less flows (idle, etc), we initialize consumer with empty
180
183
  # messages set. In production we have persistent consumers, so this is not a performance
181
184
  # overhead as this will happen only once per consumer lifetime
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.4.11'
6
+ VERSION = '2.4.12'
7
7
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.11
4
+ version: 2.4.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  i9zWxov0mr44TWegTVeypcWGd/0nxu1+QHVNHJrpqlPBRvwQsUm7fwmRInGpcaB8
36
36
  ap8wNYvryYzrzvzUxIVFBVM5PacgkFqRmolCa8I7tdKQN+R1
37
37
  -----END CERTIFICATE-----
38
- date: 2024-09-04 00:00:00.000000000 Z
38
+ date: 2024-09-17 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: base64
@@ -290,6 +290,8 @@ files:
290
290
  - lib/karafka/pro/iterator/expander.rb
291
291
  - lib/karafka/pro/iterator/tpl_builder.rb
292
292
  - lib/karafka/pro/loader.rb
293
+ - lib/karafka/pro/processing/adaptive_iterator/consumer.rb
294
+ - lib/karafka/pro/processing/adaptive_iterator/tracker.rb
293
295
  - lib/karafka/pro/processing/collapser.rb
294
296
  - lib/karafka/pro/processing/coordinator.rb
295
297
  - lib/karafka/pro/processing/coordinators/errors_tracker.rb
@@ -386,6 +388,10 @@ files:
386
388
  - lib/karafka/pro/recurring_tasks/task.rb
387
389
  - lib/karafka/pro/routing/features/active_job.rb
388
390
  - lib/karafka/pro/routing/features/active_job/builder.rb
391
+ - lib/karafka/pro/routing/features/adaptive_iterator.rb
392
+ - lib/karafka/pro/routing/features/adaptive_iterator/config.rb
393
+ - lib/karafka/pro/routing/features/adaptive_iterator/contracts/topic.rb
394
+ - lib/karafka/pro/routing/features/adaptive_iterator/topic.rb
389
395
  - lib/karafka/pro/routing/features/base.rb
390
396
  - lib/karafka/pro/routing/features/dead_letter_queue.rb
391
397
  - lib/karafka/pro/routing/features/dead_letter_queue/contracts/topic.rb
metadata.gz.sig CHANGED
Binary file