karafka 2.4.11 → 2.4.12

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +1 -3
  4. data/CHANGELOG.md +12 -0
  5. data/Gemfile.lock +7 -7
  6. data/config/locales/errors.yml +1 -0
  7. data/config/locales/pro_errors.yml +8 -0
  8. data/docker-compose.yml +1 -1
  9. data/lib/active_job/queue_adapters/karafka_adapter.rb +12 -7
  10. data/lib/karafka/active_job/dispatcher.rb +13 -0
  11. data/lib/karafka/app.rb +17 -0
  12. data/lib/karafka/base_consumer.rb +18 -6
  13. data/lib/karafka/connection/client.rb +4 -0
  14. data/lib/karafka/connection/listener.rb +2 -1
  15. data/lib/karafka/connection/messages_buffer.rb +5 -2
  16. data/lib/karafka/connection/raw_messages_buffer.rb +11 -0
  17. data/lib/karafka/contracts/topic.rb +14 -0
  18. data/lib/karafka/helpers/config_importer.rb +13 -0
  19. data/lib/karafka/instrumentation/logger_listener.rb +15 -0
  20. data/lib/karafka/instrumentation/notifications.rb +1 -0
  21. data/lib/karafka/messages/message.rb +3 -9
  22. data/lib/karafka/pro/active_job/dispatcher.rb +28 -0
  23. data/lib/karafka/pro/active_job/job_options_contract.rb +9 -0
  24. data/lib/karafka/pro/loader.rb +1 -1
  25. data/lib/karafka/pro/processing/adaptive_iterator/consumer.rb +62 -0
  26. data/lib/karafka/pro/processing/adaptive_iterator/tracker.rb +75 -0
  27. data/lib/karafka/pro/processing/expansions_selector.rb +1 -0
  28. data/lib/karafka/pro/routing/features/adaptive_iterator/config.rb +34 -0
  29. data/lib/karafka/pro/routing/features/adaptive_iterator/contracts/topic.rb +74 -0
  30. data/lib/karafka/pro/routing/features/adaptive_iterator/topic.rb +62 -0
  31. data/lib/karafka/pro/routing/features/adaptive_iterator.rb +31 -0
  32. data/lib/karafka/pro/scheduled_messages/contracts/message.rb +23 -4
  33. data/lib/karafka/pro/scheduled_messages/day.rb +1 -1
  34. data/lib/karafka/pro/scheduled_messages/proxy.rb +16 -7
  35. data/lib/karafka/processing/coordinator.rb +4 -0
  36. data/lib/karafka/processing/executor.rb +4 -1
  37. data/lib/karafka/version.rb +1 -1
  38. data.tar.gz.sig +0 -0
  39. metadata +8 -2
  40. metadata.gz.sig +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 346743e75bc80a6a3e04361aa6b7b5caa540b31697bc25bb138baa490edd0a93
4
- data.tar.gz: 6a87b0f7af16210b93732f55e98072b0c2eaf4541b700bb1929864b515a91747
3
+ metadata.gz: f96e616f91d60d5054e276f52cc2ecde9d4a6f6c8e25b3d61c384936b9239e4f
4
+ data.tar.gz: 8dfad6cc5d0cb4cdcc885b58e129929f67b0fdc4ee7922c21f9728cbdd863f5b
5
5
  SHA512:
6
- metadata.gz: c20b7bb58d31b7771e593334783285edf13bfaff977350226aeb5a979fe1fd92482bb27acf72385a05a762d69581260196f953bdcea4aafbd1d50d0842fec9a2
7
- data.tar.gz: 07b991c9048f20352c1670a5782c028757e177f7f062ce3a1ce113fd01a97184717b8c7d3612d515993cb048e21b3dcf1804b22852c1b6d9bccebb16463ca2aa
6
+ metadata.gz: bfea8217fb7ba89158b926417a0dd0cab42460a607b3bf25a62a400ab83e510806b966f81bd2ca76144e5aff62c99fb58b35be94ca1ad5c61ca113e760098243
7
+ data.tar.gz: ee66d2c6a11dc6baac3cc836ea5455d6c88838f1db7c0f9af9a92c3f5d3c7b8dd9d1f8c42dac4c9908042da3b192d8579466deecc687126640fc7bc5e0aeafb2
checksums.yaml.gz.sig CHANGED
Binary file
@@ -117,7 +117,6 @@ jobs:
117
117
  - '3.3'
118
118
  - '3.2'
119
119
  - '3.1'
120
- - '3.0'
121
120
  steps:
122
121
  - uses: actions/checkout@v4
123
122
  - name: Install package dependencies
@@ -165,7 +164,7 @@ jobs:
165
164
  run: bin/integrations --exclude '/pro'
166
165
 
167
166
  integrations_pro:
168
- timeout-minutes: 40
167
+ timeout-minutes: 45
169
168
  runs-on: ubuntu-latest
170
169
  needs: diffend
171
170
  strategy:
@@ -176,7 +175,6 @@ jobs:
176
175
  - '3.3'
177
176
  - '3.2'
178
177
  - '3.1'
179
- - '3.0'
180
178
  steps:
181
179
  - uses: actions/checkout@v4
182
180
  - name: Install package dependencies
data/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Karafka Framework Changelog
2
2
 
3
+ ## 2.4.12 (2024-09-17)
4
+ - **[Feature]** Provide Adaptive Iterator feature as a fast alternative to Long-Running Jobs (Pro).
5
+ - [Enhancement] Provide `Consumer#each` as a delegation to messages batch.
6
+ - [Enhancement] Verify cancellation request envelope topic similar to the schedule one.
7
+ - [Enhancement] Validate presence of `bootstrap.servers` to avoid incomplete partial reconfiguration.
8
+ - [Enhancement] Support `ActiveJob#enqueue_at` via Scheduled Messages feature (Pro).
9
+ - [Enhancement] Introduce `Karafka::App#debug!` that will switch Karafka and the default producer into extensive debug mode. Useful for CLI debugging.
10
+ - [Enhancement] Support full overwrite of the `BaseConsumer#producer`.
11
+ - [Enhancement] Transfer the time of last poll back to the coordinator for more accurate metrics tracking.
12
+ - [Enhancement] Instrument `Consumer#seek` via `consumer.consuming.seek`.
13
+ - [Fix] Fix incorrect time reference reload in scheduled messages.
14
+
3
15
  ## 2.4.11 (2024-09-04)
4
16
  - [Enhancement] Validate envelope target topic type for Scheduled Messages.
5
17
  - [Enhancement] Support for enqueue_after_transaction_commit in rails active job.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.4.11)
4
+ karafka (2.4.12)
5
5
  base64 (~> 0.2)
6
6
  karafka-core (>= 2.4.3, < 2.5.0)
7
7
  karafka-rdkafka (>= 0.17.2)
@@ -36,7 +36,7 @@ GEM
36
36
  erubi (1.13.0)
37
37
  et-orbi (1.2.11)
38
38
  tzinfo
39
- factory_bot (6.4.6)
39
+ factory_bot (6.5.0)
40
40
  activesupport (>= 5.0.0)
41
41
  ffi (1.17.0)
42
42
  fugit (1.11.1)
@@ -55,20 +55,20 @@ GEM
55
55
  karafka-testing (2.4.6)
56
56
  karafka (>= 2.4.0, < 2.5.0)
57
57
  waterdrop (>= 2.7.0)
58
- karafka-web (0.10.2)
58
+ karafka-web (0.10.3)
59
59
  erubi (~> 1.4)
60
60
  karafka (>= 2.4.10, < 2.5.0)
61
61
  karafka-core (>= 2.4.0, < 2.5.0)
62
62
  roda (~> 3.68, >= 3.69)
63
63
  tilt (~> 2.0)
64
- logger (1.6.0)
64
+ logger (1.6.1)
65
65
  mini_portile2 (2.8.7)
66
66
  minitest (5.25.1)
67
67
  ostruct (0.6.0)
68
68
  raabro (1.4.0)
69
69
  rack (3.1.7)
70
70
  rake (13.2.1)
71
- roda (3.83.0)
71
+ roda (3.84.0)
72
72
  rack
73
73
  rspec (3.13.0)
74
74
  rspec-core (~> 3.13.0)
@@ -93,9 +93,9 @@ GEM
93
93
  tilt (2.4.0)
94
94
  tzinfo (2.0.6)
95
95
  concurrent-ruby (~> 1.0)
96
- waterdrop (2.7.4)
96
+ waterdrop (2.8.0)
97
97
  karafka-core (>= 2.4.3, < 3.0.0)
98
- karafka-rdkafka (>= 0.15.1)
98
+ karafka-rdkafka (>= 0.17.5)
99
99
  zeitwerk (~> 2.3)
100
100
  zeitwerk (2.6.18)
101
101
 
@@ -154,6 +154,7 @@ en:
154
154
  partitioner_format: 'needs to respond to #call'
155
155
  partition_key_type_format: 'needs to be either :key or :partition_key'
156
156
  producer_format: 'needs to respond to #call'
157
+ scheduled_messages_topic_format: 'needs to be a string with a Kafka accepted format'
157
158
 
158
159
  test:
159
160
  missing: needs to be present
@@ -76,6 +76,14 @@ en:
76
76
  direct_assignments_swarm_overbooked: 'cannot allocate partitions in swarm that were not assigned'
77
77
  direct_assignments_patterns_active: 'patterns cannot be used with direct assignments'
78
78
 
79
+ adaptive_iterator.active_missing: needs to be present
80
+ adaptive_iterator.active_format: 'needs to be boolean'
81
+ adaptive_iterator.marking_method_format: 'needs to be either #mark_as_consumed or #mark_as_consumed!'
82
+ adaptive_iterator.clean_after_yielding_format: 'needs to be boolean'
83
+ adaptive_iterator.safety_margin_format: 'needs to be between 1 and 99'
84
+ adaptive_iterator_with_virtual_partitions: 'cannot be used with virtual partitions'
85
+ adaptive_iterator_with_long_running_job: 'cannot be used with long running jobs'
86
+
79
87
  consumer_group:
80
88
  patterns_format: must be an array with hashes
81
89
  patterns_missing: needs to be present
data/docker-compose.yml CHANGED
@@ -3,7 +3,7 @@ version: '2'
3
3
  services:
4
4
  kafka:
5
5
  container_name: kafka
6
- image: confluentinc/cp-kafka:7.7.0
6
+ image: confluentinc/cp-kafka:7.7.1
7
7
 
8
8
  ports:
9
9
  - 9092:9092
@@ -7,27 +7,32 @@ module ActiveJob
7
7
  # Karafka adapter for enqueuing jobs
8
8
  # This is here for ease of integration with ActiveJob.
9
9
  class KarafkaAdapter
10
+ include Karafka::Helpers::ConfigImporter.new(
11
+ dispatcher: %i[internal active_job dispatcher]
12
+ )
13
+
10
14
  # Enqueues the job using the configured dispatcher
11
15
  #
12
16
  # @param job [Object] job that should be enqueued
13
17
  def enqueue(job)
14
- ::Karafka::App.config.internal.active_job.dispatcher.dispatch(job)
18
+ dispatcher.dispatch(job)
15
19
  end
16
20
 
17
21
  # Enqueues multiple jobs in one go
18
22
  # @param jobs [Array<Object>] jobs that we want to enqueue
19
23
  # @return [Integer] number of jobs enqueued (required by Rails)
20
24
  def enqueue_all(jobs)
21
- ::Karafka::App.config.internal.active_job.dispatcher.dispatch_many(jobs)
25
+ dispatcher.dispatch_many(jobs)
22
26
  jobs.size
23
27
  end
24
28
 
25
- # Raises info, that Karafka backend does not support scheduling jobs
29
+ # Delegates time sensitive dispatch to the dispatcher. OSS will raise error, Pro will handle
30
+ # this as it supports scheduled messages.
26
31
  #
27
- # @param _job [Object] job we cannot enqueue
28
- # @param _timestamp [Time] time when job should run
29
- def enqueue_at(_job, _timestamp)
30
- raise NotImplementedError, 'This queueing backend does not support scheduling jobs.'
32
+ # @param job [Object] job we want to enqueue
33
+ # @param timestamp [Time] time when job should run
34
+ def enqueue_at(job, timestamp)
35
+ dispatcher.dispatch_at(job, timestamp)
31
36
  end
32
37
 
33
38
  # @return [true] should we by default enqueue after the transaction and not during.
@@ -46,6 +46,19 @@ module Karafka
46
46
  end
47
47
  end
48
48
 
49
+ # Raises info, that Karafka backend does not support scheduling jobs
50
+ #
51
+ # @param _job [Object] job we cannot enqueue
52
+ # @param _timestamp [Time] time when job should run
53
+ #
54
+ # @note Karafka Pro supports this feature
55
+ def dispatch_at(_job, _timestamp)
56
+ raise NotImplementedError, <<~ERROR_MESSAGE
57
+ This queueing backend does not support scheduling jobs.
58
+ Consider using Karafka Pro, which supports this via the Scheduled Messages feature.
59
+ ERROR_MESSAGE
60
+ end
61
+
49
62
  private
50
63
 
51
64
  # @param job [ActiveJob::Base] job
data/lib/karafka/app.rb CHANGED
@@ -95,6 +95,23 @@ module Karafka
95
95
  end
96
96
  RUBY
97
97
  end
98
+
99
+ # Forces the debug setup onto Karafka and default WaterDrop producer.
100
+ # This needs to run prior to any operations that would cache state, like consuming or
101
+ # producing messages.
102
+ #
103
+ # @param contexts [String] librdkafka low level debug contexts for granular debugging
104
+ def debug!(contexts = 'all')
105
+ logger.level = ::Logger::DEBUG
106
+ producer.config.logger.level = ::Logger::DEBUG
107
+
108
+ config.kafka[:debug] = contexts
109
+ producer.config.kafka[:debug] = contexts
110
+
111
+ consumer_groups.map(&:topics).flat_map(&:to_a).each do |topic|
112
+ topic.kafka[:debug] = contexts
113
+ end
114
+ end
98
115
  end
99
116
  end
100
117
  end
@@ -14,6 +14,8 @@ module Karafka
14
14
  def_delegators :producer, :produce_async, :produce_sync, :produce_many_async,
15
15
  :produce_many_sync
16
16
 
17
+ def_delegators :messages, :each
18
+
17
19
  # @return [String] id of the current consumer
18
20
  attr_reader :id
19
21
  # @return [Karafka::Routing::Topic] topic to which a given consumer is subscribed
@@ -291,13 +293,23 @@ module Karafka
291
293
  coordinator.manual_seek if manual_seek
292
294
  coordinator.seek_offset = nil if reset_offset
293
295
 
294
- client.seek(
295
- Karafka::Messages::Seek.new(
296
- topic.name,
297
- partition,
298
- offset
299
- )
296
+ message = Karafka::Messages::Seek.new(
297
+ topic.name,
298
+ partition,
299
+ offset
300
300
  )
301
+
302
+ Karafka.monitor.instrument(
303
+ 'consumer.consuming.seek',
304
+ caller: self,
305
+ topic: topic.name,
306
+ partition: partition,
307
+ message: message,
308
+ manual_seek: manual_seek,
309
+ reset_offset: reset_offset
310
+ ) do
311
+ client.seek(message)
312
+ end
301
313
  end
302
314
 
303
315
  # @return [Boolean] true if partition was revoked from the current consumer
@@ -112,6 +112,10 @@ module Karafka
112
112
  # Fetch message within our time boundaries
113
113
  response = poll(time_poll.remaining)
114
114
 
115
+ # We track when last polling happened so we can provide means to detect upcoming
116
+ # `max.poll.interval.ms` limit
117
+ @buffer.polled
118
+
115
119
  case response
116
120
  when :tick_time
117
121
  nil
@@ -336,7 +336,7 @@ module Karafka
336
336
  idle_jobs = []
337
337
  eofed_jobs = []
338
338
 
339
- @messages_buffer.each do |topic, partition, messages, eof|
339
+ @messages_buffer.each do |topic, partition, messages, eof, last_polled_at|
340
340
  # In case we did not receive any new messages without eof we skip.
341
341
  # We may yield empty array here in case we have reached eof without new messages but in
342
342
  # such cases, we can run an eof job
@@ -344,6 +344,7 @@ module Karafka
344
344
 
345
345
  coordinator = @coordinators.find_or_create(topic, partition)
346
346
  coordinator.eofed = eof
347
+ coordinator.last_polled_at = last_polled_at
347
348
 
348
349
  # If we did not receive any messages and we did receive eof signal, we run the eofed
349
350
  # jobs so user can take actions on reaching eof
@@ -42,6 +42,7 @@ module Karafka
42
42
  # Since it happens "right after" we've received the messages, it is close enough it time
43
43
  # to be used as the moment we received messages.
44
44
  received_at = Time.now
45
+ last_polled_at = raw_messages_buffer.last_polled_at
45
46
 
46
47
  raw_messages_buffer.each do |topic, partition, messages, eof|
47
48
  @size += messages.count
@@ -58,7 +59,8 @@ module Karafka
58
59
 
59
60
  @groups[topic][partition] = {
60
61
  eof: eof,
61
- messages: built_messages
62
+ messages: built_messages,
63
+ last_polled_at: last_polled_at
62
64
  }
63
65
  end
64
66
  end
@@ -69,10 +71,11 @@ module Karafka
69
71
  # @yieldparam [Integer] partition number
70
72
  # @yieldparam [Array<Karafka::Messages::Message>] messages from a given topic partition
71
73
  # @yieldparam [Boolean] true if eof, false otherwise
74
+ # @yieldparam [Float] last polled at monotonic clock time
72
75
  def each
73
76
  @groups.each do |topic, partitions|
74
77
  partitions.each do |partition, details|
75
- yield(topic, partition, details[:messages], details[:eof])
78
+ yield(topic, partition, details[:messages], details[:eof], details[:last_polled_at])
76
79
  end
77
80
  end
78
81
  end
@@ -12,11 +12,17 @@ module Karafka
12
12
  # @note We store data here in groups per topic partition to handle the revocation case, where
13
13
  # we may need to remove messages from a single topic partition.
14
14
  class RawMessagesBuffer
15
+ include Karafka::Core::Helpers::Time
16
+
15
17
  attr_reader :size
16
18
 
19
+ # @return [Float] last polling time in milliseconds (monotonic)
20
+ attr_reader :last_polled_at
21
+
17
22
  # @return [Karafka::Connection::MessagesBuffer] buffer instance
18
23
  def initialize
19
24
  @size = 0
25
+ @last_polled_at = monotonic_now
20
26
 
21
27
  @groups = Hash.new do |topic_groups, topic|
22
28
  topic_groups[topic] = Hash.new do |partition_groups, partition|
@@ -46,6 +52,11 @@ module Karafka
46
52
  @groups[topic][partition][:eof] = true
47
53
  end
48
54
 
55
+ # Marks the last polling time that can be accessed via `#last_polled_at`
56
+ def polled
57
+ @last_polled_at = monotonic_now
58
+ end
59
+
49
60
  # Allows to iterate over all the topics and partitions messages
50
61
  #
51
62
  # @yieldparam [String] topic name
@@ -51,6 +51,20 @@ module Karafka
51
51
  end
52
52
  end
53
53
 
54
+ # When users redefine kafka scope settings per topic, they often forget to define the
55
+ # basic stuff as they assume it is auto-inherited. It is not (unless inherit flag used),
56
+ # leaving them with things like bootstrap.servers undefined. This checks that bootstrap
57
+ # servers are defined so we can catch those issues before they cause more problems.
58
+ virtual do |data, errors|
59
+ next unless errors.empty?
60
+
61
+ kafka = data.fetch(:kafka)
62
+
63
+ next if kafka.key?(:'bootstrap.servers')
64
+
65
+ [[%w[kafka bootstrap.servers], :missing]]
66
+ end
67
+
54
68
  virtual do |data, errors|
55
69
  next unless errors.empty?
56
70
  next unless ::Karafka::App.config.strict_topics_namespacing
@@ -25,6 +25,19 @@ module Karafka
25
25
  RUBY
26
26
  end
27
27
  end
28
+
29
+ # @param model [Object] object to which we want to add the config fetcher on a class level
30
+ def extended(model)
31
+ super
32
+
33
+ @attributes.each do |name, path|
34
+ model.class_eval <<~RUBY, __FILE__, __LINE__ + 1
35
+ def self.#{name}
36
+ @#{name} ||= ::Karafka::App.config.#{path.join('.')}
37
+ end
38
+ RUBY
39
+ end
40
+ end
28
41
  end
29
42
  end
30
43
  end
@@ -127,6 +127,21 @@ module Karafka
127
127
  MSG
128
128
  end
129
129
 
130
+ # Prints info about seeking to a particular location
131
+ #
132
+ # @param event [Karafka::Core::Monitoring::Event] event details including payload
133
+ def on_consumer_consuming_seek(event)
134
+ topic = event[:topic]
135
+ partition = event[:partition]
136
+ seek_offset = event[:message].offset
137
+ consumer = event[:caller]
138
+
139
+ info <<~MSG.tr("\n", ' ').strip!
140
+ [#{consumer.id}] Seeking from #{consumer.class}
141
+ on topic #{topic}/#{partition} to offset #{seek_offset}
142
+ MSG
143
+ end
144
+
130
145
  # Logs info about system signals that Karafka received and prints backtrace for threads in
131
146
  # case of ttin
132
147
  #
@@ -56,6 +56,7 @@ module Karafka
56
56
  consumer.consumed
57
57
  consumer.consuming.pause
58
58
  consumer.consuming.retry
59
+ consumer.consuming.seek
59
60
 
60
61
  consumer.before_schedule_idle
61
62
  consumer.idle
@@ -8,15 +8,9 @@ module Karafka
8
8
  # heavy-deserialization data without slowing down the whole application.
9
9
  class Message
10
10
  extend Forwardable
11
-
12
- class << self
13
- # @return [Object] general parser
14
- # @note We cache it here for performance reasons. It is 2.5x times faster than getting it
15
- # via the config chain.
16
- def parser
17
- @parser ||= App.config.internal.messages.parser
18
- end
19
- end
11
+ extend Helpers::ConfigImporter.new(
12
+ parser: %i[internal messages parser]
13
+ )
20
14
 
21
15
  attr_reader :metadata
22
16
  # raw payload needs to be mutable as we want to have option to change it in the parser
@@ -29,6 +29,9 @@ module Karafka
29
29
  partitioner: nil,
30
30
  # Allows for usage of `:key` or `:partition_key`
31
31
  partition_key_type: :key,
32
+ # Topic to where this message should go when using scheduled messages. When defined,
33
+ # it will be used with `enqueue_at`. If not defined it will raise an error.
34
+ scheduled_messages_topic: nil,
32
35
  # Allows for setting a callable producer since at the moment of defining the class,
33
36
  # variants may not be available
34
37
  #
@@ -83,6 +86,31 @@ module Karafka
83
86
  end
84
87
  end
85
88
 
89
+ # Will enqueue a job to run in the future
90
+ #
91
+ # @param job [Object] job we want to enqueue
92
+ # @param timestamp [Time] time when job should run
93
+ def dispatch_at(job, timestamp)
94
+ target_message = dispatch_details(job).merge!(
95
+ topic: job.queue_name,
96
+ payload: ::ActiveSupport::JSON.encode(serialize_job(job))
97
+ )
98
+
99
+ proxy_message = Pro::ScheduledMessages.schedule(
100
+ message: target_message,
101
+ epoch: timestamp.to_i,
102
+ envelope: {
103
+ # Select the scheduled messages proxy topic
104
+ topic: fetch_option(job, :scheduled_messages_topic, DEFAULTS)
105
+ }
106
+ )
107
+
108
+ producer(job).public_send(
109
+ fetch_option(job, :dispatch_method, DEFAULTS),
110
+ proxy_message
111
+ )
112
+ end
113
+
86
114
  private
87
115
 
88
116
  # Selects the producer based on options. If callable `:producer` is defined, it will use
@@ -28,12 +28,21 @@ module Karafka
28
28
  optional(:producer) { |val| val.nil? || val.respond_to?(:call) }
29
29
  optional(:partitioner) { |val| val.respond_to?(:call) }
30
30
  optional(:partition_key_type) { |val| %i[key partition_key partition].include?(val) }
31
+
32
+ # Whether this is a legit scheduled messages topic will be validated during the first
33
+ # dispatch, so we do not repeat validations here
34
+ optional(:scheduled_messages_topic) do |val|
35
+ (val.is_a?(String) || val.is_a?(Symbol)) &&
36
+ ::Karafka::Contracts::TOPIC_REGEXP.match?(val.to_s)
37
+ end
38
+
31
39
  optional(:dispatch_method) do |val|
32
40
  %i[
33
41
  produce_async
34
42
  produce_sync
35
43
  ].include?(val)
36
44
  end
45
+
37
46
  optional(:dispatch_many_method) do |val|
38
47
  %i[
39
48
  produce_many_async
@@ -74,8 +74,8 @@ module Karafka
74
74
  # that need to have some special configuration stuff injected into config, etc
75
75
  def features
76
76
  [
77
- Encryption,
78
77
  Cleaner,
78
+ Encryption,
79
79
  RecurringTasks,
80
80
  ScheduledMessages
81
81
  ]
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Namespace for adaptive iterator consumer components
18
+ module AdaptiveIterator
19
+ # Consumer enhancements needed to wrap the batch iterator for adaptive iterating
20
+ # It automatically marks as consumed, ensures that we do not reach `max.poll.interval.ms`
21
+ # and does other stuff to simplify user per-message processing
22
+ module Consumer
23
+ # @param args [Array] anything accepted by `Karafka::Messages::Messages#each`
24
+ def each(*args)
25
+ adi_config = topic.adaptive_iterator
26
+
27
+ tracker = Tracker.new(
28
+ adi_config.safety_margin,
29
+ coordinator.last_polled_at,
30
+ topic.subscription_group.kafka.fetch(:'max.poll.interval.ms')
31
+ )
32
+
33
+ messages.each(*args) do |message|
34
+ # Always stop if we've lost the assignment
35
+ return if revoked?
36
+ # No automatic marking risk when mom is enabled so we can fast stop
37
+ return if Karafka::App.done? && topic.manual_offset_management?
38
+
39
+ # Seek request on done will allow us to stop without marking the offset when user had
40
+ # the automatic offset marking. This should not be a big network traffic issue for
41
+ # the end user as we're stopping anyhow but should improve shutdown time
42
+ if tracker.enough? || Karafka::App.done?
43
+ # Enough means we no longer have time to process more data without polling as we
44
+ # risk reaching max poll interval. Instead we seek and we will poll again soon.
45
+ seek(message.offset, reset_offset: true)
46
+
47
+ return
48
+ end
49
+
50
+ tracker.track { yield(message) }
51
+
52
+ # Clean if this is what user configured
53
+ message.clean! if adi_config.clean_after_yielding?
54
+
55
+ public_send(adi_config.marking_method, message)
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module AdaptiveIterator
18
+ # Tracker is responsible for monitoring the processing of messages within the poll
19
+ # interval limitation.
20
+ # It ensures that the consumer does not exceed the maximum poll interval by tracking the
21
+ # processing cost and determining when to halt further processing (if needed).
22
+ class Tracker
23
+ include Karafka::Core::Helpers::Time
24
+
25
+ # Initializes a new Tracker instance.
26
+ #
27
+ # @param safety_margin [Float] The safety margin percentage (0-100) to leave as a buffer.
28
+ # @param last_polled_at [Float] The timestamp of the last polling in milliseconds.
29
+ # @param max_poll_interval_ms [Integer] The maximum poll interval time in milliseconds.
30
+ def initialize(
31
+ safety_margin,
32
+ last_polled_at,
33
+ max_poll_interval_ms
34
+ )
35
+ @safety_margin = safety_margin / 100.0 # Convert percentage to decimal
36
+ @last_polled_at = last_polled_at
37
+ @max_processing_cost = 0
38
+ @max_poll_interval_ms = max_poll_interval_ms
39
+ end
40
+
41
+ # Tracks the processing time of a block and updates the maximum processing cost.
42
+ #
43
+ # @yield Executes the block, measuring the time taken for processing.
44
+ def track
45
+ before = monotonic_now
46
+
47
+ yield
48
+
49
+ time_taken = monotonic_now - before
50
+
51
+ return unless time_taken > @max_processing_cost
52
+
53
+ @max_processing_cost = time_taken
54
+ end
55
+
56
+ # Determines if there is enough time left to process more messages without exceeding the
57
+ # maximum poll interval, considering both the safety margin and adaptive margin.
58
+ #
59
+ # @return [Boolean] Returns true if it is time to stop processing. False otherwise.
60
+ def enough?
61
+ elapsed_time_ms = monotonic_now - @last_polled_at
62
+ remaining_time_ms = @max_poll_interval_ms - elapsed_time_ms
63
+
64
+ safety_margin_ms = @max_poll_interval_ms * @safety_margin
65
+
66
+ return true if remaining_time_ms <= safety_margin_ms
67
+ return true if remaining_time_ms - @max_processing_cost <= safety_margin_ms
68
+
69
+ false
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
@@ -25,6 +25,7 @@ module Karafka
25
25
  expansions = super
26
26
  expansions << Pro::Processing::Piping::Consumer
27
27
  expansions << Pro::Processing::OffsetMetadata::Consumer if topic.offset_metadata?
28
+ expansions << Pro::Processing::AdaptiveIterator::Consumer if topic.adaptive_iterator?
28
29
  expansions << Pro::Processing::PeriodicJob::Consumer if topic.periodic_job?
29
30
  expansions
30
31
  end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Routing
17
+ module Features
18
+ class AdaptiveIterator < Base
19
+ # Adaptive Iterator configuration
20
+ Config = Struct.new(
21
+ :active,
22
+ :safety_margin,
23
+ :marking_method,
24
+ :clean_after_yielding,
25
+ keyword_init: true
26
+ ) do
27
+ alias_method :active?, :active
28
+ alias_method :clean_after_yielding?, :clean_after_yielding
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Routing
17
+ module Features
18
+ class AdaptiveIterator < Base
19
+ # Namespace for adaptive iterator contracts
20
+ module Contracts
21
+ # Contract to validate configuration of the adaptive iterator feature
22
+ class Topic < Karafka::Contracts::Base
23
+ configure do |config|
24
+ config.error_messages = YAML.safe_load(
25
+ File.read(
26
+ File.join(Karafka.gem_root, 'config', 'locales', 'pro_errors.yml')
27
+ )
28
+ ).fetch('en').fetch('validations').fetch('topic')
29
+ end
30
+
31
+ nested(:adaptive_iterator) do
32
+ required(:active) { |val| [true, false].include?(val) }
33
+ required(:safety_margin) { |val| val.is_a?(Integer) && val.positive? && val < 100 }
34
+ required(:clean_after_yielding) { |val| [true, false].include?(val) }
35
+
36
+ required(:marking_method) do |val|
37
+ %i[mark_as_consumed mark_as_consumed!].include?(val)
38
+ end
39
+ end
40
+
41
+ # Since adaptive iterator uses `#seek` and can break processing in the middle, we
42
+ # cannot use it with virtual partitions that can process data in a distributed
43
+ # manner
44
+ virtual do |data, errors|
45
+ next unless errors.empty?
46
+
47
+ adaptive_iterator = data[:adaptive_iterator]
48
+ virtual_partitions = data[:virtual_partitions]
49
+
50
+ next unless adaptive_iterator[:active]
51
+ next unless virtual_partitions[:active]
52
+
53
+ [[%i[adaptive_iterator], :with_virtual_partitions]]
54
+ end
55
+
56
+ # There is no point of using the adaptive iterator with LRJ because of how LRJ works
57
+ virtual do |data, errors|
58
+ next unless errors.empty?
59
+
60
+ adaptive_iterator = data[:adaptive_iterator]
61
+ long_running_jobs = data[:long_running_job]
62
+
63
+ next unless adaptive_iterator[:active]
64
+ next unless long_running_jobs[:active]
65
+
66
+ [[%i[adaptive_iterator], :with_long_running_job]]
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Routing
17
+ module Features
18
+ class AdaptiveIterator < Base
19
+ # Topic extension allowing us to enable and configure adaptive iterator
20
+ module Topic
21
+ # @param active [Boolean] should we use the automatic adaptive iterator
22
+ # @param safety_margin [Integer]
23
+ # How big of a margin we leave ourselves so we can safely communicate back with
24
+ # Kafka, etc. We stop and seek back when we've burned 85% of the time by default.
25
+ # We leave 15% of time for post-processing operations so we have space before we
26
+ # hit max.poll.interval.ms.
27
+ # @param marking_method [Symbol] If we should, how should we mark
28
+ # @param clean_after_yielding [Boolean] Should we clean post-yielding via the
29
+ # cleaner API
30
+ def adaptive_iterator(
31
+ active: false,
32
+ safety_margin: 10,
33
+ marking_method: :mark_as_consumed,
34
+ clean_after_yielding: true
35
+ )
36
+ @adaptive_iterator ||= Config.new(
37
+ active: active,
38
+ safety_margin: safety_margin,
39
+ marking_method: marking_method,
40
+ clean_after_yielding: clean_after_yielding
41
+ )
42
+ end
43
+
44
+ # @return [Boolean] Is adaptive iterator active. It is always `true`, since we use it
45
+ # via explicit messages batch wrapper
46
+ def adaptive_iterator?
47
+ adaptive_iterator.active?
48
+ end
49
+
50
+ # @return [Hash] topic with all its native configuration options plus poll guarding
51
+ # setup configuration.
52
+ def to_h
53
+ super.merge(
54
+ adaptive_iterator: adaptive_iterator.to_h
55
+ ).freeze
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Routing
17
+ module Features
18
+ # Feature that pro-actively monitors remaining time until max poll interval ms and
19
+ # cost of processing of each message in a batch. When there is no more time to process
20
+ # more messages from the batch, it will seek back so we do not reach max poll interval.
21
+ # It can be useful when we reach this once in a while. For a constant long-running jobs,
22
+ # please use the Long-Running Jobs feature instead.
23
+ #
24
+ # It also provides some wrapping over typical operations users do, like stopping if
25
+ # revoked, auto-marking, etc
26
+ class AdaptiveIterator < Base
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -28,17 +28,33 @@ module Karafka
28
28
  ).fetch('en').fetch('validations').fetch('scheduled_messages_message')
29
29
  end
30
30
 
31
- # Headers we expect in each message of type "message" that goes to our scheduled messages
32
- # topic
33
- EXPECTED_HEADERS = %w[
31
+ # Headers we expect in each message of type "schedule" that goes to our scheduled
32
+ # messages topic
33
+ EXPECTED_SCHEDULE_HEADERS = %w[
34
34
  schedule_schema_version
35
35
  schedule_target_epoch
36
36
  schedule_source_type
37
37
  schedule_target_topic
38
38
  ].freeze
39
39
 
40
+ # Headers we expect in each message of type "cancel"
41
+ EXPECTED_CANCEL_HEADERS = %w[
42
+ schedule_schema_version
43
+ schedule_source_type
44
+ ].freeze
45
+
40
46
  required(:key) { |val| val.is_a?(String) && val.size.positive? }
41
- required(:headers) { |val| val.is_a?(Hash) && (val.keys & EXPECTED_HEADERS).size == 4 }
47
+
48
+ # Ensure that schedule has all correct keys and that others have other related data
49
+ required(:headers) do |val|
50
+ next false unless val.is_a?(Hash)
51
+
52
+ if val['schedule_source_type'] == 'message'
53
+ (val.keys & EXPECTED_SCHEDULE_HEADERS).size >= 4
54
+ else
55
+ (val.keys & EXPECTED_CANCEL_HEADERS).size >= 2
56
+ end
57
+ end
42
58
 
43
59
  # Make sure, that schedule_target_epoch is not older than grace period behind us.
44
60
  # While this is not ideal verification of scheduling stuff in past, at leats it will
@@ -46,6 +62,9 @@ module Karafka
46
62
  virtual do |data, errors|
47
63
  next unless errors.empty?
48
64
 
65
+ # Validate epoch only for schedules
66
+ next unless data[:headers]['schedule_source_type'] == 'schedule'
67
+
49
68
  epoch_time = data[:headers].fetch('schedule_target_epoch').to_i
50
69
 
51
70
  # We allow for small lag as those will be dispatched but we should prevent dispatching
@@ -29,7 +29,7 @@ module Karafka
29
29
  def initialize
30
30
  @created_at = Time.now.to_i
31
31
 
32
- time = Time.at(@created_at)
32
+ time = Time.at(@created_at).utc
33
33
 
34
34
  @starts_at = Time.utc(time.year, time.month, time.day).to_i
35
35
  @ends_at = @starts_at + 86_399
@@ -87,12 +87,7 @@ module Karafka
87
87
  }.merge(envelope)
88
88
 
89
89
  enrich(proxy_message, message)
90
-
91
- # Final validation to make sure all user provided extra data and what we have built
92
- # complies with our requirements
93
- POST_CONTRACT.validate!(proxy_message)
94
- # After proxy specific validations we also ensure, that the final form is correct
95
- MSG_CONTRACT.validate!(proxy_message, WaterDrop::Errors::MessageInvalidError)
90
+ validate!(proxy_message)
96
91
 
97
92
  proxy_message
98
93
  end
@@ -106,7 +101,7 @@ module Karafka
106
101
  # @note Technically it is a tombstone but we differentiate just for the sake of ability
107
102
  # to debug stuff if needed
108
103
  def cancel(key:, envelope: {})
109
- {
104
+ proxy_message = {
110
105
  key: key,
111
106
  payload: nil,
112
107
  headers: {
@@ -114,6 +109,11 @@ module Karafka
114
109
  'schedule_source_type' => 'cancel'
115
110
  }
116
111
  }.merge(envelope)
112
+
113
+ # Ensure user provided envelope is with all expected details
114
+ validate!(proxy_message)
115
+
116
+ proxy_message
117
117
  end
118
118
 
119
119
  # Builds tombstone with the dispatched message details. Those details can be used
@@ -169,6 +169,15 @@ module Karafka
169
169
  proxy_message[:partition_key] = message.fetch(attribute).to_s
170
170
  end
171
171
  end
172
+
173
+ # Final validations to make sure all user provided extra data and what we have built
174
+ # complies with our requirements
175
+ # @param proxy_message [Hash] our message envelope
176
+ def validate!(proxy_message)
177
+ POST_CONTRACT.validate!(proxy_message)
178
+ # After proxy specific validations we also ensure, that the final form is correct
179
+ MSG_CONTRACT.validate!(proxy_message, WaterDrop::Errors::MessageInvalidError)
180
+ end
172
181
  end
173
182
  end
174
183
  end
@@ -19,6 +19,9 @@ module Karafka
19
19
  # any messages
20
20
  attr_accessor :eofed
21
21
 
22
+ # Last polled at time set based on the incoming last poll time
23
+ attr_accessor :last_polled_at
24
+
22
25
  def_delegators :@pause_tracker, :attempt, :paused?
23
26
 
24
27
  # @param topic [Karafka::Routing::Topic]
@@ -38,6 +41,7 @@ module Karafka
38
41
  @failure = false
39
42
  @eofed = false
40
43
  @changed_at = monotonic_now
44
+ @last_polled_at = @changed_at
41
45
  end
42
46
 
43
47
  # Starts the coordinator for given consumption jobs
@@ -174,8 +174,11 @@ module Karafka
174
174
  expansions.each { |expansion| consumer.singleton_class.include(expansion) }
175
175
 
176
176
  consumer.client = @client
177
- consumer.producer = ::Karafka::App.producer
178
177
  consumer.coordinator = @coordinator
178
+ # We assign producer only when not available already. It may already be available if
179
+ # user redefined the `#producer` method for example. This can be useful for example when
180
+ # having a multi-cluster setup and using a totally custom producer
181
+ consumer.producer ||= ::Karafka::App.producer
179
182
  # Since we have some message-less flows (idle, etc), we initialize consumer with empty
180
183
  # messages set. In production we have persistent consumers, so this is not a performance
181
184
  # overhead as this will happen only once per consumer lifetime
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.4.11'
6
+ VERSION = '2.4.12'
7
7
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.11
4
+ version: 2.4.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  i9zWxov0mr44TWegTVeypcWGd/0nxu1+QHVNHJrpqlPBRvwQsUm7fwmRInGpcaB8
36
36
  ap8wNYvryYzrzvzUxIVFBVM5PacgkFqRmolCa8I7tdKQN+R1
37
37
  -----END CERTIFICATE-----
38
- date: 2024-09-04 00:00:00.000000000 Z
38
+ date: 2024-09-17 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: base64
@@ -290,6 +290,8 @@ files:
290
290
  - lib/karafka/pro/iterator/expander.rb
291
291
  - lib/karafka/pro/iterator/tpl_builder.rb
292
292
  - lib/karafka/pro/loader.rb
293
+ - lib/karafka/pro/processing/adaptive_iterator/consumer.rb
294
+ - lib/karafka/pro/processing/adaptive_iterator/tracker.rb
293
295
  - lib/karafka/pro/processing/collapser.rb
294
296
  - lib/karafka/pro/processing/coordinator.rb
295
297
  - lib/karafka/pro/processing/coordinators/errors_tracker.rb
@@ -386,6 +388,10 @@ files:
386
388
  - lib/karafka/pro/recurring_tasks/task.rb
387
389
  - lib/karafka/pro/routing/features/active_job.rb
388
390
  - lib/karafka/pro/routing/features/active_job/builder.rb
391
+ - lib/karafka/pro/routing/features/adaptive_iterator.rb
392
+ - lib/karafka/pro/routing/features/adaptive_iterator/config.rb
393
+ - lib/karafka/pro/routing/features/adaptive_iterator/contracts/topic.rb
394
+ - lib/karafka/pro/routing/features/adaptive_iterator/topic.rb
389
395
  - lib/karafka/pro/routing/features/base.rb
390
396
  - lib/karafka/pro/routing/features/dead_letter_queue.rb
391
397
  - lib/karafka/pro/routing/features/dead_letter_queue/contracts/topic.rb
metadata.gz.sig CHANGED
Binary file