karafka 2.0.41 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +2 -2
  4. data/CHANGELOG.md +20 -1
  5. data/Gemfile.lock +2 -1
  6. data/config/locales/errors.yml +10 -0
  7. data/config/locales/pro_errors.yml +0 -2
  8. data/lib/karafka/active_job/consumer.rb +16 -11
  9. data/lib/karafka/active_job/current_attributes/loading.rb +36 -0
  10. data/lib/karafka/active_job/current_attributes/persistence.rb +28 -0
  11. data/lib/karafka/active_job/current_attributes.rb +42 -0
  12. data/lib/karafka/active_job/dispatcher.rb +8 -2
  13. data/lib/karafka/connection/client.rb +1 -1
  14. data/lib/karafka/errors.rb +3 -0
  15. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +166 -0
  16. data/lib/karafka/pro/active_job/consumer.rb +1 -10
  17. data/lib/karafka/pro/active_job/dispatcher.rb +2 -2
  18. data/lib/karafka/pro/processing/coordinator.rb +20 -1
  19. data/lib/karafka/pro/processing/filters/virtual_limiter.rb +52 -0
  20. data/lib/karafka/pro/processing/filters_applier.rb +4 -0
  21. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +1 -1
  22. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +3 -1
  23. data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +2 -2
  24. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +2 -0
  25. data/lib/karafka/pro/processing/strategies/aj/mom_vp.rb +1 -1
  26. data/lib/karafka/pro/processing/strategies/dlq/ftr.rb +1 -1
  27. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +3 -6
  28. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom_vp.rb +43 -0
  29. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_vp.rb +1 -0
  30. data/lib/karafka/pro/processing/strategies/dlq/ftr_mom.rb +3 -7
  31. data/lib/karafka/pro/processing/strategies/dlq/ftr_mom_vp.rb +41 -0
  32. data/lib/karafka/pro/processing/strategies/dlq/ftr_vp.rb +1 -0
  33. data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +3 -6
  34. data/lib/karafka/pro/processing/strategies/dlq/lrj_mom_vp.rb +36 -0
  35. data/lib/karafka/pro/processing/strategies/dlq/lrj_vp.rb +1 -0
  36. data/lib/karafka/pro/processing/strategies/dlq/mom.rb +8 -7
  37. data/lib/karafka/pro/processing/strategies/dlq/mom_vp.rb +37 -0
  38. data/lib/karafka/pro/processing/strategies/lrj/default.rb +2 -0
  39. data/lib/karafka/pro/processing/strategies/lrj/ftr_mom_vp.rb +40 -0
  40. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +2 -0
  41. data/lib/karafka/pro/processing/strategies/lrj/mom_vp.rb +38 -0
  42. data/lib/karafka/pro/processing/strategies/mom/ftr_vp.rb +37 -0
  43. data/lib/karafka/pro/{base_consumer.rb → processing/strategies/mom/vp.rb} +17 -7
  44. data/lib/karafka/pro/processing/strategies/vp/default.rb +51 -0
  45. data/lib/karafka/pro/processing/virtual_offset_manager.rb +147 -0
  46. data/lib/karafka/pro/routing/features/virtual_partitions/contract.rb +0 -17
  47. data/lib/karafka/processing/strategies/default.rb +2 -0
  48. data/lib/karafka/processing/strategies/dlq_mom.rb +9 -7
  49. data/lib/karafka/version.rb +1 -1
  50. data/lib/karafka.rb +5 -0
  51. data.tar.gz.sig +0 -0
  52. metadata +16 -4
  53. metadata.gz.sig +0 -0
  54. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +0 -16
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9560b22fc8cfd59dcaeb6551bcc0b2d2ebfef2f162a12905fa3aefe0c9c5865e
4
- data.tar.gz: a5b7aba125288ec96cf3b862e72447bf467fe23f923c232fe1d3ff9c48b1fdb4
3
+ metadata.gz: b3d0a2f78b4bf7fa8f49527d48d2e877b95597566e07beabf0166a02259a936b
4
+ data.tar.gz: fc6054ad5f99bfe8a678c337167f93fc612dddfe88494f8891158dbd8610fb7f
5
5
  SHA512:
6
- metadata.gz: d6581af85f8900d2d5ce91b6f9ec8ed0e1f6be5f3e80c36315c44c8dc07c30949566e281f40feb1b54cc9bbca771ac2188637e916d19edcb2fe26c04aeb362e1
7
- data.tar.gz: e467612b3185b5ec764d387e72507b617bf49d702436da2021d467fb0c23630aa98151c9679444a34114317a8c52e3c37f0268c6a6bdb4564ffa1bab51993109
6
+ metadata.gz: 0fb1fa88ef76ce81e145797a1364ac36bea2b94c47e733856cfd5ec9b37d0d9e2e984a3e4ef7fc36d2ac34c448e490cfdea4e10fae886cd80fb289798e55d308
7
+ data.tar.gz: 68df2bc1edb9acccd45d32428b43fd5dee12b9333c6059c801aa4ac03b3b89c01e93e8ea4ebbb8021618447c1122c8c5df64afc86747d6f0deec0cf992237e82
checksums.yaml.gz.sig CHANGED
Binary file
@@ -62,7 +62,7 @@ jobs:
62
62
  run: \curl -sSL https://api.coditsu.io/run/ci | bash
63
63
 
64
64
  specs:
65
- timeout-minutes: 45
65
+ timeout-minutes: 30
66
66
  runs-on: ubuntu-latest
67
67
  needs: diffend
68
68
  strategy:
@@ -102,7 +102,7 @@ jobs:
102
102
  run: bin/rspecs
103
103
 
104
104
  integrations:
105
- timeout-minutes: 30
105
+ timeout-minutes: 45
106
106
  runs-on: ubuntu-latest
107
107
  needs: diffend
108
108
  strategy:
data/CHANGELOG.md CHANGED
@@ -1,5 +1,24 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.1.0 (2023-05-22)
4
+ - **[Feature]** Provide ability to use CurrentAttributes with ActiveJob's Karafka adapter.
5
+ - **[Feature]** Introduce collective Virtual Partitions offset management.
6
+ - **[Feature]** Use virtual offsets to filter out messages that would be re-processed upon retries.
7
+ - [Improvement] No longer break processing on failing parallel virtual partitions in ActiveJob because it is compensated by virtual marking.
8
+ - [Improvement] Always use Virtual offset management for Pro ActiveJobs.
9
+ - [Improvement] Do not attempt to mark offsets on already revoked partitions.
10
+ - [Improvement] Make sure, that VP components are not injected into non VP strategies.
11
+ - [Improvement] Improve complex strategies inheritance flow.
12
+ - [Improvement] Optimize offset management for DLQ + MoM feature combinations.
13
+ - [Change] Removed `Karafka::Pro::BaseConsumer` in favor of `Karafka::BaseConsumer`. (#1345)
14
+ - [Fix] Fix for `max_messages` and `max_wait_time` not having reference in errors.yml (#1443)
15
+
16
+ ### Upgrade notes
17
+
18
+ 1. Upgrade to Karafka `2.0.41` prior to upgrading to `2.1.0`.
19
+ 2. Replace `Karafka::Pro::BaseConsumer` references to `Karafka::BaseConsumer`.
20
+ 3. Replace `Karafka::Instrumentation::Vendors::Datadog:Listener` with `Karafka::Instrumentation::Vendors::Datadog::MetricsListener`.
21
+
3
22
  ## 2.0.41 (2023-14-19)
4
23
  - **[Feature]** Provide `Karafka::Pro::Iterator` for anonymous topic/partitions iterations and messages lookups (#1389 and #1427).
5
24
  - [Improvement] Optimize topic lookup for `read_topic` admin method usage.
@@ -60,7 +79,7 @@
60
79
 
61
80
  ## 2.0.35 (2023-03-13)
62
81
  - **[Feature]** Allow for defining topics config via the DSL and its automatic creation via CLI command.
63
- - **[Feature]** Allow for full topics reset and topics repartitioning via the CLI.
82
+ - **[Feature]** Allow for full topics reset and topics repartitioning via the CLI.
64
83
 
65
84
  ## 2.0.34 (2023-03-04)
66
85
  - [Improvement] Attach an `embedded` tag to Karafka processes started using the embedded API.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.0.41)
4
+ karafka (2.1.0)
5
5
  karafka-core (>= 2.0.12, < 3.0.0)
6
6
  thor (>= 0.20)
7
7
  waterdrop (>= 2.4.10, < 3.0.0)
@@ -78,6 +78,7 @@ GEM
78
78
  zeitwerk (2.6.7)
79
79
 
80
80
  PLATFORMS
81
+ arm64-darwin-21
81
82
  x86_64-linux
82
83
 
83
84
  DEPENDENCIES
@@ -15,6 +15,13 @@ en:
15
15
  shutdown_timeout_format: needs to be an integer bigger than 0
16
16
  max_wait_time_format: needs to be an integer bigger than 0
17
17
  kafka_format: needs to be a filled hash
18
+ internal.processing.jobs_builder_format: cannot be nil
19
+ internal.processing.scheduler: cannot be nil
20
+ internal.processing.coordinator_class: cannot be nil
21
+ internal.processing.partitioner_class: cannot be nil
22
+ internal.active_job.dispatcher: cannot be nil
23
+ internal.active_job.job_options_contract: cannot be nil
24
+ internal.active_job.consumer_class: cannot be nil
18
25
  internal.status_format: needs to be present
19
26
  internal.process_format: needs to be present
20
27
  internal.routing.builder_format: needs to be present
@@ -31,7 +38,10 @@ en:
31
38
  topics_missing: No topics to subscribe to
32
39
 
33
40
  topic:
41
+ kafka: needs to be a hash with kafka scope settings details
34
42
  missing: needs to be present
43
+ max_messages_format: 'needs to be an integer bigger than 0'
44
+ max_wait_time_format: 'needs to be an integer bigger than 0'
35
45
  name_format: 'needs to be a string with a Kafka accepted format'
36
46
  deserializer_format: needs to be present
37
47
  consumer_format: needs to be present
@@ -4,8 +4,6 @@ en:
4
4
  virtual_partitions.partitioner_respond_to_call: needs to be defined and needs to respond to `#call`
5
5
  virtual_partitions.max_partitions_format: needs to be equal or more than 1
6
6
 
7
- manual_offset_management_not_with_virtual_partitions: cannot be used together with Virtual Partitions
8
-
9
7
  long_running_job.active_format: needs to be either true or false
10
8
 
11
9
  dead_letter_queue_with_virtual_partitions: when using Dead Letter Queue with Virtual Partitions, at least one retry is required.
@@ -24,21 +24,26 @@ module Karafka
24
24
  #
25
25
  # @param job_message [Karafka::Messages::Message] message with active job
26
26
  def consume_job(job_message)
27
- # We technically speaking could set this as deserializer and reference it from the
28
- # message instead of using the `#raw_payload`. This is not done on purpose to simplify
29
- # the ActiveJob setup here
30
- job = ::ActiveSupport::JSON.decode(job_message.raw_payload)
27
+ with_deserialized_job(job_message) do |job|
28
+ tags.add(:job_class, job['job_class'])
31
29
 
32
- tags.add(:job_class, job['job_class'])
30
+ payload = { caller: self, job: job, message: job_message }
33
31
 
34
- payload = { caller: self, job: job, message: job_message }
35
-
36
- # We publish both to make it consistent with `consumer.x` events
37
- Karafka.monitor.instrument('active_job.consume', payload)
38
- Karafka.monitor.instrument('active_job.consumed', payload) do
39
- ::ActiveJob::Base.execute(job)
32
+ # We publish both to make it consistent with `consumer.x` events
33
+ Karafka.monitor.instrument('active_job.consume', payload)
34
+ Karafka.monitor.instrument('active_job.consumed', payload) do
35
+ ::ActiveJob::Base.execute(job)
36
+ end
40
37
  end
41
38
  end
39
+
40
+ # @param job_message [Karafka::Messages::Message] message with active job
41
+ def with_deserialized_job(job_message)
42
+ # We technically speaking could set this as deserializer and reference it from the
43
+ # message instead of using the `#raw_payload`. This is not done on purpose to simplify
44
+ # the ActiveJob setup here
45
+ yield ::ActiveSupport::JSON.decode(job_message.raw_payload)
46
+ end
42
47
  end
43
48
  end
44
49
  end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module ActiveJob
5
+ module CurrentAttributes
6
+ # Module expanding the job deserialization to extract current attributes and load them
7
+ # for the time of the job execution
8
+ module Loading
9
+ # @param job_message [Karafka::Messages::Message] message with active job
10
+ def with_deserialized_job(job_message)
11
+ super(job_message) do |job|
12
+ resetable = []
13
+
14
+ _cattr_klasses.each do |key, cattr_klass_str|
15
+ next unless job.key?(key)
16
+
17
+ attributes = job.delete(key)
18
+
19
+ cattr_klass = cattr_klass_str.constantize
20
+
21
+ attributes.each do |name, value|
22
+ cattr_klass.public_send("#{name}=", value)
23
+ end
24
+
25
+ resetable << cattr_klass
26
+ end
27
+
28
+ yield(job)
29
+
30
+ resetable.each(&:reset)
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module ActiveJob
5
+ module CurrentAttributes
6
+ # Module adding the current attributes persistence into the ActiveJob jobs
7
+ module Persistence
8
+ # Alters the job serialization to inject the current attributes into the json before we
9
+ # send it to Kafka
10
+ #
11
+ # @param job [ActiveJob::Base] job
12
+ def serialize_job(job)
13
+ json = super(job)
14
+
15
+ _cattr_klasses.each do |key, cattr_klass_str|
16
+ next if json.key?(key)
17
+
18
+ attrs = cattr_klass_str.constantize.attributes
19
+
20
+ json[key] = attrs unless attrs.empty?
21
+ end
22
+
23
+ json
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'active_support/current_attributes'
4
+ require_relative 'current_attributes/loading'
5
+ require_relative 'current_attributes/persistence'
6
+
7
+ # This code is based on Sidekiqs approach to persisting current attributes
8
+ # @see https://github.com/sidekiq/sidekiq/blob/main/lib/sidekiq/middleware/current_attributes.rb
9
+ module Karafka
10
+ module ActiveJob
11
+ # Module that allows to persist current attributes on Karafka jobs
12
+ module CurrentAttributes
13
+ # Allows for persistence of given current attributes via AJ + Karafka
14
+ #
15
+ # @param klasses [Array<String, Class>] classes or names of the current attributes classes
16
+ def persist(*klasses)
17
+ # Support for providing multiple classes
18
+ klasses = Array(klasses).flatten
19
+
20
+ [Dispatcher, Consumer]
21
+ .reject { |expandable| expandable.respond_to?(:_cattr_klasses) }
22
+ .each { |expandable| expandable.class_attribute :_cattr_klasses, default: {} }
23
+
24
+ # Do not double inject in case of running persist multiple times
25
+ Dispatcher.prepend(Persistence) unless Dispatcher.ancestors.include?(Persistence)
26
+ Consumer.prepend(Loading) unless Consumer.ancestors.include?(Loading)
27
+
28
+ klasses.map(&:to_s).each do |stringified_klass|
29
+ # Prevent registering same klass multiple times
30
+ next if Dispatcher._cattr_klasses.value?(stringified_klass)
31
+
32
+ key = "cattr_#{Dispatcher._cattr_klasses.count}"
33
+
34
+ Dispatcher._cattr_klasses[key] = stringified_klass
35
+ Consumer._cattr_klasses[key] = stringified_klass
36
+ end
37
+ end
38
+
39
+ module_function :persist
40
+ end
41
+ end
42
+ end
@@ -18,7 +18,7 @@ module Karafka
18
18
  ::Karafka.producer.public_send(
19
19
  fetch_option(job, :dispatch_method, DEFAULTS),
20
20
  topic: job.queue_name,
21
- payload: ::ActiveSupport::JSON.encode(job.serialize)
21
+ payload: ::ActiveSupport::JSON.encode(serialize_job(job))
22
22
  )
23
23
  end
24
24
 
@@ -34,7 +34,7 @@ module Karafka
34
34
 
35
35
  dispatches[d_method] << {
36
36
  topic: job.queue_name,
37
- payload: ::ActiveSupport::JSON.encode(job.serialize)
37
+ payload: ::ActiveSupport::JSON.encode(serialize_job(job))
38
38
  }
39
39
  end
40
40
 
@@ -58,6 +58,12 @@ module Karafka
58
58
  .karafka_options
59
59
  .fetch(key, defaults.fetch(key))
60
60
  end
61
+
62
+ # @param job [ActiveJob::Base] job
63
+ # @return [Hash] json representation of the job
64
+ def serialize_job(job)
65
+ job.serialize
66
+ end
61
67
  end
62
68
  end
63
69
  end
@@ -30,7 +30,7 @@ module Karafka
30
30
  #
31
31
  # @param subscription_group [Karafka::Routing::SubscriptionGroup] subscription group
32
32
  # with all the configuration details needed for us to create a client
33
- # @return [Karafka::Connection::Rdk::Consumer]
33
+ # @return [Karafka::Connection::Client]
34
34
  def initialize(subscription_group)
35
35
  @id = SecureRandom.hex(6)
36
36
  # Name is set when we build consumer
@@ -46,5 +46,8 @@ module Karafka
46
46
 
47
47
  # This should never happen. Please open an issue if it does.
48
48
  StrategyNotFoundError = Class.new(BaseError)
49
+
50
+ # This should never happen. Please open an issue if it does.
51
+ InvalidRealOffsetUsage = Class.new(BaseError)
49
52
  end
50
53
  end
@@ -0,0 +1,166 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'socket'
4
+
5
+ module Karafka
6
+ module Instrumentation
7
+ module Vendors
8
+ # Namespace for instrumentation related with Kubernetes
9
+ module Kubernetes
10
+ # Kubernetes HTTP listener that does not only reply when process is not fully hanging, but
11
+ # also allows to define max time of processing and looping.
12
+ #
13
+ # Processes like Karafka server can hang while still being reachable. For example, in case
14
+ # something would hang inside of the user code, Karafka could stop polling and no new
15
+ # data would be processed, but process itself would still be active. This listener allows
16
+ # for defining of a ttl that gets bumped on each poll loop and before and after processing
17
+ # of a given messages batch.
18
+ class LivenessListener
19
+ include ::Karafka::Core::Helpers::Time
20
+
21
+ # @param hostname [String, nil] hostname or nil to bind on all
22
+ # @param port [Integer] TCP port on which we want to run our HTTP status server
23
+ # @param consuming_ttl [Integer] time in ms after which we consider consumption hanging.
24
+ # It allows us to define max consumption time after which k8s should consider given
25
+ # process as hanging
26
+ # @param polling_ttl [Integer] max time in ms for polling. If polling (any) does not
27
+ # happen that often, process should be considered dead.
28
+ # @note The default TTL matches the default `max.poll.interval.ms`
29
+ def initialize(
30
+ hostname: nil,
31
+ port: 3000,
32
+ consuming_ttl: 5 * 60 * 1_000,
33
+ polling_ttl: 5 * 60 * 1_000
34
+ )
35
+ @server = TCPServer.new(*[hostname, port].compact)
36
+ @polling_ttl = polling_ttl
37
+ @consuming_ttl = consuming_ttl
38
+ @mutex = Mutex.new
39
+ @pollings = {}
40
+ @consumptions = {}
41
+
42
+ Thread.new do
43
+ loop do
44
+ break unless respond
45
+ end
46
+ end
47
+ end
48
+
49
+ # Tick on each fetch
50
+ # @param _event [Karafka::Core::Monitoring::Event]
51
+ def on_connection_listener_fetch_loop(_event)
52
+ mark_polling_tick
53
+ end
54
+
55
+ # Tick on starting work
56
+ # @param _event [Karafka::Core::Monitoring::Event]
57
+ def on_consumer_consume(_event)
58
+ mark_consumption_tick
59
+ end
60
+
61
+ # Tick on finished work
62
+ # @param _event [Karafka::Core::Monitoring::Event]
63
+ def on_consumer_consumed(_event)
64
+ clear_consumption_tick
65
+ end
66
+
67
+ # @param _event [Karafka::Core::Monitoring::Event]
68
+ def on_consumer_revoke(_event)
69
+ mark_consumption_tick
70
+ end
71
+
72
+ # @param _event [Karafka::Core::Monitoring::Event]
73
+ def on_consumer_revoked(_event)
74
+ clear_consumption_tick
75
+ end
76
+
77
+ # @param _event [Karafka::Core::Monitoring::Event]
78
+ def on_consumer_shutting_down(_event)
79
+ mark_consumption_tick
80
+ end
81
+
82
+ # @param _event [Karafka::Core::Monitoring::Event]
83
+ def on_consumer_shutdown(_event)
84
+ clear_consumption_tick
85
+ end
86
+
87
+ # @param _event [Karafka::Core::Monitoring::Event]
88
+ def on_error_occurred(_event)
89
+ clear_consumption_tick
90
+ clear_polling_tick
91
+ end
92
+
93
+ # Stop the http server when we stop the process
94
+ # @param _event [Karafka::Core::Monitoring::Event]
95
+ def on_app_stopped(_event)
96
+ @server.close
97
+ end
98
+
99
+ private
100
+
101
+ # Wraps the logic with a mutex
102
+ # @param block [Proc] code we want to run in mutex
103
+ def synchronize(&block)
104
+ @mutex.synchronize(&block)
105
+ end
106
+
107
+ # @return [Integer] object id of the current thread
108
+ def thread_id
109
+ Thread.current.object_id
110
+ end
111
+
112
+ # Update the polling tick time for current thread
113
+ def mark_polling_tick
114
+ synchronize do
115
+ @pollings[thread_id] = monotonic_now
116
+ end
117
+ end
118
+
119
+ # Clear current thread polling time tracker
120
+ def clear_polling_tick
121
+ synchronize do
122
+ @pollings.delete(thread_id)
123
+ end
124
+ end
125
+
126
+ # Update the processing tick time
127
+ def mark_consumption_tick
128
+ synchronize do
129
+ @consumptions[thread_id] = monotonic_now
130
+ end
131
+ end
132
+
133
+ # Clear current thread consumption time tracker
134
+ def clear_consumption_tick
135
+ synchronize do
136
+ @consumptions.delete(thread_id)
137
+ end
138
+ end
139
+
140
+ # Responds to a HTTP request with the process liveness status
141
+ def respond
142
+ client = @server.accept
143
+ client.gets
144
+ client.print "HTTP/1.1 #{status}\r\n"
145
+ client.close
146
+
147
+ true
148
+ rescue Errno::ECONNRESET, Errno::EPIPE, IOError
149
+ !@server.closed?
150
+ end
151
+
152
+ # Did we exceed any of the ttls
153
+ # @return [String] 204 string if ok, 500 otherwise
154
+ def status
155
+ time = monotonic_now
156
+
157
+ return '500' if @pollings.values.any? { |tick| (time - tick) > @polling_ttl }
158
+ return '500' if @consumptions.values.any? { |tick| (time - tick) > @consuming_ttl }
159
+
160
+ '204'
161
+ end
162
+ end
163
+ end
164
+ end
165
+ end
166
+ end
@@ -35,18 +35,9 @@ module Karafka
35
35
  # double-processing
36
36
  break if Karafka::App.stopping? && !topic.virtual_partitions?
37
37
 
38
- # Break if we already know, that one of virtual partitions has failed and we will
39
- # be restarting processing all together after all VPs are done. This will minimize
40
- # number of jobs that will be re-processed
41
- break if topic.virtual_partitions? && failing?
42
-
43
38
  consume_job(message)
44
39
 
45
- # We cannot mark jobs as done after each if there are virtual partitions. Otherwise
46
- # this could create random markings.
47
- # The exception here is the collapsed state where we can move one after another
48
- next if topic.virtual_partitions? && !collapsed?
49
-
40
+ # We can always mark because of the virtual offset management that we have in VPs
50
41
  mark_as_consumed(message)
51
42
  end
52
43
  end
@@ -39,7 +39,7 @@ module Karafka
39
39
  fetch_option(job, :dispatch_method, DEFAULTS),
40
40
  dispatch_details(job).merge!(
41
41
  topic: job.queue_name,
42
- payload: ::ActiveSupport::JSON.encode(job.serialize)
42
+ payload: ::ActiveSupport::JSON.encode(serialize_job(job))
43
43
  )
44
44
  )
45
45
  end
@@ -54,7 +54,7 @@ module Karafka
54
54
 
55
55
  dispatches[d_method] << dispatch_details(job).merge!(
56
56
  topic: job.queue_name,
57
- payload: ::ActiveSupport::JSON.encode(job.serialize)
57
+ payload: ::ActiveSupport::JSON.encode(serialize_job(job))
58
58
  )
59
59
  end
60
60
 
@@ -17,7 +17,7 @@ module Karafka
17
17
  # Pro coordinator that provides extra orchestration methods useful for parallel processing
18
18
  # within the same partition
19
19
  class Coordinator < ::Karafka::Processing::Coordinator
20
- attr_reader :filter
20
+ attr_reader :filter, :virtual_offset_manager
21
21
 
22
22
  # @param args [Object] anything the base coordinator accepts
23
23
  def initialize(*args)
@@ -27,6 +27,20 @@ module Karafka
27
27
  @flow_lock = Mutex.new
28
28
  @collapser = Collapser.new
29
29
  @filter = FiltersApplier.new(self)
30
+
31
+ return unless topic.virtual_partitions?
32
+
33
+ @virtual_offset_manager = VirtualOffsetManager.new(
34
+ topic.name,
35
+ partition
36
+ )
37
+
38
+ # We register our own "internal" filter to support filtering of messages that were marked
39
+ # as consumed virtually
40
+ @filter.filters << Filters::VirtualLimiter.new(
41
+ @virtual_offset_manager,
42
+ @collapser
43
+ )
30
44
  end
31
45
 
32
46
  # Starts the coordination process
@@ -40,6 +54,11 @@ module Karafka
40
54
  @filter.apply!(messages)
41
55
 
42
56
  @executed.clear
57
+
58
+ # We keep the old processed offsets until the collapsing is done and regular processing
59
+ # with virtualization is restored
60
+ @virtual_offset_manager.clear if topic.virtual_partitions? && !@collapser.collapsed?
61
+
43
62
  @last_message = messages.last
44
63
  end
45
64
 
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Filters
18
+ # Removes messages that are already marked as consumed in the virtual offset manager
19
+ # This should operate only when using virtual partitions.
20
+ #
21
+ # This cleaner prevents us from duplicated processing of messages that were virtually
22
+ # marked as consumed even if we could not mark them as consumed in Kafka. This allows us
23
+ # to limit reprocessing when errors occur drastically when operating with virtual
24
+ # partitions
25
+ #
26
+ # @note It should be registered only when VPs are used
27
+ class VirtualLimiter < Base
28
+ # @param manager [Processing::VirtualOffsetManager]
29
+ # @param collapser [Processing::Collapser]
30
+ def initialize(manager, collapser)
31
+ @manager = manager
32
+ @collapser = collapser
33
+
34
+ super()
35
+ end
36
+
37
+ # Remove messages that we already marked as virtually consumed. Does nothing if not in
38
+ # the collapsed mode.
39
+ #
40
+ # @param messages [Array<Karafka::Messages::Message>]
41
+ def apply!(messages)
42
+ return unless @collapser.collapsed?
43
+
44
+ marked = @manager.marked
45
+
46
+ messages.delete_if { |message| marked.include?(message.offset) }
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -21,6 +21,10 @@ module Karafka
21
21
  # This means that this is the API we expose as a single filter, allowing us to control
22
22
  # the filtering via many filters easily.
23
23
  class FiltersApplier
24
+ # @return [Array] registered filters array. Useful if we want to inject internal context
25
+ # aware filters.
26
+ attr_reader :filters
27
+
24
28
  # @param coordinator [Pro::Coordinator] pro coordinator
25
29
  def initialize(coordinator)
26
30
  # Builds filters out of their factories
@@ -22,7 +22,7 @@ module Karafka
22
22
  # - Mom
23
23
  # - VP
24
24
  module DlqFtrMomVp
25
- include Strategies::Vp::Default
25
+ include Strategies::Aj::DlqMomVp
26
26
  include Strategies::Aj::DlqFtrMom
27
27
 
28
28
  # Features for this strategy
@@ -24,7 +24,9 @@ module Karafka
24
24
  # This case is a bit of special. Please see the `AjDlqMom` for explanation on how the
25
25
  # offset management works in this case.
26
26
  module DlqLrjMom
27
- include Strategies::Aj::DlqLrjMomVp
27
+ include Strategies::Default
28
+ include Strategies::Dlq::Default
29
+ include Strategies::Aj::LrjMom
28
30
 
29
31
  # Features for this strategy
30
32
  FEATURES = %i[
@@ -20,9 +20,9 @@ module Karafka
20
20
  # Manual offset management enabled
21
21
  # Virtual Partitions enabled
22
22
  module DlqMomVp
23
- include Strategies::Dlq::Default
24
- include Strategies::Vp::Default
25
23
  include Strategies::Default
24
+ include Strategies::Dlq::Vp
25
+ include Strategies::Vp::Default
26
26
 
27
27
  # Features for this strategy
28
28
  FEATURES = %i[