karafka 2.0.41 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +2 -2
  4. data/CHANGELOG.md +20 -1
  5. data/Gemfile.lock +2 -1
  6. data/config/locales/errors.yml +10 -0
  7. data/config/locales/pro_errors.yml +0 -2
  8. data/lib/karafka/active_job/consumer.rb +16 -11
  9. data/lib/karafka/active_job/current_attributes/loading.rb +36 -0
  10. data/lib/karafka/active_job/current_attributes/persistence.rb +28 -0
  11. data/lib/karafka/active_job/current_attributes.rb +42 -0
  12. data/lib/karafka/active_job/dispatcher.rb +8 -2
  13. data/lib/karafka/connection/client.rb +1 -1
  14. data/lib/karafka/errors.rb +3 -0
  15. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +166 -0
  16. data/lib/karafka/pro/active_job/consumer.rb +1 -10
  17. data/lib/karafka/pro/active_job/dispatcher.rb +2 -2
  18. data/lib/karafka/pro/processing/coordinator.rb +20 -1
  19. data/lib/karafka/pro/processing/filters/virtual_limiter.rb +52 -0
  20. data/lib/karafka/pro/processing/filters_applier.rb +4 -0
  21. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +1 -1
  22. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +3 -1
  23. data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +2 -2
  24. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +2 -0
  25. data/lib/karafka/pro/processing/strategies/aj/mom_vp.rb +1 -1
  26. data/lib/karafka/pro/processing/strategies/dlq/ftr.rb +1 -1
  27. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +3 -6
  28. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom_vp.rb +43 -0
  29. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_vp.rb +1 -0
  30. data/lib/karafka/pro/processing/strategies/dlq/ftr_mom.rb +3 -7
  31. data/lib/karafka/pro/processing/strategies/dlq/ftr_mom_vp.rb +41 -0
  32. data/lib/karafka/pro/processing/strategies/dlq/ftr_vp.rb +1 -0
  33. data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +3 -6
  34. data/lib/karafka/pro/processing/strategies/dlq/lrj_mom_vp.rb +36 -0
  35. data/lib/karafka/pro/processing/strategies/dlq/lrj_vp.rb +1 -0
  36. data/lib/karafka/pro/processing/strategies/dlq/mom.rb +8 -7
  37. data/lib/karafka/pro/processing/strategies/dlq/mom_vp.rb +37 -0
  38. data/lib/karafka/pro/processing/strategies/lrj/default.rb +2 -0
  39. data/lib/karafka/pro/processing/strategies/lrj/ftr_mom_vp.rb +40 -0
  40. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +2 -0
  41. data/lib/karafka/pro/processing/strategies/lrj/mom_vp.rb +38 -0
  42. data/lib/karafka/pro/processing/strategies/mom/ftr_vp.rb +37 -0
  43. data/lib/karafka/pro/{base_consumer.rb → processing/strategies/mom/vp.rb} +17 -7
  44. data/lib/karafka/pro/processing/strategies/vp/default.rb +51 -0
  45. data/lib/karafka/pro/processing/virtual_offset_manager.rb +147 -0
  46. data/lib/karafka/pro/routing/features/virtual_partitions/contract.rb +0 -17
  47. data/lib/karafka/processing/strategies/default.rb +2 -0
  48. data/lib/karafka/processing/strategies/dlq_mom.rb +9 -7
  49. data/lib/karafka/version.rb +1 -1
  50. data/lib/karafka.rb +5 -0
  51. data.tar.gz.sig +0 -0
  52. metadata +16 -4
  53. metadata.gz.sig +0 -0
  54. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +0 -16
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9560b22fc8cfd59dcaeb6551bcc0b2d2ebfef2f162a12905fa3aefe0c9c5865e
4
- data.tar.gz: a5b7aba125288ec96cf3b862e72447bf467fe23f923c232fe1d3ff9c48b1fdb4
3
+ metadata.gz: b3d0a2f78b4bf7fa8f49527d48d2e877b95597566e07beabf0166a02259a936b
4
+ data.tar.gz: fc6054ad5f99bfe8a678c337167f93fc612dddfe88494f8891158dbd8610fb7f
5
5
  SHA512:
6
- metadata.gz: d6581af85f8900d2d5ce91b6f9ec8ed0e1f6be5f3e80c36315c44c8dc07c30949566e281f40feb1b54cc9bbca771ac2188637e916d19edcb2fe26c04aeb362e1
7
- data.tar.gz: e467612b3185b5ec764d387e72507b617bf49d702436da2021d467fb0c23630aa98151c9679444a34114317a8c52e3c37f0268c6a6bdb4564ffa1bab51993109
6
+ metadata.gz: 0fb1fa88ef76ce81e145797a1364ac36bea2b94c47e733856cfd5ec9b37d0d9e2e984a3e4ef7fc36d2ac34c448e490cfdea4e10fae886cd80fb289798e55d308
7
+ data.tar.gz: 68df2bc1edb9acccd45d32428b43fd5dee12b9333c6059c801aa4ac03b3b89c01e93e8ea4ebbb8021618447c1122c8c5df64afc86747d6f0deec0cf992237e82
checksums.yaml.gz.sig CHANGED
Binary file
@@ -62,7 +62,7 @@ jobs:
62
62
  run: \curl -sSL https://api.coditsu.io/run/ci | bash
63
63
 
64
64
  specs:
65
- timeout-minutes: 45
65
+ timeout-minutes: 30
66
66
  runs-on: ubuntu-latest
67
67
  needs: diffend
68
68
  strategy:
@@ -102,7 +102,7 @@ jobs:
102
102
  run: bin/rspecs
103
103
 
104
104
  integrations:
105
- timeout-minutes: 30
105
+ timeout-minutes: 45
106
106
  runs-on: ubuntu-latest
107
107
  needs: diffend
108
108
  strategy:
data/CHANGELOG.md CHANGED
@@ -1,5 +1,24 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.1.0 (2023-05-22)
4
+ - **[Feature]** Provide ability to use CurrentAttributes with ActiveJob's Karafka adapter.
5
+ - **[Feature]** Introduce collective Virtual Partitions offset management.
6
+ - **[Feature]** Use virtual offsets to filter out messages that would be re-processed upon retries.
7
+ - [Improvement] No longer break processing on failing parallel virtual partitions in ActiveJob because it is compensated by virtual marking.
8
+ - [Improvement] Always use Virtual offset management for Pro ActiveJobs.
9
+ - [Improvement] Do not attempt to mark offsets on already revoked partitions.
10
+ - [Improvement] Make sure, that VP components are not injected into non VP strategies.
11
+ - [Improvement] Improve complex strategies inheritance flow.
12
+ - [Improvement] Optimize offset management for DLQ + MoM feature combinations.
13
+ - [Change] Removed `Karafka::Pro::BaseConsumer` in favor of `Karafka::BaseConsumer`. (#1345)
14
+ - [Fix] Fix for `max_messages` and `max_wait_time` not having reference in errors.yml (#1443)
15
+
16
+ ### Upgrade notes
17
+
18
+ 1. Upgrade to Karafka `2.0.41` prior to upgrading to `2.1.0`.
19
+ 2. Replace `Karafka::Pro::BaseConsumer` references to `Karafka::BaseConsumer`.
20
+ 3. Replace `Karafka::Instrumentation::Vendors::Datadog:Listener` with `Karafka::Instrumentation::Vendors::Datadog::MetricsListener`.
21
+
3
22
  ## 2.0.41 (2023-14-19)
4
23
  - **[Feature]** Provide `Karafka::Pro::Iterator` for anonymous topic/partitions iterations and messages lookups (#1389 and #1427).
5
24
  - [Improvement] Optimize topic lookup for `read_topic` admin method usage.
@@ -60,7 +79,7 @@
60
79
 
61
80
  ## 2.0.35 (2023-03-13)
62
81
  - **[Feature]** Allow for defining topics config via the DSL and its automatic creation via CLI command.
63
- - **[Feature]** Allow for full topics reset and topics repartitioning via the CLI.
82
+ - **[Feature]** Allow for full topics reset and topics repartitioning via the CLI.
64
83
 
65
84
  ## 2.0.34 (2023-03-04)
66
85
  - [Improvement] Attach an `embedded` tag to Karafka processes started using the embedded API.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.0.41)
4
+ karafka (2.1.0)
5
5
  karafka-core (>= 2.0.12, < 3.0.0)
6
6
  thor (>= 0.20)
7
7
  waterdrop (>= 2.4.10, < 3.0.0)
@@ -78,6 +78,7 @@ GEM
78
78
  zeitwerk (2.6.7)
79
79
 
80
80
  PLATFORMS
81
+ arm64-darwin-21
81
82
  x86_64-linux
82
83
 
83
84
  DEPENDENCIES
@@ -15,6 +15,13 @@ en:
15
15
  shutdown_timeout_format: needs to be an integer bigger than 0
16
16
  max_wait_time_format: needs to be an integer bigger than 0
17
17
  kafka_format: needs to be a filled hash
18
+ internal.processing.jobs_builder_format: cannot be nil
19
+ internal.processing.scheduler: cannot be nil
20
+ internal.processing.coordinator_class: cannot be nil
21
+ internal.processing.partitioner_class: cannot be nil
22
+ internal.active_job.dispatcher: cannot be nil
23
+ internal.active_job.job_options_contract: cannot be nil
24
+ internal.active_job.consumer_class: cannot be nil
18
25
  internal.status_format: needs to be present
19
26
  internal.process_format: needs to be present
20
27
  internal.routing.builder_format: needs to be present
@@ -31,7 +38,10 @@ en:
31
38
  topics_missing: No topics to subscribe to
32
39
 
33
40
  topic:
41
+ kafka: needs to be a hash with kafka scope settings details
34
42
  missing: needs to be present
43
+ max_messages_format: 'needs to be an integer bigger than 0'
44
+ max_wait_time_format: 'needs to be an integer bigger than 0'
35
45
  name_format: 'needs to be a string with a Kafka accepted format'
36
46
  deserializer_format: needs to be present
37
47
  consumer_format: needs to be present
@@ -4,8 +4,6 @@ en:
4
4
  virtual_partitions.partitioner_respond_to_call: needs to be defined and needs to respond to `#call`
5
5
  virtual_partitions.max_partitions_format: needs to be equal or more than 1
6
6
 
7
- manual_offset_management_not_with_virtual_partitions: cannot be used together with Virtual Partitions
8
-
9
7
  long_running_job.active_format: needs to be either true or false
10
8
 
11
9
  dead_letter_queue_with_virtual_partitions: when using Dead Letter Queue with Virtual Partitions, at least one retry is required.
@@ -24,21 +24,26 @@ module Karafka
24
24
  #
25
25
  # @param job_message [Karafka::Messages::Message] message with active job
26
26
  def consume_job(job_message)
27
- # We technically speaking could set this as deserializer and reference it from the
28
- # message instead of using the `#raw_payload`. This is not done on purpose to simplify
29
- # the ActiveJob setup here
30
- job = ::ActiveSupport::JSON.decode(job_message.raw_payload)
27
+ with_deserialized_job(job_message) do |job|
28
+ tags.add(:job_class, job['job_class'])
31
29
 
32
- tags.add(:job_class, job['job_class'])
30
+ payload = { caller: self, job: job, message: job_message }
33
31
 
34
- payload = { caller: self, job: job, message: job_message }
35
-
36
- # We publish both to make it consistent with `consumer.x` events
37
- Karafka.monitor.instrument('active_job.consume', payload)
38
- Karafka.monitor.instrument('active_job.consumed', payload) do
39
- ::ActiveJob::Base.execute(job)
32
+ # We publish both to make it consistent with `consumer.x` events
33
+ Karafka.monitor.instrument('active_job.consume', payload)
34
+ Karafka.monitor.instrument('active_job.consumed', payload) do
35
+ ::ActiveJob::Base.execute(job)
36
+ end
40
37
  end
41
38
  end
39
+
40
+ # @param job_message [Karafka::Messages::Message] message with active job
41
+ def with_deserialized_job(job_message)
42
+ # We technically speaking could set this as deserializer and reference it from the
43
+ # message instead of using the `#raw_payload`. This is not done on purpose to simplify
44
+ # the ActiveJob setup here
45
+ yield ::ActiveSupport::JSON.decode(job_message.raw_payload)
46
+ end
42
47
  end
43
48
  end
44
49
  end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module ActiveJob
5
+ module CurrentAttributes
6
+ # Module expanding the job deserialization to extract current attributes and load them
7
+ # for the time of the job execution
8
+ module Loading
9
+ # @param job_message [Karafka::Messages::Message] message with active job
10
+ def with_deserialized_job(job_message)
11
+ super(job_message) do |job|
12
+ resetable = []
13
+
14
+ _cattr_klasses.each do |key, cattr_klass_str|
15
+ next unless job.key?(key)
16
+
17
+ attributes = job.delete(key)
18
+
19
+ cattr_klass = cattr_klass_str.constantize
20
+
21
+ attributes.each do |name, value|
22
+ cattr_klass.public_send("#{name}=", value)
23
+ end
24
+
25
+ resetable << cattr_klass
26
+ end
27
+
28
+ yield(job)
29
+
30
+ resetable.each(&:reset)
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module ActiveJob
5
+ module CurrentAttributes
6
+ # Module adding the current attributes persistence into the ActiveJob jobs
7
+ module Persistence
8
+ # Alters the job serialization to inject the current attributes into the json before we
9
+ # send it to Kafka
10
+ #
11
+ # @param job [ActiveJob::Base] job
12
+ def serialize_job(job)
13
+ json = super(job)
14
+
15
+ _cattr_klasses.each do |key, cattr_klass_str|
16
+ next if json.key?(key)
17
+
18
+ attrs = cattr_klass_str.constantize.attributes
19
+
20
+ json[key] = attrs unless attrs.empty?
21
+ end
22
+
23
+ json
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'active_support/current_attributes'
4
+ require_relative 'current_attributes/loading'
5
+ require_relative 'current_attributes/persistence'
6
+
7
+ # This code is based on Sidekiqs approach to persisting current attributes
8
+ # @see https://github.com/sidekiq/sidekiq/blob/main/lib/sidekiq/middleware/current_attributes.rb
9
+ module Karafka
10
+ module ActiveJob
11
+ # Module that allows to persist current attributes on Karafka jobs
12
+ module CurrentAttributes
13
+ # Allows for persistence of given current attributes via AJ + Karafka
14
+ #
15
+ # @param klasses [Array<String, Class>] classes or names of the current attributes classes
16
+ def persist(*klasses)
17
+ # Support for providing multiple classes
18
+ klasses = Array(klasses).flatten
19
+
20
+ [Dispatcher, Consumer]
21
+ .reject { |expandable| expandable.respond_to?(:_cattr_klasses) }
22
+ .each { |expandable| expandable.class_attribute :_cattr_klasses, default: {} }
23
+
24
+ # Do not double inject in case of running persist multiple times
25
+ Dispatcher.prepend(Persistence) unless Dispatcher.ancestors.include?(Persistence)
26
+ Consumer.prepend(Loading) unless Consumer.ancestors.include?(Loading)
27
+
28
+ klasses.map(&:to_s).each do |stringified_klass|
29
+ # Prevent registering same klass multiple times
30
+ next if Dispatcher._cattr_klasses.value?(stringified_klass)
31
+
32
+ key = "cattr_#{Dispatcher._cattr_klasses.count}"
33
+
34
+ Dispatcher._cattr_klasses[key] = stringified_klass
35
+ Consumer._cattr_klasses[key] = stringified_klass
36
+ end
37
+ end
38
+
39
+ module_function :persist
40
+ end
41
+ end
42
+ end
@@ -18,7 +18,7 @@ module Karafka
18
18
  ::Karafka.producer.public_send(
19
19
  fetch_option(job, :dispatch_method, DEFAULTS),
20
20
  topic: job.queue_name,
21
- payload: ::ActiveSupport::JSON.encode(job.serialize)
21
+ payload: ::ActiveSupport::JSON.encode(serialize_job(job))
22
22
  )
23
23
  end
24
24
 
@@ -34,7 +34,7 @@ module Karafka
34
34
 
35
35
  dispatches[d_method] << {
36
36
  topic: job.queue_name,
37
- payload: ::ActiveSupport::JSON.encode(job.serialize)
37
+ payload: ::ActiveSupport::JSON.encode(serialize_job(job))
38
38
  }
39
39
  end
40
40
 
@@ -58,6 +58,12 @@ module Karafka
58
58
  .karafka_options
59
59
  .fetch(key, defaults.fetch(key))
60
60
  end
61
+
62
+ # @param job [ActiveJob::Base] job
63
+ # @return [Hash] json representation of the job
64
+ def serialize_job(job)
65
+ job.serialize
66
+ end
61
67
  end
62
68
  end
63
69
  end
@@ -30,7 +30,7 @@ module Karafka
30
30
  #
31
31
  # @param subscription_group [Karafka::Routing::SubscriptionGroup] subscription group
32
32
  # with all the configuration details needed for us to create a client
33
- # @return [Karafka::Connection::Rdk::Consumer]
33
+ # @return [Karafka::Connection::Client]
34
34
  def initialize(subscription_group)
35
35
  @id = SecureRandom.hex(6)
36
36
  # Name is set when we build consumer
@@ -46,5 +46,8 @@ module Karafka
46
46
 
47
47
  # This should never happen. Please open an issue if it does.
48
48
  StrategyNotFoundError = Class.new(BaseError)
49
+
50
+ # This should never happen. Please open an issue if it does.
51
+ InvalidRealOffsetUsage = Class.new(BaseError)
49
52
  end
50
53
  end
@@ -0,0 +1,166 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'socket'
4
+
5
+ module Karafka
6
+ module Instrumentation
7
+ module Vendors
8
+ # Namespace for instrumentation related with Kubernetes
9
+ module Kubernetes
10
+ # Kubernetes HTTP listener that does not only reply when process is not fully hanging, but
11
+ # also allows to define max time of processing and looping.
12
+ #
13
+ # Processes like Karafka server can hang while still being reachable. For example, in case
14
+ # something would hang inside of the user code, Karafka could stop polling and no new
15
+ # data would be processed, but process itself would still be active. This listener allows
16
+ # for defining of a ttl that gets bumped on each poll loop and before and after processing
17
+ # of a given messages batch.
18
+ class LivenessListener
19
+ include ::Karafka::Core::Helpers::Time
20
+
21
+ # @param hostname [String, nil] hostname or nil to bind on all
22
+ # @param port [Integer] TCP port on which we want to run our HTTP status server
23
+ # @param consuming_ttl [Integer] time in ms after which we consider consumption hanging.
24
+ # It allows us to define max consumption time after which k8s should consider given
25
+ # process as hanging
26
+ # @param polling_ttl [Integer] max time in ms for polling. If polling (any) does not
27
+ # happen that often, process should be considered dead.
28
+ # @note The default TTL matches the default `max.poll.interval.ms`
29
+ def initialize(
30
+ hostname: nil,
31
+ port: 3000,
32
+ consuming_ttl: 5 * 60 * 1_000,
33
+ polling_ttl: 5 * 60 * 1_000
34
+ )
35
+ @server = TCPServer.new(*[hostname, port].compact)
36
+ @polling_ttl = polling_ttl
37
+ @consuming_ttl = consuming_ttl
38
+ @mutex = Mutex.new
39
+ @pollings = {}
40
+ @consumptions = {}
41
+
42
+ Thread.new do
43
+ loop do
44
+ break unless respond
45
+ end
46
+ end
47
+ end
48
+
49
+ # Tick on each fetch
50
+ # @param _event [Karafka::Core::Monitoring::Event]
51
+ def on_connection_listener_fetch_loop(_event)
52
+ mark_polling_tick
53
+ end
54
+
55
+ # Tick on starting work
56
+ # @param _event [Karafka::Core::Monitoring::Event]
57
+ def on_consumer_consume(_event)
58
+ mark_consumption_tick
59
+ end
60
+
61
+ # Tick on finished work
62
+ # @param _event [Karafka::Core::Monitoring::Event]
63
+ def on_consumer_consumed(_event)
64
+ clear_consumption_tick
65
+ end
66
+
67
+ # @param _event [Karafka::Core::Monitoring::Event]
68
+ def on_consumer_revoke(_event)
69
+ mark_consumption_tick
70
+ end
71
+
72
+ # @param _event [Karafka::Core::Monitoring::Event]
73
+ def on_consumer_revoked(_event)
74
+ clear_consumption_tick
75
+ end
76
+
77
+ # @param _event [Karafka::Core::Monitoring::Event]
78
+ def on_consumer_shutting_down(_event)
79
+ mark_consumption_tick
80
+ end
81
+
82
+ # @param _event [Karafka::Core::Monitoring::Event]
83
+ def on_consumer_shutdown(_event)
84
+ clear_consumption_tick
85
+ end
86
+
87
+ # @param _event [Karafka::Core::Monitoring::Event]
88
+ def on_error_occurred(_event)
89
+ clear_consumption_tick
90
+ clear_polling_tick
91
+ end
92
+
93
+ # Stop the http server when we stop the process
94
+ # @param _event [Karafka::Core::Monitoring::Event]
95
+ def on_app_stopped(_event)
96
+ @server.close
97
+ end
98
+
99
+ private
100
+
101
+ # Wraps the logic with a mutex
102
+ # @param block [Proc] code we want to run in mutex
103
+ def synchronize(&block)
104
+ @mutex.synchronize(&block)
105
+ end
106
+
107
+ # @return [Integer] object id of the current thread
108
+ def thread_id
109
+ Thread.current.object_id
110
+ end
111
+
112
+ # Update the polling tick time for current thread
113
+ def mark_polling_tick
114
+ synchronize do
115
+ @pollings[thread_id] = monotonic_now
116
+ end
117
+ end
118
+
119
+ # Clear current thread polling time tracker
120
+ def clear_polling_tick
121
+ synchronize do
122
+ @pollings.delete(thread_id)
123
+ end
124
+ end
125
+
126
+ # Update the processing tick time
127
+ def mark_consumption_tick
128
+ synchronize do
129
+ @consumptions[thread_id] = monotonic_now
130
+ end
131
+ end
132
+
133
+ # Clear current thread consumption time tracker
134
+ def clear_consumption_tick
135
+ synchronize do
136
+ @consumptions.delete(thread_id)
137
+ end
138
+ end
139
+
140
+ # Responds to a HTTP request with the process liveness status
141
+ def respond
142
+ client = @server.accept
143
+ client.gets
144
+ client.print "HTTP/1.1 #{status}\r\n"
145
+ client.close
146
+
147
+ true
148
+ rescue Errno::ECONNRESET, Errno::EPIPE, IOError
149
+ !@server.closed?
150
+ end
151
+
152
+ # Did we exceed any of the ttls
153
+ # @return [String] 204 string if ok, 500 otherwise
154
+ def status
155
+ time = monotonic_now
156
+
157
+ return '500' if @pollings.values.any? { |tick| (time - tick) > @polling_ttl }
158
+ return '500' if @consumptions.values.any? { |tick| (time - tick) > @consuming_ttl }
159
+
160
+ '204'
161
+ end
162
+ end
163
+ end
164
+ end
165
+ end
166
+ end
@@ -35,18 +35,9 @@ module Karafka
35
35
  # double-processing
36
36
  break if Karafka::App.stopping? && !topic.virtual_partitions?
37
37
 
38
- # Break if we already know, that one of virtual partitions has failed and we will
39
- # be restarting processing all together after all VPs are done. This will minimize
40
- # number of jobs that will be re-processed
41
- break if topic.virtual_partitions? && failing?
42
-
43
38
  consume_job(message)
44
39
 
45
- # We cannot mark jobs as done after each if there are virtual partitions. Otherwise
46
- # this could create random markings.
47
- # The exception here is the collapsed state where we can move one after another
48
- next if topic.virtual_partitions? && !collapsed?
49
-
40
+ # We can always mark because of the virtual offset management that we have in VPs
50
41
  mark_as_consumed(message)
51
42
  end
52
43
  end
@@ -39,7 +39,7 @@ module Karafka
39
39
  fetch_option(job, :dispatch_method, DEFAULTS),
40
40
  dispatch_details(job).merge!(
41
41
  topic: job.queue_name,
42
- payload: ::ActiveSupport::JSON.encode(job.serialize)
42
+ payload: ::ActiveSupport::JSON.encode(serialize_job(job))
43
43
  )
44
44
  )
45
45
  end
@@ -54,7 +54,7 @@ module Karafka
54
54
 
55
55
  dispatches[d_method] << dispatch_details(job).merge!(
56
56
  topic: job.queue_name,
57
- payload: ::ActiveSupport::JSON.encode(job.serialize)
57
+ payload: ::ActiveSupport::JSON.encode(serialize_job(job))
58
58
  )
59
59
  end
60
60
 
@@ -17,7 +17,7 @@ module Karafka
17
17
  # Pro coordinator that provides extra orchestration methods useful for parallel processing
18
18
  # within the same partition
19
19
  class Coordinator < ::Karafka::Processing::Coordinator
20
- attr_reader :filter
20
+ attr_reader :filter, :virtual_offset_manager
21
21
 
22
22
  # @param args [Object] anything the base coordinator accepts
23
23
  def initialize(*args)
@@ -27,6 +27,20 @@ module Karafka
27
27
  @flow_lock = Mutex.new
28
28
  @collapser = Collapser.new
29
29
  @filter = FiltersApplier.new(self)
30
+
31
+ return unless topic.virtual_partitions?
32
+
33
+ @virtual_offset_manager = VirtualOffsetManager.new(
34
+ topic.name,
35
+ partition
36
+ )
37
+
38
+ # We register our own "internal" filter to support filtering of messages that were marked
39
+ # as consumed virtually
40
+ @filter.filters << Filters::VirtualLimiter.new(
41
+ @virtual_offset_manager,
42
+ @collapser
43
+ )
30
44
  end
31
45
 
32
46
  # Starts the coordination process
@@ -40,6 +54,11 @@ module Karafka
40
54
  @filter.apply!(messages)
41
55
 
42
56
  @executed.clear
57
+
58
+ # We keep the old processed offsets until the collapsing is done and regular processing
59
+ # with virtualization is restored
60
+ @virtual_offset_manager.clear if topic.virtual_partitions? && !@collapser.collapsed?
61
+
43
62
  @last_message = messages.last
44
63
  end
45
64
 
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Filters
18
+ # Removes messages that are already marked as consumed in the virtual offset manager
19
+ # This should operate only when using virtual partitions.
20
+ #
21
+ # This cleaner prevents us from duplicated processing of messages that were virtually
22
+ # marked as consumed even if we could not mark them as consumed in Kafka. This allows us
23
+ # to limit reprocessing when errors occur drastically when operating with virtual
24
+ # partitions
25
+ #
26
+ # @note It should be registered only when VPs are used
27
+ class VirtualLimiter < Base
28
+ # @param manager [Processing::VirtualOffsetManager]
29
+ # @param collapser [Processing::Collapser]
30
+ def initialize(manager, collapser)
31
+ @manager = manager
32
+ @collapser = collapser
33
+
34
+ super()
35
+ end
36
+
37
+ # Remove messages that we already marked as virtually consumed. Does nothing if not in
38
+ # the collapsed mode.
39
+ #
40
+ # @param messages [Array<Karafka::Messages::Message>]
41
+ def apply!(messages)
42
+ return unless @collapser.collapsed?
43
+
44
+ marked = @manager.marked
45
+
46
+ messages.delete_if { |message| marked.include?(message.offset) }
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -21,6 +21,10 @@ module Karafka
21
21
  # This means that this is the API we expose as a single filter, allowing us to control
22
22
  # the filtering via many filters easily.
23
23
  class FiltersApplier
24
+ # @return [Array] registered filters array. Useful if we want to inject internal context
25
+ # aware filters.
26
+ attr_reader :filters
27
+
24
28
  # @param coordinator [Pro::Coordinator] pro coordinator
25
29
  def initialize(coordinator)
26
30
  # Builds filters out of their factories
@@ -22,7 +22,7 @@ module Karafka
22
22
  # - Mom
23
23
  # - VP
24
24
  module DlqFtrMomVp
25
- include Strategies::Vp::Default
25
+ include Strategies::Aj::DlqMomVp
26
26
  include Strategies::Aj::DlqFtrMom
27
27
 
28
28
  # Features for this strategy
@@ -24,7 +24,9 @@ module Karafka
24
24
  # This case is a bit of special. Please see the `AjDlqMom` for explanation on how the
25
25
  # offset management works in this case.
26
26
  module DlqLrjMom
27
- include Strategies::Aj::DlqLrjMomVp
27
+ include Strategies::Default
28
+ include Strategies::Dlq::Default
29
+ include Strategies::Aj::LrjMom
28
30
 
29
31
  # Features for this strategy
30
32
  FEATURES = %i[
@@ -20,9 +20,9 @@ module Karafka
20
20
  # Manual offset management enabled
21
21
  # Virtual Partitions enabled
22
22
  module DlqMomVp
23
- include Strategies::Dlq::Default
24
- include Strategies::Vp::Default
25
23
  include Strategies::Default
24
+ include Strategies::Dlq::Vp
25
+ include Strategies::Vp::Default
26
26
 
27
27
  # Features for this strategy
28
28
  FEATURES = %i[