karafka 2.0.41 → 2.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +2 -2
  4. data/CHANGELOG.md +23 -1
  5. data/Gemfile.lock +12 -12
  6. data/config/locales/errors.yml +10 -0
  7. data/config/locales/pro_errors.yml +0 -2
  8. data/lib/karafka/active_job/consumer.rb +16 -11
  9. data/lib/karafka/active_job/current_attributes/loading.rb +36 -0
  10. data/lib/karafka/active_job/current_attributes/persistence.rb +28 -0
  11. data/lib/karafka/active_job/current_attributes.rb +42 -0
  12. data/lib/karafka/active_job/dispatcher.rb +8 -2
  13. data/lib/karafka/connection/client.rb +1 -1
  14. data/lib/karafka/errors.rb +3 -0
  15. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +176 -0
  16. data/lib/karafka/pro/active_job/consumer.rb +1 -10
  17. data/lib/karafka/pro/active_job/dispatcher.rb +2 -2
  18. data/lib/karafka/pro/processing/coordinator.rb +20 -1
  19. data/lib/karafka/pro/processing/filters/virtual_limiter.rb +52 -0
  20. data/lib/karafka/pro/processing/filters_applier.rb +4 -0
  21. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +1 -1
  22. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +3 -1
  23. data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +2 -2
  24. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +2 -0
  25. data/lib/karafka/pro/processing/strategies/aj/mom_vp.rb +1 -1
  26. data/lib/karafka/pro/processing/strategies/dlq/ftr.rb +1 -1
  27. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +3 -6
  28. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom_vp.rb +43 -0
  29. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_vp.rb +1 -0
  30. data/lib/karafka/pro/processing/strategies/dlq/ftr_mom.rb +3 -7
  31. data/lib/karafka/pro/processing/strategies/dlq/ftr_mom_vp.rb +41 -0
  32. data/lib/karafka/pro/processing/strategies/dlq/ftr_vp.rb +1 -0
  33. data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +3 -6
  34. data/lib/karafka/pro/processing/strategies/dlq/lrj_mom_vp.rb +36 -0
  35. data/lib/karafka/pro/processing/strategies/dlq/lrj_vp.rb +1 -0
  36. data/lib/karafka/pro/processing/strategies/dlq/mom.rb +8 -7
  37. data/lib/karafka/pro/processing/strategies/dlq/mom_vp.rb +37 -0
  38. data/lib/karafka/pro/processing/strategies/lrj/default.rb +2 -0
  39. data/lib/karafka/pro/processing/strategies/lrj/ftr_mom_vp.rb +40 -0
  40. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +2 -0
  41. data/lib/karafka/pro/processing/strategies/lrj/mom_vp.rb +38 -0
  42. data/lib/karafka/pro/processing/strategies/mom/ftr_vp.rb +37 -0
  43. data/lib/karafka/pro/{base_consumer.rb → processing/strategies/mom/vp.rb} +17 -7
  44. data/lib/karafka/pro/processing/strategies/vp/default.rb +51 -0
  45. data/lib/karafka/pro/processing/virtual_offset_manager.rb +147 -0
  46. data/lib/karafka/pro/routing/features/virtual_partitions/contract.rb +0 -17
  47. data/lib/karafka/processing/strategies/default.rb +2 -0
  48. data/lib/karafka/processing/strategies/dlq_mom.rb +9 -7
  49. data/lib/karafka/version.rb +1 -1
  50. data/lib/karafka.rb +5 -0
  51. data.tar.gz.sig +0 -0
  52. metadata +16 -4
  53. metadata.gz.sig +0 -0
  54. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +0 -16
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9560b22fc8cfd59dcaeb6551bcc0b2d2ebfef2f162a12905fa3aefe0c9c5865e
4
- data.tar.gz: a5b7aba125288ec96cf3b862e72447bf467fe23f923c232fe1d3ff9c48b1fdb4
3
+ metadata.gz: 717ad2fe9f79ab6d7b93606e7925864443c7e88b8e38137eb3d3d21bd9e41e19
4
+ data.tar.gz: 407550329413122f29aec13a11434ed3ac4313d96c6d32e060ffb5a857808120
5
5
  SHA512:
6
- metadata.gz: d6581af85f8900d2d5ce91b6f9ec8ed0e1f6be5f3e80c36315c44c8dc07c30949566e281f40feb1b54cc9bbca771ac2188637e916d19edcb2fe26c04aeb362e1
7
- data.tar.gz: e467612b3185b5ec764d387e72507b617bf49d702436da2021d467fb0c23630aa98151c9679444a34114317a8c52e3c37f0268c6a6bdb4564ffa1bab51993109
6
+ metadata.gz: 2169d98260cdc35c18b47821467ecf62a85933615662593c739b4332047f78f5b64172f78464b4ae95d3ad1d28be705c5d76bc3638f06b5f526811c7134ed1a5
7
+ data.tar.gz: 2fbad6b508fabbd23cdaa4d20fc4ad983c1391d4e0bccd60aca5181c18387f31bedfb88801563887554e5f7ee6e58fd5b637ee6e452787ea6bb025a9fed52aa1
checksums.yaml.gz.sig CHANGED
Binary file
@@ -62,7 +62,7 @@ jobs:
62
62
  run: \curl -sSL https://api.coditsu.io/run/ci | bash
63
63
 
64
64
  specs:
65
- timeout-minutes: 45
65
+ timeout-minutes: 30
66
66
  runs-on: ubuntu-latest
67
67
  needs: diffend
68
68
  strategy:
@@ -102,7 +102,7 @@ jobs:
102
102
  run: bin/rspecs
103
103
 
104
104
  integrations:
105
- timeout-minutes: 30
105
+ timeout-minutes: 45
106
106
  runs-on: ubuntu-latest
107
107
  needs: diffend
108
108
  strategy:
data/CHANGELOG.md CHANGED
@@ -1,5 +1,27 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.1.1 (2023-05-24)
4
+ - [Fix] Liveness Probe Doesn't Meet HTTP 1.1 Criteria - Causing Kubernetes Restarts (#1450)
5
+
6
+ ## 2.1.0 (2023-05-22)
7
+ - **[Feature]** Provide ability to use CurrentAttributes with ActiveJob's Karafka adapter.
8
+ - **[Feature]** Introduce collective Virtual Partitions offset management.
9
+ - **[Feature]** Use virtual offsets to filter out messages that would be re-processed upon retries.
10
+ - [Improvement] No longer break processing on failing parallel virtual partitions in ActiveJob because it is compensated by virtual marking.
11
+ - [Improvement] Always use Virtual offset management for Pro ActiveJobs.
12
+ - [Improvement] Do not attempt to mark offsets on already revoked partitions.
13
+ - [Improvement] Make sure, that VP components are not injected into non VP strategies.
14
+ - [Improvement] Improve complex strategies inheritance flow.
15
+ - [Improvement] Optimize offset management for DLQ + MoM feature combinations.
16
+ - [Change] Removed `Karafka::Pro::BaseConsumer` in favor of `Karafka::BaseConsumer`. (#1345)
17
+ - [Fix] Fix for `max_messages` and `max_wait_time` not having reference in errors.yml (#1443)
18
+
19
+ ### Upgrade notes
20
+
21
+ 1. Upgrade to Karafka `2.0.41` prior to upgrading to `2.1.0`.
22
+ 2. Replace `Karafka::Pro::BaseConsumer` references to `Karafka::BaseConsumer`.
23
+ 3. Replace `Karafka::Instrumentation::Vendors::Datadog:Listener` with `Karafka::Instrumentation::Vendors::Datadog::MetricsListener`.
24
+
3
25
  ## 2.0.41 (2023-14-19)
4
26
  - **[Feature]** Provide `Karafka::Pro::Iterator` for anonymous topic/partitions iterations and messages lookups (#1389 and #1427).
5
27
  - [Improvement] Optimize topic lookup for `read_topic` admin method usage.
@@ -60,7 +82,7 @@
60
82
 
61
83
  ## 2.0.35 (2023-03-13)
62
84
  - **[Feature]** Allow for defining topics config via the DSL and its automatic creation via CLI command.
63
- - **[Feature]** Allow for full topics reset and topics repartitioning via the CLI.
85
+ - **[Feature]** Allow for full topics reset and topics repartitioning via the CLI.
64
86
 
65
87
  ## 2.0.34 (2023-03-04)
66
88
  - [Improvement] Attach an `embedded` tag to Karafka processes started using the embedded API.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.0.41)
4
+ karafka (2.1.1)
5
5
  karafka-core (>= 2.0.12, < 3.0.0)
6
6
  thor (>= 0.20)
7
7
  waterdrop (>= 2.4.10, < 3.0.0)
@@ -28,34 +28,34 @@ GEM
28
28
  ffi (1.15.5)
29
29
  globalid (1.1.0)
30
30
  activesupport (>= 5.0)
31
- i18n (1.12.0)
31
+ i18n (1.13.0)
32
32
  concurrent-ruby (~> 1.0)
33
33
  karafka-core (2.0.12)
34
34
  concurrent-ruby (>= 1.1)
35
35
  karafka-rdkafka (>= 0.12.1)
36
- karafka-rdkafka (0.12.1)
36
+ karafka-rdkafka (0.12.2)
37
37
  ffi (~> 1.15)
38
38
  mini_portile2 (~> 2.6)
39
39
  rake (> 12)
40
- karafka-web (0.5.1)
40
+ karafka-web (0.5.2)
41
41
  erubi (~> 1.4)
42
42
  karafka (>= 2.0.40, < 3.0.0)
43
43
  karafka-core (>= 2.0.12, < 3.0.0)
44
44
  roda (~> 3.63)
45
45
  tilt (~> 2.0)
46
- mini_portile2 (2.8.1)
46
+ mini_portile2 (2.8.2)
47
47
  minitest (5.18.0)
48
48
  rack (3.0.7)
49
49
  rake (13.0.6)
50
- roda (3.67.0)
50
+ roda (3.68.0)
51
51
  rack
52
52
  rspec (3.12.0)
53
53
  rspec-core (~> 3.12.0)
54
54
  rspec-expectations (~> 3.12.0)
55
55
  rspec-mocks (~> 3.12.0)
56
- rspec-core (3.12.1)
56
+ rspec-core (3.12.2)
57
57
  rspec-support (~> 3.12.0)
58
- rspec-expectations (3.12.2)
58
+ rspec-expectations (3.12.3)
59
59
  diff-lcs (>= 1.2.0, < 2.0)
60
60
  rspec-support (~> 3.12.0)
61
61
  rspec-mocks (3.12.5)
@@ -68,14 +68,14 @@ GEM
68
68
  simplecov_json_formatter (~> 0.1)
69
69
  simplecov-html (0.12.3)
70
70
  simplecov_json_formatter (0.1.4)
71
- thor (1.2.1)
71
+ thor (1.2.2)
72
72
  tilt (2.1.0)
73
73
  tzinfo (2.0.6)
74
74
  concurrent-ruby (~> 1.0)
75
- waterdrop (2.5.1)
75
+ waterdrop (2.5.2)
76
76
  karafka-core (>= 2.0.12, < 3.0.0)
77
77
  zeitwerk (~> 2.3)
78
- zeitwerk (2.6.7)
78
+ zeitwerk (2.6.8)
79
79
 
80
80
  PLATFORMS
81
81
  x86_64-linux
@@ -90,4 +90,4 @@ DEPENDENCIES
90
90
  simplecov
91
91
 
92
92
  BUNDLED WITH
93
- 2.4.10
93
+ 2.4.12
@@ -15,6 +15,13 @@ en:
15
15
  shutdown_timeout_format: needs to be an integer bigger than 0
16
16
  max_wait_time_format: needs to be an integer bigger than 0
17
17
  kafka_format: needs to be a filled hash
18
+ internal.processing.jobs_builder_format: cannot be nil
19
+ internal.processing.scheduler: cannot be nil
20
+ internal.processing.coordinator_class: cannot be nil
21
+ internal.processing.partitioner_class: cannot be nil
22
+ internal.active_job.dispatcher: cannot be nil
23
+ internal.active_job.job_options_contract: cannot be nil
24
+ internal.active_job.consumer_class: cannot be nil
18
25
  internal.status_format: needs to be present
19
26
  internal.process_format: needs to be present
20
27
  internal.routing.builder_format: needs to be present
@@ -31,7 +38,10 @@ en:
31
38
  topics_missing: No topics to subscribe to
32
39
 
33
40
  topic:
41
+ kafka: needs to be a hash with kafka scope settings details
34
42
  missing: needs to be present
43
+ max_messages_format: 'needs to be an integer bigger than 0'
44
+ max_wait_time_format: 'needs to be an integer bigger than 0'
35
45
  name_format: 'needs to be a string with a Kafka accepted format'
36
46
  deserializer_format: needs to be present
37
47
  consumer_format: needs to be present
@@ -4,8 +4,6 @@ en:
4
4
  virtual_partitions.partitioner_respond_to_call: needs to be defined and needs to respond to `#call`
5
5
  virtual_partitions.max_partitions_format: needs to be equal or more than 1
6
6
 
7
- manual_offset_management_not_with_virtual_partitions: cannot be used together with Virtual Partitions
8
-
9
7
  long_running_job.active_format: needs to be either true or false
10
8
 
11
9
  dead_letter_queue_with_virtual_partitions: when using Dead Letter Queue with Virtual Partitions, at least one retry is required.
@@ -24,21 +24,26 @@ module Karafka
24
24
  #
25
25
  # @param job_message [Karafka::Messages::Message] message with active job
26
26
  def consume_job(job_message)
27
- # We technically speaking could set this as deserializer and reference it from the
28
- # message instead of using the `#raw_payload`. This is not done on purpose to simplify
29
- # the ActiveJob setup here
30
- job = ::ActiveSupport::JSON.decode(job_message.raw_payload)
27
+ with_deserialized_job(job_message) do |job|
28
+ tags.add(:job_class, job['job_class'])
31
29
 
32
- tags.add(:job_class, job['job_class'])
30
+ payload = { caller: self, job: job, message: job_message }
33
31
 
34
- payload = { caller: self, job: job, message: job_message }
35
-
36
- # We publish both to make it consistent with `consumer.x` events
37
- Karafka.monitor.instrument('active_job.consume', payload)
38
- Karafka.monitor.instrument('active_job.consumed', payload) do
39
- ::ActiveJob::Base.execute(job)
32
+ # We publish both to make it consistent with `consumer.x` events
33
+ Karafka.monitor.instrument('active_job.consume', payload)
34
+ Karafka.monitor.instrument('active_job.consumed', payload) do
35
+ ::ActiveJob::Base.execute(job)
36
+ end
40
37
  end
41
38
  end
39
+
40
+ # @param job_message [Karafka::Messages::Message] message with active job
41
+ def with_deserialized_job(job_message)
42
+ # We technically speaking could set this as deserializer and reference it from the
43
+ # message instead of using the `#raw_payload`. This is not done on purpose to simplify
44
+ # the ActiveJob setup here
45
+ yield ::ActiveSupport::JSON.decode(job_message.raw_payload)
46
+ end
42
47
  end
43
48
  end
44
49
  end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module ActiveJob
5
+ module CurrentAttributes
6
+ # Module expanding the job deserialization to extract current attributes and load them
7
+ # for the time of the job execution
8
+ module Loading
9
+ # @param job_message [Karafka::Messages::Message] message with active job
10
+ def with_deserialized_job(job_message)
11
+ super(job_message) do |job|
12
+ resetable = []
13
+
14
+ _cattr_klasses.each do |key, cattr_klass_str|
15
+ next unless job.key?(key)
16
+
17
+ attributes = job.delete(key)
18
+
19
+ cattr_klass = cattr_klass_str.constantize
20
+
21
+ attributes.each do |name, value|
22
+ cattr_klass.public_send("#{name}=", value)
23
+ end
24
+
25
+ resetable << cattr_klass
26
+ end
27
+
28
+ yield(job)
29
+
30
+ resetable.each(&:reset)
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module ActiveJob
5
+ module CurrentAttributes
6
+ # Module adding the current attributes persistence into the ActiveJob jobs
7
+ module Persistence
8
+ # Alters the job serialization to inject the current attributes into the json before we
9
+ # send it to Kafka
10
+ #
11
+ # @param job [ActiveJob::Base] job
12
+ def serialize_job(job)
13
+ json = super(job)
14
+
15
+ _cattr_klasses.each do |key, cattr_klass_str|
16
+ next if json.key?(key)
17
+
18
+ attrs = cattr_klass_str.constantize.attributes
19
+
20
+ json[key] = attrs unless attrs.empty?
21
+ end
22
+
23
+ json
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'active_support/current_attributes'
4
+ require_relative 'current_attributes/loading'
5
+ require_relative 'current_attributes/persistence'
6
+
7
+ # This code is based on Sidekiqs approach to persisting current attributes
8
+ # @see https://github.com/sidekiq/sidekiq/blob/main/lib/sidekiq/middleware/current_attributes.rb
9
+ module Karafka
10
+ module ActiveJob
11
+ # Module that allows to persist current attributes on Karafka jobs
12
+ module CurrentAttributes
13
+ # Allows for persistence of given current attributes via AJ + Karafka
14
+ #
15
+ # @param klasses [Array<String, Class>] classes or names of the current attributes classes
16
+ def persist(*klasses)
17
+ # Support for providing multiple classes
18
+ klasses = Array(klasses).flatten
19
+
20
+ [Dispatcher, Consumer]
21
+ .reject { |expandable| expandable.respond_to?(:_cattr_klasses) }
22
+ .each { |expandable| expandable.class_attribute :_cattr_klasses, default: {} }
23
+
24
+ # Do not double inject in case of running persist multiple times
25
+ Dispatcher.prepend(Persistence) unless Dispatcher.ancestors.include?(Persistence)
26
+ Consumer.prepend(Loading) unless Consumer.ancestors.include?(Loading)
27
+
28
+ klasses.map(&:to_s).each do |stringified_klass|
29
+ # Prevent registering same klass multiple times
30
+ next if Dispatcher._cattr_klasses.value?(stringified_klass)
31
+
32
+ key = "cattr_#{Dispatcher._cattr_klasses.count}"
33
+
34
+ Dispatcher._cattr_klasses[key] = stringified_klass
35
+ Consumer._cattr_klasses[key] = stringified_klass
36
+ end
37
+ end
38
+
39
+ module_function :persist
40
+ end
41
+ end
42
+ end
@@ -18,7 +18,7 @@ module Karafka
18
18
  ::Karafka.producer.public_send(
19
19
  fetch_option(job, :dispatch_method, DEFAULTS),
20
20
  topic: job.queue_name,
21
- payload: ::ActiveSupport::JSON.encode(job.serialize)
21
+ payload: ::ActiveSupport::JSON.encode(serialize_job(job))
22
22
  )
23
23
  end
24
24
 
@@ -34,7 +34,7 @@ module Karafka
34
34
 
35
35
  dispatches[d_method] << {
36
36
  topic: job.queue_name,
37
- payload: ::ActiveSupport::JSON.encode(job.serialize)
37
+ payload: ::ActiveSupport::JSON.encode(serialize_job(job))
38
38
  }
39
39
  end
40
40
 
@@ -58,6 +58,12 @@ module Karafka
58
58
  .karafka_options
59
59
  .fetch(key, defaults.fetch(key))
60
60
  end
61
+
62
+ # @param job [ActiveJob::Base] job
63
+ # @return [Hash] json representation of the job
64
+ def serialize_job(job)
65
+ job.serialize
66
+ end
61
67
  end
62
68
  end
63
69
  end
@@ -30,7 +30,7 @@ module Karafka
30
30
  #
31
31
  # @param subscription_group [Karafka::Routing::SubscriptionGroup] subscription group
32
32
  # with all the configuration details needed for us to create a client
33
- # @return [Karafka::Connection::Rdk::Consumer]
33
+ # @return [Karafka::Connection::Client]
34
34
  def initialize(subscription_group)
35
35
  @id = SecureRandom.hex(6)
36
36
  # Name is set when we build consumer
@@ -46,5 +46,8 @@ module Karafka
46
46
 
47
47
  # This should never happen. Please open an issue if it does.
48
48
  StrategyNotFoundError = Class.new(BaseError)
49
+
50
+ # This should never happen. Please open an issue if it does.
51
+ InvalidRealOffsetUsage = Class.new(BaseError)
49
52
  end
50
53
  end
@@ -0,0 +1,176 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'socket'
4
+
5
+ module Karafka
6
+ module Instrumentation
7
+ module Vendors
8
+ # Namespace for instrumentation related with Kubernetes
9
+ module Kubernetes
10
+ # Kubernetes HTTP listener that does not only reply when process is not fully hanging, but
11
+ # also allows to define max time of processing and looping.
12
+ #
13
+ # Processes like Karafka server can hang while still being reachable. For example, in case
14
+ # something would hang inside of the user code, Karafka could stop polling and no new
15
+ # data would be processed, but process itself would still be active. This listener allows
16
+ # for defining of a ttl that gets bumped on each poll loop and before and after processing
17
+ # of a given messages batch.
18
+ class LivenessListener
19
+ include ::Karafka::Core::Helpers::Time
20
+
21
+ # All good with Karafka
22
+ OK_CODE = '204 No Content'
23
+
24
+ # Some timeouts, fail
25
+ FAIL_CODE = '500 Internal Server Error'
26
+
27
+ private_constant :OK_CODE, :FAIL_CODE
28
+
29
+ # @param hostname [String, nil] hostname or nil to bind on all
30
+ # @param port [Integer] TCP port on which we want to run our HTTP status server
31
+ # @param consuming_ttl [Integer] time in ms after which we consider consumption hanging.
32
+ # It allows us to define max consumption time after which k8s should consider given
33
+ # process as hanging
34
+ # @param polling_ttl [Integer] max time in ms for polling. If polling (any) does not
35
+ # happen that often, process should be considered dead.
36
+ # @note The default TTL matches the default `max.poll.interval.ms`
37
+ def initialize(
38
+ hostname: nil,
39
+ port: 3000,
40
+ consuming_ttl: 5 * 60 * 1_000,
41
+ polling_ttl: 5 * 60 * 1_000
42
+ )
43
+ @server = TCPServer.new(*[hostname, port].compact)
44
+ @polling_ttl = polling_ttl
45
+ @consuming_ttl = consuming_ttl
46
+ @mutex = Mutex.new
47
+ @pollings = {}
48
+ @consumptions = {}
49
+
50
+ Thread.new do
51
+ loop do
52
+ break unless respond
53
+ end
54
+ end
55
+ end
56
+
57
+ # Tick on each fetch
58
+ # @param _event [Karafka::Core::Monitoring::Event]
59
+ def on_connection_listener_fetch_loop(_event)
60
+ mark_polling_tick
61
+ end
62
+
63
+ # Tick on starting work
64
+ # @param _event [Karafka::Core::Monitoring::Event]
65
+ def on_consumer_consume(_event)
66
+ mark_consumption_tick
67
+ end
68
+
69
+ # Tick on finished work
70
+ # @param _event [Karafka::Core::Monitoring::Event]
71
+ def on_consumer_consumed(_event)
72
+ clear_consumption_tick
73
+ end
74
+
75
+ # @param _event [Karafka::Core::Monitoring::Event]
76
+ def on_consumer_revoke(_event)
77
+ mark_consumption_tick
78
+ end
79
+
80
+ # @param _event [Karafka::Core::Monitoring::Event]
81
+ def on_consumer_revoked(_event)
82
+ clear_consumption_tick
83
+ end
84
+
85
+ # @param _event [Karafka::Core::Monitoring::Event]
86
+ def on_consumer_shutting_down(_event)
87
+ mark_consumption_tick
88
+ end
89
+
90
+ # @param _event [Karafka::Core::Monitoring::Event]
91
+ def on_consumer_shutdown(_event)
92
+ clear_consumption_tick
93
+ end
94
+
95
+ # @param _event [Karafka::Core::Monitoring::Event]
96
+ def on_error_occurred(_event)
97
+ clear_consumption_tick
98
+ clear_polling_tick
99
+ end
100
+
101
+ # Stop the http server when we stop the process
102
+ # @param _event [Karafka::Core::Monitoring::Event]
103
+ def on_app_stopped(_event)
104
+ @server.close
105
+ end
106
+
107
+ private
108
+
109
+ # Wraps the logic with a mutex
110
+ # @param block [Proc] code we want to run in mutex
111
+ def synchronize(&block)
112
+ @mutex.synchronize(&block)
113
+ end
114
+
115
+ # @return [Integer] object id of the current thread
116
+ def thread_id
117
+ Thread.current.object_id
118
+ end
119
+
120
+ # Update the polling tick time for current thread
121
+ def mark_polling_tick
122
+ synchronize do
123
+ @pollings[thread_id] = monotonic_now
124
+ end
125
+ end
126
+
127
+ # Clear current thread polling time tracker
128
+ def clear_polling_tick
129
+ synchronize do
130
+ @pollings.delete(thread_id)
131
+ end
132
+ end
133
+
134
+ # Update the processing tick time
135
+ def mark_consumption_tick
136
+ synchronize do
137
+ @consumptions[thread_id] = monotonic_now
138
+ end
139
+ end
140
+
141
+ # Clear current thread consumption time tracker
142
+ def clear_consumption_tick
143
+ synchronize do
144
+ @consumptions.delete(thread_id)
145
+ end
146
+ end
147
+
148
+ # Responds to a HTTP request with the process liveness status
149
+ def respond
150
+ client = @server.accept
151
+ client.gets
152
+ client.print "HTTP/1.1 #{status}\r\n"
153
+ client.print "Content-Type: text/plain\r\n"
154
+ client.print "\r\n"
155
+ client.close
156
+
157
+ true
158
+ rescue Errno::ECONNRESET, Errno::EPIPE, IOError
159
+ !@server.closed?
160
+ end
161
+
162
+ # Did we exceed any of the ttls
163
+ # @return [String] 204 string if ok, 500 otherwise
164
+ def status
165
+ time = monotonic_now
166
+
167
+ return FAIL_CODE if @pollings.values.any? { |tick| (time - tick) > @polling_ttl }
168
+ return FAIL_CODE if @consumptions.values.any? { |tick| (time - tick) > @consuming_ttl }
169
+
170
+ OK_CODE
171
+ end
172
+ end
173
+ end
174
+ end
175
+ end
176
+ end
@@ -35,18 +35,9 @@ module Karafka
35
35
  # double-processing
36
36
  break if Karafka::App.stopping? && !topic.virtual_partitions?
37
37
 
38
- # Break if we already know, that one of virtual partitions has failed and we will
39
- # be restarting processing all together after all VPs are done. This will minimize
40
- # number of jobs that will be re-processed
41
- break if topic.virtual_partitions? && failing?
42
-
43
38
  consume_job(message)
44
39
 
45
- # We cannot mark jobs as done after each if there are virtual partitions. Otherwise
46
- # this could create random markings.
47
- # The exception here is the collapsed state where we can move one after another
48
- next if topic.virtual_partitions? && !collapsed?
49
-
40
+ # We can always mark because of the virtual offset management that we have in VPs
50
41
  mark_as_consumed(message)
51
42
  end
52
43
  end
@@ -39,7 +39,7 @@ module Karafka
39
39
  fetch_option(job, :dispatch_method, DEFAULTS),
40
40
  dispatch_details(job).merge!(
41
41
  topic: job.queue_name,
42
- payload: ::ActiveSupport::JSON.encode(job.serialize)
42
+ payload: ::ActiveSupport::JSON.encode(serialize_job(job))
43
43
  )
44
44
  )
45
45
  end
@@ -54,7 +54,7 @@ module Karafka
54
54
 
55
55
  dispatches[d_method] << dispatch_details(job).merge!(
56
56
  topic: job.queue_name,
57
- payload: ::ActiveSupport::JSON.encode(job.serialize)
57
+ payload: ::ActiveSupport::JSON.encode(serialize_job(job))
58
58
  )
59
59
  end
60
60
 
@@ -17,7 +17,7 @@ module Karafka
17
17
  # Pro coordinator that provides extra orchestration methods useful for parallel processing
18
18
  # within the same partition
19
19
  class Coordinator < ::Karafka::Processing::Coordinator
20
- attr_reader :filter
20
+ attr_reader :filter, :virtual_offset_manager
21
21
 
22
22
  # @param args [Object] anything the base coordinator accepts
23
23
  def initialize(*args)
@@ -27,6 +27,20 @@ module Karafka
27
27
  @flow_lock = Mutex.new
28
28
  @collapser = Collapser.new
29
29
  @filter = FiltersApplier.new(self)
30
+
31
+ return unless topic.virtual_partitions?
32
+
33
+ @virtual_offset_manager = VirtualOffsetManager.new(
34
+ topic.name,
35
+ partition
36
+ )
37
+
38
+ # We register our own "internal" filter to support filtering of messages that were marked
39
+ # as consumed virtually
40
+ @filter.filters << Filters::VirtualLimiter.new(
41
+ @virtual_offset_manager,
42
+ @collapser
43
+ )
30
44
  end
31
45
 
32
46
  # Starts the coordination process
@@ -40,6 +54,11 @@ module Karafka
40
54
  @filter.apply!(messages)
41
55
 
42
56
  @executed.clear
57
+
58
+ # We keep the old processed offsets until the collapsing is done and regular processing
59
+ # with virtualization is restored
60
+ @virtual_offset_manager.clear if topic.virtual_partitions? && !@collapser.collapsed?
61
+
43
62
  @last_message = messages.last
44
63
  end
45
64