karafka 2.0.41 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +2 -2
- data/CHANGELOG.md +20 -1
- data/Gemfile.lock +2 -1
- data/config/locales/errors.yml +10 -0
- data/config/locales/pro_errors.yml +0 -2
- data/lib/karafka/active_job/consumer.rb +16 -11
- data/lib/karafka/active_job/current_attributes/loading.rb +36 -0
- data/lib/karafka/active_job/current_attributes/persistence.rb +28 -0
- data/lib/karafka/active_job/current_attributes.rb +42 -0
- data/lib/karafka/active_job/dispatcher.rb +8 -2
- data/lib/karafka/connection/client.rb +1 -1
- data/lib/karafka/errors.rb +3 -0
- data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +166 -0
- data/lib/karafka/pro/active_job/consumer.rb +1 -10
- data/lib/karafka/pro/active_job/dispatcher.rb +2 -2
- data/lib/karafka/pro/processing/coordinator.rb +20 -1
- data/lib/karafka/pro/processing/filters/virtual_limiter.rb +52 -0
- data/lib/karafka/pro/processing/filters_applier.rb +4 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +3 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +2 -2
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +2 -0
- data/lib/karafka/pro/processing/strategies/aj/mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/dlq/ftr.rb +1 -1
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +3 -6
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom_vp.rb +43 -0
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_vp.rb +1 -0
- data/lib/karafka/pro/processing/strategies/dlq/ftr_mom.rb +3 -7
- data/lib/karafka/pro/processing/strategies/dlq/ftr_mom_vp.rb +41 -0
- data/lib/karafka/pro/processing/strategies/dlq/ftr_vp.rb +1 -0
- data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +3 -6
- data/lib/karafka/pro/processing/strategies/dlq/lrj_mom_vp.rb +36 -0
- data/lib/karafka/pro/processing/strategies/dlq/lrj_vp.rb +1 -0
- data/lib/karafka/pro/processing/strategies/dlq/mom.rb +8 -7
- data/lib/karafka/pro/processing/strategies/dlq/mom_vp.rb +37 -0
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +2 -0
- data/lib/karafka/pro/processing/strategies/lrj/ftr_mom_vp.rb +40 -0
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +2 -0
- data/lib/karafka/pro/processing/strategies/lrj/mom_vp.rb +38 -0
- data/lib/karafka/pro/processing/strategies/mom/ftr_vp.rb +37 -0
- data/lib/karafka/pro/{base_consumer.rb → processing/strategies/mom/vp.rb} +17 -7
- data/lib/karafka/pro/processing/strategies/vp/default.rb +51 -0
- data/lib/karafka/pro/processing/virtual_offset_manager.rb +147 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/contract.rb +0 -17
- data/lib/karafka/processing/strategies/default.rb +2 -0
- data/lib/karafka/processing/strategies/dlq_mom.rb +9 -7
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +5 -0
- data.tar.gz.sig +0 -0
- metadata +16 -4
- metadata.gz.sig +0 -0
- data/lib/karafka/instrumentation/vendors/datadog/listener.rb +0 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b3d0a2f78b4bf7fa8f49527d48d2e877b95597566e07beabf0166a02259a936b
|
4
|
+
data.tar.gz: fc6054ad5f99bfe8a678c337167f93fc612dddfe88494f8891158dbd8610fb7f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0fb1fa88ef76ce81e145797a1364ac36bea2b94c47e733856cfd5ec9b37d0d9e2e984a3e4ef7fc36d2ac34c448e490cfdea4e10fae886cd80fb289798e55d308
|
7
|
+
data.tar.gz: 68df2bc1edb9acccd45d32428b43fd5dee12b9333c6059c801aa4ac03b3b89c01e93e8ea4ebbb8021618447c1122c8c5df64afc86747d6f0deec0cf992237e82
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/.github/workflows/ci.yml
CHANGED
@@ -62,7 +62,7 @@ jobs:
|
|
62
62
|
run: \curl -sSL https://api.coditsu.io/run/ci | bash
|
63
63
|
|
64
64
|
specs:
|
65
|
-
timeout-minutes:
|
65
|
+
timeout-minutes: 30
|
66
66
|
runs-on: ubuntu-latest
|
67
67
|
needs: diffend
|
68
68
|
strategy:
|
@@ -102,7 +102,7 @@ jobs:
|
|
102
102
|
run: bin/rspecs
|
103
103
|
|
104
104
|
integrations:
|
105
|
-
timeout-minutes:
|
105
|
+
timeout-minutes: 45
|
106
106
|
runs-on: ubuntu-latest
|
107
107
|
needs: diffend
|
108
108
|
strategy:
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,24 @@
|
|
1
1
|
# Karafka framework changelog
|
2
2
|
|
3
|
+
## 2.1.0 (2023-05-22)
|
4
|
+
- **[Feature]** Provide ability to use CurrentAttributes with ActiveJob's Karafka adapter.
|
5
|
+
- **[Feature]** Introduce collective Virtual Partitions offset management.
|
6
|
+
- **[Feature]** Use virtual offsets to filter out messages that would be re-processed upon retries.
|
7
|
+
- [Improvement] No longer break processing on failing parallel virtual partitions in ActiveJob because it is compensated by virtual marking.
|
8
|
+
- [Improvement] Always use Virtual offset management for Pro ActiveJobs.
|
9
|
+
- [Improvement] Do not attempt to mark offsets on already revoked partitions.
|
10
|
+
- [Improvement] Make sure, that VP components are not injected into non VP strategies.
|
11
|
+
- [Improvement] Improve complex strategies inheritance flow.
|
12
|
+
- [Improvement] Optimize offset management for DLQ + MoM feature combinations.
|
13
|
+
- [Change] Removed `Karafka::Pro::BaseConsumer` in favor of `Karafka::BaseConsumer`. (#1345)
|
14
|
+
- [Fix] Fix for `max_messages` and `max_wait_time` not having reference in errors.yml (#1443)
|
15
|
+
|
16
|
+
### Upgrade notes
|
17
|
+
|
18
|
+
1. Upgrade to Karafka `2.0.41` prior to upgrading to `2.1.0`.
|
19
|
+
2. Replace `Karafka::Pro::BaseConsumer` references to `Karafka::BaseConsumer`.
|
20
|
+
3. Replace `Karafka::Instrumentation::Vendors::Datadog:Listener` with `Karafka::Instrumentation::Vendors::Datadog::MetricsListener`.
|
21
|
+
|
3
22
|
## 2.0.41 (2023-14-19)
|
4
23
|
- **[Feature]** Provide `Karafka::Pro::Iterator` for anonymous topic/partitions iterations and messages lookups (#1389 and #1427).
|
5
24
|
- [Improvement] Optimize topic lookup for `read_topic` admin method usage.
|
@@ -60,7 +79,7 @@
|
|
60
79
|
|
61
80
|
## 2.0.35 (2023-03-13)
|
62
81
|
- **[Feature]** Allow for defining topics config via the DSL and its automatic creation via CLI command.
|
63
|
-
- **[Feature]** Allow for full topics reset and topics repartitioning via the CLI.
|
82
|
+
- **[Feature]** Allow for full topics reset and topics repartitioning via the CLI.
|
64
83
|
|
65
84
|
## 2.0.34 (2023-03-04)
|
66
85
|
- [Improvement] Attach an `embedded` tag to Karafka processes started using the embedded API.
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
karafka (2.0
|
4
|
+
karafka (2.1.0)
|
5
5
|
karafka-core (>= 2.0.12, < 3.0.0)
|
6
6
|
thor (>= 0.20)
|
7
7
|
waterdrop (>= 2.4.10, < 3.0.0)
|
@@ -78,6 +78,7 @@ GEM
|
|
78
78
|
zeitwerk (2.6.7)
|
79
79
|
|
80
80
|
PLATFORMS
|
81
|
+
arm64-darwin-21
|
81
82
|
x86_64-linux
|
82
83
|
|
83
84
|
DEPENDENCIES
|
data/config/locales/errors.yml
CHANGED
@@ -15,6 +15,13 @@ en:
|
|
15
15
|
shutdown_timeout_format: needs to be an integer bigger than 0
|
16
16
|
max_wait_time_format: needs to be an integer bigger than 0
|
17
17
|
kafka_format: needs to be a filled hash
|
18
|
+
internal.processing.jobs_builder_format: cannot be nil
|
19
|
+
internal.processing.scheduler: cannot be nil
|
20
|
+
internal.processing.coordinator_class: cannot be nil
|
21
|
+
internal.processing.partitioner_class: cannot be nil
|
22
|
+
internal.active_job.dispatcher: cannot be nil
|
23
|
+
internal.active_job.job_options_contract: cannot be nil
|
24
|
+
internal.active_job.consumer_class: cannot be nil
|
18
25
|
internal.status_format: needs to be present
|
19
26
|
internal.process_format: needs to be present
|
20
27
|
internal.routing.builder_format: needs to be present
|
@@ -31,7 +38,10 @@ en:
|
|
31
38
|
topics_missing: No topics to subscribe to
|
32
39
|
|
33
40
|
topic:
|
41
|
+
kafka: needs to be a hash with kafka scope settings details
|
34
42
|
missing: needs to be present
|
43
|
+
max_messages_format: 'needs to be an integer bigger than 0'
|
44
|
+
max_wait_time_format: 'needs to be an integer bigger than 0'
|
35
45
|
name_format: 'needs to be a string with a Kafka accepted format'
|
36
46
|
deserializer_format: needs to be present
|
37
47
|
consumer_format: needs to be present
|
@@ -4,8 +4,6 @@ en:
|
|
4
4
|
virtual_partitions.partitioner_respond_to_call: needs to be defined and needs to respond to `#call`
|
5
5
|
virtual_partitions.max_partitions_format: needs to be equal or more than 1
|
6
6
|
|
7
|
-
manual_offset_management_not_with_virtual_partitions: cannot be used together with Virtual Partitions
|
8
|
-
|
9
7
|
long_running_job.active_format: needs to be either true or false
|
10
8
|
|
11
9
|
dead_letter_queue_with_virtual_partitions: when using Dead Letter Queue with Virtual Partitions, at least one retry is required.
|
@@ -24,21 +24,26 @@ module Karafka
|
|
24
24
|
#
|
25
25
|
# @param job_message [Karafka::Messages::Message] message with active job
|
26
26
|
def consume_job(job_message)
|
27
|
-
|
28
|
-
|
29
|
-
# the ActiveJob setup here
|
30
|
-
job = ::ActiveSupport::JSON.decode(job_message.raw_payload)
|
27
|
+
with_deserialized_job(job_message) do |job|
|
28
|
+
tags.add(:job_class, job['job_class'])
|
31
29
|
|
32
|
-
|
30
|
+
payload = { caller: self, job: job, message: job_message }
|
33
31
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
::ActiveJob::Base.execute(job)
|
32
|
+
# We publish both to make it consistent with `consumer.x` events
|
33
|
+
Karafka.monitor.instrument('active_job.consume', payload)
|
34
|
+
Karafka.monitor.instrument('active_job.consumed', payload) do
|
35
|
+
::ActiveJob::Base.execute(job)
|
36
|
+
end
|
40
37
|
end
|
41
38
|
end
|
39
|
+
|
40
|
+
# @param job_message [Karafka::Messages::Message] message with active job
|
41
|
+
def with_deserialized_job(job_message)
|
42
|
+
# We technically speaking could set this as deserializer and reference it from the
|
43
|
+
# message instead of using the `#raw_payload`. This is not done on purpose to simplify
|
44
|
+
# the ActiveJob setup here
|
45
|
+
yield ::ActiveSupport::JSON.decode(job_message.raw_payload)
|
46
|
+
end
|
42
47
|
end
|
43
48
|
end
|
44
49
|
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module ActiveJob
|
5
|
+
module CurrentAttributes
|
6
|
+
# Module expanding the job deserialization to extract current attributes and load them
|
7
|
+
# for the time of the job execution
|
8
|
+
module Loading
|
9
|
+
# @param job_message [Karafka::Messages::Message] message with active job
|
10
|
+
def with_deserialized_job(job_message)
|
11
|
+
super(job_message) do |job|
|
12
|
+
resetable = []
|
13
|
+
|
14
|
+
_cattr_klasses.each do |key, cattr_klass_str|
|
15
|
+
next unless job.key?(key)
|
16
|
+
|
17
|
+
attributes = job.delete(key)
|
18
|
+
|
19
|
+
cattr_klass = cattr_klass_str.constantize
|
20
|
+
|
21
|
+
attributes.each do |name, value|
|
22
|
+
cattr_klass.public_send("#{name}=", value)
|
23
|
+
end
|
24
|
+
|
25
|
+
resetable << cattr_klass
|
26
|
+
end
|
27
|
+
|
28
|
+
yield(job)
|
29
|
+
|
30
|
+
resetable.each(&:reset)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module ActiveJob
|
5
|
+
module CurrentAttributes
|
6
|
+
# Module adding the current attributes persistence into the ActiveJob jobs
|
7
|
+
module Persistence
|
8
|
+
# Alters the job serialization to inject the current attributes into the json before we
|
9
|
+
# send it to Kafka
|
10
|
+
#
|
11
|
+
# @param job [ActiveJob::Base] job
|
12
|
+
def serialize_job(job)
|
13
|
+
json = super(job)
|
14
|
+
|
15
|
+
_cattr_klasses.each do |key, cattr_klass_str|
|
16
|
+
next if json.key?(key)
|
17
|
+
|
18
|
+
attrs = cattr_klass_str.constantize.attributes
|
19
|
+
|
20
|
+
json[key] = attrs unless attrs.empty?
|
21
|
+
end
|
22
|
+
|
23
|
+
json
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'active_support/current_attributes'
|
4
|
+
require_relative 'current_attributes/loading'
|
5
|
+
require_relative 'current_attributes/persistence'
|
6
|
+
|
7
|
+
# This code is based on Sidekiqs approach to persisting current attributes
|
8
|
+
# @see https://github.com/sidekiq/sidekiq/blob/main/lib/sidekiq/middleware/current_attributes.rb
|
9
|
+
module Karafka
|
10
|
+
module ActiveJob
|
11
|
+
# Module that allows to persist current attributes on Karafka jobs
|
12
|
+
module CurrentAttributes
|
13
|
+
# Allows for persistence of given current attributes via AJ + Karafka
|
14
|
+
#
|
15
|
+
# @param klasses [Array<String, Class>] classes or names of the current attributes classes
|
16
|
+
def persist(*klasses)
|
17
|
+
# Support for providing multiple classes
|
18
|
+
klasses = Array(klasses).flatten
|
19
|
+
|
20
|
+
[Dispatcher, Consumer]
|
21
|
+
.reject { |expandable| expandable.respond_to?(:_cattr_klasses) }
|
22
|
+
.each { |expandable| expandable.class_attribute :_cattr_klasses, default: {} }
|
23
|
+
|
24
|
+
# Do not double inject in case of running persist multiple times
|
25
|
+
Dispatcher.prepend(Persistence) unless Dispatcher.ancestors.include?(Persistence)
|
26
|
+
Consumer.prepend(Loading) unless Consumer.ancestors.include?(Loading)
|
27
|
+
|
28
|
+
klasses.map(&:to_s).each do |stringified_klass|
|
29
|
+
# Prevent registering same klass multiple times
|
30
|
+
next if Dispatcher._cattr_klasses.value?(stringified_klass)
|
31
|
+
|
32
|
+
key = "cattr_#{Dispatcher._cattr_klasses.count}"
|
33
|
+
|
34
|
+
Dispatcher._cattr_klasses[key] = stringified_klass
|
35
|
+
Consumer._cattr_klasses[key] = stringified_klass
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
module_function :persist
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -18,7 +18,7 @@ module Karafka
|
|
18
18
|
::Karafka.producer.public_send(
|
19
19
|
fetch_option(job, :dispatch_method, DEFAULTS),
|
20
20
|
topic: job.queue_name,
|
21
|
-
payload: ::ActiveSupport::JSON.encode(job
|
21
|
+
payload: ::ActiveSupport::JSON.encode(serialize_job(job))
|
22
22
|
)
|
23
23
|
end
|
24
24
|
|
@@ -34,7 +34,7 @@ module Karafka
|
|
34
34
|
|
35
35
|
dispatches[d_method] << {
|
36
36
|
topic: job.queue_name,
|
37
|
-
payload: ::ActiveSupport::JSON.encode(job
|
37
|
+
payload: ::ActiveSupport::JSON.encode(serialize_job(job))
|
38
38
|
}
|
39
39
|
end
|
40
40
|
|
@@ -58,6 +58,12 @@ module Karafka
|
|
58
58
|
.karafka_options
|
59
59
|
.fetch(key, defaults.fetch(key))
|
60
60
|
end
|
61
|
+
|
62
|
+
# @param job [ActiveJob::Base] job
|
63
|
+
# @return [Hash] json representation of the job
|
64
|
+
def serialize_job(job)
|
65
|
+
job.serialize
|
66
|
+
end
|
61
67
|
end
|
62
68
|
end
|
63
69
|
end
|
@@ -30,7 +30,7 @@ module Karafka
|
|
30
30
|
#
|
31
31
|
# @param subscription_group [Karafka::Routing::SubscriptionGroup] subscription group
|
32
32
|
# with all the configuration details needed for us to create a client
|
33
|
-
# @return [Karafka::Connection::
|
33
|
+
# @return [Karafka::Connection::Client]
|
34
34
|
def initialize(subscription_group)
|
35
35
|
@id = SecureRandom.hex(6)
|
36
36
|
# Name is set when we build consumer
|
data/lib/karafka/errors.rb
CHANGED
@@ -0,0 +1,166 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'socket'
|
4
|
+
|
5
|
+
module Karafka
|
6
|
+
module Instrumentation
|
7
|
+
module Vendors
|
8
|
+
# Namespace for instrumentation related with Kubernetes
|
9
|
+
module Kubernetes
|
10
|
+
# Kubernetes HTTP listener that does not only reply when process is not fully hanging, but
|
11
|
+
# also allows to define max time of processing and looping.
|
12
|
+
#
|
13
|
+
# Processes like Karafka server can hang while still being reachable. For example, in case
|
14
|
+
# something would hang inside of the user code, Karafka could stop polling and no new
|
15
|
+
# data would be processed, but process itself would still be active. This listener allows
|
16
|
+
# for defining of a ttl that gets bumped on each poll loop and before and after processing
|
17
|
+
# of a given messages batch.
|
18
|
+
class LivenessListener
|
19
|
+
include ::Karafka::Core::Helpers::Time
|
20
|
+
|
21
|
+
# @param hostname [String, nil] hostname or nil to bind on all
|
22
|
+
# @param port [Integer] TCP port on which we want to run our HTTP status server
|
23
|
+
# @param consuming_ttl [Integer] time in ms after which we consider consumption hanging.
|
24
|
+
# It allows us to define max consumption time after which k8s should consider given
|
25
|
+
# process as hanging
|
26
|
+
# @param polling_ttl [Integer] max time in ms for polling. If polling (any) does not
|
27
|
+
# happen that often, process should be considered dead.
|
28
|
+
# @note The default TTL matches the default `max.poll.interval.ms`
|
29
|
+
def initialize(
|
30
|
+
hostname: nil,
|
31
|
+
port: 3000,
|
32
|
+
consuming_ttl: 5 * 60 * 1_000,
|
33
|
+
polling_ttl: 5 * 60 * 1_000
|
34
|
+
)
|
35
|
+
@server = TCPServer.new(*[hostname, port].compact)
|
36
|
+
@polling_ttl = polling_ttl
|
37
|
+
@consuming_ttl = consuming_ttl
|
38
|
+
@mutex = Mutex.new
|
39
|
+
@pollings = {}
|
40
|
+
@consumptions = {}
|
41
|
+
|
42
|
+
Thread.new do
|
43
|
+
loop do
|
44
|
+
break unless respond
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Tick on each fetch
|
50
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
51
|
+
def on_connection_listener_fetch_loop(_event)
|
52
|
+
mark_polling_tick
|
53
|
+
end
|
54
|
+
|
55
|
+
# Tick on starting work
|
56
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
57
|
+
def on_consumer_consume(_event)
|
58
|
+
mark_consumption_tick
|
59
|
+
end
|
60
|
+
|
61
|
+
# Tick on finished work
|
62
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
63
|
+
def on_consumer_consumed(_event)
|
64
|
+
clear_consumption_tick
|
65
|
+
end
|
66
|
+
|
67
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
68
|
+
def on_consumer_revoke(_event)
|
69
|
+
mark_consumption_tick
|
70
|
+
end
|
71
|
+
|
72
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
73
|
+
def on_consumer_revoked(_event)
|
74
|
+
clear_consumption_tick
|
75
|
+
end
|
76
|
+
|
77
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
78
|
+
def on_consumer_shutting_down(_event)
|
79
|
+
mark_consumption_tick
|
80
|
+
end
|
81
|
+
|
82
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
83
|
+
def on_consumer_shutdown(_event)
|
84
|
+
clear_consumption_tick
|
85
|
+
end
|
86
|
+
|
87
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
88
|
+
def on_error_occurred(_event)
|
89
|
+
clear_consumption_tick
|
90
|
+
clear_polling_tick
|
91
|
+
end
|
92
|
+
|
93
|
+
# Stop the http server when we stop the process
|
94
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
95
|
+
def on_app_stopped(_event)
|
96
|
+
@server.close
|
97
|
+
end
|
98
|
+
|
99
|
+
private
|
100
|
+
|
101
|
+
# Wraps the logic with a mutex
|
102
|
+
# @param block [Proc] code we want to run in mutex
|
103
|
+
def synchronize(&block)
|
104
|
+
@mutex.synchronize(&block)
|
105
|
+
end
|
106
|
+
|
107
|
+
# @return [Integer] object id of the current thread
|
108
|
+
def thread_id
|
109
|
+
Thread.current.object_id
|
110
|
+
end
|
111
|
+
|
112
|
+
# Update the polling tick time for current thread
|
113
|
+
def mark_polling_tick
|
114
|
+
synchronize do
|
115
|
+
@pollings[thread_id] = monotonic_now
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# Clear current thread polling time tracker
|
120
|
+
def clear_polling_tick
|
121
|
+
synchronize do
|
122
|
+
@pollings.delete(thread_id)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# Update the processing tick time
|
127
|
+
def mark_consumption_tick
|
128
|
+
synchronize do
|
129
|
+
@consumptions[thread_id] = monotonic_now
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
# Clear current thread consumption time tracker
|
134
|
+
def clear_consumption_tick
|
135
|
+
synchronize do
|
136
|
+
@consumptions.delete(thread_id)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# Responds to a HTTP request with the process liveness status
|
141
|
+
def respond
|
142
|
+
client = @server.accept
|
143
|
+
client.gets
|
144
|
+
client.print "HTTP/1.1 #{status}\r\n"
|
145
|
+
client.close
|
146
|
+
|
147
|
+
true
|
148
|
+
rescue Errno::ECONNRESET, Errno::EPIPE, IOError
|
149
|
+
!@server.closed?
|
150
|
+
end
|
151
|
+
|
152
|
+
# Did we exceed any of the ttls
|
153
|
+
# @return [String] 204 string if ok, 500 otherwise
|
154
|
+
def status
|
155
|
+
time = monotonic_now
|
156
|
+
|
157
|
+
return '500' if @pollings.values.any? { |tick| (time - tick) > @polling_ttl }
|
158
|
+
return '500' if @consumptions.values.any? { |tick| (time - tick) > @consuming_ttl }
|
159
|
+
|
160
|
+
'204'
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
@@ -35,18 +35,9 @@ module Karafka
|
|
35
35
|
# double-processing
|
36
36
|
break if Karafka::App.stopping? && !topic.virtual_partitions?
|
37
37
|
|
38
|
-
# Break if we already know, that one of virtual partitions has failed and we will
|
39
|
-
# be restarting processing all together after all VPs are done. This will minimize
|
40
|
-
# number of jobs that will be re-processed
|
41
|
-
break if topic.virtual_partitions? && failing?
|
42
|
-
|
43
38
|
consume_job(message)
|
44
39
|
|
45
|
-
# We
|
46
|
-
# this could create random markings.
|
47
|
-
# The exception here is the collapsed state where we can move one after another
|
48
|
-
next if topic.virtual_partitions? && !collapsed?
|
49
|
-
|
40
|
+
# We can always mark because of the virtual offset management that we have in VPs
|
50
41
|
mark_as_consumed(message)
|
51
42
|
end
|
52
43
|
end
|
@@ -39,7 +39,7 @@ module Karafka
|
|
39
39
|
fetch_option(job, :dispatch_method, DEFAULTS),
|
40
40
|
dispatch_details(job).merge!(
|
41
41
|
topic: job.queue_name,
|
42
|
-
payload: ::ActiveSupport::JSON.encode(job
|
42
|
+
payload: ::ActiveSupport::JSON.encode(serialize_job(job))
|
43
43
|
)
|
44
44
|
)
|
45
45
|
end
|
@@ -54,7 +54,7 @@ module Karafka
|
|
54
54
|
|
55
55
|
dispatches[d_method] << dispatch_details(job).merge!(
|
56
56
|
topic: job.queue_name,
|
57
|
-
payload: ::ActiveSupport::JSON.encode(job
|
57
|
+
payload: ::ActiveSupport::JSON.encode(serialize_job(job))
|
58
58
|
)
|
59
59
|
end
|
60
60
|
|
@@ -17,7 +17,7 @@ module Karafka
|
|
17
17
|
# Pro coordinator that provides extra orchestration methods useful for parallel processing
|
18
18
|
# within the same partition
|
19
19
|
class Coordinator < ::Karafka::Processing::Coordinator
|
20
|
-
attr_reader :filter
|
20
|
+
attr_reader :filter, :virtual_offset_manager
|
21
21
|
|
22
22
|
# @param args [Object] anything the base coordinator accepts
|
23
23
|
def initialize(*args)
|
@@ -27,6 +27,20 @@ module Karafka
|
|
27
27
|
@flow_lock = Mutex.new
|
28
28
|
@collapser = Collapser.new
|
29
29
|
@filter = FiltersApplier.new(self)
|
30
|
+
|
31
|
+
return unless topic.virtual_partitions?
|
32
|
+
|
33
|
+
@virtual_offset_manager = VirtualOffsetManager.new(
|
34
|
+
topic.name,
|
35
|
+
partition
|
36
|
+
)
|
37
|
+
|
38
|
+
# We register our own "internal" filter to support filtering of messages that were marked
|
39
|
+
# as consumed virtually
|
40
|
+
@filter.filters << Filters::VirtualLimiter.new(
|
41
|
+
@virtual_offset_manager,
|
42
|
+
@collapser
|
43
|
+
)
|
30
44
|
end
|
31
45
|
|
32
46
|
# Starts the coordination process
|
@@ -40,6 +54,11 @@ module Karafka
|
|
40
54
|
@filter.apply!(messages)
|
41
55
|
|
42
56
|
@executed.clear
|
57
|
+
|
58
|
+
# We keep the old processed offsets until the collapsing is done and regular processing
|
59
|
+
# with virtualization is restored
|
60
|
+
@virtual_offset_manager.clear if topic.virtual_partitions? && !@collapser.collapsed?
|
61
|
+
|
43
62
|
@last_message = messages.last
|
44
63
|
end
|
45
64
|
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Processing
|
17
|
+
module Filters
|
18
|
+
# Removes messages that are already marked as consumed in the virtual offset manager
|
19
|
+
# This should operate only when using virtual partitions.
|
20
|
+
#
|
21
|
+
# This cleaner prevents us from duplicated processing of messages that were virtually
|
22
|
+
# marked as consumed even if we could not mark them as consumed in Kafka. This allows us
|
23
|
+
# to limit reprocessing when errors occur drastically when operating with virtual
|
24
|
+
# partitions
|
25
|
+
#
|
26
|
+
# @note It should be registered only when VPs are used
|
27
|
+
class VirtualLimiter < Base
|
28
|
+
# @param manager [Processing::VirtualOffsetManager]
|
29
|
+
# @param collapser [Processing::Collapser]
|
30
|
+
def initialize(manager, collapser)
|
31
|
+
@manager = manager
|
32
|
+
@collapser = collapser
|
33
|
+
|
34
|
+
super()
|
35
|
+
end
|
36
|
+
|
37
|
+
# Remove messages that we already marked as virtually consumed. Does nothing if not in
|
38
|
+
# the collapsed mode.
|
39
|
+
#
|
40
|
+
# @param messages [Array<Karafka::Messages::Message>]
|
41
|
+
def apply!(messages)
|
42
|
+
return unless @collapser.collapsed?
|
43
|
+
|
44
|
+
marked = @manager.marked
|
45
|
+
|
46
|
+
messages.delete_if { |message| marked.include?(message.offset) }
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -21,6 +21,10 @@ module Karafka
|
|
21
21
|
# This means that this is the API we expose as a single filter, allowing us to control
|
22
22
|
# the filtering via many filters easily.
|
23
23
|
class FiltersApplier
|
24
|
+
# @return [Array] registered filters array. Useful if we want to inject internal context
|
25
|
+
# aware filters.
|
26
|
+
attr_reader :filters
|
27
|
+
|
24
28
|
# @param coordinator [Pro::Coordinator] pro coordinator
|
25
29
|
def initialize(coordinator)
|
26
30
|
# Builds filters out of their factories
|
@@ -24,7 +24,9 @@ module Karafka
|
|
24
24
|
# This case is a bit of special. Please see the `AjDlqMom` for explanation on how the
|
25
25
|
# offset management works in this case.
|
26
26
|
module DlqLrjMom
|
27
|
-
include Strategies::
|
27
|
+
include Strategies::Default
|
28
|
+
include Strategies::Dlq::Default
|
29
|
+
include Strategies::Aj::LrjMom
|
28
30
|
|
29
31
|
# Features for this strategy
|
30
32
|
FEATURES = %i[
|
@@ -20,9 +20,9 @@ module Karafka
|
|
20
20
|
# Manual offset management enabled
|
21
21
|
# Virtual Partitions enabled
|
22
22
|
module DlqMomVp
|
23
|
-
include Strategies::Dlq::Default
|
24
|
-
include Strategies::Vp::Default
|
25
23
|
include Strategies::Default
|
24
|
+
include Strategies::Dlq::Vp
|
25
|
+
include Strategies::Vp::Default
|
26
26
|
|
27
27
|
# Features for this strategy
|
28
28
|
FEATURES = %i[
|