karafka 2.0.37 → 2.0.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +1 -1
- data/.ruby-version +1 -1
- data/CHANGELOG.md +34 -0
- data/Gemfile.lock +7 -7
- data/README.md +1 -1
- data/bin/integrations +1 -1
- data/config/locales/errors.yml +0 -7
- data/config/locales/pro_errors.yml +18 -0
- data/lib/karafka/active_job/consumer.rb +22 -7
- data/lib/karafka/admin.rb +46 -14
- data/lib/karafka/base_consumer.rb +35 -55
- data/lib/karafka/connection/listener.rb +15 -10
- data/lib/karafka/errors.rb +0 -3
- data/lib/karafka/instrumentation/logger_listener.rb +44 -3
- data/lib/karafka/instrumentation/notifications.rb +7 -0
- data/lib/karafka/pro/active_job/consumer.rb +10 -5
- data/lib/karafka/pro/processing/coordinator.rb +13 -4
- data/lib/karafka/pro/processing/filters/base.rb +61 -0
- data/lib/karafka/pro/processing/filters/delayer.rb +70 -0
- data/lib/karafka/pro/processing/filters/expirer.rb +51 -0
- data/lib/karafka/pro/processing/filters/throttler.rb +84 -0
- data/lib/karafka/pro/processing/filters_applier.rb +100 -0
- data/lib/karafka/pro/processing/jobs_builder.rb +7 -3
- data/lib/karafka/pro/processing/scheduler.rb +24 -7
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +68 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +74 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom.rb +72 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +76 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +62 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +68 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_mom.rb +64 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +69 -0
- data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom.rb +38 -0
- data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +64 -0
- data/lib/karafka/pro/processing/strategies/aj/ftr_mom.rb +38 -0
- data/lib/karafka/pro/processing/strategies/aj/ftr_mom_vp.rb +58 -0
- data/lib/karafka/pro/processing/strategies/{dlq_lrj_vp.rb → aj/lrj_mom.rb} +14 -13
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +77 -0
- data/lib/karafka/pro/processing/strategies/aj/mom.rb +36 -0
- data/lib/karafka/pro/processing/strategies/aj/mom_vp.rb +52 -0
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +131 -0
- data/lib/karafka/pro/processing/strategies/dlq/ftr.rb +61 -0
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +75 -0
- data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +74 -0
- data/lib/karafka/pro/processing/strategies/{mom.rb → dlq/ftr_lrj_vp.rb} +16 -19
- data/lib/karafka/pro/processing/strategies/dlq/ftr_mom.rb +73 -0
- data/lib/karafka/pro/processing/strategies/dlq/ftr_vp.rb +39 -0
- data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +63 -0
- data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +66 -0
- data/lib/karafka/pro/processing/strategies/dlq/lrj_vp.rb +38 -0
- data/lib/karafka/pro/processing/strategies/dlq/mom.rb +67 -0
- data/lib/karafka/pro/processing/strategies/dlq/vp.rb +39 -0
- data/lib/karafka/pro/processing/strategies/ftr/default.rb +104 -0
- data/lib/karafka/pro/processing/strategies/ftr/vp.rb +40 -0
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +85 -0
- data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +69 -0
- data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +67 -0
- data/lib/karafka/pro/processing/strategies/{vp.rb → lrj/ftr_vp.rb} +15 -13
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +78 -0
- data/lib/karafka/pro/processing/strategies/{aj_lrj_mom.rb → lrj/vp.rb} +13 -12
- data/lib/karafka/pro/processing/strategies/mom/default.rb +46 -0
- data/lib/karafka/pro/processing/strategies/mom/ftr.rb +53 -0
- data/lib/karafka/pro/processing/strategies/vp/default.rb +53 -0
- data/lib/karafka/pro/processing/{strategies/lrj_vp.rb → strategies.rb} +1 -13
- data/lib/karafka/pro/processing/strategy_selector.rb +44 -18
- data/lib/karafka/pro/{processing/strategies/aj_mom.rb → routing/features/delaying/config.rb} +7 -13
- data/lib/karafka/pro/routing/features/delaying/contract.rb +38 -0
- data/lib/karafka/pro/routing/features/delaying/topic.rb +59 -0
- data/lib/karafka/pro/routing/features/delaying.rb +29 -0
- data/lib/karafka/pro/routing/features/expiring/config.rb +27 -0
- data/lib/karafka/pro/routing/features/expiring/contract.rb +38 -0
- data/lib/karafka/pro/routing/features/expiring/topic.rb +59 -0
- data/lib/karafka/pro/routing/features/expiring.rb +27 -0
- data/lib/karafka/pro/routing/features/filtering/config.rb +40 -0
- data/lib/karafka/pro/routing/features/filtering/contract.rb +41 -0
- data/lib/karafka/pro/routing/features/filtering/topic.rb +51 -0
- data/lib/karafka/pro/routing/features/filtering.rb +27 -0
- data/lib/karafka/pro/routing/features/long_running_job/contract.rb +1 -1
- data/lib/karafka/pro/routing/features/throttling/config.rb +32 -0
- data/lib/karafka/pro/routing/features/throttling/contract.rb +41 -0
- data/lib/karafka/pro/routing/features/throttling/topic.rb +69 -0
- data/lib/karafka/pro/routing/features/throttling.rb +30 -0
- data/lib/karafka/processing/coordinator.rb +60 -30
- data/lib/karafka/processing/coordinators_buffer.rb +5 -1
- data/lib/karafka/processing/executor.rb +23 -16
- data/lib/karafka/processing/executors_buffer.rb +10 -26
- data/lib/karafka/processing/jobs/consume.rb +2 -4
- data/lib/karafka/processing/jobs/idle.rb +24 -0
- data/lib/karafka/processing/jobs_builder.rb +2 -3
- data/lib/karafka/processing/result.rb +5 -0
- data/lib/karafka/processing/strategies/aj_dlq_mom.rb +1 -1
- data/lib/karafka/processing/strategies/base.rb +5 -0
- data/lib/karafka/processing/strategies/default.rb +50 -0
- data/lib/karafka/processing/strategies/dlq.rb +13 -4
- data/lib/karafka/processing/strategies/dlq_mom.rb +8 -3
- data/lib/karafka/processing/strategy_selector.rb +27 -10
- data/lib/karafka/version.rb +1 -1
- data/renovate.json +6 -0
- data.tar.gz.sig +0 -0
- metadata +66 -22
- metadata.gz.sig +0 -0
- data/lib/karafka/pro/processing/strategies/aj_dlq_lrj_mom.rb +0 -42
- data/lib/karafka/pro/processing/strategies/aj_dlq_lrj_mom_vp.rb +0 -70
- data/lib/karafka/pro/processing/strategies/aj_dlq_mom.rb +0 -62
- data/lib/karafka/pro/processing/strategies/aj_dlq_mom_vp.rb +0 -68
- data/lib/karafka/pro/processing/strategies/aj_lrj_mom_vp.rb +0 -75
- data/lib/karafka/pro/processing/strategies/aj_mom_vp.rb +0 -62
- data/lib/karafka/pro/processing/strategies/dlq.rb +0 -120
- data/lib/karafka/pro/processing/strategies/dlq_lrj.rb +0 -65
- data/lib/karafka/pro/processing/strategies/dlq_lrj_mom.rb +0 -62
- data/lib/karafka/pro/processing/strategies/dlq_mom.rb +0 -62
- data/lib/karafka/pro/processing/strategies/dlq_vp.rb +0 -37
- data/lib/karafka/pro/processing/strategies/lrj.rb +0 -83
- data/lib/karafka/pro/processing/strategies/lrj_mom.rb +0 -73
|
@@ -22,20 +22,25 @@ module Karafka
|
|
|
22
22
|
#
|
|
23
23
|
# It contains slightly better revocation warranties than the regular blocking consumer as
|
|
24
24
|
# it can stop processing batch of jobs in the middle after the revocation.
|
|
25
|
-
class Consumer < Karafka::
|
|
25
|
+
class Consumer < ::Karafka::ActiveJob::Consumer
|
|
26
26
|
# Runs ActiveJob jobs processing and handles lrj if needed
|
|
27
27
|
def consume
|
|
28
28
|
messages.each do |message|
|
|
29
29
|
# If for any reason we've lost this partition, not worth iterating over new messages
|
|
30
30
|
# as they are no longer ours
|
|
31
31
|
break if revoked?
|
|
32
|
-
break if Karafka::App.stopping?
|
|
33
32
|
|
|
34
|
-
|
|
33
|
+
# We cannot early stop when running virtual partitions because the intermediate state
|
|
34
|
+
# would force us not to commit the offsets. This would cause extensive
|
|
35
|
+
# double-processing
|
|
36
|
+
break if Karafka::App.stopping? && !topic.virtual_partitions?
|
|
35
37
|
|
|
36
|
-
|
|
38
|
+
# Break if we already know, that one of virtual partitions has failed and we will
|
|
39
|
+
# be restarting processing all together after all VPs are done. This will minimize
|
|
40
|
+
# number of jobs that will be re-processed
|
|
41
|
+
break if topic.virtual_partitions? && failing?
|
|
37
42
|
|
|
38
|
-
|
|
43
|
+
consume_job(message)
|
|
39
44
|
|
|
40
45
|
# We cannot mark jobs as done after each if there are virtual partitions. Otherwise
|
|
41
46
|
# this could create random markings.
|
|
@@ -17,6 +17,8 @@ module Karafka
|
|
|
17
17
|
# Pro coordinator that provides extra orchestration methods useful for parallel processing
|
|
18
18
|
# within the same partition
|
|
19
19
|
class Coordinator < ::Karafka::Processing::Coordinator
|
|
20
|
+
attr_reader :filter
|
|
21
|
+
|
|
20
22
|
# @param args [Object] anything the base coordinator accepts
|
|
21
23
|
def initialize(*args)
|
|
22
24
|
super
|
|
@@ -24,6 +26,7 @@ module Karafka
|
|
|
24
26
|
@executed = []
|
|
25
27
|
@flow_lock = Mutex.new
|
|
26
28
|
@collapser = Collapser.new
|
|
29
|
+
@filter = FiltersApplier.new(self)
|
|
27
30
|
end
|
|
28
31
|
|
|
29
32
|
# Starts the coordination process
|
|
@@ -34,10 +37,10 @@ module Karafka
|
|
|
34
37
|
|
|
35
38
|
@collapser.refresh!(messages.first.offset)
|
|
36
39
|
|
|
37
|
-
@
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
40
|
+
@filter.apply!(messages)
|
|
41
|
+
|
|
42
|
+
@executed.clear
|
|
43
|
+
@last_message = messages.last
|
|
41
44
|
end
|
|
42
45
|
|
|
43
46
|
# Sets the consumer failure status and additionally starts the collapse until
|
|
@@ -54,6 +57,12 @@ module Karafka
|
|
|
54
57
|
@collapser.collapsed?
|
|
55
58
|
end
|
|
56
59
|
|
|
60
|
+
# @return [Boolean] did any of the filters apply any logic that would cause use to run
|
|
61
|
+
# the filtering flow
|
|
62
|
+
def filtered?
|
|
63
|
+
@filter.applied?
|
|
64
|
+
end
|
|
65
|
+
|
|
57
66
|
# @return [Boolean] is the coordinated work finished or not
|
|
58
67
|
def finished?
|
|
59
68
|
@running_jobs.zero?
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
|
5
|
+
#
|
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
7
|
+
# repository and their usage requires commercial license agreement.
|
|
8
|
+
#
|
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
10
|
+
#
|
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
12
|
+
# your code to Maciej Mensfeld.
|
|
13
|
+
|
|
14
|
+
module Karafka
|
|
15
|
+
module Pro
|
|
16
|
+
module Processing
|
|
17
|
+
module Filters
|
|
18
|
+
# Base for all the filters.
|
|
19
|
+
# All filters (including custom) need to use this API.
|
|
20
|
+
#
|
|
21
|
+
# Due to the fact, that filters can limit data in such a way, that we need to pause or
|
|
22
|
+
# seek (throttling for example), the api is not just "remove some things from batch" but
|
|
23
|
+
# also provides ways to control the post-filtering operations that may be needed.
|
|
24
|
+
class Base
|
|
25
|
+
# @return [Karafka::Messages::Message, nil] the message that we want to use as a cursor
|
|
26
|
+
# one to pause or seek or nil if not applicable.
|
|
27
|
+
attr_reader :cursor
|
|
28
|
+
|
|
29
|
+
include Karafka::Core::Helpers::Time
|
|
30
|
+
|
|
31
|
+
def initialize
|
|
32
|
+
@applied = false
|
|
33
|
+
@cursor = nil
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# @param messages [Array<Karafka::Messages::Message>] array with messages. Please keep
|
|
37
|
+
# in mind, this may already be partial due to execution of previous filters.
|
|
38
|
+
def apply!(messages)
|
|
39
|
+
raise NotImplementedError, 'Implement in a subclass'
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# @return [Symbol] filter post-execution action on consumer. Either `:skip`, `:pause` or
|
|
43
|
+
# `:seek`.
|
|
44
|
+
def action
|
|
45
|
+
:skip
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# @return [Boolean] did this filter change messages in any way
|
|
49
|
+
def applied?
|
|
50
|
+
@applied
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# @return [Integer] default timeout for pausing (if applicable)
|
|
54
|
+
def timeout
|
|
55
|
+
0
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
|
5
|
+
#
|
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
7
|
+
# repository and their usage requires commercial license agreement.
|
|
8
|
+
#
|
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
10
|
+
#
|
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
12
|
+
# your code to Maciej Mensfeld.
|
|
13
|
+
|
|
14
|
+
module Karafka
|
|
15
|
+
module Pro
|
|
16
|
+
module Processing
|
|
17
|
+
module Filters
|
|
18
|
+
# A filter that allows us to delay processing by pausing until time is right.
|
|
19
|
+
class Delayer < Base
|
|
20
|
+
# @param delay [Integer] ms delay / minimum age of each message we want to process
|
|
21
|
+
def initialize(delay)
|
|
22
|
+
super()
|
|
23
|
+
|
|
24
|
+
@delay = delay
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Removes too old messages
|
|
28
|
+
#
|
|
29
|
+
# @param messages [Array<Karafka::Messages::Message>]
|
|
30
|
+
def apply!(messages)
|
|
31
|
+
@applied = false
|
|
32
|
+
@cursor = nil
|
|
33
|
+
|
|
34
|
+
# Time on message is in seconds with ms precision, so we need to convert the ttl that
|
|
35
|
+
# is in ms to this format
|
|
36
|
+
border = ::Time.now.utc - @delay / 1_000.to_f
|
|
37
|
+
|
|
38
|
+
messages.delete_if do |message|
|
|
39
|
+
too_young = message.timestamp > border
|
|
40
|
+
|
|
41
|
+
if too_young
|
|
42
|
+
@applied = true
|
|
43
|
+
|
|
44
|
+
@cursor ||= message
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
@applied
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# @return [Integer] timeout delay in ms
|
|
52
|
+
def timeout
|
|
53
|
+
return 0 unless @cursor
|
|
54
|
+
|
|
55
|
+
timeout = (@delay / 1_000.to_f) - (::Time.now.utc - @cursor.timestamp)
|
|
56
|
+
|
|
57
|
+
timeout <= 0 ? 0 : timeout * 1_000
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# @return [Symbol] action to take on post-filtering
|
|
61
|
+
def action
|
|
62
|
+
return :skip unless applied?
|
|
63
|
+
|
|
64
|
+
timeout <= 0 ? :seek : :pause
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
|
5
|
+
#
|
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
7
|
+
# repository and their usage requires commercial license agreement.
|
|
8
|
+
#
|
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
10
|
+
#
|
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
12
|
+
# your code to Maciej Mensfeld.
|
|
13
|
+
|
|
14
|
+
module Karafka
|
|
15
|
+
module Pro
|
|
16
|
+
module Processing
|
|
17
|
+
module Filters
|
|
18
|
+
# Expirer for removing too old messages.
|
|
19
|
+
# It never moves offsets in any way and does not impact the processing flow. It always
|
|
20
|
+
# runs `:skip` action.
|
|
21
|
+
class Expirer < Base
|
|
22
|
+
# @param ttl [Integer] maximum age of a message (in ms)
|
|
23
|
+
def initialize(ttl)
|
|
24
|
+
super()
|
|
25
|
+
|
|
26
|
+
@ttl = ttl
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Removes too old messages
|
|
30
|
+
#
|
|
31
|
+
# @param messages [Array<Karafka::Messages::Message>]
|
|
32
|
+
def apply!(messages)
|
|
33
|
+
@applied = false
|
|
34
|
+
|
|
35
|
+
# Time on message is in seconds with ms precision, so we need to convert the ttl that
|
|
36
|
+
# is in ms to this format
|
|
37
|
+
border = ::Time.now.utc - @ttl / 1_000.to_f
|
|
38
|
+
|
|
39
|
+
messages.delete_if do |message|
|
|
40
|
+
too_old = message.timestamp < border
|
|
41
|
+
|
|
42
|
+
@applied = true if too_old
|
|
43
|
+
|
|
44
|
+
too_old
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
|
5
|
+
#
|
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
7
|
+
# repository and their usage requires commercial license agreement.
|
|
8
|
+
#
|
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
10
|
+
#
|
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
12
|
+
# your code to Maciej Mensfeld.
|
|
13
|
+
|
|
14
|
+
module Karafka
|
|
15
|
+
module Pro
|
|
16
|
+
module Processing
|
|
17
|
+
# Namespace containing Pro out of the box filters used by various strategies
|
|
18
|
+
module Filters
|
|
19
|
+
# Throttler used to limit number of messages we can process in a given time interval
|
|
20
|
+
# The tricky thing is, that even if we throttle on 100 messages, if we've reached 100, we
|
|
21
|
+
# still need to indicate, that we throttle despite not receiving 101. Otherwise we will
|
|
22
|
+
# not pause the partition and will fetch more data that we should not process.
|
|
23
|
+
#
|
|
24
|
+
# This is a special type of a filter that always throttles and makes us wait / seek if
|
|
25
|
+
# anything is applied out.
|
|
26
|
+
class Throttler < Base
|
|
27
|
+
# @param limit [Integer] how many messages we can process in a given time
|
|
28
|
+
# @param interval [Integer] interval in milliseconds for which we want to process
|
|
29
|
+
def initialize(limit, interval)
|
|
30
|
+
super()
|
|
31
|
+
|
|
32
|
+
@limit = limit
|
|
33
|
+
@interval = interval
|
|
34
|
+
@requests = Hash.new { |h, k| h[k] = 0 }
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Limits number of messages to a range that we can process (if needed) and keeps track
|
|
38
|
+
# of how many messages we've processed in a given time
|
|
39
|
+
# @param messages [Array<Karafka::Messages::Message>] limits the number of messages to
|
|
40
|
+
# number we can accept in the context of throttling constraints
|
|
41
|
+
def apply!(messages)
|
|
42
|
+
@applied = false
|
|
43
|
+
@cursor = nil
|
|
44
|
+
@time = monotonic_now
|
|
45
|
+
@requests.delete_if { |timestamp, _| timestamp < (@time - @interval) }
|
|
46
|
+
values = @requests.values.sum
|
|
47
|
+
accepted = 0
|
|
48
|
+
|
|
49
|
+
messages.delete_if do |message|
|
|
50
|
+
# +1 because of current
|
|
51
|
+
@applied = (values + accepted + 1) > @limit
|
|
52
|
+
|
|
53
|
+
@cursor = message if @applied && @cursor.nil?
|
|
54
|
+
|
|
55
|
+
next true if @applied
|
|
56
|
+
|
|
57
|
+
accepted += 1
|
|
58
|
+
|
|
59
|
+
false
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
@requests[@time] += accepted
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# @return [Symbol] action to take upon throttler reaching certain state
|
|
66
|
+
def action
|
|
67
|
+
if applied?
|
|
68
|
+
timeout.zero? ? :seek : :pause
|
|
69
|
+
else
|
|
70
|
+
:skip
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# @return [Integer] minimum number of milliseconds to wait before getting more messages
|
|
75
|
+
# so we are no longer throttled and so we can process at least one message
|
|
76
|
+
def timeout
|
|
77
|
+
timeout = @interval - (monotonic_now - @time)
|
|
78
|
+
timeout <= 0 ? 0 : timeout
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
|
5
|
+
#
|
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
7
|
+
# repository and their usage requires commercial license agreement.
|
|
8
|
+
#
|
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
10
|
+
#
|
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
12
|
+
# your code to Maciej Mensfeld.
|
|
13
|
+
|
|
14
|
+
module Karafka
|
|
15
|
+
module Pro
|
|
16
|
+
module Processing
|
|
17
|
+
# Applier for all filters we want to have. Whether related to limiting messages based
|
|
18
|
+
# on the payload or any other things.
|
|
19
|
+
#
|
|
20
|
+
# From the outside world perspective, this encapsulates all the filters.
|
|
21
|
+
# This means that this is the API we expose as a single filter, allowing us to control
|
|
22
|
+
# the filtering via many filters easily.
|
|
23
|
+
class FiltersApplier
|
|
24
|
+
# @param coordinator [Pro::Coordinator] pro coordinator
|
|
25
|
+
def initialize(coordinator)
|
|
26
|
+
# Builds filters out of their factories
|
|
27
|
+
# We build it that way (providing topic and partition) because there may be a case where
|
|
28
|
+
# someone wants to have a specific logic that is per topic or partition. Like for example
|
|
29
|
+
# a case where there is a cache bypassing revocations for topic partition.
|
|
30
|
+
#
|
|
31
|
+
# We provide full Karafka routing topic here and not the name only, in case the filter
|
|
32
|
+
# would be customized based on other topic settings (like VPs, etc)
|
|
33
|
+
#
|
|
34
|
+
# This setup allows for biggest flexibility also because topic object holds the reference
|
|
35
|
+
# to the subscription group and consumer group
|
|
36
|
+
@filters = coordinator.topic.filtering.factories.map do |factory|
|
|
37
|
+
factory.call(coordinator.topic, coordinator.partition)
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# @param messages [Array<Karafka::Messages::Message>] array with messages from the
|
|
42
|
+
# partition
|
|
43
|
+
def apply!(messages)
|
|
44
|
+
return unless active?
|
|
45
|
+
|
|
46
|
+
@filters.each { |filter| filter.apply!(messages) }
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# @return [Boolean] did we filter out any messages during filtering run
|
|
50
|
+
def applied?
|
|
51
|
+
return false unless active?
|
|
52
|
+
|
|
53
|
+
!applied.empty?
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# @return [Symbol] consumer post-filtering action that should be taken
|
|
57
|
+
def action
|
|
58
|
+
return :skip unless applied?
|
|
59
|
+
|
|
60
|
+
# The highest priority is on a potential backoff from any of the filters because it is
|
|
61
|
+
# the less risky (delay and continue later)
|
|
62
|
+
return :pause if applied.any? { |filter| filter.action == :pause }
|
|
63
|
+
|
|
64
|
+
# If none of the filters wanted to pause, we can check for any that would want to seek
|
|
65
|
+
# and if there is any, we can go with this strategy
|
|
66
|
+
return :seek if applied.any? { |filter| filter.action == :seek }
|
|
67
|
+
|
|
68
|
+
:skip
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# @return [Integer] minimum timeout we need to pause. This is the minimum for all the
|
|
72
|
+
# filters to satisfy all of them.
|
|
73
|
+
def timeout
|
|
74
|
+
applied.map(&:timeout).compact.min || 0
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# The first message we do need to get next time we poll. We use the minimum not to jump
|
|
78
|
+
# accidentally by over any.
|
|
79
|
+
# @return [Karafka::Messages::Message, nil] cursor message or nil if none
|
|
80
|
+
def cursor
|
|
81
|
+
return nil unless active?
|
|
82
|
+
|
|
83
|
+
applied.map(&:cursor).compact.min_by(&:offset)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
private
|
|
87
|
+
|
|
88
|
+
# @return [Boolean] is filtering active
|
|
89
|
+
def active?
|
|
90
|
+
!@filters.empty?
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# @return [Array<Object>] filters that applied any sort of messages limiting
|
|
94
|
+
def applied
|
|
95
|
+
@filters.select(&:applied?)
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
@@ -16,14 +16,18 @@ module Karafka
|
|
|
16
16
|
module Processing
|
|
17
17
|
# Pro jobs builder that supports lrj
|
|
18
18
|
class JobsBuilder < ::Karafka::Processing::JobsBuilder
|
|
19
|
+
# @param executor [Karafka::Processing::Executor]
|
|
20
|
+
def idle(executor)
|
|
21
|
+
Karafka::Processing::Jobs::Idle.new(executor)
|
|
22
|
+
end
|
|
23
|
+
|
|
19
24
|
# @param executor [Karafka::Processing::Executor]
|
|
20
25
|
# @param messages [Karafka::Messages::Messages] messages batch to be consumed
|
|
21
|
-
# @param coordinator [Karafka::Processing::Coordinator]
|
|
22
26
|
# @return [Karafka::Processing::Jobs::Consume] blocking job
|
|
23
27
|
# @return [Karafka::Pro::Processing::Jobs::ConsumeNonBlocking] non blocking for lrj
|
|
24
|
-
def consume(executor, messages
|
|
28
|
+
def consume(executor, messages)
|
|
25
29
|
if executor.topic.long_running_job?
|
|
26
|
-
Jobs::ConsumeNonBlocking.new(executor, messages
|
|
30
|
+
Jobs::ConsumeNonBlocking.new(executor, messages)
|
|
27
31
|
else
|
|
28
32
|
super
|
|
29
33
|
end
|
|
@@ -31,17 +31,15 @@ module Karafka
|
|
|
31
31
|
# @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
|
|
32
32
|
#
|
|
33
33
|
def schedule_consumption(queue, jobs_array)
|
|
34
|
-
|
|
34
|
+
perf_tracker = PerformanceTracker.instance
|
|
35
35
|
|
|
36
36
|
ordered = []
|
|
37
37
|
|
|
38
38
|
jobs_array.each do |job|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
ordered << [job, cost]
|
|
39
|
+
ordered << [
|
|
40
|
+
job,
|
|
41
|
+
processing_cost(perf_tracker, job)
|
|
42
|
+
]
|
|
45
43
|
end
|
|
46
44
|
|
|
47
45
|
ordered.sort_by!(&:last)
|
|
@@ -52,6 +50,25 @@ module Karafka
|
|
|
52
50
|
queue << job
|
|
53
51
|
end
|
|
54
52
|
end
|
|
53
|
+
|
|
54
|
+
private
|
|
55
|
+
|
|
56
|
+
# @param perf_tracker [PerformanceTracker]
|
|
57
|
+
# @param job [Karafka::Processing::Jobs::Base] job we will be processing
|
|
58
|
+
# @return [Numeric] estimated cost of processing this job
|
|
59
|
+
def processing_cost(perf_tracker, job)
|
|
60
|
+
if job.is_a?(::Karafka::Processing::Jobs::Consume)
|
|
61
|
+
messages = job.messages
|
|
62
|
+
message = messages.first
|
|
63
|
+
|
|
64
|
+
perf_tracker.processing_time_p95(message.topic, message.partition) * messages.size
|
|
65
|
+
else
|
|
66
|
+
# LJF will set first the most expensive, but we want to run the zero cost jobs
|
|
67
|
+
# related to the lifecycle always first. That is why we "emulate" that they
|
|
68
|
+
# the longest possible jobs that anyone can run
|
|
69
|
+
Float::INFINITY
|
|
70
|
+
end
|
|
71
|
+
end
|
|
55
72
|
end
|
|
56
73
|
end
|
|
57
74
|
end
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
|
5
|
+
#
|
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
7
|
+
# repository and their usage requires commercial license agreement.
|
|
8
|
+
#
|
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
10
|
+
#
|
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
12
|
+
# your code to Maciej Mensfeld.
|
|
13
|
+
|
|
14
|
+
module Karafka
|
|
15
|
+
module Pro
|
|
16
|
+
module Processing
|
|
17
|
+
module Strategies
|
|
18
|
+
module Aj
|
|
19
|
+
# ActiveJob enabled
|
|
20
|
+
# DLQ enabled
|
|
21
|
+
# Filtering enabled
|
|
22
|
+
# Long-Running Job enabled
|
|
23
|
+
# Manual offset management enabled
|
|
24
|
+
module DlqFtrLrjMom
|
|
25
|
+
include Strategies::Aj::FtrMom
|
|
26
|
+
include Strategies::Aj::DlqMom
|
|
27
|
+
include Strategies::Aj::LrjMom
|
|
28
|
+
|
|
29
|
+
# Features for this strategy
|
|
30
|
+
FEATURES = %i[
|
|
31
|
+
active_job
|
|
32
|
+
dead_letter_queue
|
|
33
|
+
filtering
|
|
34
|
+
long_running_job
|
|
35
|
+
manual_offset_management
|
|
36
|
+
].freeze
|
|
37
|
+
|
|
38
|
+
# This strategy assumes we do not early break on shutdown as it has VP
|
|
39
|
+
def handle_after_consume
|
|
40
|
+
coordinator.on_finished do
|
|
41
|
+
if coordinator.success?
|
|
42
|
+
coordinator.pause_tracker.reset
|
|
43
|
+
|
|
44
|
+
if coordinator.filtered? && !revoked?
|
|
45
|
+
handle_post_filtering
|
|
46
|
+
elsif !revoked?
|
|
47
|
+
seek(coordinator.seek_offset)
|
|
48
|
+
resume
|
|
49
|
+
else
|
|
50
|
+
resume
|
|
51
|
+
end
|
|
52
|
+
elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
|
|
53
|
+
retry_after_pause
|
|
54
|
+
else
|
|
55
|
+
coordinator.pause_tracker.reset
|
|
56
|
+
skippable_message, = find_skippable_message
|
|
57
|
+
dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
|
|
58
|
+
mark_as_consumed(skippable_message)
|
|
59
|
+
pause(coordinator.seek_offset, nil, false)
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
|
5
|
+
#
|
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
7
|
+
# repository and their usage requires commercial license agreement.
|
|
8
|
+
#
|
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
10
|
+
#
|
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
12
|
+
# your code to Maciej Mensfeld.
|
|
13
|
+
|
|
14
|
+
module Karafka
|
|
15
|
+
module Pro
|
|
16
|
+
module Processing
|
|
17
|
+
module Strategies
|
|
18
|
+
module Aj
|
|
19
|
+
# ActiveJob enabled
|
|
20
|
+
# DLQ enabled
|
|
21
|
+
# Filtering enabled
|
|
22
|
+
# Long-Running Job enabled
|
|
23
|
+
# Manual offset management enabled
|
|
24
|
+
# Virtual Partitions enabled
|
|
25
|
+
module DlqFtrLrjMomVp
|
|
26
|
+
include Strategies::Aj::FtrMom
|
|
27
|
+
include Strategies::Aj::DlqMomVp
|
|
28
|
+
include Strategies::Aj::LrjMom
|
|
29
|
+
|
|
30
|
+
# Features for this strategy
|
|
31
|
+
FEATURES = %i[
|
|
32
|
+
active_job
|
|
33
|
+
dead_letter_queue
|
|
34
|
+
filtering
|
|
35
|
+
long_running_job
|
|
36
|
+
manual_offset_management
|
|
37
|
+
virtual_partitions
|
|
38
|
+
].freeze
|
|
39
|
+
|
|
40
|
+
# This strategy assumes we do not early break on shutdown as it has VP
|
|
41
|
+
def handle_after_consume
|
|
42
|
+
coordinator.on_finished do |last_group_message|
|
|
43
|
+
if coordinator.success?
|
|
44
|
+
coordinator.pause_tracker.reset
|
|
45
|
+
|
|
46
|
+
# Since we have VP here we do not commit intermediate offsets and need to commit
|
|
47
|
+
# them here. We do commit in collapsed mode but this is generalized.
|
|
48
|
+
mark_as_consumed(last_group_message) unless revoked?
|
|
49
|
+
|
|
50
|
+
if coordinator.filtered? && !revoked?
|
|
51
|
+
handle_post_filtering
|
|
52
|
+
elsif !revoked?
|
|
53
|
+
seek(coordinator.seek_offset)
|
|
54
|
+
resume
|
|
55
|
+
else
|
|
56
|
+
resume
|
|
57
|
+
end
|
|
58
|
+
elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
|
|
59
|
+
retry_after_pause
|
|
60
|
+
else
|
|
61
|
+
coordinator.pause_tracker.reset
|
|
62
|
+
skippable_message, = find_skippable_message
|
|
63
|
+
dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
|
|
64
|
+
mark_as_consumed(skippable_message)
|
|
65
|
+
pause(coordinator.seek_offset, nil, false)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|