karafka 2.0.37 → 2.0.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +1 -1
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +34 -0
  6. data/Gemfile.lock +7 -7
  7. data/README.md +1 -1
  8. data/bin/integrations +1 -1
  9. data/config/locales/errors.yml +0 -7
  10. data/config/locales/pro_errors.yml +18 -0
  11. data/lib/karafka/active_job/consumer.rb +22 -7
  12. data/lib/karafka/admin.rb +46 -14
  13. data/lib/karafka/base_consumer.rb +35 -55
  14. data/lib/karafka/connection/listener.rb +15 -10
  15. data/lib/karafka/errors.rb +0 -3
  16. data/lib/karafka/instrumentation/logger_listener.rb +44 -3
  17. data/lib/karafka/instrumentation/notifications.rb +7 -0
  18. data/lib/karafka/pro/active_job/consumer.rb +10 -5
  19. data/lib/karafka/pro/processing/coordinator.rb +13 -4
  20. data/lib/karafka/pro/processing/filters/base.rb +61 -0
  21. data/lib/karafka/pro/processing/filters/delayer.rb +70 -0
  22. data/lib/karafka/pro/processing/filters/expirer.rb +51 -0
  23. data/lib/karafka/pro/processing/filters/throttler.rb +84 -0
  24. data/lib/karafka/pro/processing/filters_applier.rb +100 -0
  25. data/lib/karafka/pro/processing/jobs_builder.rb +7 -3
  26. data/lib/karafka/pro/processing/scheduler.rb +24 -7
  27. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +68 -0
  28. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +74 -0
  29. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom.rb +72 -0
  30. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +76 -0
  31. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +62 -0
  32. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +68 -0
  33. data/lib/karafka/pro/processing/strategies/aj/dlq_mom.rb +64 -0
  34. data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +69 -0
  35. data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom.rb +38 -0
  36. data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +64 -0
  37. data/lib/karafka/pro/processing/strategies/aj/ftr_mom.rb +38 -0
  38. data/lib/karafka/pro/processing/strategies/aj/ftr_mom_vp.rb +58 -0
  39. data/lib/karafka/pro/processing/strategies/{dlq_lrj_vp.rb → aj/lrj_mom.rb} +14 -13
  40. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +77 -0
  41. data/lib/karafka/pro/processing/strategies/aj/mom.rb +36 -0
  42. data/lib/karafka/pro/processing/strategies/aj/mom_vp.rb +52 -0
  43. data/lib/karafka/pro/processing/strategies/dlq/default.rb +131 -0
  44. data/lib/karafka/pro/processing/strategies/dlq/ftr.rb +61 -0
  45. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +75 -0
  46. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +74 -0
  47. data/lib/karafka/pro/processing/strategies/{mom.rb → dlq/ftr_lrj_vp.rb} +16 -19
  48. data/lib/karafka/pro/processing/strategies/dlq/ftr_mom.rb +73 -0
  49. data/lib/karafka/pro/processing/strategies/dlq/ftr_vp.rb +39 -0
  50. data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +63 -0
  51. data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +66 -0
  52. data/lib/karafka/pro/processing/strategies/dlq/lrj_vp.rb +38 -0
  53. data/lib/karafka/pro/processing/strategies/dlq/mom.rb +67 -0
  54. data/lib/karafka/pro/processing/strategies/dlq/vp.rb +39 -0
  55. data/lib/karafka/pro/processing/strategies/ftr/default.rb +104 -0
  56. data/lib/karafka/pro/processing/strategies/ftr/vp.rb +40 -0
  57. data/lib/karafka/pro/processing/strategies/lrj/default.rb +85 -0
  58. data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +69 -0
  59. data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +67 -0
  60. data/lib/karafka/pro/processing/strategies/{vp.rb → lrj/ftr_vp.rb} +15 -13
  61. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +78 -0
  62. data/lib/karafka/pro/processing/strategies/{aj_lrj_mom.rb → lrj/vp.rb} +13 -12
  63. data/lib/karafka/pro/processing/strategies/mom/default.rb +46 -0
  64. data/lib/karafka/pro/processing/strategies/mom/ftr.rb +53 -0
  65. data/lib/karafka/pro/processing/strategies/vp/default.rb +53 -0
  66. data/lib/karafka/pro/processing/{strategies/lrj_vp.rb → strategies.rb} +1 -13
  67. data/lib/karafka/pro/processing/strategy_selector.rb +44 -18
  68. data/lib/karafka/pro/{processing/strategies/aj_mom.rb → routing/features/delaying/config.rb} +7 -13
  69. data/lib/karafka/pro/routing/features/delaying/contract.rb +38 -0
  70. data/lib/karafka/pro/routing/features/delaying/topic.rb +59 -0
  71. data/lib/karafka/pro/routing/features/delaying.rb +29 -0
  72. data/lib/karafka/pro/routing/features/expiring/config.rb +27 -0
  73. data/lib/karafka/pro/routing/features/expiring/contract.rb +38 -0
  74. data/lib/karafka/pro/routing/features/expiring/topic.rb +59 -0
  75. data/lib/karafka/pro/routing/features/expiring.rb +27 -0
  76. data/lib/karafka/pro/routing/features/filtering/config.rb +40 -0
  77. data/lib/karafka/pro/routing/features/filtering/contract.rb +41 -0
  78. data/lib/karafka/pro/routing/features/filtering/topic.rb +51 -0
  79. data/lib/karafka/pro/routing/features/filtering.rb +27 -0
  80. data/lib/karafka/pro/routing/features/long_running_job/contract.rb +1 -1
  81. data/lib/karafka/pro/routing/features/throttling/config.rb +32 -0
  82. data/lib/karafka/pro/routing/features/throttling/contract.rb +41 -0
  83. data/lib/karafka/pro/routing/features/throttling/topic.rb +69 -0
  84. data/lib/karafka/pro/routing/features/throttling.rb +30 -0
  85. data/lib/karafka/processing/coordinator.rb +60 -30
  86. data/lib/karafka/processing/coordinators_buffer.rb +5 -1
  87. data/lib/karafka/processing/executor.rb +23 -16
  88. data/lib/karafka/processing/executors_buffer.rb +10 -26
  89. data/lib/karafka/processing/jobs/consume.rb +2 -4
  90. data/lib/karafka/processing/jobs/idle.rb +24 -0
  91. data/lib/karafka/processing/jobs_builder.rb +2 -3
  92. data/lib/karafka/processing/result.rb +5 -0
  93. data/lib/karafka/processing/strategies/aj_dlq_mom.rb +1 -1
  94. data/lib/karafka/processing/strategies/base.rb +5 -0
  95. data/lib/karafka/processing/strategies/default.rb +50 -0
  96. data/lib/karafka/processing/strategies/dlq.rb +13 -4
  97. data/lib/karafka/processing/strategies/dlq_mom.rb +8 -3
  98. data/lib/karafka/processing/strategy_selector.rb +27 -10
  99. data/lib/karafka/version.rb +1 -1
  100. data/renovate.json +6 -0
  101. data.tar.gz.sig +0 -0
  102. metadata +66 -22
  103. metadata.gz.sig +0 -0
  104. data/lib/karafka/pro/processing/strategies/aj_dlq_lrj_mom.rb +0 -42
  105. data/lib/karafka/pro/processing/strategies/aj_dlq_lrj_mom_vp.rb +0 -70
  106. data/lib/karafka/pro/processing/strategies/aj_dlq_mom.rb +0 -62
  107. data/lib/karafka/pro/processing/strategies/aj_dlq_mom_vp.rb +0 -68
  108. data/lib/karafka/pro/processing/strategies/aj_lrj_mom_vp.rb +0 -75
  109. data/lib/karafka/pro/processing/strategies/aj_mom_vp.rb +0 -62
  110. data/lib/karafka/pro/processing/strategies/dlq.rb +0 -120
  111. data/lib/karafka/pro/processing/strategies/dlq_lrj.rb +0 -65
  112. data/lib/karafka/pro/processing/strategies/dlq_lrj_mom.rb +0 -62
  113. data/lib/karafka/pro/processing/strategies/dlq_mom.rb +0 -62
  114. data/lib/karafka/pro/processing/strategies/dlq_vp.rb +0 -37
  115. data/lib/karafka/pro/processing/strategies/lrj.rb +0 -83
  116. data/lib/karafka/pro/processing/strategies/lrj_mom.rb +0 -73
@@ -22,20 +22,25 @@ module Karafka
22
22
  #
23
23
  # It contains slightly better revocation warranties than the regular blocking consumer as
24
24
  # it can stop processing batch of jobs in the middle after the revocation.
25
- class Consumer < Karafka::Pro::BaseConsumer
25
+ class Consumer < ::Karafka::ActiveJob::Consumer
26
26
  # Runs ActiveJob jobs processing and handles lrj if needed
27
27
  def consume
28
28
  messages.each do |message|
29
29
  # If for any reason we've lost this partition, not worth iterating over new messages
30
30
  # as they are no longer ours
31
31
  break if revoked?
32
- break if Karafka::App.stopping?
33
32
 
34
- job = ::ActiveSupport::JSON.decode(message.raw_payload)
33
+ # We cannot early stop when running virtual partitions because the intermediate state
34
+ # would force us not to commit the offsets. This would cause extensive
35
+ # double-processing
36
+ break if Karafka::App.stopping? && !topic.virtual_partitions?
35
37
 
36
- tags.add(:job_class, job['job_class'])
38
+ # Break if we already know, that one of virtual partitions has failed and we will
39
+ # be restarting processing all together after all VPs are done. This will minimize
40
+ # number of jobs that will be re-processed
41
+ break if topic.virtual_partitions? && failing?
37
42
 
38
- ::ActiveJob::Base.execute(job)
43
+ consume_job(message)
39
44
 
40
45
  # We cannot mark jobs as done after each if there are virtual partitions. Otherwise
41
46
  # this could create random markings.
@@ -17,6 +17,8 @@ module Karafka
17
17
  # Pro coordinator that provides extra orchestration methods useful for parallel processing
18
18
  # within the same partition
19
19
  class Coordinator < ::Karafka::Processing::Coordinator
20
+ attr_reader :filter
21
+
20
22
  # @param args [Object] anything the base coordinator accepts
21
23
  def initialize(*args)
22
24
  super
@@ -24,6 +26,7 @@ module Karafka
24
26
  @executed = []
25
27
  @flow_lock = Mutex.new
26
28
  @collapser = Collapser.new
29
+ @filter = FiltersApplier.new(self)
27
30
  end
28
31
 
29
32
  # Starts the coordination process
@@ -34,10 +37,10 @@ module Karafka
34
37
 
35
38
  @collapser.refresh!(messages.first.offset)
36
39
 
37
- @mutex.synchronize do
38
- @executed.clear
39
- @last_message = messages.last
40
- end
40
+ @filter.apply!(messages)
41
+
42
+ @executed.clear
43
+ @last_message = messages.last
41
44
  end
42
45
 
43
46
  # Sets the consumer failure status and additionally starts the collapse until
@@ -54,6 +57,12 @@ module Karafka
54
57
  @collapser.collapsed?
55
58
  end
56
59
 
60
+ # @return [Boolean] did any of the filters apply any logic that would cause use to run
61
+ # the filtering flow
62
+ def filtered?
63
+ @filter.applied?
64
+ end
65
+
57
66
  # @return [Boolean] is the coordinated work finished or not
58
67
  def finished?
59
68
  @running_jobs.zero?
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Filters
18
+ # Base for all the filters.
19
+ # All filters (including custom) need to use this API.
20
+ #
21
+ # Due to the fact, that filters can limit data in such a way, that we need to pause or
22
+ # seek (throttling for example), the api is not just "remove some things from batch" but
23
+ # also provides ways to control the post-filtering operations that may be needed.
24
+ class Base
25
+ # @return [Karafka::Messages::Message, nil] the message that we want to use as a cursor
26
+ # one to pause or seek or nil if not applicable.
27
+ attr_reader :cursor
28
+
29
+ include Karafka::Core::Helpers::Time
30
+
31
+ def initialize
32
+ @applied = false
33
+ @cursor = nil
34
+ end
35
+
36
+ # @param messages [Array<Karafka::Messages::Message>] array with messages. Please keep
37
+ # in mind, this may already be partial due to execution of previous filters.
38
+ def apply!(messages)
39
+ raise NotImplementedError, 'Implement in a subclass'
40
+ end
41
+
42
+ # @return [Symbol] filter post-execution action on consumer. Either `:skip`, `:pause` or
43
+ # `:seek`.
44
+ def action
45
+ :skip
46
+ end
47
+
48
+ # @return [Boolean] did this filter change messages in any way
49
+ def applied?
50
+ @applied
51
+ end
52
+
53
+ # @return [Integer] default timeout for pausing (if applicable)
54
+ def timeout
55
+ 0
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Filters
18
+ # A filter that allows us to delay processing by pausing until time is right.
19
+ class Delayer < Base
20
+ # @param delay [Integer] ms delay / minimum age of each message we want to process
21
+ def initialize(delay)
22
+ super()
23
+
24
+ @delay = delay
25
+ end
26
+
27
+ # Removes too old messages
28
+ #
29
+ # @param messages [Array<Karafka::Messages::Message>]
30
+ def apply!(messages)
31
+ @applied = false
32
+ @cursor = nil
33
+
34
+ # Time on message is in seconds with ms precision, so we need to convert the ttl that
35
+ # is in ms to this format
36
+ border = ::Time.now.utc - @delay / 1_000.to_f
37
+
38
+ messages.delete_if do |message|
39
+ too_young = message.timestamp > border
40
+
41
+ if too_young
42
+ @applied = true
43
+
44
+ @cursor ||= message
45
+ end
46
+
47
+ @applied
48
+ end
49
+ end
50
+
51
+ # @return [Integer] timeout delay in ms
52
+ def timeout
53
+ return 0 unless @cursor
54
+
55
+ timeout = (@delay / 1_000.to_f) - (::Time.now.utc - @cursor.timestamp)
56
+
57
+ timeout <= 0 ? 0 : timeout * 1_000
58
+ end
59
+
60
+ # @return [Symbol] action to take on post-filtering
61
+ def action
62
+ return :skip unless applied?
63
+
64
+ timeout <= 0 ? :seek : :pause
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Filters
18
+ # Expirer for removing too old messages.
19
+ # It never moves offsets in any way and does not impact the processing flow. It always
20
+ # runs `:skip` action.
21
+ class Expirer < Base
22
+ # @param ttl [Integer] maximum age of a message (in ms)
23
+ def initialize(ttl)
24
+ super()
25
+
26
+ @ttl = ttl
27
+ end
28
+
29
+ # Removes too old messages
30
+ #
31
+ # @param messages [Array<Karafka::Messages::Message>]
32
+ def apply!(messages)
33
+ @applied = false
34
+
35
+ # Time on message is in seconds with ms precision, so we need to convert the ttl that
36
+ # is in ms to this format
37
+ border = ::Time.now.utc - @ttl / 1_000.to_f
38
+
39
+ messages.delete_if do |message|
40
+ too_old = message.timestamp < border
41
+
42
+ @applied = true if too_old
43
+
44
+ too_old
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Namespace containing Pro out of the box filters used by various strategies
18
+ module Filters
19
+ # Throttler used to limit number of messages we can process in a given time interval
20
+ # The tricky thing is, that even if we throttle on 100 messages, if we've reached 100, we
21
+ # still need to indicate, that we throttle despite not receiving 101. Otherwise we will
22
+ # not pause the partition and will fetch more data that we should not process.
23
+ #
24
+ # This is a special type of a filter that always throttles and makes us wait / seek if
25
+ # anything is applied out.
26
+ class Throttler < Base
27
+ # @param limit [Integer] how many messages we can process in a given time
28
+ # @param interval [Integer] interval in milliseconds for which we want to process
29
+ def initialize(limit, interval)
30
+ super()
31
+
32
+ @limit = limit
33
+ @interval = interval
34
+ @requests = Hash.new { |h, k| h[k] = 0 }
35
+ end
36
+
37
+ # Limits number of messages to a range that we can process (if needed) and keeps track
38
+ # of how many messages we've processed in a given time
39
+ # @param messages [Array<Karafka::Messages::Message>] limits the number of messages to
40
+ # number we can accept in the context of throttling constraints
41
+ def apply!(messages)
42
+ @applied = false
43
+ @cursor = nil
44
+ @time = monotonic_now
45
+ @requests.delete_if { |timestamp, _| timestamp < (@time - @interval) }
46
+ values = @requests.values.sum
47
+ accepted = 0
48
+
49
+ messages.delete_if do |message|
50
+ # +1 because of current
51
+ @applied = (values + accepted + 1) > @limit
52
+
53
+ @cursor = message if @applied && @cursor.nil?
54
+
55
+ next true if @applied
56
+
57
+ accepted += 1
58
+
59
+ false
60
+ end
61
+
62
+ @requests[@time] += accepted
63
+ end
64
+
65
+ # @return [Symbol] action to take upon throttler reaching certain state
66
+ def action
67
+ if applied?
68
+ timeout.zero? ? :seek : :pause
69
+ else
70
+ :skip
71
+ end
72
+ end
73
+
74
+ # @return [Integer] minimum number of milliseconds to wait before getting more messages
75
+ # so we are no longer throttled and so we can process at least one message
76
+ def timeout
77
+ timeout = @interval - (monotonic_now - @time)
78
+ timeout <= 0 ? 0 : timeout
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,100 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Applier for all filters we want to have. Whether related to limiting messages based
18
+ # on the payload or any other things.
19
+ #
20
+ # From the outside world perspective, this encapsulates all the filters.
21
+ # This means that this is the API we expose as a single filter, allowing us to control
22
+ # the filtering via many filters easily.
23
+ class FiltersApplier
24
+ # @param coordinator [Pro::Coordinator] pro coordinator
25
+ def initialize(coordinator)
26
+ # Builds filters out of their factories
27
+ # We build it that way (providing topic and partition) because there may be a case where
28
+ # someone wants to have a specific logic that is per topic or partition. Like for example
29
+ # a case where there is a cache bypassing revocations for topic partition.
30
+ #
31
+ # We provide full Karafka routing topic here and not the name only, in case the filter
32
+ # would be customized based on other topic settings (like VPs, etc)
33
+ #
34
+ # This setup allows for biggest flexibility also because topic object holds the reference
35
+ # to the subscription group and consumer group
36
+ @filters = coordinator.topic.filtering.factories.map do |factory|
37
+ factory.call(coordinator.topic, coordinator.partition)
38
+ end
39
+ end
40
+
41
+ # @param messages [Array<Karafka::Messages::Message>] array with messages from the
42
+ # partition
43
+ def apply!(messages)
44
+ return unless active?
45
+
46
+ @filters.each { |filter| filter.apply!(messages) }
47
+ end
48
+
49
+ # @return [Boolean] did we filter out any messages during filtering run
50
+ def applied?
51
+ return false unless active?
52
+
53
+ !applied.empty?
54
+ end
55
+
56
+ # @return [Symbol] consumer post-filtering action that should be taken
57
+ def action
58
+ return :skip unless applied?
59
+
60
+ # The highest priority is on a potential backoff from any of the filters because it is
61
+ # the less risky (delay and continue later)
62
+ return :pause if applied.any? { |filter| filter.action == :pause }
63
+
64
+ # If none of the filters wanted to pause, we can check for any that would want to seek
65
+ # and if there is any, we can go with this strategy
66
+ return :seek if applied.any? { |filter| filter.action == :seek }
67
+
68
+ :skip
69
+ end
70
+
71
+ # @return [Integer] minimum timeout we need to pause. This is the minimum for all the
72
+ # filters to satisfy all of them.
73
+ def timeout
74
+ applied.map(&:timeout).compact.min || 0
75
+ end
76
+
77
+ # The first message we do need to get next time we poll. We use the minimum not to jump
78
+ # accidentally by over any.
79
+ # @return [Karafka::Messages::Message, nil] cursor message or nil if none
80
+ def cursor
81
+ return nil unless active?
82
+
83
+ applied.map(&:cursor).compact.min_by(&:offset)
84
+ end
85
+
86
+ private
87
+
88
+ # @return [Boolean] is filtering active
89
+ def active?
90
+ !@filters.empty?
91
+ end
92
+
93
+ # @return [Array<Object>] filters that applied any sort of messages limiting
94
+ def applied
95
+ @filters.select(&:applied?)
96
+ end
97
+ end
98
+ end
99
+ end
100
+ end
@@ -16,14 +16,18 @@ module Karafka
16
16
  module Processing
17
17
  # Pro jobs builder that supports lrj
18
18
  class JobsBuilder < ::Karafka::Processing::JobsBuilder
19
+ # @param executor [Karafka::Processing::Executor]
20
+ def idle(executor)
21
+ Karafka::Processing::Jobs::Idle.new(executor)
22
+ end
23
+
19
24
  # @param executor [Karafka::Processing::Executor]
20
25
  # @param messages [Karafka::Messages::Messages] messages batch to be consumed
21
- # @param coordinator [Karafka::Processing::Coordinator]
22
26
  # @return [Karafka::Processing::Jobs::Consume] blocking job
23
27
  # @return [Karafka::Pro::Processing::Jobs::ConsumeNonBlocking] non blocking for lrj
24
- def consume(executor, messages, coordinator)
28
+ def consume(executor, messages)
25
29
  if executor.topic.long_running_job?
26
- Jobs::ConsumeNonBlocking.new(executor, messages, coordinator)
30
+ Jobs::ConsumeNonBlocking.new(executor, messages)
27
31
  else
28
32
  super
29
33
  end
@@ -31,17 +31,15 @@ module Karafka
31
31
  # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
32
32
  #
33
33
  def schedule_consumption(queue, jobs_array)
34
- pt = PerformanceTracker.instance
34
+ perf_tracker = PerformanceTracker.instance
35
35
 
36
36
  ordered = []
37
37
 
38
38
  jobs_array.each do |job|
39
- messages = job.messages
40
- message = messages.first
41
-
42
- cost = pt.processing_time_p95(message.topic, message.partition) * messages.size
43
-
44
- ordered << [job, cost]
39
+ ordered << [
40
+ job,
41
+ processing_cost(perf_tracker, job)
42
+ ]
45
43
  end
46
44
 
47
45
  ordered.sort_by!(&:last)
@@ -52,6 +50,25 @@ module Karafka
52
50
  queue << job
53
51
  end
54
52
  end
53
+
54
+ private
55
+
56
+ # @param perf_tracker [PerformanceTracker]
57
+ # @param job [Karafka::Processing::Jobs::Base] job we will be processing
58
+ # @return [Numeric] estimated cost of processing this job
59
+ def processing_cost(perf_tracker, job)
60
+ if job.is_a?(::Karafka::Processing::Jobs::Consume)
61
+ messages = job.messages
62
+ message = messages.first
63
+
64
+ perf_tracker.processing_time_p95(message.topic, message.partition) * messages.size
65
+ else
66
+ # LJF will set first the most expensive, but we want to run the zero cost jobs
67
+ # related to the lifecycle always first. That is why we "emulate" that they
68
+ # the longest possible jobs that anyone can run
69
+ Float::INFINITY
70
+ end
71
+ end
55
72
  end
56
73
  end
57
74
  end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Strategies
18
+ module Aj
19
+ # ActiveJob enabled
20
+ # DLQ enabled
21
+ # Filtering enabled
22
+ # Long-Running Job enabled
23
+ # Manual offset management enabled
24
+ module DlqFtrLrjMom
25
+ include Strategies::Aj::FtrMom
26
+ include Strategies::Aj::DlqMom
27
+ include Strategies::Aj::LrjMom
28
+
29
+ # Features for this strategy
30
+ FEATURES = %i[
31
+ active_job
32
+ dead_letter_queue
33
+ filtering
34
+ long_running_job
35
+ manual_offset_management
36
+ ].freeze
37
+
38
+ # This strategy assumes we do not early break on shutdown as it has VP
39
+ def handle_after_consume
40
+ coordinator.on_finished do
41
+ if coordinator.success?
42
+ coordinator.pause_tracker.reset
43
+
44
+ if coordinator.filtered? && !revoked?
45
+ handle_post_filtering
46
+ elsif !revoked?
47
+ seek(coordinator.seek_offset)
48
+ resume
49
+ else
50
+ resume
51
+ end
52
+ elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
53
+ retry_after_pause
54
+ else
55
+ coordinator.pause_tracker.reset
56
+ skippable_message, = find_skippable_message
57
+ dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
58
+ mark_as_consumed(skippable_message)
59
+ pause(coordinator.seek_offset, nil, false)
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Strategies
18
+ module Aj
19
+ # ActiveJob enabled
20
+ # DLQ enabled
21
+ # Filtering enabled
22
+ # Long-Running Job enabled
23
+ # Manual offset management enabled
24
+ # Virtual Partitions enabled
25
+ module DlqFtrLrjMomVp
26
+ include Strategies::Aj::FtrMom
27
+ include Strategies::Aj::DlqMomVp
28
+ include Strategies::Aj::LrjMom
29
+
30
+ # Features for this strategy
31
+ FEATURES = %i[
32
+ active_job
33
+ dead_letter_queue
34
+ filtering
35
+ long_running_job
36
+ manual_offset_management
37
+ virtual_partitions
38
+ ].freeze
39
+
40
+ # This strategy assumes we do not early break on shutdown as it has VP
41
+ def handle_after_consume
42
+ coordinator.on_finished do |last_group_message|
43
+ if coordinator.success?
44
+ coordinator.pause_tracker.reset
45
+
46
+ # Since we have VP here we do not commit intermediate offsets and need to commit
47
+ # them here. We do commit in collapsed mode but this is generalized.
48
+ mark_as_consumed(last_group_message) unless revoked?
49
+
50
+ if coordinator.filtered? && !revoked?
51
+ handle_post_filtering
52
+ elsif !revoked?
53
+ seek(coordinator.seek_offset)
54
+ resume
55
+ else
56
+ resume
57
+ end
58
+ elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
59
+ retry_after_pause
60
+ else
61
+ coordinator.pause_tracker.reset
62
+ skippable_message, = find_skippable_message
63
+ dispatch_to_dlq(skippable_message) if dispatch_to_dlq?
64
+ mark_as_consumed(skippable_message)
65
+ pause(coordinator.seek_offset, nil, false)
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end