karafka 2.0.38 → 2.0.40

Sign up to get free protection for your applications and to get access to all the features.
Files changed (116) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +1 -1
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +34 -0
  6. data/Gemfile.lock +4 -4
  7. data/bin/integrations +1 -1
  8. data/config/locales/errors.yml +0 -7
  9. data/config/locales/pro_errors.yml +18 -0
  10. data/lib/karafka/base_consumer.rb +35 -55
  11. data/lib/karafka/connection/listener.rb +15 -10
  12. data/lib/karafka/errors.rb +0 -3
  13. data/lib/karafka/instrumentation/logger_listener.rb +44 -3
  14. data/lib/karafka/instrumentation/notifications.rb +4 -0
  15. data/lib/karafka/messages/builders/batch_metadata.rb +6 -5
  16. data/lib/karafka/messages/builders/messages.rb +3 -1
  17. data/lib/karafka/messages/messages.rb +5 -0
  18. data/lib/karafka/pro/active_job/consumer.rb +10 -1
  19. data/lib/karafka/pro/processing/coordinator.rb +13 -4
  20. data/lib/karafka/pro/processing/filters/base.rb +61 -0
  21. data/lib/karafka/pro/processing/filters/delayer.rb +70 -0
  22. data/lib/karafka/pro/processing/filters/expirer.rb +51 -0
  23. data/lib/karafka/pro/processing/filters/throttler.rb +84 -0
  24. data/lib/karafka/pro/processing/filters_applier.rb +100 -0
  25. data/lib/karafka/pro/processing/jobs_builder.rb +7 -3
  26. data/lib/karafka/pro/processing/scheduler.rb +24 -7
  27. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +68 -0
  28. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +74 -0
  29. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom.rb +72 -0
  30. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +76 -0
  31. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +62 -0
  32. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +68 -0
  33. data/lib/karafka/pro/processing/strategies/aj/dlq_mom.rb +64 -0
  34. data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +69 -0
  35. data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom.rb +38 -0
  36. data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +64 -0
  37. data/lib/karafka/pro/processing/strategies/aj/ftr_mom.rb +38 -0
  38. data/lib/karafka/pro/processing/strategies/aj/ftr_mom_vp.rb +58 -0
  39. data/lib/karafka/pro/processing/strategies/{dlq_lrj_vp.rb → aj/lrj_mom.rb} +14 -13
  40. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +77 -0
  41. data/lib/karafka/pro/processing/strategies/aj/mom.rb +36 -0
  42. data/lib/karafka/pro/processing/strategies/aj/mom_vp.rb +52 -0
  43. data/lib/karafka/pro/processing/strategies/dlq/default.rb +131 -0
  44. data/lib/karafka/pro/processing/strategies/dlq/ftr.rb +61 -0
  45. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +75 -0
  46. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +74 -0
  47. data/lib/karafka/pro/processing/strategies/{mom.rb → dlq/ftr_lrj_vp.rb} +16 -19
  48. data/lib/karafka/pro/processing/strategies/dlq/ftr_mom.rb +73 -0
  49. data/lib/karafka/pro/processing/strategies/dlq/ftr_vp.rb +39 -0
  50. data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +63 -0
  51. data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +66 -0
  52. data/lib/karafka/pro/processing/strategies/dlq/lrj_vp.rb +38 -0
  53. data/lib/karafka/pro/processing/strategies/dlq/mom.rb +67 -0
  54. data/lib/karafka/pro/processing/strategies/dlq/vp.rb +40 -0
  55. data/lib/karafka/pro/processing/strategies/ftr/default.rb +104 -0
  56. data/lib/karafka/pro/processing/strategies/ftr/vp.rb +40 -0
  57. data/lib/karafka/pro/processing/strategies/lrj/default.rb +85 -0
  58. data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +69 -0
  59. data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +67 -0
  60. data/lib/karafka/pro/processing/strategies/{vp.rb → lrj/ftr_vp.rb} +15 -13
  61. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +78 -0
  62. data/lib/karafka/pro/processing/strategies/{aj_lrj_mom.rb → lrj/vp.rb} +13 -12
  63. data/lib/karafka/pro/processing/strategies/mom/default.rb +46 -0
  64. data/lib/karafka/pro/processing/strategies/mom/ftr.rb +53 -0
  65. data/lib/karafka/pro/processing/strategies/vp/default.rb +53 -0
  66. data/lib/karafka/pro/processing/{strategies/lrj_vp.rb → strategies.rb} +1 -13
  67. data/lib/karafka/pro/processing/strategy_selector.rb +44 -18
  68. data/lib/karafka/pro/{processing/strategies/aj_mom.rb → routing/features/delaying/config.rb} +7 -13
  69. data/lib/karafka/pro/routing/features/delaying/contract.rb +38 -0
  70. data/lib/karafka/pro/routing/features/delaying/topic.rb +59 -0
  71. data/lib/karafka/pro/routing/features/delaying.rb +29 -0
  72. data/lib/karafka/pro/routing/features/expiring/config.rb +27 -0
  73. data/lib/karafka/pro/routing/features/expiring/contract.rb +38 -0
  74. data/lib/karafka/pro/routing/features/expiring/topic.rb +59 -0
  75. data/lib/karafka/pro/routing/features/expiring.rb +27 -0
  76. data/lib/karafka/pro/routing/features/filtering/config.rb +40 -0
  77. data/lib/karafka/pro/routing/features/filtering/contract.rb +41 -0
  78. data/lib/karafka/pro/routing/features/filtering/topic.rb +51 -0
  79. data/lib/karafka/pro/routing/features/filtering.rb +27 -0
  80. data/lib/karafka/pro/routing/features/long_running_job/contract.rb +1 -1
  81. data/lib/karafka/pro/routing/features/throttling/config.rb +32 -0
  82. data/lib/karafka/pro/routing/features/throttling/contract.rb +41 -0
  83. data/lib/karafka/pro/routing/features/throttling/topic.rb +69 -0
  84. data/lib/karafka/pro/routing/features/throttling.rb +30 -0
  85. data/lib/karafka/processing/coordinator.rb +60 -30
  86. data/lib/karafka/processing/coordinators_buffer.rb +5 -1
  87. data/lib/karafka/processing/executor.rb +37 -21
  88. data/lib/karafka/processing/executors_buffer.rb +10 -26
  89. data/lib/karafka/processing/jobs/consume.rb +2 -4
  90. data/lib/karafka/processing/jobs/idle.rb +24 -0
  91. data/lib/karafka/processing/jobs_builder.rb +2 -3
  92. data/lib/karafka/processing/result.rb +5 -0
  93. data/lib/karafka/processing/strategies/aj_dlq_mom.rb +1 -1
  94. data/lib/karafka/processing/strategies/base.rb +5 -0
  95. data/lib/karafka/processing/strategies/default.rb +50 -0
  96. data/lib/karafka/processing/strategies/dlq.rb +13 -4
  97. data/lib/karafka/processing/strategies/dlq_mom.rb +8 -3
  98. data/lib/karafka/processing/strategy_selector.rb +27 -10
  99. data/lib/karafka/version.rb +1 -1
  100. data/renovate.json +6 -0
  101. data.tar.gz.sig +0 -0
  102. metadata +66 -22
  103. metadata.gz.sig +0 -0
  104. data/lib/karafka/pro/processing/strategies/aj_dlq_lrj_mom.rb +0 -42
  105. data/lib/karafka/pro/processing/strategies/aj_dlq_lrj_mom_vp.rb +0 -70
  106. data/lib/karafka/pro/processing/strategies/aj_dlq_mom.rb +0 -62
  107. data/lib/karafka/pro/processing/strategies/aj_dlq_mom_vp.rb +0 -68
  108. data/lib/karafka/pro/processing/strategies/aj_lrj_mom_vp.rb +0 -75
  109. data/lib/karafka/pro/processing/strategies/aj_mom_vp.rb +0 -62
  110. data/lib/karafka/pro/processing/strategies/dlq.rb +0 -120
  111. data/lib/karafka/pro/processing/strategies/dlq_lrj.rb +0 -65
  112. data/lib/karafka/pro/processing/strategies/dlq_lrj_mom.rb +0 -62
  113. data/lib/karafka/pro/processing/strategies/dlq_mom.rb +0 -62
  114. data/lib/karafka/pro/processing/strategies/dlq_vp.rb +0 -37
  115. data/lib/karafka/pro/processing/strategies/lrj.rb +0 -83
  116. data/lib/karafka/pro/processing/strategies/lrj_mom.rb +0 -73
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Routing
17
+ module Features
18
+ # Filtering provides a generic API allowing you to pre-filter messages before they are
19
+ # dispatched to jobs and processed.
20
+ #
21
+ # It allows for throttling, delayed jobs and other filtering implementations.
22
+ class Filtering < Base
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -16,7 +16,7 @@ module Karafka
16
16
  module Routing
17
17
  module Features
18
18
  class LongRunningJob < Base
19
- # Rules around manual offset management settings
19
+ # Rules around long-running job settings
20
20
  class Contract < Contracts::Base
21
21
  configure do |config|
22
22
  config.error_messages = YAML.safe_load(
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Routing
17
+ module Features
18
+ class Throttling < Base
19
+ # Throttling feature configuration
20
+ Config = Struct.new(
21
+ :active,
22
+ :limit,
23
+ :interval,
24
+ keyword_init: true
25
+ ) do
26
+ alias_method :active?, :active
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Routing
17
+ module Features
18
+ class Throttling < Base
19
+ # Rules around throttling settings
20
+ class Contract < Contracts::Base
21
+ configure do |config|
22
+ config.error_messages = YAML.safe_load(
23
+ File.read(
24
+ File.join(Karafka.gem_root, 'config', 'locales', 'pro_errors.yml')
25
+ )
26
+ ).fetch('en').fetch('validations').fetch('topic')
27
+ end
28
+
29
+ nested(:throttling) do
30
+ required(:active) { |val| [true, false].include?(val) }
31
+ required(:interval) { |val| val.is_a?(Integer) && val.positive? }
32
+ required(:limit) do |val|
33
+ (val.is_a?(Integer) || val == Float::INFINITY) && val.positive?
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Routing
17
+ module Features
18
+ class Throttling < Base
19
+ # Topic throttling API extensions
20
+ module Topic
21
+ # @param limit [Integer] max messages to process in an time interval
22
+ # @param interval [Integer] time interval for processing
23
+ def throttling(
24
+ limit: Float::INFINITY,
25
+ interval: 60_000
26
+ )
27
+ # Those settings are used for validation
28
+ @throttling ||= begin
29
+ config = Config.new(
30
+ active: limit != Float::INFINITY,
31
+ limit: limit,
32
+ interval: interval
33
+ )
34
+
35
+ # If someone defined throttling setup, we need to create appropriate filter for it
36
+ # and inject it via filtering feature
37
+ if config.active?
38
+ factory = ->(*) { Pro::Processing::Filters::Throttler.new(limit, interval) }
39
+ filter(factory)
40
+ end
41
+
42
+ config
43
+ end
44
+ end
45
+
46
+ # Just an alias for nice API
47
+ #
48
+ # @param args [Array] Anything `#throttling` accepts
49
+ def throttle(**args)
50
+ throttling(**args)
51
+ end
52
+
53
+ # @return [Boolean] is a given job throttled
54
+ def throttling?
55
+ throttling.active?
56
+ end
57
+
58
+ # @return [Hash] topic with all its native configuration options plus throttling
59
+ def to_h
60
+ super.merge(
61
+ throttling: throttling.to_h
62
+ ).freeze
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Routing
17
+ module Features
18
+ # Ability to throttle ingestion of data per topic partition
19
+ # Useful when we have fixed limit of things we can process in a given time period without
20
+ # getting into trouble. It can be used for example to:
21
+ # - make sure we do not insert things to DB too fast
22
+ # - make sure we do not dispatch HTTP requests to external resources too fast
23
+ #
24
+ # This feature is virtual. It materializes itself via the `Filtering` feature.
25
+ class Throttling < Base
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -10,57 +10,65 @@ module Karafka
10
10
  # listener thread, but we go with thread-safe by default for all not to worry about potential
11
11
  # future mistakes.
12
12
  class Coordinator
13
- # @return [Karafka::TimeTrackers::Pause]
14
- attr_reader :pause_tracker
15
-
16
- attr_reader :seek_offset
13
+ attr_reader :pause_tracker, :seek_offset, :topic, :partition
17
14
 
15
+ # @param topic [Karafka::Routing::Topic]
16
+ # @param partition [Integer]
18
17
  # @param pause_tracker [Karafka::TimeTrackers::Pause] pause tracker for given topic partition
19
- def initialize(pause_tracker)
18
+ def initialize(topic, partition, pause_tracker)
19
+ @topic = topic
20
+ @partition = partition
20
21
  @pause_tracker = pause_tracker
21
22
  @revoked = false
22
23
  @consumptions = {}
23
24
  @running_jobs = 0
24
25
  @manual_pause = false
25
26
  @mutex = Mutex.new
27
+ @marked = false
28
+ @failure = false
26
29
  end
27
30
 
28
31
  # Starts the coordinator for given consumption jobs
29
32
  # @param messages [Array<Karafka::Messages::Message>] batch of message for which we are
30
33
  # going to coordinate work. Not used with regular coordinator.
31
34
  def start(messages)
32
- @mutex.synchronize do
33
- @running_jobs = 0
34
- # We need to clear the consumption results hash here, otherwise we could end up storing
35
- # consumption results of consumer instances we no longer control
36
- @consumptions.clear
37
-
38
- # When starting to run, no pause is expected and no manual pause as well
39
- @manual_pause = false
40
-
41
- # We set it on the first encounter and never again, because then the offset setting
42
- # should be up to the consumers logic (our or the end user)
43
- # Seek offset needs to be always initialized as for case where manual offset management
44
- # is turned on, we need to have reference to the first offset even in case of running
45
- # multiple batches without marking any messages as consumed. Rollback needs to happen to
46
- # the last place we know of or the last message + 1 that was marked
47
- @seek_offset ||= messages.first.offset
48
- end
35
+ @failure = false
36
+ @running_jobs = 0
37
+ # We need to clear the consumption results hash here, otherwise we could end up storing
38
+ # consumption results of consumer instances we no longer control
39
+ @consumptions.clear
40
+
41
+ # When starting to run, no pause is expected and no manual pause as well
42
+ @manual_pause = false
43
+
44
+ # We set it on the first encounter and never again, because then the offset setting
45
+ # should be up to the consumers logic (our or the end user)
46
+ # Seek offset needs to be always initialized as for case where manual offset management
47
+ # is turned on, we need to have reference to the first offset even in case of running
48
+ # multiple batches without marking any messages as consumed. Rollback needs to happen to
49
+ # the last place we know of or the last message + 1 that was marked
50
+ #
51
+ # It is however worth keeping in mind, that this may need to be used with `#marked?` to
52
+ # make sure that the first offset is an offset that has been marked.
53
+ @seek_offset ||= messages.first.offset
49
54
  end
50
55
 
51
56
  # @param offset [Integer] message offset
52
57
  def seek_offset=(offset)
53
- @mutex.synchronize { @seek_offset = offset }
58
+ synchronize do
59
+ @marked = true
60
+ @seek_offset = offset
61
+ end
54
62
  end
55
63
 
56
64
  # Increases number of jobs that we handle with this coordinator
57
65
  def increment
58
- @mutex.synchronize { @running_jobs += 1 }
66
+ synchronize { @running_jobs += 1 }
59
67
  end
60
68
 
61
69
  # Decrements number of jobs we handle at the moment
62
70
  def decrement
63
- @mutex.synchronize do
71
+ synchronize do
64
72
  @running_jobs -= 1
65
73
 
66
74
  return @running_jobs unless @running_jobs.negative?
@@ -72,8 +80,10 @@ module Karafka
72
80
  end
73
81
 
74
82
  # Is all the consumption done and finished successfully for this coordinator
83
+ # We do not say we're successful until all work is done, because running work may still
84
+ # crash.
75
85
  def success?
76
- @mutex.synchronize do
86
+ synchronize do
77
87
  @running_jobs.zero? && @consumptions.values.all?(&:success?)
78
88
  end
79
89
  end
@@ -81,7 +91,7 @@ module Karafka
81
91
  # Mark given consumption on consumer as successful
82
92
  # @param consumer [Karafka::BaseConsumer] consumer that finished successfully
83
93
  def success!(consumer)
84
- @mutex.synchronize do
94
+ synchronize do
85
95
  consumption(consumer).success!
86
96
  end
87
97
  end
@@ -90,11 +100,17 @@ module Karafka
90
100
  # @param consumer [Karafka::BaseConsumer] consumer that failed
91
101
  # @param error [StandardError] error that occurred
92
102
  def failure!(consumer, error)
93
- @mutex.synchronize do
103
+ synchronize do
104
+ @failure = true
94
105
  consumption(consumer).failure!(error)
95
106
  end
96
107
  end
97
108
 
109
+ # @return [Boolean] true if any of work we were running failed
110
+ def failure?
111
+ @failure
112
+ end
113
+
98
114
  # Marks given coordinator for processing group as revoked
99
115
  #
100
116
  # This is invoked in two places:
@@ -105,7 +121,7 @@ module Karafka
105
121
  # listener loop dispatching the revocation job. It is ok, as effectively nothing will be
106
122
  # processed until revocation jobs are done.
107
123
  def revoke
108
- @mutex.synchronize { @revoked = true }
124
+ synchronize { @revoked = true }
109
125
  end
110
126
 
111
127
  # @return [Boolean] is the partition we are processing revoked or not
@@ -113,10 +129,18 @@ module Karafka
113
129
  @revoked
114
130
  end
115
131
 
132
+ # @return [Boolean] was the new seek offset assigned at least once. This is needed because
133
+ # by default we assign seek offset of a first message ever, however this is insufficient
134
+ # for DLQ in a scenario where the first message would be broken. We would never move
135
+ # out of it and would end up in an endless loop.
136
+ def marked?
137
+ @marked
138
+ end
139
+
116
140
  # Store in the coordinator info, that this pause was done manually by the end user and not
117
141
  # by the system itself
118
142
  def manual_pause
119
- @mutex.synchronize { @manual_pause = true }
143
+ @manual_pause = true
120
144
  end
121
145
 
122
146
  # @return [Boolean] are we in a pause that was initiated by the user
@@ -124,6 +148,12 @@ module Karafka
124
148
  @pause_tracker.paused? && @manual_pause
125
149
  end
126
150
 
151
+ # Allows to run synchronized (locked) code that can operate in between virtual partitions
152
+ # @param block [Proc] code we want to run in the synchronized mode
153
+ def synchronize(&block)
154
+ @mutex.synchronize(&block)
155
+ end
156
+
127
157
  private
128
158
 
129
159
  # @param consumer [Object] karafka consumer (normal or pro)
@@ -9,16 +9,20 @@ module Karafka
9
9
  # @note This buffer operates only from the listener loop, thus we do not have to make it
10
10
  # thread-safe.
11
11
  class CoordinatorsBuffer
12
- def initialize
12
+ # @param topics [Karafka::Routing::Topics]
13
+ def initialize(topics)
13
14
  @pauses_manager = Connection::PausesManager.new
14
15
  @coordinator_class = ::Karafka::App.config.internal.processing.coordinator_class
15
16
  @coordinators = Hash.new { |h, k| h[k] = {} }
17
+ @topics = topics
16
18
  end
17
19
 
18
20
  # @param topic [String] topic name
19
21
  # @param partition [Integer] partition number
20
22
  def find_or_create(topic, partition)
21
23
  @coordinators[topic][partition] ||= @coordinator_class.new(
24
+ @topics.find(topic),
25
+ partition,
22
26
  @pauses_manager.fetch(topic, partition)
23
27
  )
24
28
  end
@@ -12,6 +12,10 @@ module Karafka
12
12
  # @note Executors are not removed after partition is revoked. They are not that big and will
13
13
  # be re-used in case of a re-claim
14
14
  class Executor
15
+ extend Forwardable
16
+
17
+ def_delegators :@coordinator, :topic, :partition
18
+
15
19
  # @return [String] unique id that we use to ensure, that we use for state tracking
16
20
  attr_reader :id
17
21
 
@@ -21,20 +25,17 @@ module Karafka
21
25
  # @return [Karafka::Messages::Messages] messages batch
22
26
  attr_reader :messages
23
27
 
24
- # Topic accessibility may be needed for the jobs builder to be able to build a proper job
25
- # based on the topic settings defined by the end user
26
- #
27
- # @return [Karafka::Routing::Topic] topic of this executor
28
- attr_reader :topic
28
+ # @return [Karafka::Processing::Coordinator] coordinator for this executor
29
+ attr_reader :coordinator
29
30
 
30
31
  # @param group_id [String] id of the subscription group to which the executor belongs
31
32
  # @param client [Karafka::Connection::Client] kafka client
32
- # @param topic [Karafka::Routing::Topic] topic for which this executor will run
33
- def initialize(group_id, client, topic)
33
+ # @param coordinator [Karafka::Processing::Coordinator]
34
+ def initialize(group_id, client, coordinator)
34
35
  @id = SecureRandom.hex(6)
35
36
  @group_id = group_id
36
37
  @client = client
37
- @topic = topic
38
+ @coordinator = coordinator
38
39
  end
39
40
 
40
41
  # Allows us to prepare the consumer in the listener thread prior to the job being send to
@@ -42,25 +43,21 @@ module Karafka
42
43
  # queue as it could cause starvation.
43
44
  #
44
45
  # @param messages [Array<Karafka::Messages::Message>]
45
- # @param coordinator [Karafka::Processing::Coordinator] coordinator for processing management
46
- def before_enqueue(messages, coordinator)
47
- # the moment we've received the batch or actually the moment we've enqueued it,
48
- # but good enough
49
- @enqueued_at = Time.now
50
-
46
+ def before_enqueue(messages)
51
47
  # Recreate consumer with each batch if persistence is not enabled
52
48
  # We reload the consumers with each batch instead of relying on some external signals
53
49
  # when needed for consistency. That way devs may have it on or off and not in this
54
50
  # middle state, where re-creation of a consumer instance would occur only sometimes
55
51
  @consumer = nil unless ::Karafka::App.config.consumer_persistence
56
52
 
57
- consumer.coordinator = coordinator
58
-
59
53
  # First we build messages batch...
60
54
  consumer.messages = Messages::Builders::Messages.call(
61
55
  messages,
62
- @topic,
63
- @enqueued_at
56
+ topic,
57
+ partition,
58
+ # the moment we've received the batch or actually the moment we've enqueued it,
59
+ # but good enough
60
+ Time.now
64
61
  )
65
62
 
66
63
  consumer.on_before_enqueue
@@ -82,6 +79,23 @@ module Karafka
82
79
  consumer.on_after_consume
83
80
  end
84
81
 
82
+ # Runs consumer idle operations
83
+ # This may include house-keeping or other state management changes that can occur but that
84
+ # not mean there are any new messages available for the end user to process
85
+ def idle
86
+ # Initializes the messages set in case idle operation would happen before any processing
87
+ # This prevents us from having no messages object at all as the messages object and
88
+ # its metadata may be used for statistics
89
+ consumer.messages ||= Messages::Builders::Messages.call(
90
+ [],
91
+ topic,
92
+ partition,
93
+ Time.now
94
+ )
95
+
96
+ consumer.on_idle
97
+ end
98
+
85
99
  # Runs the controller `#revoked` method that should be triggered when a given consumer is
86
100
  # no longer needed due to partitions reassignment.
87
101
  #
@@ -114,15 +128,17 @@ module Karafka
114
128
  # @return [Object] cached consumer instance
115
129
  def consumer
116
130
  @consumer ||= begin
117
- strategy = ::Karafka::App.config.internal.processing.strategy_selector.find(@topic)
131
+ topic = @coordinator.topic
132
+
133
+ strategy = ::Karafka::App.config.internal.processing.strategy_selector.find(topic)
118
134
 
119
- consumer = @topic.consumer_class.new
135
+ consumer = topic.consumer_class.new
120
136
  # We use singleton class as the same consumer class may be used to process different
121
137
  # topics with different settings
122
138
  consumer.singleton_class.include(strategy)
123
- consumer.topic = @topic
124
139
  consumer.client = @client
125
140
  consumer.producer = ::Karafka::App.producer
141
+ consumer.coordinator = @coordinator
126
142
 
127
143
  consumer
128
144
  end
@@ -20,14 +20,13 @@ module Karafka
20
20
  # @param topic [String] topic name
21
21
  # @param partition [Integer] partition number
22
22
  # @param parallel_key [String] parallel group key
23
+ # @param coordinator [Karafka::Processing::Coordinator]
23
24
  # @return [Executor] consumer executor
24
- def find_or_create(topic, partition, parallel_key)
25
- ktopic = find_topic(topic)
26
-
27
- @buffer[ktopic][partition][parallel_key] ||= Executor.new(
25
+ def find_or_create(topic, partition, parallel_key, coordinator)
26
+ @buffer[topic][partition][parallel_key] ||= Executor.new(
28
27
  @subscription_group.id,
29
28
  @client,
30
- ktopic
29
+ coordinator
31
30
  )
32
31
  end
33
32
 
@@ -37,9 +36,7 @@ module Karafka
37
36
  # @param topic [String] topic name
38
37
  # @param partition [Integer] partition number
39
38
  def revoke(topic, partition)
40
- ktopic = find_topic(topic)
41
-
42
- @buffer[ktopic][partition].clear
39
+ @buffer[topic][partition].clear
43
40
  end
44
41
 
45
42
  # Finds all the executors available for a given topic partition
@@ -48,9 +45,7 @@ module Karafka
48
45
  # @param partition [Integer] partition number
49
46
  # @return [Array<Executor>] executors in use for this topic + partition
50
47
  def find_all(topic, partition)
51
- ktopic = find_topic(topic)
52
-
53
- @buffer[ktopic][partition].values
48
+ @buffer[topic][partition].values
54
49
  end
55
50
 
56
51
  # Iterates over all available executors and yields them together with topic and partition
@@ -59,11 +54,10 @@ module Karafka
59
54
  # @yieldparam [Integer] partition number
60
55
  # @yieldparam [Executor] given executor
61
56
  def each
62
- @buffer.each do |ktopic, partitions|
63
- partitions.each do |partition, executors|
64
- executors.each do |_parallel_key, executor|
65
- # We skip the parallel key here as it does not serve any value when iterating
66
- yield(ktopic, partition, executor)
57
+ @buffer.each do |_, partitions|
58
+ partitions.each do |_, executors|
59
+ executors.each do |_, executor|
60
+ yield(executor)
67
61
  end
68
62
  end
69
63
  end
@@ -73,16 +67,6 @@ module Karafka
73
67
  def clear
74
68
  @buffer.clear
75
69
  end
76
-
77
- private
78
-
79
- # Finds topic based on its name
80
- #
81
- # @param topic [String] topic we're looking for
82
- # @return [Karafka::Routing::Topic] topic we're interested in
83
- def find_topic(topic)
84
- @subscription_group.topics.find(topic) || raise(Errors::TopicNotFoundError, topic)
85
- end
86
70
  end
87
71
  end
88
72
  end
@@ -12,19 +12,17 @@ module Karafka
12
12
  # @param executor [Karafka::Processing::Executor] executor that is suppose to run a given
13
13
  # job
14
14
  # @param messages [Karafka::Messages::Messages] karafka messages batch
15
- # @param coordinator [Karafka::Processing::Coordinator] processing coordinator
16
15
  # @return [Consume]
17
- def initialize(executor, messages, coordinator)
16
+ def initialize(executor, messages)
18
17
  @executor = executor
19
18
  @messages = messages
20
- @coordinator = coordinator
21
19
  super()
22
20
  end
23
21
 
24
22
  # Runs all the preparation code on the executor that needs to happen before the job is
25
23
  # enqueued.
26
24
  def before_enqueue
27
- executor.before_enqueue(@messages, @coordinator)
25
+ executor.before_enqueue(@messages)
28
26
  end
29
27
 
30
28
  # Runs the before consumption preparations on the executor
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ module Jobs
6
+ # Type of job that we may use to run some extra handling that happens without the user
7
+ # related lifecycle event like consumption, revocation, etc.
8
+ class Idle < Base
9
+ # @param executor [Karafka::Processing::Executor] executor that is suppose to run a given
10
+ # job on an active consumer
11
+ # @return [Shutdown]
12
+ def initialize(executor)
13
+ @executor = executor
14
+ super()
15
+ end
16
+
17
+ # Run the idle work via the executor
18
+ def call
19
+ executor.idle
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -7,10 +7,9 @@ module Karafka
7
7
  class JobsBuilder
8
8
  # @param executor [Karafka::Processing::Executor]
9
9
  # @param messages [Karafka::Messages::Messages] messages batch to be consumed
10
- # @param coordinator [Karafka::Processing::Coordinator]
11
10
  # @return [Karafka::Processing::Jobs::Consume] consumption job
12
- def consume(executor, messages, coordinator)
13
- Jobs::Consume.new(executor, messages, coordinator)
11
+ def consume(executor, messages)
12
+ Jobs::Consume.new(executor, messages)
14
13
  end
15
14
 
16
15
  # @param executor [Karafka::Processing::Executor]
@@ -32,6 +32,11 @@ module Karafka
32
32
  @success = false
33
33
  @cause = cause
34
34
  end
35
+
36
+ # @return [Boolean] true if processing failed
37
+ def failure?
38
+ !@success
39
+ end
35
40
  end
36
41
  end
37
42
  end
@@ -29,7 +29,7 @@ module Karafka
29
29
  retry_after_pause
30
30
  else
31
31
  coordinator.pause_tracker.reset
32
- skippable_message = find_skippable_message
32
+ skippable_message, = find_skippable_message
33
33
  dispatch_to_dlq(skippable_message)
34
34
  # We can commit the offset here because we know that we skip it "forever" and
35
35
  # since AJ consumer commits the offset after each job, we also know that the