karafka 2.2.5 → 2.2.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +3 -0
  4. data/CHANGELOG.md +9 -0
  5. data/Gemfile.lock +4 -4
  6. data/config/locales/errors.yml +7 -4
  7. data/config/locales/pro_errors.yml +3 -0
  8. data/lib/karafka/admin.rb +31 -1
  9. data/lib/karafka/base_consumer.rb +7 -1
  10. data/lib/karafka/connection/listener.rb +3 -3
  11. data/lib/karafka/contracts/config.rb +2 -0
  12. data/lib/karafka/pro/loader.rb +2 -2
  13. data/lib/karafka/pro/processing/filters/delayer.rb +1 -1
  14. data/lib/karafka/pro/processing/filters/inline_insights_delayer.rb +78 -0
  15. data/lib/karafka/pro/routing/features/inline_insights/config.rb +32 -0
  16. data/lib/karafka/pro/routing/features/inline_insights/contracts/topic.rb +41 -0
  17. data/lib/karafka/pro/routing/features/inline_insights/topic.rb +52 -0
  18. data/lib/karafka/pro/routing/features/inline_insights.rb +26 -0
  19. data/lib/karafka/processing/executor.rb +7 -0
  20. data/lib/karafka/processing/expansions_selector.rb +22 -0
  21. data/lib/karafka/processing/inline_insights/consumer.rb +41 -0
  22. data/lib/karafka/processing/inline_insights/listener.rb +19 -0
  23. data/lib/karafka/processing/inline_insights/tracker.rb +128 -0
  24. data/lib/karafka/railtie.rb +14 -7
  25. data/lib/karafka/routing/features/base.rb +36 -1
  26. data/lib/karafka/routing/features/inline_insights/config.rb +15 -0
  27. data/lib/karafka/routing/features/inline_insights/contracts/topic.rb +27 -0
  28. data/lib/karafka/routing/features/inline_insights/topic.rb +31 -0
  29. data/lib/karafka/routing/features/inline_insights.rb +40 -0
  30. data/lib/karafka/routing/subscription_group.rb +6 -2
  31. data/lib/karafka/setup/attributes_map.rb +1 -0
  32. data/lib/karafka/setup/config.rb +11 -2
  33. data/lib/karafka/version.rb +1 -1
  34. data.tar.gz.sig +0 -0
  35. metadata +16 -3
  36. metadata.gz.sig +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 729320b972ba55fe500db0729aa651c6308e1975db33d3ebd4db8061561e38bc
4
- data.tar.gz: f26fa836af6b7777692c7fd36fad1c89ce22c7ab60ab243662df40993acd7bd4
3
+ metadata.gz: b6a42d67752052bcf8f29fb683ed8ffb5fce7675c155b73269c4654681913f7b
4
+ data.tar.gz: b1bfc5bdad87c27111d8aee50302593133460f0eac47161f3c3872dc726d3b68
5
5
  SHA512:
6
- metadata.gz: 86cecfe51fbdb21943f75a98a1ee8e755496f9777bf5c4445ad44ecff45fa5be34fc79c0ad6358ded3886a59ebde4f0cd47598fdcdda41f75c5152489cf5ca35
7
- data.tar.gz: fec15466a63fc6e49089f339d3f0d9bb3d4d308c7855adf17f23e93ef10e3e09daf6e8453296a53bf74a57d1004883fc2b144c4564342daec8b94b9a7288bd82
6
+ metadata.gz: b1e46475db36dc2fc837aa68b1639ad43b94a59afc72dd7287c60fac5f504f55c1ed68f9a1527a4eb1f6743801d38ba44ac6eef6c9d69a6b2ae4e4fbf3034b98
7
+ data.tar.gz: 329fe59c9bbede3367c9bb3624c69652111f9387360d28b8d9e83bb0a184ea0e524d87d3f47afc400446f48993a57a41c4c171ae5e16c5413c7a496e88fc0421
checksums.yaml.gz.sig CHANGED
Binary file
@@ -71,6 +71,7 @@ jobs:
71
71
  fail-fast: false
72
72
  matrix:
73
73
  ruby:
74
+ - '3.3.0-preview2'
74
75
  - '3.2'
75
76
  # We run it against the oldest and the newest of a given major to make sure, that there
76
77
  # are no syntax-sugars that we would use that were introduced down the road
@@ -115,6 +116,7 @@ jobs:
115
116
  fail-fast: false
116
117
  matrix:
117
118
  ruby:
119
+ - '3.3.0-preview2'
118
120
  - '3.2'
119
121
  - '3.1'
120
122
  - '3.0'
@@ -166,6 +168,7 @@ jobs:
166
168
  fail-fast: false
167
169
  matrix:
168
170
  ruby:
171
+ - '3.3.0-preview2'
169
172
  - '3.2'
170
173
  - '3.1'
171
174
  - '3.0'
data/CHANGELOG.md CHANGED
@@ -1,5 +1,14 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.2.7 (2023-10-07)
4
+ - **[Feature]** Introduce Inline Insights to both OSS and Pro. Inline Insights allow you to get the Kafka insights/metrics from the consumer instance and use them to alter the processing flow. In Pro, there's an extra filter flow allowing to ensure, that the insights exist during consumption.
5
+ - [Enhancement] Make sure, that subscription groups ids are unique by including their consumer group id in them similar to how topics ids are handled (not a breaking change).
6
+ - [Enhancement] Expose `#attempt` method on a consumer to directly indicate number of attempt of processing given data.
7
+ - [Enhancement] Support Rails 7.1.
8
+
9
+ ## 2.2.6 (2023-09-26)
10
+ - [Enhancement] Retry `Karafka::Admin#read_watermark_offsets` fetching upon `not_leader_for_partition` that can occur mostly on newly created topics in KRaft and after crashes during leader selection.
11
+
3
12
  ## 2.2.5 (2023-09-25)
4
13
  - [Enhancement] Ensure, that when topic related operations end, the result is usable. There were few cases where admin operations on topics would finish successfully but internal Kafka caches would not report changes for a short period of time.
5
14
  - [Enhancement] Stabilize cooperative-sticky early shutdown procedure.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.2.5)
4
+ karafka (2.2.7)
5
5
  karafka-core (>= 2.2.2, < 2.3.0)
6
6
  thor (>= 0.20)
7
7
  waterdrop (>= 2.6.6, < 3.0.0)
@@ -37,9 +37,9 @@ GEM
37
37
  ffi (~> 1.15)
38
38
  mini_portile2 (~> 2.6)
39
39
  rake (> 12)
40
- karafka-web (0.7.4)
40
+ karafka-web (0.7.5)
41
41
  erubi (~> 1.4)
42
- karafka (>= 2.2.3, < 3.0.0)
42
+ karafka (>= 2.2.6, < 3.0.0)
43
43
  karafka-core (>= 2.2.2, < 3.0.0)
44
44
  roda (~> 3.68, >= 3.69)
45
45
  tilt (~> 2.0)
@@ -75,7 +75,7 @@ GEM
75
75
  waterdrop (2.6.7)
76
76
  karafka-core (>= 2.1.1, < 3.0.0)
77
77
  zeitwerk (~> 2.3)
78
- zeitwerk (2.6.11)
78
+ zeitwerk (2.6.12)
79
79
 
80
80
  PLATFORMS
81
81
  x86_64-linux
@@ -16,9 +16,11 @@ en:
16
16
  max_wait_time_format: needs to be an integer bigger than 0
17
17
  kafka_format: needs to be a filled hash
18
18
  internal.processing.jobs_builder_format: cannot be nil
19
- internal.processing.scheduler: cannot be nil
20
- internal.processing.coordinator_class: cannot be nil
21
- internal.processing.partitioner_class: cannot be nil
19
+ internal.processing.scheduler_format: cannot be nil
20
+ internal.processing.coordinator_class_format: cannot be nil
21
+ internal.processing.partitioner_class_format: cannot be nil
22
+ internal.processing.strategy_selector_format: cannot be nil
23
+ internal.processing.expansions_selector_format: cannot be nil
22
24
  internal.active_job.dispatcher: cannot be nil
23
25
  internal.active_job.job_options_contract: cannot be nil
24
26
  internal.active_job.consumer_class: cannot be nil
@@ -59,8 +61,9 @@ en:
59
61
  initial_offset_format: needs to be either earliest or latest
60
62
  subscription_group_name_format: must be a non-empty string
61
63
  manual_offset_management.active_format: needs to be either true or false
62
- consumer_active_job_missing: ActiveJob needs to be available
63
64
  manual_offset_management_must_be_enabled: cannot be disabled for ActiveJob topics
65
+ inline_insights.active_format: needs to be either true or false
66
+ consumer_active_job_missing: ActiveJob needs to be available
64
67
  dead_letter_queue.max_retries_format: needs to be equal or bigger than 0
65
68
  dead_letter_queue.topic_format: 'needs to be a string with a Kafka accepted format'
66
69
  dead_letter_queue.active_format: needs to be either true or false
@@ -31,6 +31,9 @@ en:
31
31
  patterns.active_format: 'needs to be boolean'
32
32
  patterns.type_format: 'needs to be :matcher, :discovered or :regular'
33
33
 
34
+ inline_insights.active_format: 'needs to be boolean'
35
+ inline_insights.required_format: 'needs to be boolean'
36
+
34
37
  consumer_group:
35
38
  patterns_format: must be an array with hashes
36
39
  patterns_missing: needs to be present
data/lib/karafka/admin.rb CHANGED
@@ -142,7 +142,12 @@ module Karafka
142
142
  # @return [Array<Integer, Integer>] low watermark offset and high watermark offset
143
143
  def read_watermark_offsets(name, partition)
144
144
  with_consumer do |consumer|
145
- consumer.query_watermark_offsets(name, partition)
145
+ # For newly created topics or in cases where we're trying to get them but there is no
146
+ # leader, this can fail. It happens more often for new topics under KRaft, however we
147
+ # still want to make sure things operate as expected even then
148
+ with_rdkafka_retry(codes: %i[not_leader_for_partition]) do
149
+ consumer.query_watermark_offsets(name, partition)
150
+ end
146
151
  end
147
152
  end
148
153
 
@@ -228,6 +233,31 @@ module Karafka
228
233
  raise
229
234
  end
230
235
 
236
+ # Handles retries for rdkafka related errors that we specify in `:codes`.
237
+ #
238
+ # Some operations temporarily fail, especially for cases where we changed something fast
239
+ # like topic creation or repartitioning. In cases like this it is ok to retry operations that
240
+ # do not change the state as it will usually recover.
241
+ #
242
+ # @param codes [Array<Symbol>] librdkafka error codes on which we want to retry
243
+ # @param max_attempts [Integer] number of attempts (including initial) after which we should
244
+ # give up
245
+ #
246
+ # @note This code implements a simple backoff that increases with each attempt.
247
+ def with_rdkafka_retry(codes:, max_attempts: 5)
248
+ attempt ||= 0
249
+ attempt += 1
250
+
251
+ yield
252
+ rescue Rdkafka::RdkafkaError => e
253
+ raise unless codes.include?(e.code)
254
+ raise if attempt >= max_attempts
255
+
256
+ sleep(max_attempts)
257
+
258
+ retry
259
+ end
260
+
231
261
  # @param type [Symbol] type of config we want
232
262
  # @param settings [Hash] extra settings for config (if needed)
233
263
  # @return [::Rdkafka::Config] rdkafka config
@@ -252,7 +252,13 @@ module Karafka
252
252
  # different flow after there is an error, for example for resources cleanup, small manual
253
253
  # backoff or different instrumentation tracking.
254
254
  def retrying?
255
- coordinator.pause_tracker.attempt > 1
255
+ attempt > 1
256
+ end
257
+
258
+ # @return [Integer] attempt of processing given batch. 1 if this is the first attempt or higher
259
+ # in case it is a retry
260
+ def attempt
261
+ coordinator.pause_tracker.attempt
256
262
  end
257
263
 
258
264
  # Pauses the processing from the last offset to retry on given message
@@ -112,7 +112,7 @@ module Karafka
112
112
  # distributing consuming jobs as upon revoking, we might get assigned to the same
113
113
  # partitions, thus getting their jobs. The revoking jobs need to finish before
114
114
  # appropriate consumers are taken down and re-created
115
- build_and_schedule_revoke_lost_partitions_jobs
115
+ build_and_schedule_revoked_jobs_for_revoked_partitions
116
116
 
117
117
  # We wait only on jobs from our subscription group. Other groups are independent.
118
118
  # This will block on revoked jobs until they are finished. Those are not meant to last
@@ -140,7 +140,7 @@ module Karafka
140
140
  # that occurred in the cluster.
141
141
  wait_pinging(
142
142
  wait_until: -> { @jobs_queue.empty?(@subscription_group.id) },
143
- after_ping: -> { build_and_schedule_revoke_lost_partitions_jobs }
143
+ after_ping: -> { build_and_schedule_revoked_jobs_for_revoked_partitions }
144
144
  )
145
145
 
146
146
  # We do not want to schedule the shutdown jobs prior to finishing all the jobs
@@ -197,7 +197,7 @@ module Karafka
197
197
  end
198
198
 
199
199
  # Enqueues revoking jobs for partitions that were taken away from the running process.
200
- def build_and_schedule_revoke_lost_partitions_jobs
200
+ def build_and_schedule_revoked_jobs_for_revoked_partitions
201
201
  revoked_partitions = @client.rebalance_manager.revoked_partitions
202
202
 
203
203
  # Stop early to save on some execution and array allocation
@@ -73,6 +73,8 @@ module Karafka
73
73
  required(:scheduler) { |val| !val.nil? }
74
74
  required(:coordinator_class) { |val| !val.nil? }
75
75
  required(:partitioner_class) { |val| !val.nil? }
76
+ required(:strategy_selector) { |val| !val.nil? }
77
+ required(:expansions_selector) { |val| !val.nil? }
76
78
  end
77
79
 
78
80
  nested(:active_job) do
@@ -49,7 +49,7 @@ module Karafka
49
49
  # Loads all the pro components and configures them wherever it is expected
50
50
  # @param config [Karafka::Core::Configurable::Node] app config that we can alter with pro
51
51
  # components
52
- def pre_setup(config)
52
+ def pre_setup_all(config)
53
53
  features.each { |feature| feature.pre_setup(config) }
54
54
 
55
55
  reconfigure(config)
@@ -60,7 +60,7 @@ module Karafka
60
60
  # Runs post setup features configuration operations
61
61
  #
62
62
  # @param config [Karafka::Core::Configurable::Node]
63
- def post_setup(config)
63
+ def post_setup_all(config)
64
64
  features.each { |feature| feature.post_setup(config) }
65
65
  end
66
66
 
@@ -24,7 +24,7 @@ module Karafka
24
24
  @delay = delay
25
25
  end
26
26
 
27
- # Removes too old messages
27
+ # Removes too young messages
28
28
  #
29
29
  # @param messages [Array<Karafka::Messages::Message>]
30
30
  def apply!(messages)
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Filters
18
+ # Delayer that checks if we have appropriate insights available. If not, pauses for
19
+ # 5 seconds so the insights can be loaded from the broker.
20
+ #
21
+ # In case it would take more than five seconds to load insights, it will just pause again
22
+ #
23
+ # This filter ensures, that we always have inline insights that a consumer can use
24
+ #
25
+ # It is relevant in most cases only during the process start, when first poll may not
26
+ # yield statistics yet but will give some data.
27
+ class InlineInsightsDelayer < Base
28
+ # Minimum how long should we pause when there are no metrics
29
+ PAUSE_TIMEOUT = 5_000
30
+
31
+ private_constant :PAUSE_TIMEOUT
32
+
33
+ # @param topic [Karafka::Routing::Topic]
34
+ # @param partition [Integer] partition
35
+ def initialize(topic, partition)
36
+ super()
37
+ @topic = topic
38
+ @partition = partition
39
+ end
40
+
41
+ # Pauses if inline insights would not be available. Does nothing otherwise
42
+ #
43
+ # @param messages [Array<Karafka::Messages::Message>]
44
+ def apply!(messages)
45
+ @applied = false
46
+ @cursor = messages.first
47
+
48
+ # Nothing to do if there were no messages
49
+ # This can happen when we chain filters
50
+ return unless @cursor
51
+
52
+ insights = ::Karafka::Processing::InlineInsights::Tracker.find(
53
+ @topic,
54
+ @partition
55
+ )
56
+
57
+ # If insights are available, also nothing to do here and we can just process
58
+ return unless insights.empty?
59
+
60
+ messages.clear
61
+
62
+ @applied = true
63
+ end
64
+
65
+ # @return [Integer] ms timeout in case of pause
66
+ def timeout
67
+ @cursor && applied? ? PAUSE_TIMEOUT : 0
68
+ end
69
+
70
+ # Pause when we had to back-off or skip if delay is not needed
71
+ def action
72
+ applied? ? :pause : :skip
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Routing
17
+ module Features
18
+ class InlineInsights < Base
19
+ # Config of this feature
20
+ Config = Struct.new(
21
+ :active,
22
+ :required,
23
+ keyword_init: true
24
+ ) do
25
+ alias_method :active?, :active
26
+ alias_method :required?, :required
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Routing
17
+ module Features
18
+ class InlineInsights < Base
19
+ # Inline Insights related contracts namespace
20
+ module Contracts
21
+ # Contract for inline insights topic setup
22
+ class Topic < Karafka::Contracts::Base
23
+ configure do |config|
24
+ config.error_messages = YAML.safe_load(
25
+ File.read(
26
+ File.join(Karafka.gem_root, 'config', 'locales', 'pro_errors.yml')
27
+ )
28
+ ).fetch('en').fetch('validations').fetch('topic')
29
+ end
30
+
31
+ nested :inline_insights do
32
+ required(:active) { |val| [true, false].include?(val) }
33
+ required(:required) { |val| [true, false].include?(val) }
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Routing
17
+ module Features
18
+ class InlineInsights < Base
19
+ # Routing topic inline insights API
20
+ module Topic
21
+ # @param active [Boolean] should inline insights be activated
22
+ # @param required [Boolean] are the insights required to operate
23
+ def inline_insights(active = -1, required: -1)
24
+ # This weird style of checking allows us to activate inline insights in few ways:
25
+ # - inline_insights(true)
26
+ # - inline_insights(required: true)
27
+ # - inline_insights(required: false)
28
+ #
29
+ # In each of those cases inline insights will become active
30
+ @inline_insights ||= begin
31
+ config = Config.new(
32
+ active: active == true || (active == -1 && required != -1),
33
+ required: required == true
34
+ )
35
+
36
+ if config.active? && config.required?
37
+ factory = lambda do |topic, partition|
38
+ Pro::Processing::Filters::InlineInsightsDelayer.new(topic, partition)
39
+ end
40
+
41
+ filter(factory)
42
+ end
43
+
44
+ config
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Routing
17
+ module Features
18
+ # Enhanced inline insights
19
+ # Allows you to set up an automatic filter that will ensure, that metrics are always
20
+ # available when processing starts.
21
+ class InlineInsights < Base
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -131,11 +131,18 @@ module Karafka
131
131
  topic = @coordinator.topic
132
132
 
133
133
  strategy = ::Karafka::App.config.internal.processing.strategy_selector.find(topic)
134
+ expansions = ::Karafka::App.config.internal.processing.expansions_selector.find(topic)
134
135
 
135
136
  consumer = topic.consumer_class.new
136
137
  # We use singleton class as the same consumer class may be used to process different
137
138
  # topics with different settings
138
139
  consumer.singleton_class.include(strategy)
140
+
141
+ # Specific features may expand consumer API beyond the injected strategy. The difference
142
+ # here is that strategy impacts the flow of states while extra APIs just provide some
143
+ # extra methods with informations, etc but do no deviate the flow behavior
144
+ expansions.each { |expansion| consumer.singleton_class.include(expansion) }
145
+
139
146
  consumer.client = @client
140
147
  consumer.producer = ::Karafka::App.producer
141
148
  consumer.coordinator = @coordinator
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # Selector of appropriate topic setup based features enhancements.
6
+ #
7
+ # Those expansions to the consumer API are NOT about the flow of processing. For this we have
8
+ # strategies. Those are suppose to provide certain extra APIs that user can use to get some
9
+ # extra non-flow related functionalities.
10
+ class ExpansionsSelector
11
+ # @param topic [Karafka::Routing::Topic] topic with settings based on which we find
12
+ # expansions
13
+ # @return [Array<Module>] modules with proper expansions we're suppose to use to enhance the
14
+ # consumer
15
+ def find(topic)
16
+ expansions = []
17
+ expansions << Processing::InlineInsights::Consumer if topic.inline_insights?
18
+ expansions
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # Namespace of the Inline Insights feature "non routing" related components
6
+ #
7
+ # @note We use both `#insights` because it is the feature name but also `#statistics` to make
8
+ # it consistent with the fact that we publish and operate on statistics. User can pick
9
+ # whichever name they prefer.
10
+ module InlineInsights
11
+ # Module that adds extra methods to the consumer that allow us to fetch the insights
12
+ module Consumer
13
+ # @return [Hash] empty hash or hash with given partition insights if already present
14
+ # @note We cache insights on the consumer, as in some scenarios we may no longer have them
15
+ # inside the Tracker, for example under involuntary revocation, incoming statistics may
16
+ # no longer have lost partition insights. Since we want to be consistent during single
17
+ # batch operations, we want to ensure, that if we have insights they are available
18
+ # throughout the whole processing.
19
+ def insights
20
+ insights = Tracker.find(topic, partition)
21
+
22
+ # If we no longer have new insights but we still have them locally, we can use them
23
+ return @insights if @insights && insights.empty?
24
+ # If insights are still the same, we can use them
25
+ return @insights if @insights.equal?(insights)
26
+
27
+ # If we've received new insights that are not empty, we can cache them
28
+ @insights = insights
29
+ end
30
+
31
+ # @return [Boolean] true if there are insights to work with, otherwise false
32
+ def insights?
33
+ !insights.empty?
34
+ end
35
+
36
+ alias statistics insights
37
+ alias statistics? insights?
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ module InlineInsights
6
+ # Listener that adds statistics to our inline tracker
7
+ class Listener
8
+ # Adds statistics to the tracker
9
+ # @param event [Karafka::Core::Monitoring::Event] event with statistics
10
+ def on_statistics_emitted(event)
11
+ Tracker.add(
12
+ event[:consumer_group_id],
13
+ event[:statistics]
14
+ )
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,128 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ module InlineInsights
6
+ # Object used to track statistics coming from librdkafka in a way that can be accessible by
7
+ # the consumers
8
+ #
9
+ # We use a single tracker because we do not need state management here as our consumer groups
10
+ # clients identified by statistics name value are unique. On top of that, having a per
11
+ # process one that is a singleton allows us to use tracker easily also from other places like
12
+ # filtering API etc.
13
+ #
14
+ # @note We include cache of 5 minutes for revoked partitions to compensate for cases where
15
+ # when using LRJ a lost partition data would not be present anymore, however we would still
16
+ # be in the processing phase. Since those metrics are published with each `poll`, regular
17
+ # processing is not a subject of this issue. For LRJ we keep the reference. The only case
18
+ # where this could be switched midway is when LRJ is running for an extended period of time
19
+ # after the involuntary revocation. Having a time based cache instead of tracking
20
+ # simplifies the design as we do not have to deal with state tracking, especially since
21
+ # we would have to track also operations running in a revoked state.
22
+ #
23
+ # @note This tracker keeps in memory data about all topics and partitions that it encounters
24
+ # because in case of routing patterns, we may start getting statistics prior to registering
25
+ # given topic via dynamic routing expansions. In such case we would not have insights
26
+ # where they were actually available for us to use.
27
+ #
28
+ # @note Memory usage is negligible as long as we can evict expired data. Single metrics set
29
+ # for a single partition contains around 4KB of data. This means, that in case of an
30
+ # assignment of 1000 partitions, we use around 4MB of space for tracking those metrics.
31
+ class Tracker
32
+ include Singleton
33
+ include Karafka::Core::Helpers::Time
34
+
35
+ # Empty hash we want to return in any case where we could not locate appropriate topic
36
+ # partition statistics.
37
+ EMPTY_HASH = {}.freeze
38
+
39
+ # Empty array to save on memory allocations.
40
+ EMPTY_ARRAY = [].freeze
41
+
42
+ # 5 minutes of cache. We cache last result per consumer group topic partition so we are
43
+ # not affected by involuntary rebalances during LRJ execution.
44
+ TTL = 5 * 60 * 1_000
45
+
46
+ private_constant :EMPTY_HASH, :EMPTY_ARRAY, :TTL
47
+
48
+ class << self
49
+ extend Forwardable
50
+
51
+ def_delegators :instance, :find, :add, :exists?, :clear
52
+ end
53
+
54
+ def initialize
55
+ @accu = {}
56
+ @mutex = Mutex.new
57
+ end
58
+
59
+ # Adds each partition statistics into internal accumulator. Single statistics set may
60
+ # contain data from multiple topics and their partitions because a single client can
61
+ # operate on multiple topics and partitions.
62
+ #
63
+ # We iterate over those topics and partitions and store topics partitions data only.
64
+ #
65
+ # @param consumer_group_id [String] id of the consumer group for which statistics were
66
+ # emitted.
67
+ # @param statistics [Hash] librdkafka enriched statistics
68
+ def add(consumer_group_id, statistics)
69
+ @mutex.synchronize do
70
+ statistics.fetch('topics', EMPTY_HASH).each do |topic_name, t_details|
71
+ t_details.fetch('partitions', EMPTY_HASH).each do |partition_id, p_details|
72
+ next unless track?(partition_id, p_details)
73
+
74
+ key = "#{consumer_group_id}_#{topic_name}_#{partition_id}"
75
+ @accu[key] = [monotonic_now, p_details]
76
+ end
77
+ end
78
+
79
+ evict
80
+ end
81
+ end
82
+
83
+ # Finds statistics about requested consumer group topic partition
84
+ #
85
+ # @param topic [Karafka::Routing::Topic]
86
+ # @param partition [Integer]
87
+ # @return [Hash] hash with given topic partition statistics or empty hash if not present
88
+ #
89
+ # @note We do not enclose it with a mutex mainly because the only thing that could happen
90
+ # here that would be a race-condition is a miss that anyhow we need to support due to
91
+ # how librdkafka ships metrics and a potential removal of data on heavily revoked LRJ.
92
+ def find(topic, partition)
93
+ key = "#{topic.consumer_group.id}_#{topic.name}_#{partition}"
94
+ @accu.fetch(key, EMPTY_ARRAY).last || EMPTY_HASH
95
+ end
96
+
97
+ # Clears the tracker
98
+ def clear
99
+ @mutex.synchronize { @accu.clear }
100
+ end
101
+
102
+ private
103
+
104
+ # Evicts expired data from the cache
105
+ def evict
106
+ @accu.delete_if { |_, details| monotonic_now - details.first > TTL }
107
+ end
108
+
109
+ # Should we track given partition
110
+ #
111
+ # We do not track stopped partitions and the once we do not work with actively
112
+ # @param partition_id [String] partition id as a string
113
+ # @param p_details [Hash] partition statistics details
114
+ # @return [Boolean] true if we should track given partition
115
+ def track?(partition_id, p_details)
116
+ return false if partition_id == '-1'
117
+
118
+ fetch_state = p_details.fetch('fetch_state')
119
+
120
+ return false if fetch_state == 'stopped'
121
+ return false if fetch_state == 'none'
122
+
123
+ true
124
+ end
125
+ end
126
+ end
127
+ end
128
+ end
@@ -33,7 +33,7 @@ if Karafka.rails?
33
33
  # server + will support code reloading with each fetched loop. We do it only for karafka
34
34
  # based commands as Rails processes and console will have it enabled already
35
35
  initializer 'karafka.configure_rails_logger' do
36
- # Make Karafka use Rails logger
36
+ # Make Karafka uses Rails logger
37
37
  ::Karafka::App.config.logger = Rails.logger
38
38
 
39
39
  next unless Rails.env.development?
@@ -42,15 +42,22 @@ if Karafka.rails?
42
42
  # If added again, would print stuff twice
43
43
  next if ActiveSupport::Logger.logger_outputs_to?(Rails.logger, $stdout)
44
44
 
45
- logger = ActiveSupport::Logger.new($stdout)
45
+ stdout_logger = ActiveSupport::Logger.new($stdout)
46
46
  # Inherit the logger level from Rails, otherwise would always run with the debug level
47
- logger.level = Rails.logger.level
47
+ stdout_logger.level = Rails.logger.level
48
48
 
49
- Rails.logger.extend(
50
- ActiveSupport::Logger.broadcast(
51
- logger
49
+ rails71plus = Rails.gem_version >= Gem::Version.new('7.1.0')
50
+
51
+ # Rails 7.1 replaced the broadcast module with a broadcast logger
52
+ if rails71plus
53
+ Rails.logger.broadcast_to(stdout_logger)
54
+ else
55
+ Rails.logger.extend(
56
+ ActiveSupport::Logger.broadcast(
57
+ stdout_logger
58
+ )
52
59
  )
53
- )
60
+ end
54
61
  end
55
62
 
56
63
  initializer 'karafka.configure_rails_auto_load_paths' do |app|
@@ -23,11 +23,46 @@ module Karafka
23
23
 
24
24
  # Loads all the features and activates them
25
25
  def load_all
26
+ features.each(&:activate)
27
+ end
28
+
29
+ # @param config [Karafka::Core::Configurable::Node] app config that we can alter with
30
+ # particular routing feature specific stuff if needed
31
+ def pre_setup_all(config)
32
+ features.each { |feature| feature.pre_setup(config) }
33
+ end
34
+
35
+ # Runs post setup routing features configuration operations
36
+ #
37
+ # @param config [Karafka::Core::Configurable::Node]
38
+ def post_setup_all(config)
39
+ features.each { |feature| feature.post_setup(config) }
40
+ end
41
+
42
+ private
43
+
44
+ # @return [Array<Class>] all available routing features
45
+ def features
26
46
  ObjectSpace
27
47
  .each_object(Class)
28
48
  .select { |klass| klass < self }
29
49
  .sort_by(&:to_s)
30
- .each(&:activate)
50
+ end
51
+
52
+ protected
53
+
54
+ # Runs pre-setup configuration of a particular routing feature
55
+ #
56
+ # @param _config [Karafka::Core::Configurable::Node] app config node
57
+ def pre_setup(_config)
58
+ true
59
+ end
60
+
61
+ # Runs post-setup configuration of a particular routing feature
62
+ #
63
+ # @param _config [Karafka::Core::Configurable::Node] app config node
64
+ def post_setup(_config)
65
+ true
31
66
  end
32
67
  end
33
68
  end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Routing
5
+ module Features
6
+ class InlineInsights < Base
7
+ # Config of this feature
8
+ Config = Struct.new(
9
+ :active,
10
+ keyword_init: true
11
+ ) { alias_method :active?, :active }
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Routing
5
+ module Features
6
+ class InlineInsights < Base
7
+ # Inline Insights related contracts namespace
8
+ module Contracts
9
+ # Contract for inline insights topic setup
10
+ class Topic < Karafka::Contracts::Base
11
+ configure do |config|
12
+ config.error_messages = YAML.safe_load(
13
+ File.read(
14
+ File.join(Karafka.gem_root, 'config', 'locales', 'errors.yml')
15
+ )
16
+ ).fetch('en').fetch('validations').fetch('topic')
17
+ end
18
+
19
+ nested :inline_insights do
20
+ required(:active) { |val| [true, false].include?(val) }
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Routing
5
+ module Features
6
+ class InlineInsights < Base
7
+ # Routing topic inline insights API
8
+ module Topic
9
+ # @param active [Boolean] should inline insights be activated
10
+ def inline_insights(active = false)
11
+ @inline_insights ||= Config.new(
12
+ active: active
13
+ )
14
+ end
15
+
16
+ # @return [Boolean] Are inline insights active
17
+ def inline_insights?
18
+ inline_insights.active?
19
+ end
20
+
21
+ # @return [Hash] topic setup hash
22
+ def to_h
23
+ super.merge(
24
+ inline_insights: inline_insights.to_h
25
+ ).freeze
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Routing
5
+ module Features
6
+ # Feature allowing us to get visibility during the consumption into metrics of particular
7
+ # partition we operate on. It can be useful when making context-aware consumers that change
8
+ # their behaviours based on the lag and other parameters.
9
+ class InlineInsights < Base
10
+ class << self
11
+ # If needed installs the needed listener and initializes tracker
12
+ #
13
+ # @param _config [Karafka::Core::Configurable::Node] app config
14
+ def post_setup(_config)
15
+ ::Karafka::App.monitor.subscribe('app.running') do
16
+ # Do not activate tracking of statistics if none of our active topics uses it
17
+ # This prevents us from tracking metrics when user just runs a subset of topics
18
+ # in a given process and none of those actually utilizes this feature
19
+ next unless ::Karafka::App
20
+ .subscription_groups
21
+ .values
22
+ .flat_map(&:itself)
23
+ .flat_map(&:topics)
24
+ .flat_map(&:to_a)
25
+ .any?(&:inline_insights?)
26
+
27
+ # Initialize the tracker prior to becoming multi-threaded
28
+ ::Karafka::Processing::InlineInsights::Tracker.instance
29
+
30
+ # Subscribe to the statistics reports and collect them
31
+ ::Karafka.monitor.subscribe(
32
+ ::Karafka::Processing::InlineInsights::Listener.new
33
+ )
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -34,7 +34,11 @@ module Karafka
34
34
  def initialize(position, topics)
35
35
  @name = topics.first.subscription_group_name
36
36
  @consumer_group = topics.first.consumer_group
37
- @id = "#{@name}_#{position}"
37
+ # We include the consumer group id here because we want to have unique ids of subscription
38
+ # groups across the system. Otherwise user could set the same name for multiple
39
+ # subscription groups in many consumer groups effectively having same id for different
40
+ # entities
41
+ @id = "#{@consumer_group.id}_#{@name}_#{position}"
38
42
  @position = position
39
43
  @topics = topics
40
44
  @kafka = build_kafka
@@ -87,7 +91,7 @@ module Karafka
87
91
  # If we use static group memberships, there can be a case, where same instance id would
88
92
  # be set on many subscription groups as the group instance id from Karafka perspective is
89
93
  # set per config. Each instance even if they are subscribed to different topics needs to
90
- # have if fully unique. To make sure of that, we just add extra postfix at the end that
94
+ # have it fully unique. To make sure of that, we just add extra postfix at the end that
91
95
  # increments.
92
96
  group_instance_id = kafka.fetch(:'group.instance.id', false)
93
97
 
@@ -221,6 +221,7 @@ module Karafka
221
221
  request.timeout.ms
222
222
  resolve_cb
223
223
  retries
224
+ retry.backoff.max.ms
224
225
  retry.backoff.ms
225
226
  sasl.kerberos.keytab
226
227
  sasl.kerberos.kinit.cmd
@@ -208,6 +208,8 @@ module Karafka
208
208
  setting :partitioner_class, default: Processing::Partitioner
209
209
  # option strategy_selector [Object] processing strategy selector to be used
210
210
  setting :strategy_selector, default: Processing::StrategySelector.new
211
+ # option expansions_selector [Object] processing expansions selector to be used
212
+ setting :expansions_selector, default: Processing::ExpansionsSelector.new
211
213
  end
212
214
 
213
215
  # Things related to operating on messages
@@ -238,10 +240,14 @@ module Karafka
238
240
  def setup(&block)
239
241
  # Will prepare and verify license if present
240
242
  Licenser.prepare_and_verify(config.license)
243
+
244
+ # Pre-setup configure all routing features that would need this
245
+ Routing::Features::Base.pre_setup_all(config)
246
+
241
247
  # Will configure all the pro components
242
248
  # This needs to happen before end user configuration as the end user may overwrite some
243
249
  # of the pro defaults with custom components
244
- Pro::Loader.pre_setup(config) if Karafka.pro?
250
+ Pro::Loader.pre_setup_all(config) if Karafka.pro?
245
251
 
246
252
  configure(&block)
247
253
  merge_kafka_defaults!(config)
@@ -253,9 +259,12 @@ module Karafka
253
259
  # Refreshes the references that are cached that might have been changed by the config
254
260
  ::Karafka.refresh!
255
261
 
262
+ # Post-setup configure all routing features that would need this
263
+ Routing::Features::Base.post_setup_all(config)
264
+
256
265
  # Runs things that need to be executed after config is defined and all the components
257
266
  # are also configured
258
- Pro::Loader.post_setup(config) if Karafka.pro?
267
+ Pro::Loader.post_setup_all(config) if Karafka.pro?
259
268
 
260
269
  Karafka::App.initialized!
261
270
  end
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.2.5'
6
+ VERSION = '2.2.7'
7
7
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.5
4
+ version: 2.2.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  AnG1dJU+yL2BK7vaVytLTstJME5mepSZ46qqIJXMuWob/YPDmVaBF39TDSG9e34s
36
36
  msG3BiCqgOgHAnL23+CN3Rt8MsuRfEtoTKpJVcCfoEoNHOkc
37
37
  -----END CERTIFICATE-----
38
- date: 2023-09-25 00:00:00.000000000 Z
38
+ date: 2023-10-07 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: karafka-core
@@ -244,6 +244,7 @@ files:
244
244
  - lib/karafka/pro/processing/filters/base.rb
245
245
  - lib/karafka/pro/processing/filters/delayer.rb
246
246
  - lib/karafka/pro/processing/filters/expirer.rb
247
+ - lib/karafka/pro/processing/filters/inline_insights_delayer.rb
247
248
  - lib/karafka/pro/processing/filters/throttler.rb
248
249
  - lib/karafka/pro/processing/filters/virtual_limiter.rb
249
250
  - lib/karafka/pro/processing/filters_applier.rb
@@ -321,6 +322,10 @@ files:
321
322
  - lib/karafka/pro/routing/features/filtering/config.rb
322
323
  - lib/karafka/pro/routing/features/filtering/contracts/topic.rb
323
324
  - lib/karafka/pro/routing/features/filtering/topic.rb
325
+ - lib/karafka/pro/routing/features/inline_insights.rb
326
+ - lib/karafka/pro/routing/features/inline_insights/config.rb
327
+ - lib/karafka/pro/routing/features/inline_insights/contracts/topic.rb
328
+ - lib/karafka/pro/routing/features/inline_insights/topic.rb
324
329
  - lib/karafka/pro/routing/features/long_running_job.rb
325
330
  - lib/karafka/pro/routing/features/long_running_job/config.rb
326
331
  - lib/karafka/pro/routing/features/long_running_job/contracts/topic.rb
@@ -353,6 +358,10 @@ files:
353
358
  - lib/karafka/processing/coordinators_buffer.rb
354
359
  - lib/karafka/processing/executor.rb
355
360
  - lib/karafka/processing/executors_buffer.rb
361
+ - lib/karafka/processing/expansions_selector.rb
362
+ - lib/karafka/processing/inline_insights/consumer.rb
363
+ - lib/karafka/processing/inline_insights/listener.rb
364
+ - lib/karafka/processing/inline_insights/tracker.rb
356
365
  - lib/karafka/processing/jobs/base.rb
357
366
  - lib/karafka/processing/jobs/consume.rb
358
367
  - lib/karafka/processing/jobs/idle.rb
@@ -394,6 +403,10 @@ files:
394
403
  - lib/karafka/routing/features/declaratives/config.rb
395
404
  - lib/karafka/routing/features/declaratives/contracts/topic.rb
396
405
  - lib/karafka/routing/features/declaratives/topic.rb
406
+ - lib/karafka/routing/features/inline_insights.rb
407
+ - lib/karafka/routing/features/inline_insights/config.rb
408
+ - lib/karafka/routing/features/inline_insights/contracts/topic.rb
409
+ - lib/karafka/routing/features/inline_insights/topic.rb
397
410
  - lib/karafka/routing/features/manual_offset_management.rb
398
411
  - lib/karafka/routing/features/manual_offset_management/config.rb
399
412
  - lib/karafka/routing/features/manual_offset_management/contracts/topic.rb
@@ -447,7 +460,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
447
460
  - !ruby/object:Gem::Version
448
461
  version: '0'
449
462
  requirements: []
450
- rubygems_version: 3.4.19
463
+ rubygems_version: 3.4.10
451
464
  signing_key:
452
465
  specification_version: 4
453
466
  summary: Karafka is Ruby and Rails efficient Kafka processing framework.
metadata.gz.sig CHANGED
Binary file