karafka 2.2.6 → 2.2.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +4 -1
  3. data/.github/workflows/ci.yml +3 -0
  4. data/CHANGELOG.md +6 -0
  5. data/Gemfile.lock +4 -4
  6. data/config/locales/errors.yml +7 -4
  7. data/config/locales/pro_errors.yml +3 -0
  8. data/lib/karafka/base_consumer.rb +7 -1
  9. data/lib/karafka/connection/listener.rb +3 -3
  10. data/lib/karafka/contracts/config.rb +2 -0
  11. data/lib/karafka/pro/loader.rb +2 -2
  12. data/lib/karafka/pro/processing/filters/delayer.rb +1 -1
  13. data/lib/karafka/pro/processing/filters/inline_insights_delayer.rb +78 -0
  14. data/lib/karafka/pro/routing/features/inline_insights/config.rb +32 -0
  15. data/lib/karafka/pro/routing/features/inline_insights/contracts/topic.rb +41 -0
  16. data/lib/karafka/pro/routing/features/inline_insights/topic.rb +52 -0
  17. data/lib/karafka/pro/routing/features/inline_insights.rb +26 -0
  18. data/lib/karafka/processing/executor.rb +7 -0
  19. data/lib/karafka/processing/expansions_selector.rb +22 -0
  20. data/lib/karafka/processing/inline_insights/consumer.rb +41 -0
  21. data/lib/karafka/processing/inline_insights/listener.rb +19 -0
  22. data/lib/karafka/processing/inline_insights/tracker.rb +128 -0
  23. data/lib/karafka/railtie.rb +14 -7
  24. data/lib/karafka/routing/features/base.rb +36 -1
  25. data/lib/karafka/routing/features/inline_insights/config.rb +15 -0
  26. data/lib/karafka/routing/features/inline_insights/contracts/topic.rb +27 -0
  27. data/lib/karafka/routing/features/inline_insights/topic.rb +31 -0
  28. data/lib/karafka/routing/features/inline_insights.rb +40 -0
  29. data/lib/karafka/routing/subscription_group.rb +6 -2
  30. data/lib/karafka/setup/attributes_map.rb +1 -0
  31. data/lib/karafka/setup/config.rb +11 -2
  32. data/lib/karafka/version.rb +1 -1
  33. data.tar.gz.sig +0 -0
  34. metadata +16 -3
  35. metadata.gz.sig +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 36fe50a9b0a758b9b916ca668c862085629bf09bcf16f7c0e8a340a9b7333abb
4
- data.tar.gz: 61d3f10df4e1545a1a4e0d8190eb6ee2a7b751636bb347cf2703a74149fc710c
3
+ metadata.gz: b6a42d67752052bcf8f29fb683ed8ffb5fce7675c155b73269c4654681913f7b
4
+ data.tar.gz: b1bfc5bdad87c27111d8aee50302593133460f0eac47161f3c3872dc726d3b68
5
5
  SHA512:
6
- metadata.gz: f12fb91f2e8e4e7dba766fbf32c79b734f5cd9f6fa63a66f80863a819dfd6d442024a97a4095bea84ef87bb0cb645ff431ba6d2f04f46d72864c377d61fc76a6
7
- data.tar.gz: 4235c1944c98f0bd6359b82b7eab68a69812d70665ab0436c9f67cc5011e4c8edfa2c194a6d65744a4d82f97710beca5ddb871b8671febf61e5c9f591a746bee
6
+ metadata.gz: b1e46475db36dc2fc837aa68b1639ad43b94a59afc72dd7287c60fac5f504f55c1ed68f9a1527a4eb1f6743801d38ba44ac6eef6c9d69a6b2ae4e4fbf3034b98
7
+ data.tar.gz: 329fe59c9bbede3367c9bb3624c69652111f9387360d28b8d9e83bb0a184ea0e524d87d3f47afc400446f48993a57a41c4c171ae5e16c5413c7a496e88fc0421
checksums.yaml.gz.sig CHANGED
@@ -1 +1,4 @@
1
- ��ฟ�R�^Ա�<���q��~ �r�N]C4 ��j-8�^��� K�4�h�K���g�y���`������� �׿�G�ejOSz ����H�Z�w��*�O�����r��rߣ.<:����3Ń�� �3i1�����8E��)������T���8.�
1
+ mu
2
+ j.B�7�܆�����mT�c�=k�Q����o�ֽ�E"������V�
3
+ Y���n8v��z��b��E��б�f378��X<����
4
+ ���]\C����/� �pcyb�O���v8��9��IG"�V!v_T.
@@ -71,6 +71,7 @@ jobs:
71
71
  fail-fast: false
72
72
  matrix:
73
73
  ruby:
74
+ - '3.3.0-preview2'
74
75
  - '3.2'
75
76
  # We run it against the oldest and the newest of a given major to make sure, that there
76
77
  # are no syntax-sugars that we would use that were introduced down the road
@@ -115,6 +116,7 @@ jobs:
115
116
  fail-fast: false
116
117
  matrix:
117
118
  ruby:
119
+ - '3.3.0-preview2'
118
120
  - '3.2'
119
121
  - '3.1'
120
122
  - '3.0'
@@ -166,6 +168,7 @@ jobs:
166
168
  fail-fast: false
167
169
  matrix:
168
170
  ruby:
171
+ - '3.3.0-preview2'
169
172
  - '3.2'
170
173
  - '3.1'
171
174
  - '3.0'
data/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.2.7 (2023-10-07)
4
+ - **[Feature]** Introduce Inline Insights to both OSS and Pro. Inline Insights allow you to get the Kafka insights/metrics from the consumer instance and use them to alter the processing flow. In Pro, there's an extra filter flow allowing to ensure, that the insights exist during consumption.
5
+ - [Enhancement] Make sure, that subscription groups ids are unique by including their consumer group id in them similar to how topics ids are handled (not a breaking change).
6
+ - [Enhancement] Expose `#attempt` method on a consumer to directly indicate number of attempt of processing given data.
7
+ - [Enhancement] Support Rails 7.1.
8
+
3
9
  ## 2.2.6 (2023-09-26)
4
10
  - [Enhancement] Retry `Karafka::Admin#read_watermark_offsets` fetching upon `not_leader_for_partition` that can occur mostly on newly created topics in KRaft and after crashes during leader selection.
5
11
 
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.2.6)
4
+ karafka (2.2.7)
5
5
  karafka-core (>= 2.2.2, < 2.3.0)
6
6
  thor (>= 0.20)
7
7
  waterdrop (>= 2.6.6, < 3.0.0)
@@ -37,9 +37,9 @@ GEM
37
37
  ffi (~> 1.15)
38
38
  mini_portile2 (~> 2.6)
39
39
  rake (> 12)
40
- karafka-web (0.7.4)
40
+ karafka-web (0.7.5)
41
41
  erubi (~> 1.4)
42
- karafka (>= 2.2.3, < 3.0.0)
42
+ karafka (>= 2.2.6, < 3.0.0)
43
43
  karafka-core (>= 2.2.2, < 3.0.0)
44
44
  roda (~> 3.68, >= 3.69)
45
45
  tilt (~> 2.0)
@@ -75,7 +75,7 @@ GEM
75
75
  waterdrop (2.6.7)
76
76
  karafka-core (>= 2.1.1, < 3.0.0)
77
77
  zeitwerk (~> 2.3)
78
- zeitwerk (2.6.11)
78
+ zeitwerk (2.6.12)
79
79
 
80
80
  PLATFORMS
81
81
  x86_64-linux
@@ -16,9 +16,11 @@ en:
16
16
  max_wait_time_format: needs to be an integer bigger than 0
17
17
  kafka_format: needs to be a filled hash
18
18
  internal.processing.jobs_builder_format: cannot be nil
19
- internal.processing.scheduler: cannot be nil
20
- internal.processing.coordinator_class: cannot be nil
21
- internal.processing.partitioner_class: cannot be nil
19
+ internal.processing.scheduler_format: cannot be nil
20
+ internal.processing.coordinator_class_format: cannot be nil
21
+ internal.processing.partitioner_class_format: cannot be nil
22
+ internal.processing.strategy_selector_format: cannot be nil
23
+ internal.processing.expansions_selector_format: cannot be nil
22
24
  internal.active_job.dispatcher: cannot be nil
23
25
  internal.active_job.job_options_contract: cannot be nil
24
26
  internal.active_job.consumer_class: cannot be nil
@@ -59,8 +61,9 @@ en:
59
61
  initial_offset_format: needs to be either earliest or latest
60
62
  subscription_group_name_format: must be a non-empty string
61
63
  manual_offset_management.active_format: needs to be either true or false
62
- consumer_active_job_missing: ActiveJob needs to be available
63
64
  manual_offset_management_must_be_enabled: cannot be disabled for ActiveJob topics
65
+ inline_insights.active_format: needs to be either true or false
66
+ consumer_active_job_missing: ActiveJob needs to be available
64
67
  dead_letter_queue.max_retries_format: needs to be equal or bigger than 0
65
68
  dead_letter_queue.topic_format: 'needs to be a string with a Kafka accepted format'
66
69
  dead_letter_queue.active_format: needs to be either true or false
@@ -31,6 +31,9 @@ en:
31
31
  patterns.active_format: 'needs to be boolean'
32
32
  patterns.type_format: 'needs to be :matcher, :discovered or :regular'
33
33
 
34
+ inline_insights.active_format: 'needs to be boolean'
35
+ inline_insights.required_format: 'needs to be boolean'
36
+
34
37
  consumer_group:
35
38
  patterns_format: must be an array with hashes
36
39
  patterns_missing: needs to be present
@@ -252,7 +252,13 @@ module Karafka
252
252
  # different flow after there is an error, for example for resources cleanup, small manual
253
253
  # backoff or different instrumentation tracking.
254
254
  def retrying?
255
- coordinator.pause_tracker.attempt > 1
255
+ attempt > 1
256
+ end
257
+
258
+ # @return [Integer] attempt of processing given batch. 1 if this is the first attempt or higher
259
+ # in case it is a retry
260
+ def attempt
261
+ coordinator.pause_tracker.attempt
256
262
  end
257
263
 
258
264
  # Pauses the processing from the last offset to retry on given message
@@ -112,7 +112,7 @@ module Karafka
112
112
  # distributing consuming jobs as upon revoking, we might get assigned to the same
113
113
  # partitions, thus getting their jobs. The revoking jobs need to finish before
114
114
  # appropriate consumers are taken down and re-created
115
- build_and_schedule_revoke_lost_partitions_jobs
115
+ build_and_schedule_revoked_jobs_for_revoked_partitions
116
116
 
117
117
  # We wait only on jobs from our subscription group. Other groups are independent.
118
118
  # This will block on revoked jobs until they are finished. Those are not meant to last
@@ -140,7 +140,7 @@ module Karafka
140
140
  # that occurred in the cluster.
141
141
  wait_pinging(
142
142
  wait_until: -> { @jobs_queue.empty?(@subscription_group.id) },
143
- after_ping: -> { build_and_schedule_revoke_lost_partitions_jobs }
143
+ after_ping: -> { build_and_schedule_revoked_jobs_for_revoked_partitions }
144
144
  )
145
145
 
146
146
  # We do not want to schedule the shutdown jobs prior to finishing all the jobs
@@ -197,7 +197,7 @@ module Karafka
197
197
  end
198
198
 
199
199
  # Enqueues revoking jobs for partitions that were taken away from the running process.
200
- def build_and_schedule_revoke_lost_partitions_jobs
200
+ def build_and_schedule_revoked_jobs_for_revoked_partitions
201
201
  revoked_partitions = @client.rebalance_manager.revoked_partitions
202
202
 
203
203
  # Stop early to save on some execution and array allocation
@@ -73,6 +73,8 @@ module Karafka
73
73
  required(:scheduler) { |val| !val.nil? }
74
74
  required(:coordinator_class) { |val| !val.nil? }
75
75
  required(:partitioner_class) { |val| !val.nil? }
76
+ required(:strategy_selector) { |val| !val.nil? }
77
+ required(:expansions_selector) { |val| !val.nil? }
76
78
  end
77
79
 
78
80
  nested(:active_job) do
@@ -49,7 +49,7 @@ module Karafka
49
49
  # Loads all the pro components and configures them wherever it is expected
50
50
  # @param config [Karafka::Core::Configurable::Node] app config that we can alter with pro
51
51
  # components
52
- def pre_setup(config)
52
+ def pre_setup_all(config)
53
53
  features.each { |feature| feature.pre_setup(config) }
54
54
 
55
55
  reconfigure(config)
@@ -60,7 +60,7 @@ module Karafka
60
60
  # Runs post setup features configuration operations
61
61
  #
62
62
  # @param config [Karafka::Core::Configurable::Node]
63
- def post_setup(config)
63
+ def post_setup_all(config)
64
64
  features.each { |feature| feature.post_setup(config) }
65
65
  end
66
66
 
@@ -24,7 +24,7 @@ module Karafka
24
24
  @delay = delay
25
25
  end
26
26
 
27
- # Removes too old messages
27
+ # Removes too young messages
28
28
  #
29
29
  # @param messages [Array<Karafka::Messages::Message>]
30
30
  def apply!(messages)
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Filters
18
+ # Delayer that checks if we have appropriate insights available. If not, pauses for
19
+ # 5 seconds so the insights can be loaded from the broker.
20
+ #
21
+ # In case it would take more than five seconds to load insights, it will just pause again
22
+ #
23
+ # This filter ensures, that we always have inline insights that a consumer can use
24
+ #
25
+ # It is relevant in most cases only during the process start, when first poll may not
26
+ # yield statistics yet but will give some data.
27
+ class InlineInsightsDelayer < Base
28
+ # Minimum how long should we pause when there are no metrics
29
+ PAUSE_TIMEOUT = 5_000
30
+
31
+ private_constant :PAUSE_TIMEOUT
32
+
33
+ # @param topic [Karafka::Routing::Topic]
34
+ # @param partition [Integer] partition
35
+ def initialize(topic, partition)
36
+ super()
37
+ @topic = topic
38
+ @partition = partition
39
+ end
40
+
41
+ # Pauses if inline insights would not be available. Does nothing otherwise
42
+ #
43
+ # @param messages [Array<Karafka::Messages::Message>]
44
+ def apply!(messages)
45
+ @applied = false
46
+ @cursor = messages.first
47
+
48
+ # Nothing to do if there were no messages
49
+ # This can happen when we chain filters
50
+ return unless @cursor
51
+
52
+ insights = ::Karafka::Processing::InlineInsights::Tracker.find(
53
+ @topic,
54
+ @partition
55
+ )
56
+
57
+ # If insights are available, also nothing to do here and we can just process
58
+ return unless insights.empty?
59
+
60
+ messages.clear
61
+
62
+ @applied = true
63
+ end
64
+
65
+ # @return [Integer] ms timeout in case of pause
66
+ def timeout
67
+ @cursor && applied? ? PAUSE_TIMEOUT : 0
68
+ end
69
+
70
+ # Pause when we had to back-off or skip if delay is not needed
71
+ def action
72
+ applied? ? :pause : :skip
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Routing
17
+ module Features
18
+ class InlineInsights < Base
19
+ # Config of this feature
20
+ Config = Struct.new(
21
+ :active,
22
+ :required,
23
+ keyword_init: true
24
+ ) do
25
+ alias_method :active?, :active
26
+ alias_method :required?, :required
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Routing
17
+ module Features
18
+ class InlineInsights < Base
19
+ # Inline Insights related contracts namespace
20
+ module Contracts
21
+ # Contract for inline insights topic setup
22
+ class Topic < Karafka::Contracts::Base
23
+ configure do |config|
24
+ config.error_messages = YAML.safe_load(
25
+ File.read(
26
+ File.join(Karafka.gem_root, 'config', 'locales', 'pro_errors.yml')
27
+ )
28
+ ).fetch('en').fetch('validations').fetch('topic')
29
+ end
30
+
31
+ nested :inline_insights do
32
+ required(:active) { |val| [true, false].include?(val) }
33
+ required(:required) { |val| [true, false].include?(val) }
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Routing
17
+ module Features
18
+ class InlineInsights < Base
19
+ # Routing topic inline insights API
20
+ module Topic
21
+ # @param active [Boolean] should inline insights be activated
22
+ # @param required [Boolean] are the insights required to operate
23
+ def inline_insights(active = -1, required: -1)
24
+ # This weird style of checking allows us to activate inline insights in few ways:
25
+ # - inline_insights(true)
26
+ # - inline_insights(required: true)
27
+ # - inline_insights(required: false)
28
+ #
29
+ # In each of those cases inline insights will become active
30
+ @inline_insights ||= begin
31
+ config = Config.new(
32
+ active: active == true || (active == -1 && required != -1),
33
+ required: required == true
34
+ )
35
+
36
+ if config.active? && config.required?
37
+ factory = lambda do |topic, partition|
38
+ Pro::Processing::Filters::InlineInsightsDelayer.new(topic, partition)
39
+ end
40
+
41
+ filter(factory)
42
+ end
43
+
44
+ config
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Routing
17
+ module Features
18
+ # Enhanced inline insights
19
+ # Allows you to set up an automatic filter that will ensure, that metrics are always
20
+ # available when processing starts.
21
+ class InlineInsights < Base
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -131,11 +131,18 @@ module Karafka
131
131
  topic = @coordinator.topic
132
132
 
133
133
  strategy = ::Karafka::App.config.internal.processing.strategy_selector.find(topic)
134
+ expansions = ::Karafka::App.config.internal.processing.expansions_selector.find(topic)
134
135
 
135
136
  consumer = topic.consumer_class.new
136
137
  # We use singleton class as the same consumer class may be used to process different
137
138
  # topics with different settings
138
139
  consumer.singleton_class.include(strategy)
140
+
141
+ # Specific features may expand consumer API beyond the injected strategy. The difference
142
+ # here is that strategy impacts the flow of states while extra APIs just provide some
143
+ # extra methods with informations, etc but do no deviate the flow behavior
144
+ expansions.each { |expansion| consumer.singleton_class.include(expansion) }
145
+
139
146
  consumer.client = @client
140
147
  consumer.producer = ::Karafka::App.producer
141
148
  consumer.coordinator = @coordinator
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # Selector of appropriate topic setup based features enhancements.
6
+ #
7
+ # Those expansions to the consumer API are NOT about the flow of processing. For this we have
8
+ # strategies. Those are suppose to provide certain extra APIs that user can use to get some
9
+ # extra non-flow related functionalities.
10
+ class ExpansionsSelector
11
+ # @param topic [Karafka::Routing::Topic] topic with settings based on which we find
12
+ # expansions
13
+ # @return [Array<Module>] modules with proper expansions we're suppose to use to enhance the
14
+ # consumer
15
+ def find(topic)
16
+ expansions = []
17
+ expansions << Processing::InlineInsights::Consumer if topic.inline_insights?
18
+ expansions
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # Namespace of the Inline Insights feature "non routing" related components
6
+ #
7
+ # @note We use both `#insights` because it is the feature name but also `#statistics` to make
8
+ # it consistent with the fact that we publish and operate on statistics. User can pick
9
+ # whichever name they prefer.
10
+ module InlineInsights
11
+ # Module that adds extra methods to the consumer that allow us to fetch the insights
12
+ module Consumer
13
+ # @return [Hash] empty hash or hash with given partition insights if already present
14
+ # @note We cache insights on the consumer, as in some scenarios we may no longer have them
15
+ # inside the Tracker, for example under involuntary revocation, incoming statistics may
16
+ # no longer have lost partition insights. Since we want to be consistent during single
17
+ # batch operations, we want to ensure, that if we have insights they are available
18
+ # throughout the whole processing.
19
+ def insights
20
+ insights = Tracker.find(topic, partition)
21
+
22
+ # If we no longer have new insights but we still have them locally, we can use them
23
+ return @insights if @insights && insights.empty?
24
+ # If insights are still the same, we can use them
25
+ return @insights if @insights.equal?(insights)
26
+
27
+ # If we've received new insights that are not empty, we can cache them
28
+ @insights = insights
29
+ end
30
+
31
+ # @return [Boolean] true if there are insights to work with, otherwise false
32
+ def insights?
33
+ !insights.empty?
34
+ end
35
+
36
+ alias statistics insights
37
+ alias statistics? insights?
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ module InlineInsights
6
+ # Listener that adds statistics to our inline tracker
7
+ class Listener
8
+ # Adds statistics to the tracker
9
+ # @param event [Karafka::Core::Monitoring::Event] event with statistics
10
+ def on_statistics_emitted(event)
11
+ Tracker.add(
12
+ event[:consumer_group_id],
13
+ event[:statistics]
14
+ )
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,128 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ module InlineInsights
6
+ # Object used to track statistics coming from librdkafka in a way that can be accessible by
7
+ # the consumers
8
+ #
9
+ # We use a single tracker because we do not need state management here as our consumer groups
10
+ # clients identified by statistics name value are unique. On top of that, having a per
11
+ # process one that is a singleton allows us to use tracker easily also from other places like
12
+ # filtering API etc.
13
+ #
14
+ # @note We include cache of 5 minutes for revoked partitions to compensate for cases where
15
+ # when using LRJ a lost partition data would not be present anymore, however we would still
16
+ # be in the processing phase. Since those metrics are published with each `poll`, regular
17
+ # processing is not a subject of this issue. For LRJ we keep the reference. The only case
18
+ # where this could be switched midway is when LRJ is running for an extended period of time
19
+ # after the involuntary revocation. Having a time based cache instead of tracking
20
+ # simplifies the design as we do not have to deal with state tracking, especially since
21
+ # we would have to track also operations running in a revoked state.
22
+ #
23
+ # @note This tracker keeps in memory data about all topics and partitions that it encounters
24
+ # because in case of routing patterns, we may start getting statistics prior to registering
25
+ # given topic via dynamic routing expansions. In such case we would not have insights
26
+ # where they were actually available for us to use.
27
+ #
28
+ # @note Memory usage is negligible as long as we can evict expired data. Single metrics set
29
+ # for a single partition contains around 4KB of data. This means, that in case of an
30
+ # assignment of 1000 partitions, we use around 4MB of space for tracking those metrics.
31
+ class Tracker
32
+ include Singleton
33
+ include Karafka::Core::Helpers::Time
34
+
35
+ # Empty hash we want to return in any case where we could not locate appropriate topic
36
+ # partition statistics.
37
+ EMPTY_HASH = {}.freeze
38
+
39
+ # Empty array to save on memory allocations.
40
+ EMPTY_ARRAY = [].freeze
41
+
42
+ # 5 minutes of cache. We cache last result per consumer group topic partition so we are
43
+ # not affected by involuntary rebalances during LRJ execution.
44
+ TTL = 5 * 60 * 1_000
45
+
46
+ private_constant :EMPTY_HASH, :EMPTY_ARRAY, :TTL
47
+
48
+ class << self
49
+ extend Forwardable
50
+
51
+ def_delegators :instance, :find, :add, :exists?, :clear
52
+ end
53
+
54
+ def initialize
55
+ @accu = {}
56
+ @mutex = Mutex.new
57
+ end
58
+
59
+ # Adds each partition statistics into internal accumulator. Single statistics set may
60
+ # contain data from multiple topics and their partitions because a single client can
61
+ # operate on multiple topics and partitions.
62
+ #
63
+ # We iterate over those topics and partitions and store topics partitions data only.
64
+ #
65
+ # @param consumer_group_id [String] id of the consumer group for which statistics were
66
+ # emitted.
67
+ # @param statistics [Hash] librdkafka enriched statistics
68
+ def add(consumer_group_id, statistics)
69
+ @mutex.synchronize do
70
+ statistics.fetch('topics', EMPTY_HASH).each do |topic_name, t_details|
71
+ t_details.fetch('partitions', EMPTY_HASH).each do |partition_id, p_details|
72
+ next unless track?(partition_id, p_details)
73
+
74
+ key = "#{consumer_group_id}_#{topic_name}_#{partition_id}"
75
+ @accu[key] = [monotonic_now, p_details]
76
+ end
77
+ end
78
+
79
+ evict
80
+ end
81
+ end
82
+
83
+ # Finds statistics about requested consumer group topic partition
84
+ #
85
+ # @param topic [Karafka::Routing::Topic]
86
+ # @param partition [Integer]
87
+ # @return [Hash] hash with given topic partition statistics or empty hash if not present
88
+ #
89
+ # @note We do not enclose it with a mutex mainly because the only thing that could happen
90
+ # here that would be a race-condition is a miss that anyhow we need to support due to
91
+ # how librdkafka ships metrics and a potential removal of data on heavily revoked LRJ.
92
+ def find(topic, partition)
93
+ key = "#{topic.consumer_group.id}_#{topic.name}_#{partition}"
94
+ @accu.fetch(key, EMPTY_ARRAY).last || EMPTY_HASH
95
+ end
96
+
97
+ # Clears the tracker
98
+ def clear
99
+ @mutex.synchronize { @accu.clear }
100
+ end
101
+
102
+ private
103
+
104
+ # Evicts expired data from the cache
105
+ def evict
106
+ @accu.delete_if { |_, details| monotonic_now - details.first > TTL }
107
+ end
108
+
109
+ # Should we track given partition
110
+ #
111
+ # We do not track stopped partitions and the once we do not work with actively
112
+ # @param partition_id [String] partition id as a string
113
+ # @param p_details [Hash] partition statistics details
114
+ # @return [Boolean] true if we should track given partition
115
+ def track?(partition_id, p_details)
116
+ return false if partition_id == '-1'
117
+
118
+ fetch_state = p_details.fetch('fetch_state')
119
+
120
+ return false if fetch_state == 'stopped'
121
+ return false if fetch_state == 'none'
122
+
123
+ true
124
+ end
125
+ end
126
+ end
127
+ end
128
+ end
@@ -33,7 +33,7 @@ if Karafka.rails?
33
33
  # server + will support code reloading with each fetched loop. We do it only for karafka
34
34
  # based commands as Rails processes and console will have it enabled already
35
35
  initializer 'karafka.configure_rails_logger' do
36
- # Make Karafka use Rails logger
36
+ # Make Karafka uses Rails logger
37
37
  ::Karafka::App.config.logger = Rails.logger
38
38
 
39
39
  next unless Rails.env.development?
@@ -42,15 +42,22 @@ if Karafka.rails?
42
42
  # If added again, would print stuff twice
43
43
  next if ActiveSupport::Logger.logger_outputs_to?(Rails.logger, $stdout)
44
44
 
45
- logger = ActiveSupport::Logger.new($stdout)
45
+ stdout_logger = ActiveSupport::Logger.new($stdout)
46
46
  # Inherit the logger level from Rails, otherwise would always run with the debug level
47
- logger.level = Rails.logger.level
47
+ stdout_logger.level = Rails.logger.level
48
48
 
49
- Rails.logger.extend(
50
- ActiveSupport::Logger.broadcast(
51
- logger
49
+ rails71plus = Rails.gem_version >= Gem::Version.new('7.1.0')
50
+
51
+ # Rails 7.1 replaced the broadcast module with a broadcast logger
52
+ if rails71plus
53
+ Rails.logger.broadcast_to(stdout_logger)
54
+ else
55
+ Rails.logger.extend(
56
+ ActiveSupport::Logger.broadcast(
57
+ stdout_logger
58
+ )
52
59
  )
53
- )
60
+ end
54
61
  end
55
62
 
56
63
  initializer 'karafka.configure_rails_auto_load_paths' do |app|
@@ -23,11 +23,46 @@ module Karafka
23
23
 
24
24
  # Loads all the features and activates them
25
25
  def load_all
26
+ features.each(&:activate)
27
+ end
28
+
29
+ # @param config [Karafka::Core::Configurable::Node] app config that we can alter with
30
+ # particular routing feature specific stuff if needed
31
+ def pre_setup_all(config)
32
+ features.each { |feature| feature.pre_setup(config) }
33
+ end
34
+
35
+ # Runs post setup routing features configuration operations
36
+ #
37
+ # @param config [Karafka::Core::Configurable::Node]
38
+ def post_setup_all(config)
39
+ features.each { |feature| feature.post_setup(config) }
40
+ end
41
+
42
+ private
43
+
44
+ # @return [Array<Class>] all available routing features
45
+ def features
26
46
  ObjectSpace
27
47
  .each_object(Class)
28
48
  .select { |klass| klass < self }
29
49
  .sort_by(&:to_s)
30
- .each(&:activate)
50
+ end
51
+
52
+ protected
53
+
54
+ # Runs pre-setup configuration of a particular routing feature
55
+ #
56
+ # @param _config [Karafka::Core::Configurable::Node] app config node
57
+ def pre_setup(_config)
58
+ true
59
+ end
60
+
61
+ # Runs post-setup configuration of a particular routing feature
62
+ #
63
+ # @param _config [Karafka::Core::Configurable::Node] app config node
64
+ def post_setup(_config)
65
+ true
31
66
  end
32
67
  end
33
68
  end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Routing
5
+ module Features
6
+ class InlineInsights < Base
7
+ # Config of this feature
8
+ Config = Struct.new(
9
+ :active,
10
+ keyword_init: true
11
+ ) { alias_method :active?, :active }
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Routing
5
+ module Features
6
+ class InlineInsights < Base
7
+ # Inline Insights related contracts namespace
8
+ module Contracts
9
+ # Contract for inline insights topic setup
10
+ class Topic < Karafka::Contracts::Base
11
+ configure do |config|
12
+ config.error_messages = YAML.safe_load(
13
+ File.read(
14
+ File.join(Karafka.gem_root, 'config', 'locales', 'errors.yml')
15
+ )
16
+ ).fetch('en').fetch('validations').fetch('topic')
17
+ end
18
+
19
+ nested :inline_insights do
20
+ required(:active) { |val| [true, false].include?(val) }
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Routing
5
+ module Features
6
+ class InlineInsights < Base
7
+ # Routing topic inline insights API
8
+ module Topic
9
+ # @param active [Boolean] should inline insights be activated
10
+ def inline_insights(active = false)
11
+ @inline_insights ||= Config.new(
12
+ active: active
13
+ )
14
+ end
15
+
16
+ # @return [Boolean] Are inline insights active
17
+ def inline_insights?
18
+ inline_insights.active?
19
+ end
20
+
21
+ # @return [Hash] topic setup hash
22
+ def to_h
23
+ super.merge(
24
+ inline_insights: inline_insights.to_h
25
+ ).freeze
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Routing
5
+ module Features
6
+ # Feature allowing us to get visibility during the consumption into metrics of particular
7
+ # partition we operate on. It can be useful when making context-aware consumers that change
8
+ # their behaviours based on the lag and other parameters.
9
+ class InlineInsights < Base
10
+ class << self
11
+ # If needed installs the needed listener and initializes tracker
12
+ #
13
+ # @param _config [Karafka::Core::Configurable::Node] app config
14
+ def post_setup(_config)
15
+ ::Karafka::App.monitor.subscribe('app.running') do
16
+ # Do not activate tracking of statistics if none of our active topics uses it
17
+ # This prevents us from tracking metrics when user just runs a subset of topics
18
+ # in a given process and none of those actually utilizes this feature
19
+ next unless ::Karafka::App
20
+ .subscription_groups
21
+ .values
22
+ .flat_map(&:itself)
23
+ .flat_map(&:topics)
24
+ .flat_map(&:to_a)
25
+ .any?(&:inline_insights?)
26
+
27
+ # Initialize the tracker prior to becoming multi-threaded
28
+ ::Karafka::Processing::InlineInsights::Tracker.instance
29
+
30
+ # Subscribe to the statistics reports and collect them
31
+ ::Karafka.monitor.subscribe(
32
+ ::Karafka::Processing::InlineInsights::Listener.new
33
+ )
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -34,7 +34,11 @@ module Karafka
34
34
  def initialize(position, topics)
35
35
  @name = topics.first.subscription_group_name
36
36
  @consumer_group = topics.first.consumer_group
37
- @id = "#{@name}_#{position}"
37
+ # We include the consumer group id here because we want to have unique ids of subscription
38
+ # groups across the system. Otherwise user could set the same name for multiple
39
+ # subscription groups in many consumer groups effectively having same id for different
40
+ # entities
41
+ @id = "#{@consumer_group.id}_#{@name}_#{position}"
38
42
  @position = position
39
43
  @topics = topics
40
44
  @kafka = build_kafka
@@ -87,7 +91,7 @@ module Karafka
87
91
  # If we use static group memberships, there can be a case, where same instance id would
88
92
  # be set on many subscription groups as the group instance id from Karafka perspective is
89
93
  # set per config. Each instance even if they are subscribed to different topics needs to
90
- # have if fully unique. To make sure of that, we just add extra postfix at the end that
94
+ # have it fully unique. To make sure of that, we just add extra postfix at the end that
91
95
  # increments.
92
96
  group_instance_id = kafka.fetch(:'group.instance.id', false)
93
97
 
@@ -221,6 +221,7 @@ module Karafka
221
221
  request.timeout.ms
222
222
  resolve_cb
223
223
  retries
224
+ retry.backoff.max.ms
224
225
  retry.backoff.ms
225
226
  sasl.kerberos.keytab
226
227
  sasl.kerberos.kinit.cmd
@@ -208,6 +208,8 @@ module Karafka
208
208
  setting :partitioner_class, default: Processing::Partitioner
209
209
  # option strategy_selector [Object] processing strategy selector to be used
210
210
  setting :strategy_selector, default: Processing::StrategySelector.new
211
+ # option expansions_selector [Object] processing expansions selector to be used
212
+ setting :expansions_selector, default: Processing::ExpansionsSelector.new
211
213
  end
212
214
 
213
215
  # Things related to operating on messages
@@ -238,10 +240,14 @@ module Karafka
238
240
  def setup(&block)
239
241
  # Will prepare and verify license if present
240
242
  Licenser.prepare_and_verify(config.license)
243
+
244
+ # Pre-setup configure all routing features that would need this
245
+ Routing::Features::Base.pre_setup_all(config)
246
+
241
247
  # Will configure all the pro components
242
248
  # This needs to happen before end user configuration as the end user may overwrite some
243
249
  # of the pro defaults with custom components
244
- Pro::Loader.pre_setup(config) if Karafka.pro?
250
+ Pro::Loader.pre_setup_all(config) if Karafka.pro?
245
251
 
246
252
  configure(&block)
247
253
  merge_kafka_defaults!(config)
@@ -253,9 +259,12 @@ module Karafka
253
259
  # Refreshes the references that are cached that might have been changed by the config
254
260
  ::Karafka.refresh!
255
261
 
262
+ # Post-setup configure all routing features that would need this
263
+ Routing::Features::Base.post_setup_all(config)
264
+
256
265
  # Runs things that need to be executed after config is defined and all the components
257
266
  # are also configured
258
- Pro::Loader.post_setup(config) if Karafka.pro?
267
+ Pro::Loader.post_setup_all(config) if Karafka.pro?
259
268
 
260
269
  Karafka::App.initialized!
261
270
  end
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.2.6'
6
+ VERSION = '2.2.7'
7
7
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.6
4
+ version: 2.2.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  AnG1dJU+yL2BK7vaVytLTstJME5mepSZ46qqIJXMuWob/YPDmVaBF39TDSG9e34s
36
36
  msG3BiCqgOgHAnL23+CN3Rt8MsuRfEtoTKpJVcCfoEoNHOkc
37
37
  -----END CERTIFICATE-----
38
- date: 2023-09-26 00:00:00.000000000 Z
38
+ date: 2023-10-07 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: karafka-core
@@ -244,6 +244,7 @@ files:
244
244
  - lib/karafka/pro/processing/filters/base.rb
245
245
  - lib/karafka/pro/processing/filters/delayer.rb
246
246
  - lib/karafka/pro/processing/filters/expirer.rb
247
+ - lib/karafka/pro/processing/filters/inline_insights_delayer.rb
247
248
  - lib/karafka/pro/processing/filters/throttler.rb
248
249
  - lib/karafka/pro/processing/filters/virtual_limiter.rb
249
250
  - lib/karafka/pro/processing/filters_applier.rb
@@ -321,6 +322,10 @@ files:
321
322
  - lib/karafka/pro/routing/features/filtering/config.rb
322
323
  - lib/karafka/pro/routing/features/filtering/contracts/topic.rb
323
324
  - lib/karafka/pro/routing/features/filtering/topic.rb
325
+ - lib/karafka/pro/routing/features/inline_insights.rb
326
+ - lib/karafka/pro/routing/features/inline_insights/config.rb
327
+ - lib/karafka/pro/routing/features/inline_insights/contracts/topic.rb
328
+ - lib/karafka/pro/routing/features/inline_insights/topic.rb
324
329
  - lib/karafka/pro/routing/features/long_running_job.rb
325
330
  - lib/karafka/pro/routing/features/long_running_job/config.rb
326
331
  - lib/karafka/pro/routing/features/long_running_job/contracts/topic.rb
@@ -353,6 +358,10 @@ files:
353
358
  - lib/karafka/processing/coordinators_buffer.rb
354
359
  - lib/karafka/processing/executor.rb
355
360
  - lib/karafka/processing/executors_buffer.rb
361
+ - lib/karafka/processing/expansions_selector.rb
362
+ - lib/karafka/processing/inline_insights/consumer.rb
363
+ - lib/karafka/processing/inline_insights/listener.rb
364
+ - lib/karafka/processing/inline_insights/tracker.rb
356
365
  - lib/karafka/processing/jobs/base.rb
357
366
  - lib/karafka/processing/jobs/consume.rb
358
367
  - lib/karafka/processing/jobs/idle.rb
@@ -394,6 +403,10 @@ files:
394
403
  - lib/karafka/routing/features/declaratives/config.rb
395
404
  - lib/karafka/routing/features/declaratives/contracts/topic.rb
396
405
  - lib/karafka/routing/features/declaratives/topic.rb
406
+ - lib/karafka/routing/features/inline_insights.rb
407
+ - lib/karafka/routing/features/inline_insights/config.rb
408
+ - lib/karafka/routing/features/inline_insights/contracts/topic.rb
409
+ - lib/karafka/routing/features/inline_insights/topic.rb
397
410
  - lib/karafka/routing/features/manual_offset_management.rb
398
411
  - lib/karafka/routing/features/manual_offset_management/config.rb
399
412
  - lib/karafka/routing/features/manual_offset_management/contracts/topic.rb
@@ -447,7 +460,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
447
460
  - !ruby/object:Gem::Version
448
461
  version: '0'
449
462
  requirements: []
450
- rubygems_version: 3.4.19
463
+ rubygems_version: 3.4.10
451
464
  signing_key:
452
465
  specification_version: 4
453
466
  summary: Karafka is Ruby and Rails efficient Kafka processing framework.
metadata.gz.sig CHANGED
Binary file