karafka 2.2.13 → 2.3.0.alpha1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +38 -12
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +161 -125
  6. data/Gemfile.lock +12 -12
  7. data/README.md +0 -2
  8. data/SECURITY.md +23 -0
  9. data/config/locales/errors.yml +7 -1
  10. data/config/locales/pro_errors.yml +22 -0
  11. data/docker-compose.yml +3 -1
  12. data/karafka.gemspec +2 -2
  13. data/lib/karafka/admin/acl.rb +287 -0
  14. data/lib/karafka/admin.rb +118 -16
  15. data/lib/karafka/app.rb +12 -3
  16. data/lib/karafka/base_consumer.rb +32 -31
  17. data/lib/karafka/cli/base.rb +1 -1
  18. data/lib/karafka/connection/client.rb +94 -84
  19. data/lib/karafka/connection/conductor.rb +28 -0
  20. data/lib/karafka/connection/listener.rb +165 -46
  21. data/lib/karafka/connection/listeners_batch.rb +5 -11
  22. data/lib/karafka/connection/manager.rb +72 -0
  23. data/lib/karafka/connection/messages_buffer.rb +12 -0
  24. data/lib/karafka/connection/proxy.rb +17 -0
  25. data/lib/karafka/connection/status.rb +75 -0
  26. data/lib/karafka/contracts/config.rb +14 -10
  27. data/lib/karafka/contracts/consumer_group.rb +9 -1
  28. data/lib/karafka/contracts/topic.rb +3 -1
  29. data/lib/karafka/errors.rb +13 -0
  30. data/lib/karafka/instrumentation/assignments_tracker.rb +96 -0
  31. data/lib/karafka/instrumentation/callbacks/rebalance.rb +10 -7
  32. data/lib/karafka/instrumentation/logger_listener.rb +3 -9
  33. data/lib/karafka/instrumentation/notifications.rb +19 -9
  34. data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +31 -28
  35. data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +22 -3
  36. data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +15 -12
  37. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +39 -36
  38. data/lib/karafka/pro/base_consumer.rb +47 -0
  39. data/lib/karafka/pro/connection/manager.rb +300 -0
  40. data/lib/karafka/pro/connection/multiplexing/listener.rb +40 -0
  41. data/lib/karafka/pro/instrumentation/performance_tracker.rb +85 -0
  42. data/lib/karafka/pro/iterator/tpl_builder.rb +1 -1
  43. data/lib/karafka/pro/iterator.rb +1 -6
  44. data/lib/karafka/pro/loader.rb +16 -2
  45. data/lib/karafka/pro/processing/coordinator.rb +2 -1
  46. data/lib/karafka/pro/processing/executor.rb +37 -0
  47. data/lib/karafka/pro/processing/expansions_selector.rb +32 -0
  48. data/lib/karafka/pro/processing/jobs/periodic.rb +41 -0
  49. data/lib/karafka/pro/processing/jobs/periodic_non_blocking.rb +32 -0
  50. data/lib/karafka/pro/processing/jobs_builder.rb +14 -3
  51. data/lib/karafka/pro/processing/offset_metadata/consumer.rb +44 -0
  52. data/lib/karafka/pro/processing/offset_metadata/fetcher.rb +131 -0
  53. data/lib/karafka/pro/processing/offset_metadata/listener.rb +46 -0
  54. data/lib/karafka/pro/processing/schedulers/base.rb +143 -0
  55. data/lib/karafka/pro/processing/schedulers/default.rb +107 -0
  56. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
  57. data/lib/karafka/pro/processing/strategies/default.rb +136 -3
  58. data/lib/karafka/pro/processing/strategies/dlq/default.rb +35 -0
  59. data/lib/karafka/pro/processing/strategies/lrj/default.rb +1 -1
  60. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
  61. data/lib/karafka/pro/processing/strategies/vp/default.rb +60 -26
  62. data/lib/karafka/pro/processing/virtual_offset_manager.rb +41 -11
  63. data/lib/karafka/pro/routing/features/long_running_job/topic.rb +2 -0
  64. data/lib/karafka/pro/routing/features/multiplexing/config.rb +38 -0
  65. data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +114 -0
  66. data/lib/karafka/pro/routing/features/multiplexing/patches/contracts/consumer_group.rb +42 -0
  67. data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +38 -0
  68. data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +42 -0
  69. data/lib/karafka/pro/routing/features/multiplexing/subscription_groups_builder.rb +40 -0
  70. data/lib/karafka/pro/routing/features/multiplexing.rb +59 -0
  71. data/lib/karafka/pro/routing/features/non_blocking_job/topic.rb +32 -0
  72. data/lib/karafka/pro/routing/features/non_blocking_job.rb +37 -0
  73. data/lib/karafka/pro/routing/features/offset_metadata/config.rb +33 -0
  74. data/lib/karafka/pro/routing/features/offset_metadata/contracts/topic.rb +42 -0
  75. data/lib/karafka/pro/routing/features/offset_metadata/topic.rb +65 -0
  76. data/lib/karafka/pro/routing/features/offset_metadata.rb +40 -0
  77. data/lib/karafka/pro/routing/features/patterns/contracts/consumer_group.rb +4 -0
  78. data/lib/karafka/pro/routing/features/patterns/detector.rb +18 -10
  79. data/lib/karafka/pro/routing/features/periodic_job/config.rb +37 -0
  80. data/lib/karafka/pro/routing/features/periodic_job/contracts/topic.rb +44 -0
  81. data/lib/karafka/pro/routing/features/periodic_job/topic.rb +94 -0
  82. data/lib/karafka/pro/routing/features/periodic_job.rb +27 -0
  83. data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +1 -0
  84. data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
  85. data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +7 -2
  86. data/lib/karafka/process.rb +5 -3
  87. data/lib/karafka/processing/coordinator.rb +5 -1
  88. data/lib/karafka/processing/executor.rb +43 -13
  89. data/lib/karafka/processing/executors_buffer.rb +22 -7
  90. data/lib/karafka/processing/jobs/base.rb +19 -2
  91. data/lib/karafka/processing/jobs/consume.rb +3 -3
  92. data/lib/karafka/processing/jobs/idle.rb +5 -0
  93. data/lib/karafka/processing/jobs/revoked.rb +5 -0
  94. data/lib/karafka/processing/jobs/shutdown.rb +5 -0
  95. data/lib/karafka/processing/jobs_queue.rb +19 -8
  96. data/lib/karafka/processing/schedulers/default.rb +42 -0
  97. data/lib/karafka/processing/strategies/base.rb +13 -4
  98. data/lib/karafka/processing/strategies/default.rb +23 -7
  99. data/lib/karafka/processing/strategies/dlq.rb +36 -0
  100. data/lib/karafka/processing/worker.rb +4 -1
  101. data/lib/karafka/routing/builder.rb +12 -2
  102. data/lib/karafka/routing/consumer_group.rb +5 -5
  103. data/lib/karafka/routing/features/base.rb +44 -8
  104. data/lib/karafka/routing/features/dead_letter_queue/config.rb +6 -1
  105. data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
  106. data/lib/karafka/routing/features/dead_letter_queue/topic.rb +9 -2
  107. data/lib/karafka/routing/proxy.rb +4 -3
  108. data/lib/karafka/routing/subscription_group.rb +2 -2
  109. data/lib/karafka/routing/subscription_groups_builder.rb +11 -2
  110. data/lib/karafka/routing/topic.rb +8 -10
  111. data/lib/karafka/routing/topics.rb +1 -1
  112. data/lib/karafka/runner.rb +13 -3
  113. data/lib/karafka/server.rb +5 -9
  114. data/lib/karafka/setup/config.rb +21 -1
  115. data/lib/karafka/status.rb +23 -14
  116. data/lib/karafka/templates/karafka.rb.erb +7 -0
  117. data/lib/karafka/time_trackers/partition_usage.rb +56 -0
  118. data/lib/karafka/version.rb +1 -1
  119. data.tar.gz.sig +0 -0
  120. metadata +47 -13
  121. metadata.gz.sig +0 -0
  122. data/lib/karafka/connection/consumer_group_coordinator.rb +0 -48
  123. data/lib/karafka/pro/performance_tracker.rb +0 -84
  124. data/lib/karafka/pro/processing/scheduler.rb +0 -74
  125. data/lib/karafka/processing/scheduler.rb +0 -38
@@ -0,0 +1,300 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Connection
17
+ # Manager that can handle working with multiplexed connections.
18
+ #
19
+ # This manager takes into consideration the number of partitions assigned to the topics and
20
+ # does its best to balance. Additional connections may not always be utilized because
21
+ # alongside of them, other processes may "hijack" the assignment. In such cases those extra
22
+ # empty connections will be turned off after a while.
23
+ #
24
+ # @note Manager operations relate to consumer groups and not subscription groups. Since
25
+ # cluster operations can cause consumer group wide effects, we always apply only one
26
+ # change on a consumer group.
27
+ #
28
+ # @note Since we collect statistical data from listeners and this happens in a background
29
+ # thread, we need to make sure we lock not to have race conditions with expired data
30
+ # eviction.
31
+ class Manager < Karafka::Connection::Manager
32
+ include Core::Helpers::Time
33
+
34
+ # How long should we keep stale stats before evicting them completely
35
+ EVICTION_DELAY = 5 * 60 * 1_000
36
+
37
+ private_constant :EVICTION_DELAY
38
+
39
+ # How long should we wait after a rebalance before doing anything on a consumer group
40
+ #
41
+ # @param scale_delay [Integer] How long should we wait before making any changes. Any
42
+ # change related to this consumer group will postpone the scaling operations. This is
43
+ # done that way to prevent too many friction in the cluster. It is 1 minute by default
44
+ def initialize(scale_delay = 60 * 1_000)
45
+ super()
46
+ @scale_delay = scale_delay
47
+ @mutex = Mutex.new
48
+ @changes = Hash.new do |h, k|
49
+ h[k] = {
50
+ state: '',
51
+ join_state: '',
52
+ state_age: 0,
53
+ state_age_sync: monotonic_now,
54
+ changed_at: monotonic_now
55
+ }
56
+ end
57
+ end
58
+
59
+ # Registers listeners and starts the scaling procedures
60
+ #
61
+ # When using dynamic multiplexing, it will start the absolute minimum of connections for
62
+ # subscription group available.
63
+ #
64
+ # @param listeners [Connection::ListenersBatch]
65
+ def register(listeners)
66
+ @listeners = listeners
67
+
68
+ in_sg_families do |first_subscription_group, sg_listeners|
69
+ multiplexing = first_subscription_group.multiplexing
70
+
71
+ if multiplexing.active? && multiplexing.dynamic?
72
+ # Start as many boot listeners as user wants. If not configured, starts half of max.
73
+ sg_listeners.first(multiplexing.boot).each(&:start!)
74
+ else
75
+ sg_listeners.each(&:start!)
76
+ end
77
+ end
78
+ end
79
+
80
+ # Collects data from the statistics about given subscription group. This is used to ensure
81
+ # that we do not rescale short after rebalances, deployments, etc.
82
+ # @param subscription_group_id [String] id of the subscription group for which statistics
83
+ # were emitted
84
+ # @param statistics [Hash] emitted statistics
85
+ #
86
+ # @note Please note that while we collect here per subscription group, we use those metrics
87
+ # collectively on a whole consumer group. This reduces the friction.
88
+ def notice(subscription_group_id, statistics)
89
+ @mutex.synchronize do
90
+ times = []
91
+ # stateage is in microseconds
92
+ # We monitor broker changes to make sure we do not introduce extra friction
93
+ times << statistics['brokers'].values.map { |stats| stats['stateage'] }.min / 1_000
94
+ times << statistics['cgrp']['rebalance_age']
95
+ times << statistics['cgrp']['stateage']
96
+
97
+ # Keep the previous change age for changes that were triggered by us
98
+ previous_changed_at = @changes[subscription_group_id][:changed_at]
99
+
100
+ @changes[subscription_group_id] = {
101
+ state_age: times.min,
102
+ changed_at: previous_changed_at,
103
+ join_state: statistics['cgrp']['join_state'],
104
+ state: statistics['cgrp']['state'],
105
+ state_age_sync: monotonic_now
106
+ }
107
+ end
108
+ end
109
+
110
+ # Shuts down all the listeners when it is time (including moving to quiet) or rescales
111
+ # when it is needed
112
+ def control
113
+ Karafka::App.done? ? shutdown : rescale
114
+ end
115
+
116
+ private
117
+
118
+ # Handles the shutdown and quiet flows
119
+ def shutdown
120
+ active_listeners = @listeners.active
121
+
122
+ # When we are done processing immediately quiet all the listeners so they do not pick up
123
+ # new work to do
124
+ once(:quiet!) { active_listeners.each(&:quiet!) }
125
+
126
+ # If we are in the process of moving to quiet state, we need to check it.
127
+ if Karafka::App.quieting? && active_listeners.all?(&:quiet?)
128
+ once(:quieted!) { Karafka::App.quieted! }
129
+ end
130
+
131
+ return if Karafka::App.quiet?
132
+
133
+ # Since separate subscription groups are subscribed to different topics, there is no risk
134
+ # in shutting them down independently even if they operate in the same subscription group
135
+ in_sg_families do |first_subscription_group, sg_listeners|
136
+ active_sg_listeners = sg_listeners.select(&:active?)
137
+
138
+ # Do nothing until all listeners from the same consumer group are quiet. Otherwise we
139
+ # could have problems with in-flight rebalances during shutdown
140
+ next unless active_sg_listeners.all?(&:quiet?)
141
+
142
+ # Do not stop the same family twice
143
+ once(:stop!, first_subscription_group.name) { active_sg_listeners.each(&:stop!) }
144
+ end
145
+
146
+ return unless @listeners.active.all?(&:stopped?)
147
+
148
+ # All listeners including pending need to be moved at the end to stopped state for
149
+ # the whole server to stop
150
+ once(:stop!) { @listeners.each(&:stopped!) }
151
+ end
152
+
153
+ # Handles two scenarios:
154
+ # - Selects subscriptions that could benefit from having more parallel connections
155
+ # to kafka and then upscales them
156
+ # - Selects subscriptions that are idle (have nothing subscribed to them) and then shuts
157
+ # them down
158
+ #
159
+ # We always run scaling down and up because it may be applicable to different CGs
160
+ def rescale
161
+ evict
162
+
163
+ scale_down
164
+ scale_up
165
+ end
166
+
167
+ # Checks for connections without any assignments and scales them down.
168
+ # Does that only for dynamically multiplexed subscription groups
169
+ def scale_down
170
+ sgs_in_use = Karafka::App.assignments.keys.map(&:subscription_group).uniq
171
+
172
+ # Select connections for scaling down
173
+ in_sg_families do |first_subscription_group, sg_listeners|
174
+ next unless stable?(sg_listeners)
175
+
176
+ multiplexing = first_subscription_group.multiplexing
177
+
178
+ next unless multiplexing.active?
179
+ next unless multiplexing.dynamic?
180
+
181
+ # If we cannot downscale, do not
182
+ next if sg_listeners.count(&:active?) <= multiplexing.min
183
+
184
+ sg_listeners.each do |sg_listener|
185
+ # Do not stop connections with subscriptions
186
+ next if sgs_in_use.include?(sg_listener.subscription_group)
187
+ # Skip listeners that are already in standby
188
+ next unless sg_listener.active?
189
+
190
+ touch(sg_listener.subscription_group.id)
191
+
192
+ # Shut down not used connection
193
+ sg_listener.stop!
194
+
195
+ break
196
+ end
197
+ end
198
+ end
199
+
200
+ # Checks if we have space to scale and if there are any assignments with multiple topics
201
+ # partitions assigned in sgs that can be scaled. If that is the case, we scale up.
202
+ def scale_up
203
+ multi_part_sgs_families = Karafka::App
204
+ .assignments
205
+ .select { |_, partitions| partitions.size > 1 }
206
+ .keys
207
+ .map(&:subscription_group)
208
+ .map(&:name)
209
+ .uniq
210
+
211
+ # Select connections for scaling up
212
+ in_sg_families do |first_subscription_group, sg_listeners|
213
+ next unless stable?(sg_listeners)
214
+
215
+ multiplexing = first_subscription_group.multiplexing
216
+
217
+ next unless multiplexing.active?
218
+ next unless multiplexing.dynamic?
219
+ # If we cannot downscale, do not
220
+ next if sg_listeners.count(&:active?) >= multiplexing.max
221
+
222
+ sg_listeners.each do |sg_listener|
223
+ next unless multi_part_sgs_families.include?(sg_listener.subscription_group.name)
224
+ # Skip already active connections
225
+ next unless sg_listener.pending? || sg_listener.stopped?
226
+
227
+ touch(sg_listener.subscription_group.id)
228
+ sg_listener.start!
229
+
230
+ break
231
+ end
232
+ end
233
+ end
234
+
235
+ # Removes states that are no longer being reported for stopped/pending listeners
236
+ def evict
237
+ @mutex.synchronize do
238
+ @changes.delete_if do |_, details|
239
+ monotonic_now - details[:state_age_sync] >= EVICTION_DELAY
240
+ end
241
+ end
242
+ end
243
+
244
+ # Indicates, that something has changed on a subscription group. We consider every single
245
+ # change we make as a change to the setup as well.
246
+ # @param subscription_group_id [String]
247
+ def touch(subscription_group_id)
248
+ @mutex.synchronize do
249
+ @changes[subscription_group_id][:changed_at] = 0
250
+ @changes[subscription_group_id][:state_age_sync] = monotonic_now
251
+ end
252
+ end
253
+
254
+ # @param sg_listeners [Array<Listener>] listeners from one multiplexed sg
255
+ # @return [Boolean] is given subscription group listeners set stable. It is considered
256
+ # stable when it had no changes happening on it recently and all relevant states in it
257
+ # are also stable. This is a strong indicator that no rebalances or other operations are
258
+ # happening at a given moment.
259
+ def stable?(sg_listeners)
260
+ # If none of listeners has changes reported it means we did not yet start collecting
261
+ # metrics about any of them and at least one must be present. We do not consider it
262
+ # stable in such case as we still are waiting for metrics.
263
+ return false if sg_listeners.none? do |sg_listener|
264
+ @changes.key?(sg_listener.subscription_group.id)
265
+ end
266
+
267
+ sg_listeners.all? do |sg_listener|
268
+ # Not all SGs may be started initially or may be stopped, we ignore them here as they
269
+ # are irrelevant from the point of view of establishing stability
270
+ next true unless @changes.key?(sg_listener.subscription_group.id)
271
+
272
+ state = @changes[sg_listener.subscription_group.id]
273
+
274
+ state[:state_age] >= @scale_delay &&
275
+ (monotonic_now - state[:changed_at]) >= @scale_delay &&
276
+ state[:state] == 'up' &&
277
+ state[:join_state] == 'steady'
278
+ end
279
+ end
280
+
281
+ # Yields listeners in groups based on their subscription groups
282
+ # @yieldparam [Karafka::Routing::SubscriptionGroup] first subscription group out of the
283
+ # family
284
+ # @yieldparam [Array<Listener>] listeners of a single subscription group
285
+ def in_sg_families
286
+ grouped = @listeners.group_by { |listener| listener.subscription_group.name }
287
+
288
+ grouped.each_value do |listeners|
289
+ listener = listeners.first
290
+
291
+ yield(
292
+ listener.subscription_group,
293
+ listeners
294
+ )
295
+ end
296
+ end
297
+ end
298
+ end
299
+ end
300
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ # Namespace for Pro connections related components
17
+ module Connection
18
+ # Namespace for Multiplexing management related components
19
+ module Multiplexing
20
+ # Listener used to connect listeners manager to the lifecycle events that are significant
21
+ # to its operations
22
+ class Listener
23
+ def initialize
24
+ @manager = App.config.internal.connection.manager
25
+ end
26
+
27
+ # Triggers connection manage subscription groups details noticing
28
+ #
29
+ # @param event [Karafka::Core::Monitoring::Event] event with statistics
30
+ def on_statistics_emitted(event)
31
+ @manager.notice(
32
+ event[:subscription_group_id],
33
+ event[:statistics]
34
+ )
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ # Namespace for Pro components instrumentation related code
17
+ module Instrumentation
18
+ # Tracker used to keep track of performance metrics
19
+ # It provides insights that can be used to optimize processing flow
20
+ # @note Even if we have some race-conditions here it is relevant due to the quantity of data.
21
+ # This is why we do not mutex it.
22
+ class PerformanceTracker
23
+ include Singleton
24
+
25
+ # How many samples do we collect per topic partition
26
+ SAMPLES_COUNT = 200
27
+
28
+ private_constant :SAMPLES_COUNT
29
+
30
+ # Builds up nested concurrent hash for data tracking
31
+ def initialize
32
+ @processing_times = Hash.new do |topics_hash, topic|
33
+ topics_hash[topic] = Hash.new do |partitions_hash, partition|
34
+ partitions_hash[partition] = []
35
+ end
36
+ end
37
+ end
38
+
39
+ # @param topic [String]
40
+ # @param partition [Integer]
41
+ # @return [Float] p95 processing time of a single message from a single topic partition
42
+ def processing_time_p95(topic, partition)
43
+ values = @processing_times[topic][partition]
44
+
45
+ return 0 if values.empty?
46
+ return values.first if values.size == 1
47
+
48
+ percentile(0.95, values)
49
+ end
50
+
51
+ # @private
52
+ # @param event [Karafka::Core::Monitoring::Event] event details
53
+ # Tracks time taken to process a single message of a given topic partition
54
+ def on_consumer_consumed(event)
55
+ consumer = event[:caller]
56
+ messages = consumer.messages
57
+ topic = messages.metadata.topic
58
+ partition = messages.metadata.partition
59
+
60
+ samples = @processing_times[topic][partition]
61
+ samples << event[:time] / messages.count
62
+
63
+ return unless samples.size > SAMPLES_COUNT
64
+
65
+ samples.shift
66
+ end
67
+
68
+ private
69
+
70
+ # Computers the requested percentile out of provided values
71
+ # @param percentile [Float]
72
+ # @param values [Array<String>] all the values based on which we should
73
+ # @return [Float] computed percentile
74
+ def percentile(percentile, values)
75
+ values_sorted = values.sort
76
+
77
+ floor = (percentile * (values_sorted.length - 1) + 1).floor - 1
78
+ mod = (percentile * (values_sorted.length - 1) + 1).modulo(1)
79
+
80
+ values_sorted[floor] + (mod * (values_sorted[floor + 1] - values_sorted[floor]))
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
@@ -25,7 +25,7 @@ module Karafka
25
25
  # @param consumer [::Rdkafka::Consumer] consumer instance needed to talk with Kafka
26
26
  # @param expanded_topics [Hash] hash with expanded and normalized topics data
27
27
  def initialize(consumer, expanded_topics)
28
- @consumer = Connection::Proxy.new(consumer)
28
+ @consumer = ::Karafka::Connection::Proxy.new(consumer)
29
29
  @expanded_topics = expanded_topics
30
30
  @mapped_topics = Hash.new { |h, k| h[k] = {} }
31
31
  end
@@ -22,11 +22,6 @@ module Karafka
22
22
  #
23
23
  # It does **not** create a consumer group and does not have any offset management.
24
24
  class Iterator
25
- # Local partition reference for librdkafka
26
- Partition = Struct.new(:partition, :offset)
27
-
28
- private_constant :Partition
29
-
30
25
  # A simple API allowing to iterate over topic/partition data, without having to subscribe
31
26
  # and deal with rebalances. This API allows for multi-partition streaming and is optimized
32
27
  # for data lookups. It allows for explicit stopping iteration over any partition during
@@ -127,7 +122,7 @@ module Karafka
127
122
 
128
123
  @current_consumer.pause(
129
124
  Rdkafka::Consumer::TopicPartitionList.new(
130
- name => [Partition.new(partition, 0)]
125
+ name => [Rdkafka::Consumer::Partition.new(partition, 0)]
131
126
  )
132
127
  )
133
128
  end
@@ -53,6 +53,7 @@ module Karafka
53
53
  features.each { |feature| feature.pre_setup(config) }
54
54
 
55
55
  reconfigure(config)
56
+ expand
56
57
 
57
58
  load_topic_features
58
59
  end
@@ -82,18 +83,31 @@ module Karafka
82
83
 
83
84
  icfg.cli.contract = Contracts::ServerCliOptions.new
84
85
 
86
+ # Use manager that supports multiplexing
87
+ icfg.connection.manager = Connection::Manager.new
88
+
85
89
  icfg.processing.coordinator_class = Processing::Coordinator
86
90
  icfg.processing.partitioner_class = Processing::Partitioner
87
- icfg.processing.scheduler_class = Processing::Scheduler
91
+ icfg.processing.scheduler_class = Processing::Schedulers::Default
88
92
  icfg.processing.jobs_queue_class = Processing::JobsQueue
93
+ icfg.processing.executor_class = Processing::Executor
89
94
  icfg.processing.jobs_builder = Processing::JobsBuilder.new
90
95
  icfg.processing.strategy_selector = Processing::StrategySelector.new
96
+ icfg.processing.expansions_selector = Processing::ExpansionsSelector.new
91
97
 
92
98
  icfg.active_job.consumer_class = ActiveJob::Consumer
93
99
  icfg.active_job.dispatcher = ActiveJob::Dispatcher.new
94
100
  icfg.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
95
101
 
96
- config.monitor.subscribe(PerformanceTracker.instance)
102
+ config.monitor.subscribe(Instrumentation::PerformanceTracker.instance)
103
+ end
104
+
105
+ # Adds extra modules to certain classes
106
+ # This expands their functionalities with things that are needed when operating in Pro
107
+ # It is used only when given class is part of the end user API and cannot be swapped by
108
+ # a pluggable component
109
+ def expand
110
+ Karafka::BaseConsumer.include Pro::BaseConsumer
97
111
  end
98
112
 
99
113
  # Loads the Pro features of Karafka
@@ -42,7 +42,8 @@ module Karafka
42
42
 
43
43
  @virtual_offset_manager = VirtualOffsetManager.new(
44
44
  topic.name,
45
- partition
45
+ partition,
46
+ topic.virtual_partitions.offset_metadata_strategy
46
47
  )
47
48
 
48
49
  # We register our own "internal" filter to support filtering of messages that were marked
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Pro executor that supports periodic jobs
18
+ class Executor < Karafka::Processing::Executor
19
+ # Runs the code that should happen before periodic job is scheduled
20
+ #
21
+ # @note While jobs are called `Periodic`, from the consumer perspective it is "ticking".
22
+ # This name was taken for a reason: we may want to introduce periodic ticking also not
23
+ # only during polling but for example on wait and a name "poll" would not align well.
24
+ # A name "periodic" is not a verb and our other consumer actions are verbs like:
25
+ # consume or revoked. So for the sake of consistency we have ticking here.
26
+ def before_schedule_periodic
27
+ consumer.on_before_schedule_tick
28
+ end
29
+
30
+ # Triggers consumer ticking
31
+ def periodic
32
+ consumer.on_tick
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Pro selector of appropriate topic setup based features enhancements.
18
+ class ExpansionsSelector < Karafka::Processing::ExpansionsSelector
19
+ # @param topic [Karafka::Routing::Topic] topic with settings based on which we find
20
+ # expansions
21
+ # @return [Array<Module>] modules with proper expansions we're suppose to use to enhance
22
+ # the consumer
23
+ def find(topic)
24
+ # Start with the non-pro expansions
25
+ expansions = super
26
+ expansions << Pro::Processing::OffsetMetadata::Consumer if topic.offset_metadata?
27
+ expansions
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Jobs
18
+ # Job that represents a "ticking" work. Work that we run periodically for the Periodics
19
+ # enabled topics.
20
+ class Periodic < ::Karafka::Processing::Jobs::Base
21
+ # @param executor [Karafka::Pro::Processing::Executor] pro executor that is suppose to
22
+ # run a given job
23
+ def initialize(executor)
24
+ @executor = executor
25
+ super()
26
+ end
27
+
28
+ # Code executed before we schedule this job
29
+ def before_schedule
30
+ executor.before_schedule_periodic
31
+ end
32
+
33
+ # Runs the executor periodic action
34
+ def call
35
+ executor.periodic
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Jobs
18
+ # Non-Blocking version of the Periodic job
19
+ # We use this version for LRJ topics for cases where saturated resources would not allow
20
+ # to run this job for extended period of time. Under such scenarios, if we would not use
21
+ # a non-blocking one, we would reach max.poll.interval.ms.
22
+ class PeriodicNonBlocking < Periodic
23
+ # @param args [Array] any arguments accepted by `::Karafka::Processing::Jobs::Periodic`
24
+ def initialize(*args)
25
+ super
26
+ @non_blocking = true
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end