karafka 2.2.13 → 2.3.0.alpha1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (125) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +38 -12
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +161 -125
  6. data/Gemfile.lock +12 -12
  7. data/README.md +0 -2
  8. data/SECURITY.md +23 -0
  9. data/config/locales/errors.yml +7 -1
  10. data/config/locales/pro_errors.yml +22 -0
  11. data/docker-compose.yml +3 -1
  12. data/karafka.gemspec +2 -2
  13. data/lib/karafka/admin/acl.rb +287 -0
  14. data/lib/karafka/admin.rb +118 -16
  15. data/lib/karafka/app.rb +12 -3
  16. data/lib/karafka/base_consumer.rb +32 -31
  17. data/lib/karafka/cli/base.rb +1 -1
  18. data/lib/karafka/connection/client.rb +94 -84
  19. data/lib/karafka/connection/conductor.rb +28 -0
  20. data/lib/karafka/connection/listener.rb +165 -46
  21. data/lib/karafka/connection/listeners_batch.rb +5 -11
  22. data/lib/karafka/connection/manager.rb +72 -0
  23. data/lib/karafka/connection/messages_buffer.rb +12 -0
  24. data/lib/karafka/connection/proxy.rb +17 -0
  25. data/lib/karafka/connection/status.rb +75 -0
  26. data/lib/karafka/contracts/config.rb +14 -10
  27. data/lib/karafka/contracts/consumer_group.rb +9 -1
  28. data/lib/karafka/contracts/topic.rb +3 -1
  29. data/lib/karafka/errors.rb +13 -0
  30. data/lib/karafka/instrumentation/assignments_tracker.rb +96 -0
  31. data/lib/karafka/instrumentation/callbacks/rebalance.rb +10 -7
  32. data/lib/karafka/instrumentation/logger_listener.rb +3 -9
  33. data/lib/karafka/instrumentation/notifications.rb +19 -9
  34. data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +31 -28
  35. data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +22 -3
  36. data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +15 -12
  37. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +39 -36
  38. data/lib/karafka/pro/base_consumer.rb +47 -0
  39. data/lib/karafka/pro/connection/manager.rb +300 -0
  40. data/lib/karafka/pro/connection/multiplexing/listener.rb +40 -0
  41. data/lib/karafka/pro/instrumentation/performance_tracker.rb +85 -0
  42. data/lib/karafka/pro/iterator/tpl_builder.rb +1 -1
  43. data/lib/karafka/pro/iterator.rb +1 -6
  44. data/lib/karafka/pro/loader.rb +16 -2
  45. data/lib/karafka/pro/processing/coordinator.rb +2 -1
  46. data/lib/karafka/pro/processing/executor.rb +37 -0
  47. data/lib/karafka/pro/processing/expansions_selector.rb +32 -0
  48. data/lib/karafka/pro/processing/jobs/periodic.rb +41 -0
  49. data/lib/karafka/pro/processing/jobs/periodic_non_blocking.rb +32 -0
  50. data/lib/karafka/pro/processing/jobs_builder.rb +14 -3
  51. data/lib/karafka/pro/processing/offset_metadata/consumer.rb +44 -0
  52. data/lib/karafka/pro/processing/offset_metadata/fetcher.rb +131 -0
  53. data/lib/karafka/pro/processing/offset_metadata/listener.rb +46 -0
  54. data/lib/karafka/pro/processing/schedulers/base.rb +143 -0
  55. data/lib/karafka/pro/processing/schedulers/default.rb +107 -0
  56. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
  57. data/lib/karafka/pro/processing/strategies/default.rb +136 -3
  58. data/lib/karafka/pro/processing/strategies/dlq/default.rb +35 -0
  59. data/lib/karafka/pro/processing/strategies/lrj/default.rb +1 -1
  60. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
  61. data/lib/karafka/pro/processing/strategies/vp/default.rb +60 -26
  62. data/lib/karafka/pro/processing/virtual_offset_manager.rb +41 -11
  63. data/lib/karafka/pro/routing/features/long_running_job/topic.rb +2 -0
  64. data/lib/karafka/pro/routing/features/multiplexing/config.rb +38 -0
  65. data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +114 -0
  66. data/lib/karafka/pro/routing/features/multiplexing/patches/contracts/consumer_group.rb +42 -0
  67. data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +38 -0
  68. data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +42 -0
  69. data/lib/karafka/pro/routing/features/multiplexing/subscription_groups_builder.rb +40 -0
  70. data/lib/karafka/pro/routing/features/multiplexing.rb +59 -0
  71. data/lib/karafka/pro/routing/features/non_blocking_job/topic.rb +32 -0
  72. data/lib/karafka/pro/routing/features/non_blocking_job.rb +37 -0
  73. data/lib/karafka/pro/routing/features/offset_metadata/config.rb +33 -0
  74. data/lib/karafka/pro/routing/features/offset_metadata/contracts/topic.rb +42 -0
  75. data/lib/karafka/pro/routing/features/offset_metadata/topic.rb +65 -0
  76. data/lib/karafka/pro/routing/features/offset_metadata.rb +40 -0
  77. data/lib/karafka/pro/routing/features/patterns/contracts/consumer_group.rb +4 -0
  78. data/lib/karafka/pro/routing/features/patterns/detector.rb +18 -10
  79. data/lib/karafka/pro/routing/features/periodic_job/config.rb +37 -0
  80. data/lib/karafka/pro/routing/features/periodic_job/contracts/topic.rb +44 -0
  81. data/lib/karafka/pro/routing/features/periodic_job/topic.rb +94 -0
  82. data/lib/karafka/pro/routing/features/periodic_job.rb +27 -0
  83. data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +1 -0
  84. data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
  85. data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +7 -2
  86. data/lib/karafka/process.rb +5 -3
  87. data/lib/karafka/processing/coordinator.rb +5 -1
  88. data/lib/karafka/processing/executor.rb +43 -13
  89. data/lib/karafka/processing/executors_buffer.rb +22 -7
  90. data/lib/karafka/processing/jobs/base.rb +19 -2
  91. data/lib/karafka/processing/jobs/consume.rb +3 -3
  92. data/lib/karafka/processing/jobs/idle.rb +5 -0
  93. data/lib/karafka/processing/jobs/revoked.rb +5 -0
  94. data/lib/karafka/processing/jobs/shutdown.rb +5 -0
  95. data/lib/karafka/processing/jobs_queue.rb +19 -8
  96. data/lib/karafka/processing/schedulers/default.rb +42 -0
  97. data/lib/karafka/processing/strategies/base.rb +13 -4
  98. data/lib/karafka/processing/strategies/default.rb +23 -7
  99. data/lib/karafka/processing/strategies/dlq.rb +36 -0
  100. data/lib/karafka/processing/worker.rb +4 -1
  101. data/lib/karafka/routing/builder.rb +12 -2
  102. data/lib/karafka/routing/consumer_group.rb +5 -5
  103. data/lib/karafka/routing/features/base.rb +44 -8
  104. data/lib/karafka/routing/features/dead_letter_queue/config.rb +6 -1
  105. data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
  106. data/lib/karafka/routing/features/dead_letter_queue/topic.rb +9 -2
  107. data/lib/karafka/routing/proxy.rb +4 -3
  108. data/lib/karafka/routing/subscription_group.rb +2 -2
  109. data/lib/karafka/routing/subscription_groups_builder.rb +11 -2
  110. data/lib/karafka/routing/topic.rb +8 -10
  111. data/lib/karafka/routing/topics.rb +1 -1
  112. data/lib/karafka/runner.rb +13 -3
  113. data/lib/karafka/server.rb +5 -9
  114. data/lib/karafka/setup/config.rb +21 -1
  115. data/lib/karafka/status.rb +23 -14
  116. data/lib/karafka/templates/karafka.rb.erb +7 -0
  117. data/lib/karafka/time_trackers/partition_usage.rb +56 -0
  118. data/lib/karafka/version.rb +1 -1
  119. data.tar.gz.sig +0 -0
  120. metadata +47 -13
  121. metadata.gz.sig +0 -0
  122. data/lib/karafka/connection/consumer_group_coordinator.rb +0 -48
  123. data/lib/karafka/pro/performance_tracker.rb +0 -84
  124. data/lib/karafka/pro/processing/scheduler.rb +0 -74
  125. data/lib/karafka/processing/scheduler.rb +0 -38
@@ -0,0 +1,300 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Connection
17
+ # Manager that can handle working with multiplexed connections.
18
+ #
19
+ # This manager takes into consideration the number of partitions assigned to the topics and
20
+ # does its best to balance. Additional connections may not always be utilized because
21
+ # alongside of them, other processes may "hijack" the assignment. In such cases those extra
22
+ # empty connections will be turned off after a while.
23
+ #
24
+ # @note Manager operations relate to consumer groups and not subscription groups. Since
25
+ # cluster operations can cause consumer group wide effects, we always apply only one
26
+ # change on a consumer group.
27
+ #
28
+ # @note Since we collect statistical data from listeners and this happens in a background
29
+ # thread, we need to make sure we lock not to have race conditions with expired data
30
+ # eviction.
31
+ class Manager < Karafka::Connection::Manager
32
+ include Core::Helpers::Time
33
+
34
+ # How long should we keep stale stats before evicting them completely
35
+ EVICTION_DELAY = 5 * 60 * 1_000
36
+
37
+ private_constant :EVICTION_DELAY
38
+
39
+ # How long should we wait after a rebalance before doing anything on a consumer group
40
+ #
41
+ # @param scale_delay [Integer] How long should we wait before making any changes. Any
42
+ # change related to this consumer group will postpone the scaling operations. This is
43
+ # done that way to prevent too many friction in the cluster. It is 1 minute by default
44
+ def initialize(scale_delay = 60 * 1_000)
45
+ super()
46
+ @scale_delay = scale_delay
47
+ @mutex = Mutex.new
48
+ @changes = Hash.new do |h, k|
49
+ h[k] = {
50
+ state: '',
51
+ join_state: '',
52
+ state_age: 0,
53
+ state_age_sync: monotonic_now,
54
+ changed_at: monotonic_now
55
+ }
56
+ end
57
+ end
58
+
59
+ # Registers listeners and starts the scaling procedures
60
+ #
61
+ # When using dynamic multiplexing, it will start the absolute minimum of connections for
62
+ # subscription group available.
63
+ #
64
+ # @param listeners [Connection::ListenersBatch]
65
+ def register(listeners)
66
+ @listeners = listeners
67
+
68
+ in_sg_families do |first_subscription_group, sg_listeners|
69
+ multiplexing = first_subscription_group.multiplexing
70
+
71
+ if multiplexing.active? && multiplexing.dynamic?
72
+ # Start as many boot listeners as user wants. If not configured, starts half of max.
73
+ sg_listeners.first(multiplexing.boot).each(&:start!)
74
+ else
75
+ sg_listeners.each(&:start!)
76
+ end
77
+ end
78
+ end
79
+
80
+ # Collects data from the statistics about given subscription group. This is used to ensure
81
+ # that we do not rescale short after rebalances, deployments, etc.
82
+ # @param subscription_group_id [String] id of the subscription group for which statistics
83
+ # were emitted
84
+ # @param statistics [Hash] emitted statistics
85
+ #
86
+ # @note Please note that while we collect here per subscription group, we use those metrics
87
+ # collectively on a whole consumer group. This reduces the friction.
88
+ def notice(subscription_group_id, statistics)
89
+ @mutex.synchronize do
90
+ times = []
91
+ # stateage is in microseconds
92
+ # We monitor broker changes to make sure we do not introduce extra friction
93
+ times << statistics['brokers'].values.map { |stats| stats['stateage'] }.min / 1_000
94
+ times << statistics['cgrp']['rebalance_age']
95
+ times << statistics['cgrp']['stateage']
96
+
97
+ # Keep the previous change age for changes that were triggered by us
98
+ previous_changed_at = @changes[subscription_group_id][:changed_at]
99
+
100
+ @changes[subscription_group_id] = {
101
+ state_age: times.min,
102
+ changed_at: previous_changed_at,
103
+ join_state: statistics['cgrp']['join_state'],
104
+ state: statistics['cgrp']['state'],
105
+ state_age_sync: monotonic_now
106
+ }
107
+ end
108
+ end
109
+
110
+ # Shuts down all the listeners when it is time (including moving to quiet) or rescales
111
+ # when it is needed
112
+ def control
113
+ Karafka::App.done? ? shutdown : rescale
114
+ end
115
+
116
+ private
117
+
118
+ # Handles the shutdown and quiet flows
119
+ def shutdown
120
+ active_listeners = @listeners.active
121
+
122
+ # When we are done processing immediately quiet all the listeners so they do not pick up
123
+ # new work to do
124
+ once(:quiet!) { active_listeners.each(&:quiet!) }
125
+
126
+ # If we are in the process of moving to quiet state, we need to check it.
127
+ if Karafka::App.quieting? && active_listeners.all?(&:quiet?)
128
+ once(:quieted!) { Karafka::App.quieted! }
129
+ end
130
+
131
+ return if Karafka::App.quiet?
132
+
133
+ # Since separate subscription groups are subscribed to different topics, there is no risk
134
+ # in shutting them down independently even if they operate in the same subscription group
135
+ in_sg_families do |first_subscription_group, sg_listeners|
136
+ active_sg_listeners = sg_listeners.select(&:active?)
137
+
138
+ # Do nothing until all listeners from the same consumer group are quiet. Otherwise we
139
+ # could have problems with in-flight rebalances during shutdown
140
+ next unless active_sg_listeners.all?(&:quiet?)
141
+
142
+ # Do not stop the same family twice
143
+ once(:stop!, first_subscription_group.name) { active_sg_listeners.each(&:stop!) }
144
+ end
145
+
146
+ return unless @listeners.active.all?(&:stopped?)
147
+
148
+ # All listeners including pending need to be moved at the end to stopped state for
149
+ # the whole server to stop
150
+ once(:stop!) { @listeners.each(&:stopped!) }
151
+ end
152
+
153
+ # Handles two scenarios:
154
+ # - Selects subscriptions that could benefit from having more parallel connections
155
+ # to kafka and then upscales them
156
+ # - Selects subscriptions that are idle (have nothing subscribed to them) and then shuts
157
+ # them down
158
+ #
159
+ # We always run scaling down and up because it may be applicable to different CGs
160
+ def rescale
161
+ evict
162
+
163
+ scale_down
164
+ scale_up
165
+ end
166
+
167
+ # Checks for connections without any assignments and scales them down.
168
+ # Does that only for dynamically multiplexed subscription groups
169
+ def scale_down
170
+ sgs_in_use = Karafka::App.assignments.keys.map(&:subscription_group).uniq
171
+
172
+ # Select connections for scaling down
173
+ in_sg_families do |first_subscription_group, sg_listeners|
174
+ next unless stable?(sg_listeners)
175
+
176
+ multiplexing = first_subscription_group.multiplexing
177
+
178
+ next unless multiplexing.active?
179
+ next unless multiplexing.dynamic?
180
+
181
+ # If we cannot downscale, do not
182
+ next if sg_listeners.count(&:active?) <= multiplexing.min
183
+
184
+ sg_listeners.each do |sg_listener|
185
+ # Do not stop connections with subscriptions
186
+ next if sgs_in_use.include?(sg_listener.subscription_group)
187
+ # Skip listeners that are already in standby
188
+ next unless sg_listener.active?
189
+
190
+ touch(sg_listener.subscription_group.id)
191
+
192
+ # Shut down not used connection
193
+ sg_listener.stop!
194
+
195
+ break
196
+ end
197
+ end
198
+ end
199
+
200
+ # Checks if we have space to scale and if there are any assignments with multiple topics
201
+ # partitions assigned in sgs that can be scaled. If that is the case, we scale up.
202
+ def scale_up
203
+ multi_part_sgs_families = Karafka::App
204
+ .assignments
205
+ .select { |_, partitions| partitions.size > 1 }
206
+ .keys
207
+ .map(&:subscription_group)
208
+ .map(&:name)
209
+ .uniq
210
+
211
+ # Select connections for scaling up
212
+ in_sg_families do |first_subscription_group, sg_listeners|
213
+ next unless stable?(sg_listeners)
214
+
215
+ multiplexing = first_subscription_group.multiplexing
216
+
217
+ next unless multiplexing.active?
218
+ next unless multiplexing.dynamic?
219
+ # If we cannot downscale, do not
220
+ next if sg_listeners.count(&:active?) >= multiplexing.max
221
+
222
+ sg_listeners.each do |sg_listener|
223
+ next unless multi_part_sgs_families.include?(sg_listener.subscription_group.name)
224
+ # Skip already active connections
225
+ next unless sg_listener.pending? || sg_listener.stopped?
226
+
227
+ touch(sg_listener.subscription_group.id)
228
+ sg_listener.start!
229
+
230
+ break
231
+ end
232
+ end
233
+ end
234
+
235
+ # Removes states that are no longer being reported for stopped/pending listeners
236
+ def evict
237
+ @mutex.synchronize do
238
+ @changes.delete_if do |_, details|
239
+ monotonic_now - details[:state_age_sync] >= EVICTION_DELAY
240
+ end
241
+ end
242
+ end
243
+
244
+ # Indicates, that something has changed on a subscription group. We consider every single
245
+ # change we make as a change to the setup as well.
246
+ # @param subscription_group_id [String]
247
+ def touch(subscription_group_id)
248
+ @mutex.synchronize do
249
+ @changes[subscription_group_id][:changed_at] = 0
250
+ @changes[subscription_group_id][:state_age_sync] = monotonic_now
251
+ end
252
+ end
253
+
254
+ # @param sg_listeners [Array<Listener>] listeners from one multiplexed sg
255
+ # @return [Boolean] is given subscription group listeners set stable. It is considered
256
+ # stable when it had no changes happening on it recently and all relevant states in it
257
+ # are also stable. This is a strong indicator that no rebalances or other operations are
258
+ # happening at a given moment.
259
+ def stable?(sg_listeners)
260
+ # If none of listeners has changes reported it means we did not yet start collecting
261
+ # metrics about any of them and at least one must be present. We do not consider it
262
+ # stable in such case as we still are waiting for metrics.
263
+ return false if sg_listeners.none? do |sg_listener|
264
+ @changes.key?(sg_listener.subscription_group.id)
265
+ end
266
+
267
+ sg_listeners.all? do |sg_listener|
268
+ # Not all SGs may be started initially or may be stopped, we ignore them here as they
269
+ # are irrelevant from the point of view of establishing stability
270
+ next true unless @changes.key?(sg_listener.subscription_group.id)
271
+
272
+ state = @changes[sg_listener.subscription_group.id]
273
+
274
+ state[:state_age] >= @scale_delay &&
275
+ (monotonic_now - state[:changed_at]) >= @scale_delay &&
276
+ state[:state] == 'up' &&
277
+ state[:join_state] == 'steady'
278
+ end
279
+ end
280
+
281
+ # Yields listeners in groups based on their subscription groups
282
+ # @yieldparam [Karafka::Routing::SubscriptionGroup] first subscription group out of the
283
+ # family
284
+ # @yieldparam [Array<Listener>] listeners of a single subscription group
285
+ def in_sg_families
286
+ grouped = @listeners.group_by { |listener| listener.subscription_group.name }
287
+
288
+ grouped.each_value do |listeners|
289
+ listener = listeners.first
290
+
291
+ yield(
292
+ listener.subscription_group,
293
+ listeners
294
+ )
295
+ end
296
+ end
297
+ end
298
+ end
299
+ end
300
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ # Namespace for Pro connections related components
17
+ module Connection
18
+ # Namespace for Multiplexing management related components
19
+ module Multiplexing
20
+ # Listener used to connect listeners manager to the lifecycle events that are significant
21
+ # to its operations
22
+ class Listener
23
+ def initialize
24
+ @manager = App.config.internal.connection.manager
25
+ end
26
+
27
+ # Triggers connection manage subscription groups details noticing
28
+ #
29
+ # @param event [Karafka::Core::Monitoring::Event] event with statistics
30
+ def on_statistics_emitted(event)
31
+ @manager.notice(
32
+ event[:subscription_group_id],
33
+ event[:statistics]
34
+ )
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ # Namespace for Pro components instrumentation related code
17
+ module Instrumentation
18
+ # Tracker used to keep track of performance metrics
19
+ # It provides insights that can be used to optimize processing flow
20
+ # @note Even if we have some race-conditions here it is relevant due to the quantity of data.
21
+ # This is why we do not mutex it.
22
+ class PerformanceTracker
23
+ include Singleton
24
+
25
+ # How many samples do we collect per topic partition
26
+ SAMPLES_COUNT = 200
27
+
28
+ private_constant :SAMPLES_COUNT
29
+
30
+ # Builds up nested concurrent hash for data tracking
31
+ def initialize
32
+ @processing_times = Hash.new do |topics_hash, topic|
33
+ topics_hash[topic] = Hash.new do |partitions_hash, partition|
34
+ partitions_hash[partition] = []
35
+ end
36
+ end
37
+ end
38
+
39
+ # @param topic [String]
40
+ # @param partition [Integer]
41
+ # @return [Float] p95 processing time of a single message from a single topic partition
42
+ def processing_time_p95(topic, partition)
43
+ values = @processing_times[topic][partition]
44
+
45
+ return 0 if values.empty?
46
+ return values.first if values.size == 1
47
+
48
+ percentile(0.95, values)
49
+ end
50
+
51
+ # @private
52
+ # @param event [Karafka::Core::Monitoring::Event] event details
53
+ # Tracks time taken to process a single message of a given topic partition
54
+ def on_consumer_consumed(event)
55
+ consumer = event[:caller]
56
+ messages = consumer.messages
57
+ topic = messages.metadata.topic
58
+ partition = messages.metadata.partition
59
+
60
+ samples = @processing_times[topic][partition]
61
+ samples << event[:time] / messages.count
62
+
63
+ return unless samples.size > SAMPLES_COUNT
64
+
65
+ samples.shift
66
+ end
67
+
68
+ private
69
+
70
+ # Computers the requested percentile out of provided values
71
+ # @param percentile [Float]
72
+ # @param values [Array<String>] all the values based on which we should
73
+ # @return [Float] computed percentile
74
+ def percentile(percentile, values)
75
+ values_sorted = values.sort
76
+
77
+ floor = (percentile * (values_sorted.length - 1) + 1).floor - 1
78
+ mod = (percentile * (values_sorted.length - 1) + 1).modulo(1)
79
+
80
+ values_sorted[floor] + (mod * (values_sorted[floor + 1] - values_sorted[floor]))
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
@@ -25,7 +25,7 @@ module Karafka
25
25
  # @param consumer [::Rdkafka::Consumer] consumer instance needed to talk with Kafka
26
26
  # @param expanded_topics [Hash] hash with expanded and normalized topics data
27
27
  def initialize(consumer, expanded_topics)
28
- @consumer = Connection::Proxy.new(consumer)
28
+ @consumer = ::Karafka::Connection::Proxy.new(consumer)
29
29
  @expanded_topics = expanded_topics
30
30
  @mapped_topics = Hash.new { |h, k| h[k] = {} }
31
31
  end
@@ -22,11 +22,6 @@ module Karafka
22
22
  #
23
23
  # It does **not** create a consumer group and does not have any offset management.
24
24
  class Iterator
25
- # Local partition reference for librdkafka
26
- Partition = Struct.new(:partition, :offset)
27
-
28
- private_constant :Partition
29
-
30
25
  # A simple API allowing to iterate over topic/partition data, without having to subscribe
31
26
  # and deal with rebalances. This API allows for multi-partition streaming and is optimized
32
27
  # for data lookups. It allows for explicit stopping iteration over any partition during
@@ -127,7 +122,7 @@ module Karafka
127
122
 
128
123
  @current_consumer.pause(
129
124
  Rdkafka::Consumer::TopicPartitionList.new(
130
- name => [Partition.new(partition, 0)]
125
+ name => [Rdkafka::Consumer::Partition.new(partition, 0)]
131
126
  )
132
127
  )
133
128
  end
@@ -53,6 +53,7 @@ module Karafka
53
53
  features.each { |feature| feature.pre_setup(config) }
54
54
 
55
55
  reconfigure(config)
56
+ expand
56
57
 
57
58
  load_topic_features
58
59
  end
@@ -82,18 +83,31 @@ module Karafka
82
83
 
83
84
  icfg.cli.contract = Contracts::ServerCliOptions.new
84
85
 
86
+ # Use manager that supports multiplexing
87
+ icfg.connection.manager = Connection::Manager.new
88
+
85
89
  icfg.processing.coordinator_class = Processing::Coordinator
86
90
  icfg.processing.partitioner_class = Processing::Partitioner
87
- icfg.processing.scheduler_class = Processing::Scheduler
91
+ icfg.processing.scheduler_class = Processing::Schedulers::Default
88
92
  icfg.processing.jobs_queue_class = Processing::JobsQueue
93
+ icfg.processing.executor_class = Processing::Executor
89
94
  icfg.processing.jobs_builder = Processing::JobsBuilder.new
90
95
  icfg.processing.strategy_selector = Processing::StrategySelector.new
96
+ icfg.processing.expansions_selector = Processing::ExpansionsSelector.new
91
97
 
92
98
  icfg.active_job.consumer_class = ActiveJob::Consumer
93
99
  icfg.active_job.dispatcher = ActiveJob::Dispatcher.new
94
100
  icfg.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
95
101
 
96
- config.monitor.subscribe(PerformanceTracker.instance)
102
+ config.monitor.subscribe(Instrumentation::PerformanceTracker.instance)
103
+ end
104
+
105
+ # Adds extra modules to certain classes
106
+ # This expands their functionalities with things that are needed when operating in Pro
107
+ # It is used only when given class is part of the end user API and cannot be swapped by
108
+ # a pluggable component
109
+ def expand
110
+ Karafka::BaseConsumer.include Pro::BaseConsumer
97
111
  end
98
112
 
99
113
  # Loads the Pro features of Karafka
@@ -42,7 +42,8 @@ module Karafka
42
42
 
43
43
  @virtual_offset_manager = VirtualOffsetManager.new(
44
44
  topic.name,
45
- partition
45
+ partition,
46
+ topic.virtual_partitions.offset_metadata_strategy
46
47
  )
47
48
 
48
49
  # We register our own "internal" filter to support filtering of messages that were marked
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Pro executor that supports periodic jobs
18
+ class Executor < Karafka::Processing::Executor
19
+ # Runs the code that should happen before periodic job is scheduled
20
+ #
21
+ # @note While jobs are called `Periodic`, from the consumer perspective it is "ticking".
22
+ # This name was taken for a reason: we may want to introduce periodic ticking also not
23
+ # only during polling but for example on wait and a name "poll" would not align well.
24
+ # A name "periodic" is not a verb and our other consumer actions are verbs like:
25
+ # consume or revoked. So for the sake of consistency we have ticking here.
26
+ def before_schedule_periodic
27
+ consumer.on_before_schedule_tick
28
+ end
29
+
30
+ # Triggers consumer ticking
31
+ def periodic
32
+ consumer.on_tick
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Pro selector of appropriate topic setup based features enhancements.
18
+ class ExpansionsSelector < Karafka::Processing::ExpansionsSelector
19
+ # @param topic [Karafka::Routing::Topic] topic with settings based on which we find
20
+ # expansions
21
+ # @return [Array<Module>] modules with proper expansions we're suppose to use to enhance
22
+ # the consumer
23
+ def find(topic)
24
+ # Start with the non-pro expansions
25
+ expansions = super
26
+ expansions << Pro::Processing::OffsetMetadata::Consumer if topic.offset_metadata?
27
+ expansions
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Jobs
18
+ # Job that represents a "ticking" work. Work that we run periodically for the Periodics
19
+ # enabled topics.
20
+ class Periodic < ::Karafka::Processing::Jobs::Base
21
+ # @param executor [Karafka::Pro::Processing::Executor] pro executor that is suppose to
22
+ # run a given job
23
+ def initialize(executor)
24
+ @executor = executor
25
+ super()
26
+ end
27
+
28
+ # Code executed before we schedule this job
29
+ def before_schedule
30
+ executor.before_schedule_periodic
31
+ end
32
+
33
+ # Runs the executor periodic action
34
+ def call
35
+ executor.periodic
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Jobs
18
+ # Non-Blocking version of the Periodic job
19
+ # We use this version for LRJ topics for cases where saturated resources would not allow
20
+ # to run this job for extended period of time. Under such scenarios, if we would not use
21
+ # a non-blocking one, we would reach max.poll.interval.ms.
22
+ class PeriodicNonBlocking < Periodic
23
+ # @param args [Array] any arguments accepted by `::Karafka::Processing::Jobs::Periodic`
24
+ def initialize(*args)
25
+ super
26
+ @non_blocking = true
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end