karafka 2.2.14 → 2.3.0.alpha2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +38 -12
- data/.ruby-version +1 -1
- data/CHANGELOG.md +24 -0
- data/Gemfile.lock +16 -16
- data/README.md +0 -2
- data/SECURITY.md +23 -0
- data/bin/integrations +1 -1
- data/config/locales/errors.yml +7 -1
- data/config/locales/pro_errors.yml +22 -0
- data/docker-compose.yml +1 -1
- data/karafka.gemspec +2 -2
- data/lib/karafka/admin/acl.rb +287 -0
- data/lib/karafka/admin.rb +9 -13
- data/lib/karafka/app.rb +5 -3
- data/lib/karafka/base_consumer.rb +9 -1
- data/lib/karafka/cli/base.rb +1 -1
- data/lib/karafka/connection/client.rb +83 -76
- data/lib/karafka/connection/conductor.rb +28 -0
- data/lib/karafka/connection/listener.rb +159 -42
- data/lib/karafka/connection/listeners_batch.rb +5 -11
- data/lib/karafka/connection/manager.rb +72 -0
- data/lib/karafka/connection/messages_buffer.rb +12 -0
- data/lib/karafka/connection/proxy.rb +17 -0
- data/lib/karafka/connection/status.rb +75 -0
- data/lib/karafka/contracts/config.rb +14 -10
- data/lib/karafka/contracts/consumer_group.rb +9 -1
- data/lib/karafka/contracts/topic.rb +3 -1
- data/lib/karafka/errors.rb +17 -0
- data/lib/karafka/instrumentation/logger_listener.rb +3 -0
- data/lib/karafka/instrumentation/notifications.rb +13 -5
- data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +31 -28
- data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +20 -1
- data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +15 -12
- data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +39 -36
- data/lib/karafka/pro/base_consumer.rb +47 -0
- data/lib/karafka/pro/connection/manager.rb +269 -0
- data/lib/karafka/pro/connection/multiplexing/listener.rb +40 -0
- data/lib/karafka/pro/iterator/tpl_builder.rb +1 -1
- data/lib/karafka/pro/iterator.rb +1 -6
- data/lib/karafka/pro/loader.rb +14 -0
- data/lib/karafka/pro/processing/coordinator.rb +2 -1
- data/lib/karafka/pro/processing/executor.rb +37 -0
- data/lib/karafka/pro/processing/expansions_selector.rb +32 -0
- data/lib/karafka/pro/processing/jobs/periodic.rb +41 -0
- data/lib/karafka/pro/processing/jobs/periodic_non_blocking.rb +32 -0
- data/lib/karafka/pro/processing/jobs_builder.rb +14 -3
- data/lib/karafka/pro/processing/offset_metadata/consumer.rb +44 -0
- data/lib/karafka/pro/processing/offset_metadata/fetcher.rb +131 -0
- data/lib/karafka/pro/processing/offset_metadata/listener.rb +46 -0
- data/lib/karafka/pro/processing/schedulers/base.rb +39 -23
- data/lib/karafka/pro/processing/schedulers/default.rb +12 -14
- data/lib/karafka/pro/processing/strategies/default.rb +154 -1
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +39 -0
- data/lib/karafka/pro/processing/strategies/vp/default.rb +65 -25
- data/lib/karafka/pro/processing/virtual_offset_manager.rb +41 -11
- data/lib/karafka/pro/routing/features/long_running_job/topic.rb +2 -0
- data/lib/karafka/pro/routing/features/multiplexing/config.rb +38 -0
- data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +114 -0
- data/lib/karafka/pro/routing/features/multiplexing/patches/contracts/consumer_group.rb +42 -0
- data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +38 -0
- data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +42 -0
- data/lib/karafka/pro/routing/features/multiplexing/subscription_groups_builder.rb +40 -0
- data/lib/karafka/pro/routing/features/multiplexing.rb +59 -0
- data/lib/karafka/pro/routing/features/non_blocking_job/topic.rb +32 -0
- data/lib/karafka/pro/routing/features/non_blocking_job.rb +37 -0
- data/lib/karafka/pro/routing/features/offset_metadata/config.rb +33 -0
- data/lib/karafka/pro/routing/features/offset_metadata/contracts/topic.rb +42 -0
- data/lib/karafka/pro/routing/features/offset_metadata/topic.rb +65 -0
- data/lib/karafka/pro/routing/features/offset_metadata.rb +40 -0
- data/lib/karafka/pro/routing/features/patterns/contracts/consumer_group.rb +4 -0
- data/lib/karafka/pro/routing/features/patterns/detector.rb +18 -10
- data/lib/karafka/pro/routing/features/periodic_job/config.rb +37 -0
- data/lib/karafka/pro/routing/features/periodic_job/contracts/topic.rb +44 -0
- data/lib/karafka/pro/routing/features/periodic_job/topic.rb +94 -0
- data/lib/karafka/pro/routing/features/periodic_job.rb +27 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +1 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +7 -2
- data/lib/karafka/process.rb +5 -3
- data/lib/karafka/processing/coordinator.rb +5 -1
- data/lib/karafka/processing/executor.rb +16 -10
- data/lib/karafka/processing/executors_buffer.rb +19 -4
- data/lib/karafka/processing/schedulers/default.rb +3 -2
- data/lib/karafka/processing/strategies/default.rb +6 -0
- data/lib/karafka/processing/strategies/dlq.rb +36 -0
- data/lib/karafka/routing/builder.rb +12 -2
- data/lib/karafka/routing/consumer_group.rb +5 -5
- data/lib/karafka/routing/features/base.rb +44 -8
- data/lib/karafka/routing/features/dead_letter_queue/config.rb +6 -1
- data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
- data/lib/karafka/routing/features/dead_letter_queue/topic.rb +9 -2
- data/lib/karafka/routing/subscription_group.rb +2 -2
- data/lib/karafka/routing/subscription_groups_builder.rb +11 -2
- data/lib/karafka/routing/topic.rb +8 -10
- data/lib/karafka/runner.rb +13 -3
- data/lib/karafka/server.rb +5 -9
- data/lib/karafka/setup/config.rb +17 -0
- data/lib/karafka/status.rb +23 -14
- data/lib/karafka/templates/karafka.rb.erb +7 -0
- data/lib/karafka/time_trackers/partition_usage.rb +56 -0
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +42 -10
- metadata.gz.sig +0 -0
- data/lib/karafka/connection/consumer_group_coordinator.rb +0 -48
|
@@ -247,6 +247,9 @@ module Karafka
|
|
|
247
247
|
when 'consumer.shutdown.error'
|
|
248
248
|
error "Consumer on shutdown failed due to an error: #{error}"
|
|
249
249
|
error details
|
|
250
|
+
when 'consumer.tick.error'
|
|
251
|
+
error "Consumer tick failed due to an error: #{error}"
|
|
252
|
+
error details
|
|
250
253
|
when 'worker.process.error'
|
|
251
254
|
fatal "Worker processing failed due to an error: #{error}"
|
|
252
255
|
fatal details
|
|
@@ -36,22 +36,25 @@ module Karafka
|
|
|
36
36
|
connection.listener.before_fetch_loop
|
|
37
37
|
connection.listener.fetch_loop
|
|
38
38
|
connection.listener.fetch_loop.received
|
|
39
|
-
|
|
40
|
-
rebalance.partitions_assign
|
|
41
|
-
rebalance.partitions_assigned
|
|
42
|
-
rebalance.partitions_revoke
|
|
43
|
-
rebalance.partitions_revoked
|
|
39
|
+
connection.listener.after_fetch_loop
|
|
44
40
|
|
|
45
41
|
consumer.before_schedule_consume
|
|
46
42
|
consumer.consume
|
|
47
43
|
consumer.consumed
|
|
48
44
|
consumer.consuming.pause
|
|
49
45
|
consumer.consuming.retry
|
|
46
|
+
|
|
50
47
|
consumer.before_schedule_idle
|
|
51
48
|
consumer.idle
|
|
49
|
+
|
|
52
50
|
consumer.before_schedule_revoked
|
|
53
51
|
consumer.revoke
|
|
54
52
|
consumer.revoked
|
|
53
|
+
|
|
54
|
+
consumer.before_schedule_tick
|
|
55
|
+
consumer.tick
|
|
56
|
+
consumer.ticked
|
|
57
|
+
|
|
55
58
|
consumer.before_schedule_shutdown
|
|
56
59
|
consumer.shutting_down
|
|
57
60
|
consumer.shutdown
|
|
@@ -63,6 +66,11 @@ module Karafka
|
|
|
63
66
|
|
|
64
67
|
process.notice_signal
|
|
65
68
|
|
|
69
|
+
rebalance.partitions_assign
|
|
70
|
+
rebalance.partitions_assigned
|
|
71
|
+
rebalance.partitions_revoke
|
|
72
|
+
rebalance.partitions_revoked
|
|
73
|
+
|
|
66
74
|
statistics.emitted
|
|
67
75
|
|
|
68
76
|
worker.process
|
|
@@ -42,6 +42,16 @@ module Karafka
|
|
|
42
42
|
|
|
43
43
|
configure
|
|
44
44
|
|
|
45
|
+
# Types of errors originating from user code in the consumer flow
|
|
46
|
+
USER_CONSUMER_ERROR_TYPES = %w[
|
|
47
|
+
consumer.consume.error
|
|
48
|
+
consumer.revoked.error
|
|
49
|
+
consumer.shutdown.error
|
|
50
|
+
consumer.tick.error
|
|
51
|
+
].freeze
|
|
52
|
+
|
|
53
|
+
private_constant :USER_CONSUMER_ERROR_TYPES
|
|
54
|
+
|
|
45
55
|
# Before each consumption process, lets start a transaction associated with it
|
|
46
56
|
# We also set some basic metadata about the given consumption that can be useful for
|
|
47
57
|
# debugging
|
|
@@ -94,34 +104,27 @@ module Karafka
|
|
|
94
104
|
client.register_probe(:karafka, -> { minute_probe })
|
|
95
105
|
end
|
|
96
106
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
end
|
|
111
|
-
|
|
112
|
-
# Keeps track of revocation user code execution
|
|
113
|
-
#
|
|
114
|
-
# @param event [Karafka::Core::Monitoring::Event]
|
|
115
|
-
def on_consumer_shutting_down(event)
|
|
116
|
-
consumer = event.payload[:caller]
|
|
117
|
-
start_transaction(consumer, 'shutdown')
|
|
118
|
-
end
|
|
107
|
+
[
|
|
108
|
+
%i[revoke revoked revoked],
|
|
109
|
+
%i[shutting_down shutdown shutdown],
|
|
110
|
+
%i[tick ticked tick]
|
|
111
|
+
].each do |before, after, name|
|
|
112
|
+
class_eval <<~RUBY, __FILE__, __LINE__ + 1
|
|
113
|
+
# Keeps track of user code execution
|
|
114
|
+
#
|
|
115
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
|
116
|
+
def on_consumer_#{before}(event)
|
|
117
|
+
consumer = event.payload[:caller]
|
|
118
|
+
start_transaction(consumer, '#{name}')
|
|
119
|
+
end
|
|
119
120
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
121
|
+
# Finishes the transaction
|
|
122
|
+
#
|
|
123
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
|
124
|
+
def on_consumer_#{after}(_event)
|
|
125
|
+
stop_transaction
|
|
126
|
+
end
|
|
127
|
+
RUBY
|
|
125
128
|
end
|
|
126
129
|
|
|
127
130
|
# Counts DLQ dispatches
|
|
@@ -141,7 +144,7 @@ module Karafka
|
|
|
141
144
|
# @param event [Karafka::Core::Monitoring::Event] error event details
|
|
142
145
|
def on_error_occurred(event)
|
|
143
146
|
# If this is a user consumption related error, we bump the counters for metrics
|
|
144
|
-
if event[:type]
|
|
147
|
+
if USER_CONSUMER_ERROR_TYPES.include?(event[:type])
|
|
145
148
|
consumer = event.payload[:caller]
|
|
146
149
|
|
|
147
150
|
with_multiple_resolutions(consumer) do |tags|
|
|
@@ -55,7 +55,24 @@ module Karafka
|
|
|
55
55
|
consumer = job.executor.topic.consumer
|
|
56
56
|
topic = job.executor.topic.name
|
|
57
57
|
|
|
58
|
-
|
|
58
|
+
action = case job_type
|
|
59
|
+
when 'Periodic'
|
|
60
|
+
'tick'
|
|
61
|
+
when 'PeriodicNonBlocking'
|
|
62
|
+
'tick'
|
|
63
|
+
when 'Shutdown'
|
|
64
|
+
'shutdown'
|
|
65
|
+
when 'Revoked'
|
|
66
|
+
'revoked'
|
|
67
|
+
when 'RevokedNonBlocking'
|
|
68
|
+
'revoked'
|
|
69
|
+
when 'Idle'
|
|
70
|
+
'idle'
|
|
71
|
+
else
|
|
72
|
+
'consume'
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
current_span.resource = "#{consumer}##{action}"
|
|
59
76
|
info "[#{job.id}] #{job_type} job for #{consumer} on #{topic} started"
|
|
60
77
|
|
|
61
78
|
pop_tags
|
|
@@ -102,6 +119,8 @@ module Karafka
|
|
|
102
119
|
error "Consumer after consume failed due to an error: #{error}"
|
|
103
120
|
when 'consumer.shutdown.error'
|
|
104
121
|
error "Consumer on shutdown failed due to an error: #{error}"
|
|
122
|
+
when 'consumer.tick.error'
|
|
123
|
+
error "Consumer tick failed due to an error: #{error}"
|
|
105
124
|
when 'worker.process.error'
|
|
106
125
|
fatal "Worker processing failed due to an error: #{error}"
|
|
107
126
|
when 'connection.listener.fetch_loop.error'
|
|
@@ -128,18 +128,21 @@ module Karafka
|
|
|
128
128
|
histogram('consumer.consumption_lag', metadata.consumption_lag, tags: tags)
|
|
129
129
|
end
|
|
130
130
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
131
|
+
{
|
|
132
|
+
revoked: :revoked,
|
|
133
|
+
shutdown: :shutdown,
|
|
134
|
+
ticked: :tick
|
|
135
|
+
}.each do |after, name|
|
|
136
|
+
class_eval <<~RUBY, __FILE__, __LINE__ + 1
|
|
137
|
+
# Keeps track of user code execution
|
|
138
|
+
#
|
|
139
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
|
140
|
+
def on_consumer_#{after}(event)
|
|
141
|
+
tags = default_tags + consumer_tags(event.payload[:caller])
|
|
142
|
+
|
|
143
|
+
count('consumer.#{name}', 1, tags: tags)
|
|
144
|
+
end
|
|
145
|
+
RUBY
|
|
143
146
|
end
|
|
144
147
|
|
|
145
148
|
# Worker related metrics
|
|
@@ -15,6 +15,14 @@ module Karafka
|
|
|
15
15
|
# data would be processed, but process itself would still be active. This listener allows
|
|
16
16
|
# for defining of a ttl that gets bumped on each poll loop and before and after processing
|
|
17
17
|
# of a given messages batch.
|
|
18
|
+
#
|
|
19
|
+
# @note This listener will bind itself only when Karafka will actually attempt to start
|
|
20
|
+
# and moves from initializing to running. Before that, the TCP server will NOT be active.
|
|
21
|
+
# This is done on purpose to mitigate a case where users would subscribe this listener
|
|
22
|
+
# in `karafka.rb` without checking the recommendations of conditional assignment.
|
|
23
|
+
#
|
|
24
|
+
# @note In case of usage within an embedding with Puma, you need to select different port
|
|
25
|
+
# then the one used by Puma itself.
|
|
18
26
|
class LivenessListener
|
|
19
27
|
include ::Karafka::Core::Helpers::Time
|
|
20
28
|
|
|
@@ -40,12 +48,18 @@ module Karafka
|
|
|
40
48
|
consuming_ttl: 5 * 60 * 1_000,
|
|
41
49
|
polling_ttl: 5 * 60 * 1_000
|
|
42
50
|
)
|
|
43
|
-
@
|
|
51
|
+
@hostname = hostname
|
|
52
|
+
@port = port
|
|
44
53
|
@polling_ttl = polling_ttl
|
|
45
54
|
@consuming_ttl = consuming_ttl
|
|
46
55
|
@mutex = Mutex.new
|
|
47
56
|
@pollings = {}
|
|
48
57
|
@consumptions = {}
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
|
61
|
+
def on_app_running(_event)
|
|
62
|
+
@server = TCPServer.new(*[@hostname, @port].compact)
|
|
49
63
|
|
|
50
64
|
Thread.new do
|
|
51
65
|
loop do
|
|
@@ -54,42 +68,37 @@ module Karafka
|
|
|
54
68
|
end
|
|
55
69
|
end
|
|
56
70
|
|
|
57
|
-
#
|
|
58
|
-
# @param _event [Karafka::Core::Monitoring::Event]
|
|
59
|
-
def on_connection_listener_fetch_loop(_event)
|
|
60
|
-
mark_polling_tick
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
# Tick on starting work
|
|
64
|
-
# @param _event [Karafka::Core::Monitoring::Event]
|
|
65
|
-
def on_consumer_consume(_event)
|
|
66
|
-
mark_consumption_tick
|
|
67
|
-
end
|
|
68
|
-
|
|
69
|
-
# Tick on finished work
|
|
70
|
-
# @param _event [Karafka::Core::Monitoring::Event]
|
|
71
|
-
def on_consumer_consumed(_event)
|
|
72
|
-
clear_consumption_tick
|
|
73
|
-
end
|
|
74
|
-
|
|
71
|
+
# Stop the http server when we stop the process
|
|
75
72
|
# @param _event [Karafka::Core::Monitoring::Event]
|
|
76
|
-
def
|
|
77
|
-
|
|
73
|
+
def on_app_stopped(_event)
|
|
74
|
+
@server.close
|
|
78
75
|
end
|
|
79
76
|
|
|
77
|
+
# Tick on each fetch
|
|
80
78
|
# @param _event [Karafka::Core::Monitoring::Event]
|
|
81
|
-
def
|
|
82
|
-
|
|
79
|
+
def on_connection_listener_fetch_loop(_event)
|
|
80
|
+
mark_polling_tick
|
|
83
81
|
end
|
|
84
82
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
83
|
+
{
|
|
84
|
+
consume: :consumed,
|
|
85
|
+
revoke: :revoked,
|
|
86
|
+
shutting_down: :shutdown,
|
|
87
|
+
tick: :ticked
|
|
88
|
+
}.each do |before, after|
|
|
89
|
+
class_eval <<~RUBY, __FILE__, __LINE__ + 1
|
|
90
|
+
# Tick on starting work
|
|
91
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
|
92
|
+
def on_consumer_#{before}(_event)
|
|
93
|
+
mark_consumption_tick
|
|
94
|
+
end
|
|
89
95
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
96
|
+
# Tick on finished work
|
|
97
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
|
98
|
+
def on_consumer_#{after}(_event)
|
|
99
|
+
clear_consumption_tick
|
|
100
|
+
end
|
|
101
|
+
RUBY
|
|
93
102
|
end
|
|
94
103
|
|
|
95
104
|
# @param _event [Karafka::Core::Monitoring::Event]
|
|
@@ -98,12 +107,6 @@ module Karafka
|
|
|
98
107
|
clear_polling_tick
|
|
99
108
|
end
|
|
100
109
|
|
|
101
|
-
# Stop the http server when we stop the process
|
|
102
|
-
# @param _event [Karafka::Core::Monitoring::Event]
|
|
103
|
-
def on_app_stopped(_event)
|
|
104
|
-
@server.close
|
|
105
|
-
end
|
|
106
|
-
|
|
107
110
|
private
|
|
108
111
|
|
|
109
112
|
# Wraps the logic with a mutex
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
|
5
|
+
#
|
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
7
|
+
# repository and their usage requires commercial license agreement.
|
|
8
|
+
#
|
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
10
|
+
#
|
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
12
|
+
# your code to Maciej Mensfeld.
|
|
13
|
+
|
|
14
|
+
module Karafka
|
|
15
|
+
module Pro
|
|
16
|
+
# Extra methods always used in the base consumer in the pro mode
|
|
17
|
+
#
|
|
18
|
+
# We do not define those methods as part of the strategies flows, because they are injected
|
|
19
|
+
# (strategies) on singletons and often used only in one of the strategy variants
|
|
20
|
+
#
|
|
21
|
+
# Methods here are suppose to be always available or are expected to be redefined
|
|
22
|
+
module BaseConsumer
|
|
23
|
+
# Runs the on-schedule tick periodic operations
|
|
24
|
+
# This method is an alias but is part of the naming convention used for other flows, this
|
|
25
|
+
# is why we do not reference the `handle_before_schedule_tick` directly
|
|
26
|
+
def on_before_schedule_tick
|
|
27
|
+
handle_before_schedule_tick
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Used by the executor to trigger consumer tick
|
|
31
|
+
# @private
|
|
32
|
+
def on_tick
|
|
33
|
+
handle_tick
|
|
34
|
+
rescue StandardError => e
|
|
35
|
+
Karafka.monitor.instrument(
|
|
36
|
+
'error.occurred',
|
|
37
|
+
error: e,
|
|
38
|
+
caller: self,
|
|
39
|
+
type: 'consumer.tick.error'
|
|
40
|
+
)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# By default we do nothing when ticking
|
|
44
|
+
def tick; end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
|
5
|
+
#
|
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
7
|
+
# repository and their usage requires commercial license agreement.
|
|
8
|
+
#
|
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
10
|
+
#
|
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
12
|
+
# your code to Maciej Mensfeld.
|
|
13
|
+
|
|
14
|
+
module Karafka
|
|
15
|
+
module Pro
|
|
16
|
+
module Connection
|
|
17
|
+
# Manager that can handle working with multiplexed connections.
|
|
18
|
+
#
|
|
19
|
+
# This manager takes into consideration the number of partitions assigned to the topics and
|
|
20
|
+
# does its best to balance. Additional connections may not always be utilized because
|
|
21
|
+
# alongside of them, other processes may "hijack" the assignment. In such cases those extra
|
|
22
|
+
# empty connections will be turned off after a while.
|
|
23
|
+
#
|
|
24
|
+
# @note Manager operations relate to consumer groups and not subscription groups. Since
|
|
25
|
+
# cluster operations can cause consumer group wide effects, we always apply only one
|
|
26
|
+
# change on a consumer group.
|
|
27
|
+
class Manager < Karafka::Connection::Manager
|
|
28
|
+
include Core::Helpers::Time
|
|
29
|
+
|
|
30
|
+
# How long should we wait after a rebalance before doing anything on a consumer group
|
|
31
|
+
#
|
|
32
|
+
# @param scale_delay [Integer] How long should we wait before making any changes. Any
|
|
33
|
+
# change related to this consumer group will postpone the scaling operations. This is
|
|
34
|
+
# done that way to prevent too many friction in the cluster. It is 1 minute by default
|
|
35
|
+
def initialize(scale_delay = 60 * 1_000)
|
|
36
|
+
super()
|
|
37
|
+
@scale_delay = scale_delay
|
|
38
|
+
@mutex = Mutex.new
|
|
39
|
+
@changes = Hash.new do |h, k|
|
|
40
|
+
h[k] = {
|
|
41
|
+
state: '',
|
|
42
|
+
join_state: '',
|
|
43
|
+
state_age: 0,
|
|
44
|
+
changed_at: monotonic_now
|
|
45
|
+
}
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Registers listeners and starts the scaling procedures
|
|
50
|
+
#
|
|
51
|
+
# When using dynamic multiplexing, it will start the absolute minimum of connections for
|
|
52
|
+
# subscription group available.
|
|
53
|
+
#
|
|
54
|
+
# @param listeners [Connection::ListenersBatch]
|
|
55
|
+
def register(listeners)
|
|
56
|
+
@listeners = listeners
|
|
57
|
+
|
|
58
|
+
# Preload all the keys into the hash so we never add keys to changes but just change them
|
|
59
|
+
listeners.each { |listener| @changes[listener.subscription_group.id] }
|
|
60
|
+
|
|
61
|
+
in_sg_families do |first_subscription_group, sg_listeners|
|
|
62
|
+
multiplexing = first_subscription_group.multiplexing
|
|
63
|
+
|
|
64
|
+
if multiplexing.active? && multiplexing.dynamic?
|
|
65
|
+
# Start as many boot listeners as user wants. If not configured, starts half of max.
|
|
66
|
+
sg_listeners.first(multiplexing.boot).each(&:start!)
|
|
67
|
+
else
|
|
68
|
+
sg_listeners.each(&:start!)
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Collects data from the statistics about given subscription group. This is used to ensure
|
|
74
|
+
# that we do not rescale short after rebalances, deployments, etc.
|
|
75
|
+
# @param subscription_group_id [String] id of the subscription group for which statistics
|
|
76
|
+
# were emitted
|
|
77
|
+
# @param statistics [Hash] emitted statistics
|
|
78
|
+
#
|
|
79
|
+
# @note Please note that while we collect here per subscription group, we use those metrics
|
|
80
|
+
# collectively on a whole consumer group. This reduces the friction.
|
|
81
|
+
def notice(subscription_group_id, statistics)
|
|
82
|
+
times = []
|
|
83
|
+
# stateage is in microseconds
|
|
84
|
+
# We monitor broker changes to make sure we do not introduce extra friction
|
|
85
|
+
times << statistics['brokers'].values.map { |stats| stats['stateage'] }.min / 1_000
|
|
86
|
+
times << statistics['cgrp']['rebalance_age']
|
|
87
|
+
times << statistics['cgrp']['stateage']
|
|
88
|
+
|
|
89
|
+
# Keep the previous change age for changes that were triggered by us
|
|
90
|
+
previous_changed_at = @changes[subscription_group_id][:changed_at]
|
|
91
|
+
|
|
92
|
+
@changes[subscription_group_id].merge!(
|
|
93
|
+
state_age: times.min,
|
|
94
|
+
changed_at: previous_changed_at,
|
|
95
|
+
join_state: statistics['cgrp']['join_state'],
|
|
96
|
+
state: statistics['cgrp']['state']
|
|
97
|
+
)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Shuts down all the listeners when it is time (including moving to quiet) or rescales
|
|
101
|
+
# when it is needed
|
|
102
|
+
def control
|
|
103
|
+
Karafka::App.done? ? shutdown : rescale
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
private
|
|
107
|
+
|
|
108
|
+
# Handles the shutdown and quiet flows
|
|
109
|
+
def shutdown
|
|
110
|
+
active_listeners = @listeners.active
|
|
111
|
+
|
|
112
|
+
# When we are done processing immediately quiet all the listeners so they do not pick up
|
|
113
|
+
# new work to do
|
|
114
|
+
once(:quiet!) { active_listeners.each(&:quiet!) }
|
|
115
|
+
|
|
116
|
+
# If we are in the process of moving to quiet state, we need to check it.
|
|
117
|
+
if Karafka::App.quieting? && active_listeners.all?(&:quiet?)
|
|
118
|
+
once(:quieted!) { Karafka::App.quieted! }
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
return if Karafka::App.quiet?
|
|
122
|
+
|
|
123
|
+
# Since separate subscription groups are subscribed to different topics, there is no risk
|
|
124
|
+
# in shutting them down independently even if they operate in the same subscription group
|
|
125
|
+
in_sg_families do |first_subscription_group, sg_listeners|
|
|
126
|
+
active_sg_listeners = sg_listeners.select(&:active?)
|
|
127
|
+
|
|
128
|
+
# Do nothing until all listeners from the same consumer group are quiet. Otherwise we
|
|
129
|
+
# could have problems with in-flight rebalances during shutdown
|
|
130
|
+
next unless active_sg_listeners.all?(&:quiet?)
|
|
131
|
+
|
|
132
|
+
# Do not stop the same family twice
|
|
133
|
+
once(:stop!, first_subscription_group.name) { active_sg_listeners.each(&:stop!) }
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
return unless @listeners.active.all?(&:stopped?)
|
|
137
|
+
|
|
138
|
+
# All listeners including pending need to be moved at the end to stopped state for
|
|
139
|
+
# the whole server to stop
|
|
140
|
+
once(:stop!) { @listeners.each(&:stopped!) }
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Handles two scenarios:
|
|
144
|
+
# - Selects subscriptions that could benefit from having more parallel connections
|
|
145
|
+
# to kafka and then upscales them
|
|
146
|
+
# - Selects subscriptions that are idle (have nothing subscribed to them) and then shuts
|
|
147
|
+
# them down
|
|
148
|
+
#
|
|
149
|
+
# We always run scaling down and up because it may be applicable to different CGs
|
|
150
|
+
def rescale
|
|
151
|
+
scale_down
|
|
152
|
+
scale_up
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Checks for connections without any assignments and scales them down.
|
|
156
|
+
# Does that only for dynamically multiplexed subscription groups
|
|
157
|
+
def scale_down
|
|
158
|
+
sgs_in_use = Karafka::App.assignments.keys.map(&:subscription_group).uniq
|
|
159
|
+
|
|
160
|
+
# Select connections for scaling down
|
|
161
|
+
in_sg_families do |first_subscription_group, sg_listeners|
|
|
162
|
+
next unless stable?(sg_listeners)
|
|
163
|
+
|
|
164
|
+
multiplexing = first_subscription_group.multiplexing
|
|
165
|
+
|
|
166
|
+
next unless multiplexing.active?
|
|
167
|
+
next unless multiplexing.dynamic?
|
|
168
|
+
|
|
169
|
+
# If we cannot downscale, do not
|
|
170
|
+
next if sg_listeners.count(&:active?) <= multiplexing.min
|
|
171
|
+
|
|
172
|
+
sg_listeners.each do |sg_listener|
|
|
173
|
+
# Do not stop connections with subscriptions
|
|
174
|
+
next if sgs_in_use.include?(sg_listener.subscription_group)
|
|
175
|
+
# Skip listeners that are already in standby
|
|
176
|
+
next unless sg_listener.active?
|
|
177
|
+
|
|
178
|
+
touch(sg_listener.subscription_group.id)
|
|
179
|
+
|
|
180
|
+
# Shut down not used connection
|
|
181
|
+
sg_listener.stop!
|
|
182
|
+
|
|
183
|
+
break
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# Checks if we have space to scale and if there are any assignments with multiple topics
|
|
189
|
+
# partitions assigned in sgs that can be scaled. If that is the case, we scale up.
|
|
190
|
+
def scale_up
|
|
191
|
+
multi_part_sgs_families = Karafka::App
|
|
192
|
+
.assignments
|
|
193
|
+
.select { |_, partitions| partitions.size > 1 }
|
|
194
|
+
.keys
|
|
195
|
+
.map(&:subscription_group)
|
|
196
|
+
.map(&:name)
|
|
197
|
+
.uniq
|
|
198
|
+
|
|
199
|
+
# Select connections for scaling up
|
|
200
|
+
in_sg_families do |first_subscription_group, sg_listeners|
|
|
201
|
+
next unless stable?(sg_listeners)
|
|
202
|
+
|
|
203
|
+
multiplexing = first_subscription_group.multiplexing
|
|
204
|
+
|
|
205
|
+
next unless multiplexing.active?
|
|
206
|
+
next unless multiplexing.dynamic?
|
|
207
|
+
# If we cannot downscale, do not
|
|
208
|
+
next if sg_listeners.count(&:active?) >= multiplexing.max
|
|
209
|
+
|
|
210
|
+
sg_listeners.each do |sg_listener|
|
|
211
|
+
next unless multi_part_sgs_families.include?(sg_listener.subscription_group.name)
|
|
212
|
+
# Skip already active connections
|
|
213
|
+
next unless sg_listener.pending? || sg_listener.stopped?
|
|
214
|
+
|
|
215
|
+
touch(sg_listener.subscription_group.id)
|
|
216
|
+
sg_listener.start!
|
|
217
|
+
|
|
218
|
+
break
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
# Indicates, that something has changed on a subscription group. We consider every single
|
|
224
|
+
# change we make as a change to the setup as well.
|
|
225
|
+
# @param subscription_group_id [String]
|
|
226
|
+
def touch(subscription_group_id)
|
|
227
|
+
@changes[subscription_group_id][:changed_at] = 0
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
# @param sg_listeners [Array<Listener>] listeners from one multiplexed sg
|
|
231
|
+
# @return [Boolean] is given subscription group listeners set stable. It is considered
|
|
232
|
+
# stable when it had no changes happening on it recently and all relevant states in it
|
|
233
|
+
# are also stable. This is a strong indicator that no rebalances or other operations are
|
|
234
|
+
# happening at a given moment.
|
|
235
|
+
def stable?(sg_listeners)
|
|
236
|
+
sg_listeners.all? do |sg_listener|
|
|
237
|
+
# If a listener is not active, we do not take it into consideration when looking at
|
|
238
|
+
# the stability data
|
|
239
|
+
next true unless sg_listener.active?
|
|
240
|
+
|
|
241
|
+
state = @changes[sg_listener.subscription_group.id]
|
|
242
|
+
|
|
243
|
+
state[:state_age] >= @scale_delay &&
|
|
244
|
+
(monotonic_now - state[:changed_at]) >= @scale_delay &&
|
|
245
|
+
state[:state] == 'up' &&
|
|
246
|
+
state[:join_state] == 'steady'
|
|
247
|
+
end
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
# Yields listeners in groups based on their subscription groups
|
|
251
|
+
# @yieldparam [Karafka::Routing::SubscriptionGroup] first subscription group out of the
|
|
252
|
+
# family
|
|
253
|
+
# @yieldparam [Array<Listener>] listeners of a single subscription group
|
|
254
|
+
def in_sg_families
|
|
255
|
+
grouped = @listeners.group_by { |listener| listener.subscription_group.name }
|
|
256
|
+
|
|
257
|
+
grouped.each_value do |listeners|
|
|
258
|
+
listener = listeners.first
|
|
259
|
+
|
|
260
|
+
yield(
|
|
261
|
+
listener.subscription_group,
|
|
262
|
+
listeners
|
|
263
|
+
)
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
end
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
|
5
|
+
#
|
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
7
|
+
# repository and their usage requires commercial license agreement.
|
|
8
|
+
#
|
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
10
|
+
#
|
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
12
|
+
# your code to Maciej Mensfeld.
|
|
13
|
+
|
|
14
|
+
module Karafka
|
|
15
|
+
module Pro
|
|
16
|
+
# Namespace for Pro connections related components
|
|
17
|
+
module Connection
|
|
18
|
+
# Namespace for Multiplexing management related components
|
|
19
|
+
module Multiplexing
|
|
20
|
+
# Listener used to connect listeners manager to the lifecycle events that are significant
|
|
21
|
+
# to its operations
|
|
22
|
+
class Listener
|
|
23
|
+
def initialize
|
|
24
|
+
@manager = App.config.internal.connection.manager
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Triggers connection manage subscription groups details noticing
|
|
28
|
+
#
|
|
29
|
+
# @param event [Karafka::Core::Monitoring::Event] event with statistics
|
|
30
|
+
def on_statistics_emitted(event)
|
|
31
|
+
@manager.notice(
|
|
32
|
+
event[:subscription_group_id],
|
|
33
|
+
event[:statistics]
|
|
34
|
+
)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|