karafka 2.2.6 → 2.2.8.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +3 -0
- data/CHANGELOG.md +18 -0
- data/Gemfile.lock +5 -7
- data/bin/karafka +2 -3
- data/config/locales/errors.yml +7 -4
- data/config/locales/pro_errors.yml +3 -0
- data/docker-compose.yml +3 -1
- data/karafka.gemspec +1 -2
- data/lib/karafka/base_consumer.rb +8 -1
- data/lib/karafka/cli/base.rb +45 -34
- data/lib/karafka/cli/console.rb +5 -4
- data/lib/karafka/cli/help.rb +24 -0
- data/lib/karafka/cli/info.rb +2 -2
- data/lib/karafka/cli/install.rb +4 -4
- data/lib/karafka/cli/server.rb +68 -33
- data/lib/karafka/cli/topics.rb +1 -1
- data/lib/karafka/cli.rb +23 -19
- data/lib/karafka/connection/client.rb +9 -4
- data/lib/karafka/connection/listener.rb +3 -3
- data/lib/karafka/connection/rebalance_manager.rb +36 -21
- data/lib/karafka/contracts/config.rb +2 -0
- data/lib/karafka/errors.rb +3 -0
- data/lib/karafka/instrumentation/callbacks/rebalance.rb +64 -0
- data/lib/karafka/instrumentation/notifications.rb +5 -1
- data/lib/karafka/instrumentation/vendors/appsignal/base.rb +30 -0
- data/lib/karafka/instrumentation/vendors/appsignal/client.rb +122 -0
- data/lib/karafka/instrumentation/vendors/appsignal/dashboard.json +222 -0
- data/lib/karafka/instrumentation/vendors/appsignal/errors_listener.rb +30 -0
- data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +331 -0
- data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +2 -2
- data/lib/karafka/patches/rdkafka/bindings.rb +22 -39
- data/lib/karafka/patches/rdkafka/opaque.rb +36 -0
- data/lib/karafka/pro/loader.rb +2 -2
- data/lib/karafka/pro/processing/coordinator.rb +6 -7
- data/lib/karafka/pro/processing/filters/delayer.rb +1 -1
- data/lib/karafka/pro/processing/filters/inline_insights_delayer.rb +78 -0
- data/lib/karafka/pro/processing/strategies/vp/default.rb +20 -0
- data/lib/karafka/pro/routing/features/inline_insights/config.rb +32 -0
- data/lib/karafka/pro/routing/features/inline_insights/contracts/topic.rb +41 -0
- data/lib/karafka/pro/routing/features/inline_insights/topic.rb +52 -0
- data/lib/karafka/pro/routing/features/inline_insights.rb +26 -0
- data/lib/karafka/processing/executor.rb +7 -0
- data/lib/karafka/processing/expansions_selector.rb +22 -0
- data/lib/karafka/processing/inline_insights/consumer.rb +41 -0
- data/lib/karafka/processing/inline_insights/listener.rb +19 -0
- data/lib/karafka/processing/inline_insights/tracker.rb +128 -0
- data/lib/karafka/railtie.rb +14 -7
- data/lib/karafka/routing/features/base.rb +36 -1
- data/lib/karafka/routing/features/inline_insights/config.rb +15 -0
- data/lib/karafka/routing/features/inline_insights/contracts/topic.rb +27 -0
- data/lib/karafka/routing/features/inline_insights/topic.rb +31 -0
- data/lib/karafka/routing/features/inline_insights.rb +40 -0
- data/lib/karafka/routing/subscription_group.rb +6 -2
- data/lib/karafka/setup/attributes_map.rb +1 -0
- data/lib/karafka/setup/config.rb +11 -2
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +26 -19
- metadata.gz.sig +0 -0
@@ -0,0 +1,331 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'base'
|
4
|
+
|
5
|
+
module Karafka
|
6
|
+
module Instrumentation
|
7
|
+
module Vendors
|
8
|
+
# Namespace for Appsignal instrumentation
|
9
|
+
module Appsignal
|
10
|
+
# Listener that ships metrics to Appsignal
|
11
|
+
class MetricsListener < Base
|
12
|
+
def_delegators :config, :client, :rd_kafka_metrics, :namespace
|
13
|
+
|
14
|
+
# Value object for storing a single rdkafka metric publishing details
|
15
|
+
RdKafkaMetric = Struct.new(:type, :scope, :name, :key_location)
|
16
|
+
|
17
|
+
setting :namespace, default: 'karafka'
|
18
|
+
|
19
|
+
setting :client, default: Client.new
|
20
|
+
|
21
|
+
setting :rd_kafka_metrics, default: [
|
22
|
+
# Broker metrics
|
23
|
+
RdKafkaMetric.new(:count, :brokers, 'requests_retries', 'txretries_d'),
|
24
|
+
RdKafkaMetric.new(:count, :brokers, 'transmission_errors', 'txerrs_d'),
|
25
|
+
RdKafkaMetric.new(:count, :brokers, 'receive_errors', 'rxerrs_d'),
|
26
|
+
RdKafkaMetric.new(:count, :brokers, 'connection_connects', 'connects_d'),
|
27
|
+
RdKafkaMetric.new(:count, :brokers, 'connection_disconnects', 'disconnects_d'),
|
28
|
+
RdKafkaMetric.new(:gauge, :brokers, 'network_latency_avg', %w[rtt avg]),
|
29
|
+
RdKafkaMetric.new(:gauge, :brokers, 'network_latency_p95', %w[rtt p95]),
|
30
|
+
RdKafkaMetric.new(:gauge, :brokers, 'network_latency_p99', %w[rtt p99]),
|
31
|
+
|
32
|
+
# Topics partitions metrics
|
33
|
+
RdKafkaMetric.new(:gauge, :topics, 'consumer_lag', 'consumer_lag_stored'),
|
34
|
+
RdKafkaMetric.new(:gauge, :topics, 'consumer_lag_delta', 'consumer_lag_stored_d')
|
35
|
+
].freeze
|
36
|
+
|
37
|
+
# Metrics that sum values on topics levels and not on partition levels
|
38
|
+
setting :aggregated_rd_kafka_metrics, default: [
|
39
|
+
# Topic aggregated metrics
|
40
|
+
RdKafkaMetric.new(:gauge, :topics, 'consumer_aggregated_lag', 'consumer_lag_stored')
|
41
|
+
].freeze
|
42
|
+
|
43
|
+
configure
|
44
|
+
|
45
|
+
# Before each consumption process, lets start a transaction associated with it
|
46
|
+
# We also set some basic metadata about the given consumption that can be useful for
|
47
|
+
# debugging
|
48
|
+
#
|
49
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
50
|
+
def on_consumer_consume(event)
|
51
|
+
consumer = event.payload[:caller]
|
52
|
+
|
53
|
+
start_transaction(consumer, 'consume')
|
54
|
+
|
55
|
+
client.metadata = {
|
56
|
+
batch_size: consumer.messages.size,
|
57
|
+
first_offset: consumer.messages.metadata.first_offset,
|
58
|
+
last_offset: consumer.messages.metadata.last_offset,
|
59
|
+
consumer_group: consumer.topic.consumer_group.id,
|
60
|
+
topic: consumer.topic.name,
|
61
|
+
partition: consumer.partition,
|
62
|
+
attempt: consumer.coordinator.pause_tracker.attempt
|
63
|
+
}
|
64
|
+
end
|
65
|
+
|
66
|
+
# Once we're done with consumption, we bump counters about that
|
67
|
+
#
|
68
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
69
|
+
def on_consumer_consumed(event)
|
70
|
+
consumer = event.payload[:caller]
|
71
|
+
messages = consumer.messages
|
72
|
+
metadata = messages.metadata
|
73
|
+
|
74
|
+
with_multiple_resolutions(consumer) do |tags|
|
75
|
+
count('consumer_messages', messages.size, tags)
|
76
|
+
count('consumer_batches', 1, tags)
|
77
|
+
gauge('consumer_offsets', metadata.last_offset, tags)
|
78
|
+
end
|
79
|
+
|
80
|
+
stop_transaction
|
81
|
+
end
|
82
|
+
|
83
|
+
# Register minute based probe only on app running. Otherwise if we would always register
|
84
|
+
# minute probe, it would report on processes using Karafka but not running the
|
85
|
+
# consumption process
|
86
|
+
#
|
87
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
88
|
+
def on_app_running(_event)
|
89
|
+
return if @probe_registered
|
90
|
+
|
91
|
+
@probe_registered = true
|
92
|
+
|
93
|
+
# Registers the minutely probe for one-every-minute metrics
|
94
|
+
client.register_probe(:karafka, -> { minute_probe })
|
95
|
+
end
|
96
|
+
|
97
|
+
# Keeps track of revocation user code execution
|
98
|
+
#
|
99
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
100
|
+
def on_consumer_revoke(event)
|
101
|
+
consumer = event.payload[:caller]
|
102
|
+
start_transaction(consumer, 'revoked')
|
103
|
+
end
|
104
|
+
|
105
|
+
# Finishes the revocation transaction
|
106
|
+
#
|
107
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
108
|
+
def on_consumer_revoked(_event)
|
109
|
+
stop_transaction
|
110
|
+
end
|
111
|
+
|
112
|
+
# Keeps track of revocation user code execution
|
113
|
+
#
|
114
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
115
|
+
def on_consumer_shutting_down(event)
|
116
|
+
consumer = event.payload[:caller]
|
117
|
+
start_transaction(consumer, 'shutdown')
|
118
|
+
end
|
119
|
+
|
120
|
+
# Finishes the shutdown transaction
|
121
|
+
#
|
122
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
123
|
+
def on_consumer_shutdown(_event)
|
124
|
+
stop_transaction
|
125
|
+
end
|
126
|
+
|
127
|
+
# Counts DLQ dispatches
|
128
|
+
#
|
129
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
130
|
+
def on_dead_letter_queue_dispatched(event)
|
131
|
+
consumer = event.payload[:caller]
|
132
|
+
|
133
|
+
with_multiple_resolutions(consumer) do |tags|
|
134
|
+
count('consumer_dead', 1, tags)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
# Reports on **any** error that occurs. This also includes non-user related errors
|
139
|
+
# originating from the framework.
|
140
|
+
#
|
141
|
+
# @param event [Karafka::Core::Monitoring::Event] error event details
|
142
|
+
def on_error_occurred(event)
|
143
|
+
# If this is a user consumption related error, we bump the counters for metrics
|
144
|
+
if event[:type] == 'consumer.consume.error'
|
145
|
+
consumer = event.payload[:caller]
|
146
|
+
|
147
|
+
with_multiple_resolutions(consumer) do |tags|
|
148
|
+
count('consumer_errors', 1, tags)
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
stop_transaction
|
153
|
+
end
|
154
|
+
|
155
|
+
# Hooks up to Karafka instrumentation for emitted statistics
|
156
|
+
#
|
157
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
158
|
+
def on_statistics_emitted(event)
|
159
|
+
statistics = event[:statistics]
|
160
|
+
consumer_group_id = event[:consumer_group_id]
|
161
|
+
|
162
|
+
rd_kafka_metrics.each do |metric|
|
163
|
+
report_metric(metric, statistics, consumer_group_id)
|
164
|
+
end
|
165
|
+
|
166
|
+
report_aggregated_topics_metrics(statistics, consumer_group_id)
|
167
|
+
end
|
168
|
+
|
169
|
+
# Reports a given metric statistics to Appsignal
|
170
|
+
# @param metric [RdKafkaMetric] metric value object
|
171
|
+
# @param statistics [Hash] hash with all the statistics emitted
|
172
|
+
# @param consumer_group_id [String] cg in context which we operate
|
173
|
+
def report_metric(metric, statistics, consumer_group_id)
|
174
|
+
case metric.scope
|
175
|
+
when :root
|
176
|
+
# Do nothing on the root metrics as the same metrics are reported in a granular
|
177
|
+
# way from other places
|
178
|
+
nil
|
179
|
+
when :brokers
|
180
|
+
statistics.fetch('brokers').each_value do |broker_statistics|
|
181
|
+
# Skip bootstrap nodes
|
182
|
+
# Bootstrap nodes have nodeid -1, other nodes have positive
|
183
|
+
# node ids
|
184
|
+
next if broker_statistics['nodeid'] == -1
|
185
|
+
|
186
|
+
public_send(
|
187
|
+
metric.type,
|
188
|
+
metric.name,
|
189
|
+
broker_statistics.dig(*metric.key_location),
|
190
|
+
{
|
191
|
+
broker: broker_statistics['nodename']
|
192
|
+
}
|
193
|
+
)
|
194
|
+
end
|
195
|
+
when :topics
|
196
|
+
statistics.fetch('topics').each do |topic_name, topic_values|
|
197
|
+
topic_values['partitions'].each do |partition_name, partition_statistics|
|
198
|
+
next if partition_name == '-1'
|
199
|
+
# Skip until lag info is available
|
200
|
+
next if partition_statistics['consumer_lag'] == -1
|
201
|
+
next if partition_statistics['consumer_lag_stored'] == -1
|
202
|
+
|
203
|
+
# Skip if we do not own the fetch assignment
|
204
|
+
next if partition_statistics['fetch_state'] == 'stopped'
|
205
|
+
next if partition_statistics['fetch_state'] == 'none'
|
206
|
+
|
207
|
+
public_send(
|
208
|
+
metric.type,
|
209
|
+
metric.name,
|
210
|
+
partition_statistics.dig(*metric.key_location),
|
211
|
+
{
|
212
|
+
consumer_group: consumer_group_id,
|
213
|
+
topic: topic_name,
|
214
|
+
partition: partition_name
|
215
|
+
}
|
216
|
+
)
|
217
|
+
end
|
218
|
+
end
|
219
|
+
else
|
220
|
+
raise ArgumentError, metric.scope
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
# Publishes aggregated topic-level metrics that are sum of per partition metrics
|
225
|
+
#
|
226
|
+
# @param statistics [Hash] hash with all the statistics emitted
|
227
|
+
# @param consumer_group_id [String] cg in context which we operate
|
228
|
+
def report_aggregated_topics_metrics(statistics, consumer_group_id)
|
229
|
+
config.aggregated_rd_kafka_metrics.each do |metric|
|
230
|
+
statistics.fetch('topics').each do |topic_name, topic_values|
|
231
|
+
sum = 0
|
232
|
+
|
233
|
+
topic_values['partitions'].each do |partition_name, partition_statistics|
|
234
|
+
next if partition_name == '-1'
|
235
|
+
# Skip until lag info is available
|
236
|
+
next if partition_statistics['consumer_lag'] == -1
|
237
|
+
next if partition_statistics['consumer_lag_stored'] == -1
|
238
|
+
|
239
|
+
sum += partition_statistics.dig(*metric.key_location)
|
240
|
+
end
|
241
|
+
|
242
|
+
public_send(
|
243
|
+
metric.type,
|
244
|
+
metric.name,
|
245
|
+
sum,
|
246
|
+
{
|
247
|
+
consumer_group: consumer_group_id,
|
248
|
+
topic: topic_name
|
249
|
+
}
|
250
|
+
)
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
# Increments a counter with a namespace key, value and tags
|
256
|
+
#
|
257
|
+
# @param key [String] key we want to use (without the namespace)
|
258
|
+
# @param value [Integer] count value
|
259
|
+
# @param tags [Hash] additional extra tags
|
260
|
+
def count(key, value, tags)
|
261
|
+
client.count(
|
262
|
+
namespaced_metric(key),
|
263
|
+
value,
|
264
|
+
tags
|
265
|
+
)
|
266
|
+
end
|
267
|
+
|
268
|
+
# Sets the gauge value
|
269
|
+
#
|
270
|
+
# @param key [String] key we want to use (without the namespace)
|
271
|
+
# @param value [Integer] gauge value
|
272
|
+
# @param tags [Hash] additional extra tags
|
273
|
+
def gauge(key, value, tags)
|
274
|
+
client.gauge(
|
275
|
+
namespaced_metric(key),
|
276
|
+
value,
|
277
|
+
tags
|
278
|
+
)
|
279
|
+
end
|
280
|
+
|
281
|
+
private
|
282
|
+
|
283
|
+
# Wraps metric name in listener's namespace
|
284
|
+
# @param metric_name [String] RdKafkaMetric name
|
285
|
+
# @return [String]
|
286
|
+
def namespaced_metric(metric_name)
|
287
|
+
"#{namespace}_#{metric_name}"
|
288
|
+
end
|
289
|
+
|
290
|
+
# Starts the transaction for monitoring user code
|
291
|
+
#
|
292
|
+
# @param consumer [Karafka::BaseConsumer] karafka consumer instance
|
293
|
+
# @param action_name [String] lifecycle user method name
|
294
|
+
def start_transaction(consumer, action_name)
|
295
|
+
client.start_transaction(
|
296
|
+
"#{consumer.class}##{action_name}"
|
297
|
+
)
|
298
|
+
end
|
299
|
+
|
300
|
+
# Stops the transaction wrapping user code
|
301
|
+
def stop_transaction
|
302
|
+
client.stop_transaction
|
303
|
+
end
|
304
|
+
|
305
|
+
# @param consumer [Karafka::BaseConsumer] Karafka consumer instance
|
306
|
+
def with_multiple_resolutions(consumer)
|
307
|
+
topic_name = consumer.topic.name
|
308
|
+
consumer_group_id = consumer.topic.consumer_group.id
|
309
|
+
partition = consumer.partition
|
310
|
+
|
311
|
+
tags = {
|
312
|
+
consumer_group: consumer_group_id,
|
313
|
+
topic: topic_name
|
314
|
+
}
|
315
|
+
|
316
|
+
yield(tags)
|
317
|
+
yield(tags.merge(partition: partition))
|
318
|
+
end
|
319
|
+
|
320
|
+
# Sends minute based probing metrics
|
321
|
+
def minute_probe
|
322
|
+
concurrency = Karafka::App.config.concurrency
|
323
|
+
|
324
|
+
count('processes_count', 1, {})
|
325
|
+
count('threads_count', concurrency, {})
|
326
|
+
end
|
327
|
+
end
|
328
|
+
end
|
329
|
+
end
|
330
|
+
end
|
331
|
+
end
|
@@ -67,7 +67,7 @@ module Karafka
|
|
67
67
|
configure(&block)
|
68
68
|
end
|
69
69
|
|
70
|
-
# Hooks up to
|
70
|
+
# Hooks up to Karafka instrumentation for emitted statistics
|
71
71
|
#
|
72
72
|
# @param event [Karafka::Core::Monitoring::Event]
|
73
73
|
def on_statistics_emitted(event)
|
@@ -101,7 +101,7 @@ module Karafka
|
|
101
101
|
time_taken = event[:time]
|
102
102
|
messages_count = event[:messages_buffer].size
|
103
103
|
|
104
|
-
consumer_group_id = event[:subscription_group].
|
104
|
+
consumer_group_id = event[:subscription_group].consumer_group.id
|
105
105
|
|
106
106
|
extra_tags = ["consumer_group:#{consumer_group_id}"]
|
107
107
|
|
@@ -18,15 +18,23 @@ module Karafka
|
|
18
18
|
# @param client_ptr [FFI::Pointer]
|
19
19
|
# @param code [Integer]
|
20
20
|
# @param partitions_ptr [FFI::Pointer]
|
21
|
-
|
21
|
+
# @param tpl [Rdkafka::Consumer::TopicPartitionList]
|
22
|
+
# @param opaque [Rdkafka::Opaque]
|
23
|
+
def on_cooperative_rebalance(client_ptr, code, partitions_ptr, tpl, opaque)
|
22
24
|
case code
|
23
25
|
when RB::RD_KAFKA_RESP_ERR__ASSIGN_PARTITIONS
|
26
|
+
opaque&.call_on_partitions_assign(tpl)
|
24
27
|
RB.rd_kafka_incremental_assign(client_ptr, partitions_ptr)
|
28
|
+
opaque&.call_on_partitions_assigned(tpl)
|
25
29
|
when RB::RD_KAFKA_RESP_ERR__REVOKE_PARTITIONS
|
30
|
+
opaque&.call_on_partitions_revoke(tpl)
|
26
31
|
RB.rd_kafka_commit(client_ptr, nil, false)
|
27
32
|
RB.rd_kafka_incremental_unassign(client_ptr, partitions_ptr)
|
33
|
+
opaque&.call_on_partitions_revoked(tpl)
|
28
34
|
else
|
35
|
+
opaque&.call_on_partitions_assign(tpl)
|
29
36
|
RB.rd_kafka_assign(client_ptr, FFI::Pointer::NULL)
|
37
|
+
opaque&.call_on_partitions_assigned(tpl)
|
30
38
|
end
|
31
39
|
end
|
32
40
|
|
@@ -35,45 +43,25 @@ module Karafka
|
|
35
43
|
# @param client_ptr [FFI::Pointer]
|
36
44
|
# @param code [Integer]
|
37
45
|
# @param partitions_ptr [FFI::Pointer]
|
38
|
-
|
46
|
+
# @param tpl [Rdkafka::Consumer::TopicPartitionList]
|
47
|
+
# @param opaque [Rdkafka::Opaque]
|
48
|
+
def on_eager_rebalance(client_ptr, code, partitions_ptr, tpl, opaque)
|
39
49
|
case code
|
40
50
|
when RB::RD_KAFKA_RESP_ERR__ASSIGN_PARTITIONS
|
51
|
+
opaque&.call_on_partitions_assign(tpl)
|
41
52
|
RB.rd_kafka_assign(client_ptr, partitions_ptr)
|
53
|
+
opaque&.call_on_partitions_assigned(tpl)
|
42
54
|
when RB::RD_KAFKA_RESP_ERR__REVOKE_PARTITIONS
|
55
|
+
opaque&.call_on_partitions_revoke(tpl)
|
43
56
|
RB.rd_kafka_commit(client_ptr, nil, false)
|
44
57
|
RB.rd_kafka_assign(client_ptr, FFI::Pointer::NULL)
|
58
|
+
opaque&.call_on_partitions_revoked(tpl)
|
45
59
|
else
|
60
|
+
opaque&.call_on_partitions_assign(tpl)
|
46
61
|
RB.rd_kafka_assign(client_ptr, FFI::Pointer::NULL)
|
62
|
+
opaque&.call_on_partitions_assigned(tpl)
|
47
63
|
end
|
48
64
|
end
|
49
|
-
|
50
|
-
# Trigger Karafka callbacks
|
51
|
-
#
|
52
|
-
# @param code [Integer]
|
53
|
-
# @param opaque [Rdkafka::Opaque]
|
54
|
-
# @param tpl [Rdkafka::Consumer::TopicPartitionList]
|
55
|
-
def trigger_callbacks(code, opaque, tpl)
|
56
|
-
Karafka.monitor.instrument(
|
57
|
-
'connection.client.rebalance_callback',
|
58
|
-
caller: self,
|
59
|
-
code: code,
|
60
|
-
tpl: tpl
|
61
|
-
) do
|
62
|
-
case code
|
63
|
-
when RB::RD_KAFKA_RESP_ERR__ASSIGN_PARTITIONS
|
64
|
-
opaque.call_on_partitions_assigned(tpl)
|
65
|
-
when RB::RD_KAFKA_RESP_ERR__REVOKE_PARTITIONS
|
66
|
-
opaque.call_on_partitions_revoked(tpl)
|
67
|
-
end
|
68
|
-
end
|
69
|
-
rescue StandardError => e
|
70
|
-
Karafka.monitor.instrument(
|
71
|
-
'error.occurred',
|
72
|
-
caller: self,
|
73
|
-
error: e,
|
74
|
-
type: 'connection.client.rebalance_callback.error'
|
75
|
-
)
|
76
|
-
end
|
77
65
|
end
|
78
66
|
|
79
67
|
# This patch changes few things:
|
@@ -89,19 +77,14 @@ module Karafka
|
|
89
77
|
) do |client_ptr, code, partitions_ptr, opaque_ptr|
|
90
78
|
# Patch reference
|
91
79
|
pr = ::Karafka::Patches::Rdkafka::Bindings
|
80
|
+
tpl = ::Rdkafka::Consumer::TopicPartitionList.from_native_tpl(partitions_ptr).freeze
|
81
|
+
opaque = ::Rdkafka::Config.opaques[opaque_ptr.to_i]
|
92
82
|
|
93
83
|
if RB.rd_kafka_rebalance_protocol(client_ptr) == 'COOPERATIVE'
|
94
|
-
pr.on_cooperative_rebalance(client_ptr, code, partitions_ptr)
|
84
|
+
pr.on_cooperative_rebalance(client_ptr, code, partitions_ptr, tpl, opaque)
|
95
85
|
else
|
96
|
-
pr.on_eager_rebalance(client_ptr, code, partitions_ptr)
|
86
|
+
pr.on_eager_rebalance(client_ptr, code, partitions_ptr, tpl, opaque)
|
97
87
|
end
|
98
|
-
|
99
|
-
opaque = ::Rdkafka::Config.opaques[opaque_ptr.to_i]
|
100
|
-
return unless opaque
|
101
|
-
|
102
|
-
tpl = ::Rdkafka::Consumer::TopicPartitionList.from_native_tpl(partitions_ptr).freeze
|
103
|
-
|
104
|
-
pr.trigger_callbacks(code, opaque, tpl)
|
105
88
|
end
|
106
89
|
end
|
107
90
|
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Patches
|
5
|
+
module Rdkafka
|
6
|
+
# Patches allowing us to run events on both pre and post rebalance events.
|
7
|
+
# Thanks to that, we can easily connect to the whole flow despite of the moment when things
|
8
|
+
# are happening
|
9
|
+
module Opaque
|
10
|
+
# Handles pre-assign phase of rebalance
|
11
|
+
#
|
12
|
+
# @param tpl [Rdkafka::Consumer::TopicPartitionList]
|
13
|
+
def call_on_partitions_assign(tpl)
|
14
|
+
return unless consumer_rebalance_listener
|
15
|
+
return unless consumer_rebalance_listener.respond_to?(:on_partitions_assign)
|
16
|
+
|
17
|
+
consumer_rebalance_listener.on_partitions_assign(tpl)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Handles pre-revoke phase of rebalance
|
21
|
+
#
|
22
|
+
# @param tpl [Rdkafka::Consumer::TopicPartitionList]
|
23
|
+
def call_on_partitions_revoke(tpl)
|
24
|
+
return unless consumer_rebalance_listener
|
25
|
+
return unless consumer_rebalance_listener.respond_to?(:on_partitions_revoke)
|
26
|
+
|
27
|
+
consumer_rebalance_listener.on_partitions_revoke(tpl)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
::Rdkafka::Opaque.include(
|
35
|
+
Karafka::Patches::Rdkafka::Opaque
|
36
|
+
)
|
data/lib/karafka/pro/loader.rb
CHANGED
@@ -49,7 +49,7 @@ module Karafka
|
|
49
49
|
# Loads all the pro components and configures them wherever it is expected
|
50
50
|
# @param config [Karafka::Core::Configurable::Node] app config that we can alter with pro
|
51
51
|
# components
|
52
|
-
def
|
52
|
+
def pre_setup_all(config)
|
53
53
|
features.each { |feature| feature.pre_setup(config) }
|
54
54
|
|
55
55
|
reconfigure(config)
|
@@ -60,7 +60,7 @@ module Karafka
|
|
60
60
|
# Runs post setup features configuration operations
|
61
61
|
#
|
62
62
|
# @param config [Karafka::Core::Configurable::Node]
|
63
|
-
def
|
63
|
+
def post_setup_all(config)
|
64
64
|
features.each { |feature| feature.post_setup(config) }
|
65
65
|
end
|
66
66
|
|
@@ -17,6 +17,10 @@ module Karafka
|
|
17
17
|
# Pro coordinator that provides extra orchestration methods useful for parallel processing
|
18
18
|
# within the same partition
|
19
19
|
class Coordinator < ::Karafka::Processing::Coordinator
|
20
|
+
extend Forwardable
|
21
|
+
|
22
|
+
def_delegators :@collapser, :collapsed?, :collapse_until!
|
23
|
+
|
20
24
|
attr_reader :filter, :virtual_offset_manager
|
21
25
|
|
22
26
|
# @param args [Object] anything the base coordinator accepts
|
@@ -57,7 +61,7 @@ module Karafka
|
|
57
61
|
|
58
62
|
# We keep the old processed offsets until the collapsing is done and regular processing
|
59
63
|
# with virtualization is restored
|
60
|
-
@virtual_offset_manager.clear if topic.virtual_partitions? &&
|
64
|
+
@virtual_offset_manager.clear if topic.virtual_partitions? && !collapsed?
|
61
65
|
|
62
66
|
@last_message = messages.last
|
63
67
|
end
|
@@ -68,12 +72,7 @@ module Karafka
|
|
68
72
|
# @param error [StandardError] error from the failure
|
69
73
|
def failure!(consumer, error)
|
70
74
|
super
|
71
|
-
|
72
|
-
end
|
73
|
-
|
74
|
-
# @return [Boolean] are we in a collapsed state at the moment
|
75
|
-
def collapsed?
|
76
|
-
@collapser.collapsed?
|
75
|
+
collapse_until!(@last_message.offset + 1)
|
77
76
|
end
|
78
77
|
|
79
78
|
# @return [Boolean] did any of the filters apply any logic that would cause use to run
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Processing
|
17
|
+
module Filters
|
18
|
+
# Delayer that checks if we have appropriate insights available. If not, pauses for
|
19
|
+
# 5 seconds so the insights can be loaded from the broker.
|
20
|
+
#
|
21
|
+
# In case it would take more than five seconds to load insights, it will just pause again
|
22
|
+
#
|
23
|
+
# This filter ensures, that we always have inline insights that a consumer can use
|
24
|
+
#
|
25
|
+
# It is relevant in most cases only during the process start, when first poll may not
|
26
|
+
# yield statistics yet but will give some data.
|
27
|
+
class InlineInsightsDelayer < Base
|
28
|
+
# Minimum how long should we pause when there are no metrics
|
29
|
+
PAUSE_TIMEOUT = 5_000
|
30
|
+
|
31
|
+
private_constant :PAUSE_TIMEOUT
|
32
|
+
|
33
|
+
# @param topic [Karafka::Routing::Topic]
|
34
|
+
# @param partition [Integer] partition
|
35
|
+
def initialize(topic, partition)
|
36
|
+
super()
|
37
|
+
@topic = topic
|
38
|
+
@partition = partition
|
39
|
+
end
|
40
|
+
|
41
|
+
# Pauses if inline insights would not be available. Does nothing otherwise
|
42
|
+
#
|
43
|
+
# @param messages [Array<Karafka::Messages::Message>]
|
44
|
+
def apply!(messages)
|
45
|
+
@applied = false
|
46
|
+
@cursor = messages.first
|
47
|
+
|
48
|
+
# Nothing to do if there were no messages
|
49
|
+
# This can happen when we chain filters
|
50
|
+
return unless @cursor
|
51
|
+
|
52
|
+
insights = ::Karafka::Processing::InlineInsights::Tracker.find(
|
53
|
+
@topic,
|
54
|
+
@partition
|
55
|
+
)
|
56
|
+
|
57
|
+
# If insights are available, also nothing to do here and we can just process
|
58
|
+
return unless insights.empty?
|
59
|
+
|
60
|
+
messages.clear
|
61
|
+
|
62
|
+
@applied = true
|
63
|
+
end
|
64
|
+
|
65
|
+
# @return [Integer] ms timeout in case of pause
|
66
|
+
def timeout
|
67
|
+
@cursor && applied? ? PAUSE_TIMEOUT : 0
|
68
|
+
end
|
69
|
+
|
70
|
+
# Pause when we had to back-off or skip if delay is not needed
|
71
|
+
def action
|
72
|
+
applied? ? :pause : :skip
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -72,6 +72,15 @@ module Karafka
|
|
72
72
|
coordinator.collapsed?
|
73
73
|
end
|
74
74
|
|
75
|
+
# @param offset [Integer] first offset from which we should not operate in a collapsed
|
76
|
+
# mode.
|
77
|
+
# @note Keep in mind, that if a batch contains this but also messages earlier messages
|
78
|
+
# that should be collapsed, all will continue to operate in a collapsed mode until
|
79
|
+
# first full batch with only messages that should not be collapsed.
|
80
|
+
def collapse_until!(offset)
|
81
|
+
coordinator.collapse_until!(offset)
|
82
|
+
end
|
83
|
+
|
75
84
|
# @return [Boolean] true if any of virtual partition we're operating in the entangled
|
76
85
|
# mode has already failed and we know we are failing collectively.
|
77
86
|
# Useful for early stop to minimize number of things processed twice.
|
@@ -84,6 +93,17 @@ module Karafka
|
|
84
93
|
coordinator.failure?
|
85
94
|
end
|
86
95
|
|
96
|
+
# Allows for cross-virtual-partition consumers locks
|
97
|
+
#
|
98
|
+
# This is not needed in the non-VP flows because there is always only one consumer
|
99
|
+
# per partition at the same time, so no coordination is needed directly for the
|
100
|
+
# end users
|
101
|
+
#
|
102
|
+
# @param block [Proc] block we want to run in a mutex to prevent race-conditions
|
103
|
+
def synchronize(&block)
|
104
|
+
coordinator.synchronize(&block)
|
105
|
+
end
|
106
|
+
|
87
107
|
private
|
88
108
|
|
89
109
|
# Prior to adding work to the queue, registers all the messages offsets into the
|