karafka 2.3.3 → 2.4.0.beta2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +12 -38
- data/CHANGELOG.md +59 -0
- data/Gemfile +6 -3
- data/Gemfile.lock +29 -27
- data/bin/integrations +1 -1
- data/config/locales/errors.yml +21 -2
- data/config/locales/pro_errors.yml +16 -1
- data/karafka.gemspec +4 -2
- data/lib/active_job/queue_adapters/karafka_adapter.rb +2 -0
- data/lib/karafka/admin/configs/config.rb +81 -0
- data/lib/karafka/admin/configs/resource.rb +88 -0
- data/lib/karafka/admin/configs.rb +103 -0
- data/lib/karafka/admin.rb +211 -90
- data/lib/karafka/base_consumer.rb +2 -2
- data/lib/karafka/cli/info.rb +9 -7
- data/lib/karafka/cli/server.rb +7 -7
- data/lib/karafka/cli/topics/align.rb +109 -0
- data/lib/karafka/cli/topics/base.rb +66 -0
- data/lib/karafka/cli/topics/create.rb +35 -0
- data/lib/karafka/cli/topics/delete.rb +30 -0
- data/lib/karafka/cli/topics/migrate.rb +31 -0
- data/lib/karafka/cli/topics/plan.rb +169 -0
- data/lib/karafka/cli/topics/repartition.rb +41 -0
- data/lib/karafka/cli/topics/reset.rb +18 -0
- data/lib/karafka/cli/topics.rb +13 -123
- data/lib/karafka/connection/client.rb +55 -37
- data/lib/karafka/connection/listener.rb +22 -17
- data/lib/karafka/connection/proxy.rb +93 -4
- data/lib/karafka/connection/status.rb +14 -2
- data/lib/karafka/constraints.rb +3 -3
- data/lib/karafka/contracts/config.rb +14 -1
- data/lib/karafka/contracts/topic.rb +1 -1
- data/lib/karafka/deserializers/headers.rb +15 -0
- data/lib/karafka/deserializers/key.rb +15 -0
- data/lib/karafka/deserializers/payload.rb +16 -0
- data/lib/karafka/embedded.rb +2 -0
- data/lib/karafka/helpers/async.rb +5 -2
- data/lib/karafka/helpers/colorize.rb +6 -0
- data/lib/karafka/instrumentation/callbacks/oauthbearer_token_refresh.rb +29 -0
- data/lib/karafka/instrumentation/logger_listener.rb +23 -3
- data/lib/karafka/instrumentation/notifications.rb +10 -0
- data/lib/karafka/instrumentation/vendors/appsignal/client.rb +16 -2
- data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +20 -0
- data/lib/karafka/messages/batch_metadata.rb +1 -1
- data/lib/karafka/messages/builders/batch_metadata.rb +1 -1
- data/lib/karafka/messages/builders/message.rb +10 -6
- data/lib/karafka/messages/message.rb +2 -1
- data/lib/karafka/messages/metadata.rb +20 -4
- data/lib/karafka/messages/parser.rb +1 -1
- data/lib/karafka/pro/base_consumer.rb +12 -23
- data/lib/karafka/pro/encryption/cipher.rb +7 -3
- data/lib/karafka/pro/encryption/contracts/config.rb +1 -0
- data/lib/karafka/pro/encryption/errors.rb +4 -1
- data/lib/karafka/pro/encryption/messages/middleware.rb +13 -11
- data/lib/karafka/pro/encryption/messages/parser.rb +22 -20
- data/lib/karafka/pro/encryption/setup/config.rb +5 -0
- data/lib/karafka/pro/iterator/expander.rb +2 -1
- data/lib/karafka/pro/iterator/tpl_builder.rb +38 -0
- data/lib/karafka/pro/iterator.rb +28 -2
- data/lib/karafka/pro/loader.rb +3 -0
- data/lib/karafka/pro/processing/coordinator.rb +15 -2
- data/lib/karafka/pro/processing/expansions_selector.rb +2 -0
- data/lib/karafka/pro/processing/jobs_queue.rb +122 -5
- data/lib/karafka/pro/processing/periodic_job/consumer.rb +67 -0
- data/lib/karafka/pro/processing/piping/consumer.rb +126 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +2 -0
- data/lib/karafka/pro/processing/strategies/default.rb +5 -1
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +21 -5
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +2 -0
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +2 -0
- data/lib/karafka/pro/processing/subscription_groups_coordinator.rb +52 -0
- data/lib/karafka/pro/routing/features/direct_assignments/config.rb +27 -0
- data/lib/karafka/pro/routing/features/direct_assignments/contracts/consumer_group.rb +53 -0
- data/lib/karafka/pro/routing/features/direct_assignments/contracts/topic.rb +108 -0
- data/lib/karafka/pro/routing/features/direct_assignments/subscription_group.rb +77 -0
- data/lib/karafka/pro/routing/features/direct_assignments/topic.rb +69 -0
- data/lib/karafka/pro/routing/features/direct_assignments.rb +25 -0
- data/lib/karafka/pro/routing/features/patterns/builder.rb +1 -1
- data/lib/karafka/pro/routing/features/swarm/contracts/routing.rb +76 -0
- data/lib/karafka/pro/routing/features/swarm/contracts/topic.rb +16 -5
- data/lib/karafka/pro/routing/features/swarm/topic.rb +25 -2
- data/lib/karafka/pro/routing/features/swarm.rb +11 -0
- data/lib/karafka/pro/swarm/liveness_listener.rb +20 -0
- data/lib/karafka/processing/coordinator.rb +17 -8
- data/lib/karafka/processing/coordinators_buffer.rb +5 -2
- data/lib/karafka/processing/executor.rb +6 -2
- data/lib/karafka/processing/executors_buffer.rb +5 -2
- data/lib/karafka/processing/jobs_queue.rb +9 -4
- data/lib/karafka/processing/strategies/aj_dlq_mom.rb +1 -1
- data/lib/karafka/processing/strategies/default.rb +7 -1
- data/lib/karafka/processing/strategies/dlq.rb +17 -2
- data/lib/karafka/processing/workers_batch.rb +4 -1
- data/lib/karafka/routing/builder.rb +6 -2
- data/lib/karafka/routing/consumer_group.rb +2 -1
- data/lib/karafka/routing/features/dead_letter_queue/config.rb +5 -0
- data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +8 -0
- data/lib/karafka/routing/features/dead_letter_queue/topic.rb +10 -2
- data/lib/karafka/routing/features/deserializers/config.rb +18 -0
- data/lib/karafka/routing/features/deserializers/contracts/topic.rb +31 -0
- data/lib/karafka/routing/features/deserializers/topic.rb +51 -0
- data/lib/karafka/routing/features/deserializers.rb +11 -0
- data/lib/karafka/routing/proxy.rb +9 -14
- data/lib/karafka/routing/router.rb +11 -2
- data/lib/karafka/routing/subscription_group.rb +9 -1
- data/lib/karafka/routing/topic.rb +0 -1
- data/lib/karafka/runner.rb +1 -1
- data/lib/karafka/setup/config.rb +50 -9
- data/lib/karafka/status.rb +7 -8
- data/lib/karafka/swarm/supervisor.rb +16 -2
- data/lib/karafka/templates/karafka.rb.erb +28 -1
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +38 -12
- metadata.gz.sig +0 -0
- data/lib/karafka/routing/consumer_mapper.rb +0 -23
- data/lib/karafka/serialization/json/deserializer.rb +0 -19
- data/lib/karafka/time_trackers/partition_usage.rb +0 -56
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Instrumentation
|
|
5
|
+
module Callbacks
|
|
6
|
+
# Callback that is triggered when oauth token needs to be refreshed.
|
|
7
|
+
class OauthbearerTokenRefresh
|
|
8
|
+
# @param bearer [Rdkafka::Consumer, Rdkafka::Admin] given rdkafka instance. It is needed as
|
|
9
|
+
# we need to have a reference to call `#oauthbearer_set_token` or
|
|
10
|
+
# `#oauthbearer_set_token_failure` upon the event.
|
|
11
|
+
def initialize(bearer)
|
|
12
|
+
@bearer = bearer
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# @param _rd_config [Rdkafka::Config]
|
|
16
|
+
# @param bearer_name [String] name of the bearer for which we refresh
|
|
17
|
+
def call(_rd_config, bearer_name)
|
|
18
|
+
return unless @bearer.name == bearer_name
|
|
19
|
+
|
|
20
|
+
::Karafka.monitor.instrument(
|
|
21
|
+
'oauthbearer.token_refresh',
|
|
22
|
+
bearer: @bearer,
|
|
23
|
+
caller: self
|
|
24
|
+
)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -16,10 +16,20 @@ module Karafka
|
|
|
16
16
|
|
|
17
17
|
private_constant :USED_LOG_LEVELS
|
|
18
18
|
|
|
19
|
+
# @param log_polling [Boolean] should we log the fact that messages are being polled. This is
|
|
20
|
+
# usually noisy and not useful in production but can be useful in dev. While users can
|
|
21
|
+
# do this themselves this has been requested and asked for often, thus similar to how
|
|
22
|
+
# extensive logging can be disabled in WaterDrop, we do it here as well.
|
|
23
|
+
def initialize(log_polling: true)
|
|
24
|
+
@log_polling = log_polling
|
|
25
|
+
end
|
|
26
|
+
|
|
19
27
|
# Logs each messages fetching attempt
|
|
20
28
|
#
|
|
21
29
|
# @param event [Karafka::Core::Monitoring::Event] event details including payload
|
|
22
30
|
def on_connection_listener_fetch_loop(event)
|
|
31
|
+
return unless log_polling?
|
|
32
|
+
|
|
23
33
|
listener = event[:caller]
|
|
24
34
|
debug "[#{listener.id}] Polling messages..."
|
|
25
35
|
end
|
|
@@ -28,6 +38,8 @@ module Karafka
|
|
|
28
38
|
#
|
|
29
39
|
# @param event [Karafka::Core::Monitoring::Event] event details including payload
|
|
30
40
|
def on_connection_listener_fetch_loop_received(event)
|
|
41
|
+
return unless log_polling?
|
|
42
|
+
|
|
31
43
|
listener = event[:caller]
|
|
32
44
|
time = event[:time]
|
|
33
45
|
messages_count = event[:messages_buffer].size
|
|
@@ -129,7 +141,7 @@ module Karafka
|
|
|
129
141
|
Thread.list.each do |thread|
|
|
130
142
|
tid = (thread.object_id ^ ::Process.pid).to_s(36)
|
|
131
143
|
|
|
132
|
-
warn "Thread TID-#{tid} #{thread
|
|
144
|
+
warn "Thread TID-#{tid} #{thread.name}"
|
|
133
145
|
|
|
134
146
|
if thread.backtrace
|
|
135
147
|
warn thread.backtrace.join("\n")
|
|
@@ -315,9 +327,10 @@ module Karafka
|
|
|
315
327
|
when 'connection.client.unsubscribe.error'
|
|
316
328
|
error "Client unsubscribe error occurred: #{error}"
|
|
317
329
|
error details
|
|
330
|
+
# This handles any custom errors coming from places like Web-UI, etc
|
|
318
331
|
else
|
|
319
|
-
#
|
|
320
|
-
|
|
332
|
+
error "#{type} error occurred: #{error}"
|
|
333
|
+
error details
|
|
321
334
|
end
|
|
322
335
|
end
|
|
323
336
|
|
|
@@ -326,6 +339,13 @@ module Karafka
|
|
|
326
339
|
Karafka.logger.send(log_level, *args)
|
|
327
340
|
end
|
|
328
341
|
end
|
|
342
|
+
|
|
343
|
+
private
|
|
344
|
+
|
|
345
|
+
# @return [Boolean] should we log polling
|
|
346
|
+
def log_polling?
|
|
347
|
+
@log_polling
|
|
348
|
+
end
|
|
329
349
|
end
|
|
330
350
|
end
|
|
331
351
|
end
|
|
@@ -40,6 +40,14 @@ module Karafka
|
|
|
40
40
|
connection.listener.fetch_loop.received
|
|
41
41
|
connection.listener.after_fetch_loop
|
|
42
42
|
|
|
43
|
+
connection.listener.pending
|
|
44
|
+
connection.listener.starting
|
|
45
|
+
connection.listener.running
|
|
46
|
+
connection.listener.quieting
|
|
47
|
+
connection.listener.quiet
|
|
48
|
+
connection.listener.stopping
|
|
49
|
+
connection.listener.stopped
|
|
50
|
+
|
|
43
51
|
consumer.before_schedule_consume
|
|
44
52
|
consumer.consume
|
|
45
53
|
consumer.consumed
|
|
@@ -66,6 +74,8 @@ module Karafka
|
|
|
66
74
|
filtering.throttled
|
|
67
75
|
filtering.seek
|
|
68
76
|
|
|
77
|
+
oauthbearer.token_refresh
|
|
78
|
+
|
|
69
79
|
process.notice_signal
|
|
70
80
|
|
|
71
81
|
rebalance.partitions_assign
|
|
@@ -11,6 +11,13 @@ module Karafka
|
|
|
11
11
|
#
|
|
12
12
|
# @note This client is abstract, it has no notion of Karafka whatsoever
|
|
13
13
|
class Client
|
|
14
|
+
# @param namespace_name [String, nil] Name of the AppSignal namespace we want to use or
|
|
15
|
+
# nil if it is to remain default.
|
|
16
|
+
# Defaults to `Appsignal::Transaction::BACKGROUND_JOB` in the execution flow.
|
|
17
|
+
def initialize(namespace_name: nil)
|
|
18
|
+
@namespace_name = namespace_name
|
|
19
|
+
end
|
|
20
|
+
|
|
14
21
|
# Starts an appsignal transaction with a given action name
|
|
15
22
|
#
|
|
16
23
|
# @param action_name [String] action name. For processing this should be equal to
|
|
@@ -18,7 +25,7 @@ module Karafka
|
|
|
18
25
|
def start_transaction(action_name)
|
|
19
26
|
transaction = ::Appsignal::Transaction.create(
|
|
20
27
|
SecureRandom.uuid,
|
|
21
|
-
|
|
28
|
+
namespace_name,
|
|
22
29
|
::Appsignal::Transaction::GenericRequest.new({})
|
|
23
30
|
)
|
|
24
31
|
|
|
@@ -83,7 +90,7 @@ module Karafka
|
|
|
83
90
|
transaction.set_error(error)
|
|
84
91
|
else
|
|
85
92
|
::Appsignal.send_error(error) do |transaction|
|
|
86
|
-
transaction.set_namespace(
|
|
93
|
+
transaction.set_namespace(namespace_name)
|
|
87
94
|
end
|
|
88
95
|
end
|
|
89
96
|
end
|
|
@@ -115,6 +122,13 @@ module Karafka
|
|
|
115
122
|
.transform_values(&:to_s)
|
|
116
123
|
.transform_keys!(&:to_s)
|
|
117
124
|
end
|
|
125
|
+
|
|
126
|
+
# @return [String] transaction namespace. We lazy evaluate it and resolve if needed to
|
|
127
|
+
# the default `BACKGROUND_JOB` during the execution, to ensure we can initialize the
|
|
128
|
+
# instrumentation even before appsignal gem is loaded.
|
|
129
|
+
def namespace_name
|
|
130
|
+
@namespace_name ||= ::Appsignal::Transaction::BACKGROUND_JOB
|
|
131
|
+
end
|
|
118
132
|
end
|
|
119
133
|
end
|
|
120
134
|
end
|
|
@@ -92,6 +92,26 @@ module Karafka
|
|
|
92
92
|
clear_polling_tick
|
|
93
93
|
end
|
|
94
94
|
|
|
95
|
+
# Deregister the polling tracker for given listener
|
|
96
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
|
97
|
+
def on_connection_listener_stopping(_event)
|
|
98
|
+
# We are interested in disabling tracking for given listener only if it was requested
|
|
99
|
+
# when karafka was running. If we would always clear, it would not catch the shutdown
|
|
100
|
+
# polling requirements. The "running" listener shutdown operations happen only when
|
|
101
|
+
# the manager requests it for downscaling.
|
|
102
|
+
return if Karafka::App.done?
|
|
103
|
+
|
|
104
|
+
clear_polling_tick
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Deregister the polling tracker for given listener
|
|
108
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
|
109
|
+
def on_connection_listener_stopped(_event)
|
|
110
|
+
return if Karafka::App.done?
|
|
111
|
+
|
|
112
|
+
clear_polling_tick
|
|
113
|
+
end
|
|
114
|
+
|
|
95
115
|
private
|
|
96
116
|
|
|
97
117
|
# Wraps the logic with a mutex
|
|
@@ -21,7 +21,7 @@ module Karafka
|
|
|
21
21
|
size: messages.count,
|
|
22
22
|
first_offset: messages.first&.offset || -1001,
|
|
23
23
|
last_offset: messages.last&.offset || -1001,
|
|
24
|
-
|
|
24
|
+
deserializers: topic.deserializers,
|
|
25
25
|
partition: partition,
|
|
26
26
|
topic: topic.name,
|
|
27
27
|
# We go with the assumption that the creation of the whole batch is the last message
|
|
@@ -14,14 +14,14 @@ module Karafka
|
|
|
14
14
|
def call(kafka_message, topic, received_at)
|
|
15
15
|
metadata = Karafka::Messages::Metadata.new(
|
|
16
16
|
timestamp: kafka_message.timestamp,
|
|
17
|
-
headers: kafka_message.headers,
|
|
18
|
-
key: kafka_message.key,
|
|
19
17
|
offset: kafka_message.offset,
|
|
20
|
-
|
|
18
|
+
deserializers: topic.deserializers,
|
|
21
19
|
partition: kafka_message.partition,
|
|
22
20
|
topic: topic.name,
|
|
23
|
-
received_at: received_at
|
|
24
|
-
|
|
21
|
+
received_at: received_at,
|
|
22
|
+
raw_headers: kafka_message.headers,
|
|
23
|
+
raw_key: kafka_message.key
|
|
24
|
+
)
|
|
25
25
|
|
|
26
26
|
# Get the raw payload
|
|
27
27
|
payload = kafka_message.payload
|
|
@@ -31,7 +31,11 @@ module Karafka
|
|
|
31
31
|
kafka_message.instance_variable_set('@payload', nil)
|
|
32
32
|
|
|
33
33
|
# Karafka messages cannot be frozen because of the lazy deserialization feature
|
|
34
|
-
Karafka::Messages::Message.new(payload, metadata)
|
|
34
|
+
message = Karafka::Messages::Message.new(payload, metadata)
|
|
35
|
+
# Assign message to metadata so we can reverse its relationship if needed
|
|
36
|
+
metadata[:message] = message
|
|
37
|
+
|
|
38
|
+
message
|
|
35
39
|
end
|
|
36
40
|
end
|
|
37
41
|
end
|
|
@@ -23,7 +23,8 @@ module Karafka
|
|
|
23
23
|
# prior to the final deserialization
|
|
24
24
|
attr_accessor :raw_payload
|
|
25
25
|
|
|
26
|
-
|
|
26
|
+
# We remove message as we do not want to do self-reference via `message.message`
|
|
27
|
+
def_delegators :metadata, *((Metadata.members + %i[key headers]) - %i[message])
|
|
27
28
|
|
|
28
29
|
# @param raw_payload [Object] incoming payload before deserialization
|
|
29
30
|
# @param metadata [Karafka::Messages::Metadata] message metadata object
|
|
@@ -4,15 +4,31 @@ module Karafka
|
|
|
4
4
|
module Messages
|
|
5
5
|
# Single message metadata details that can be accessed without the need of deserialization.
|
|
6
6
|
Metadata = Struct.new(
|
|
7
|
+
:message,
|
|
7
8
|
:timestamp,
|
|
8
|
-
:headers,
|
|
9
|
-
:key,
|
|
10
9
|
:offset,
|
|
11
|
-
:
|
|
10
|
+
:deserializers,
|
|
12
11
|
:partition,
|
|
13
12
|
:received_at,
|
|
14
13
|
:topic,
|
|
14
|
+
:raw_headers,
|
|
15
|
+
:raw_key,
|
|
15
16
|
keyword_init: true
|
|
16
|
-
)
|
|
17
|
+
) do
|
|
18
|
+
# @return [Object] deserialized key. By default in the raw string format.
|
|
19
|
+
def key
|
|
20
|
+
return @key if @key
|
|
21
|
+
|
|
22
|
+
@key = deserializers.key.call(self)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# @return [Object] deserialized headers. By default its a hash with keys and payload being
|
|
26
|
+
# strings
|
|
27
|
+
def headers
|
|
28
|
+
return @headers if @headers
|
|
29
|
+
|
|
30
|
+
@headers = deserializers.headers.call(self)
|
|
31
|
+
end
|
|
32
|
+
end
|
|
17
33
|
end
|
|
18
34
|
end
|
|
@@ -20,29 +20,6 @@ module Karafka
|
|
|
20
20
|
#
|
|
21
21
|
# Methods here are suppose to be always available or are expected to be redefined
|
|
22
22
|
module BaseConsumer
|
|
23
|
-
# Runs the on-schedule tick periodic operations
|
|
24
|
-
# This method is an alias but is part of the naming convention used for other flows, this
|
|
25
|
-
# is why we do not reference the `handle_before_schedule_tick` directly
|
|
26
|
-
def on_before_schedule_tick
|
|
27
|
-
handle_before_schedule_tick
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
# Used by the executor to trigger consumer tick
|
|
31
|
-
# @private
|
|
32
|
-
def on_tick
|
|
33
|
-
handle_tick
|
|
34
|
-
rescue StandardError => e
|
|
35
|
-
Karafka.monitor.instrument(
|
|
36
|
-
'error.occurred',
|
|
37
|
-
error: e,
|
|
38
|
-
caller: self,
|
|
39
|
-
type: 'consumer.tick.error'
|
|
40
|
-
)
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
# By default we do nothing when ticking
|
|
44
|
-
def tick; end
|
|
45
|
-
|
|
46
23
|
# @return [Karafka::Pro::Processing::Coordinators::ErrorsTracker] tracker for errors that
|
|
47
24
|
# occurred during processing until another successful processing
|
|
48
25
|
#
|
|
@@ -58,6 +35,18 @@ module Karafka
|
|
|
58
35
|
def errors_tracker
|
|
59
36
|
coordinator.errors_tracker
|
|
60
37
|
end
|
|
38
|
+
|
|
39
|
+
# @return [Karafka::Pro::Processing::SubscriptionGroupsCoordinator] Coordinator allowing to
|
|
40
|
+
# pause and resume polling of the given subscription group jobs queue for postponing
|
|
41
|
+
# further work.
|
|
42
|
+
#
|
|
43
|
+
# @note Since this stops polling, it can cause reaching `max.poll.interval.ms` limitations.
|
|
44
|
+
#
|
|
45
|
+
# @note This is a low-level API used for cross-topic coordination and some advanced features.
|
|
46
|
+
# Use it at own risk.
|
|
47
|
+
def subscription_groups_coordinator
|
|
48
|
+
Processing::SubscriptionGroupsCoordinator.instance
|
|
49
|
+
end
|
|
61
50
|
end
|
|
62
51
|
end
|
|
63
52
|
end
|
|
@@ -16,6 +16,10 @@ module Karafka
|
|
|
16
16
|
module Encryption
|
|
17
17
|
# Cipher for encrypting and decrypting data
|
|
18
18
|
class Cipher
|
|
19
|
+
include Helpers::ConfigImporter.new(
|
|
20
|
+
encryption: %i[encryption]
|
|
21
|
+
)
|
|
22
|
+
|
|
19
23
|
def initialize
|
|
20
24
|
@private_pems = {}
|
|
21
25
|
end
|
|
@@ -39,7 +43,7 @@ module Karafka
|
|
|
39
43
|
|
|
40
44
|
# @return [::OpenSSL::PKey::RSA] rsa public key
|
|
41
45
|
def public_pem
|
|
42
|
-
@public_pem ||= ::OpenSSL::PKey::RSA.new(
|
|
46
|
+
@public_pem ||= ::OpenSSL::PKey::RSA.new(encryption.public_key)
|
|
43
47
|
end
|
|
44
48
|
|
|
45
49
|
# @param version [String] version for which we want to get the rsa key
|
|
@@ -47,8 +51,8 @@ module Karafka
|
|
|
47
51
|
def private_pem(version)
|
|
48
52
|
return @private_pems[version] if @private_pems.key?(version)
|
|
49
53
|
|
|
50
|
-
key_string =
|
|
51
|
-
key_string || raise(Errors::
|
|
54
|
+
key_string = encryption.private_keys[version]
|
|
55
|
+
key_string || raise(Errors::PrivateKeyNotFoundError, version)
|
|
52
56
|
|
|
53
57
|
@private_pems[version] = ::OpenSSL::PKey::RSA.new(key_string)
|
|
54
58
|
end
|
|
@@ -30,6 +30,7 @@ module Karafka
|
|
|
30
30
|
required(:active) { |val| [true, false].include?(val) }
|
|
31
31
|
required(:version) { |val| val.is_a?(String) && !val.empty? }
|
|
32
32
|
required(:public_key) { |val| val.is_a?(String) }
|
|
33
|
+
required(:fingerprinter) { |val| val == false || val.respond_to?(:hexdigest) }
|
|
33
34
|
|
|
34
35
|
required(:private_keys) do |val|
|
|
35
36
|
val.is_a?(Hash) &&
|
|
@@ -20,7 +20,10 @@ module Karafka
|
|
|
20
20
|
BaseError = Class.new(::Karafka::Errors::BaseError)
|
|
21
21
|
|
|
22
22
|
# Raised when we have encountered encryption key with version we do not have
|
|
23
|
-
|
|
23
|
+
PrivateKeyNotFoundError = Class.new(BaseError)
|
|
24
|
+
|
|
25
|
+
# Raised when fingerprinting was enabled and payload after encryption did not match it
|
|
26
|
+
FingerprintVerificationError = Class.new(BaseError)
|
|
24
27
|
end
|
|
25
28
|
end
|
|
26
29
|
end
|
|
@@ -18,26 +18,28 @@ module Karafka
|
|
|
18
18
|
module Messages
|
|
19
19
|
# Middleware for WaterDrop. It automatically encrypts messages payload.
|
|
20
20
|
# It is injected only if encryption is enabled.
|
|
21
|
+
# It also fingerprints the payload for verification if fingerprinting was enabled
|
|
21
22
|
class Middleware
|
|
23
|
+
include Helpers::ConfigImporter.new(
|
|
24
|
+
cipher: %i[encryption cipher],
|
|
25
|
+
version: %i[encryption version],
|
|
26
|
+
fingerprinter: %i[encryption fingerprinter]
|
|
27
|
+
)
|
|
28
|
+
|
|
22
29
|
# @param message [Hash] WaterDrop message hash
|
|
23
30
|
# @return [Hash] hash with encrypted payload and encryption version indicator
|
|
24
31
|
def call(message)
|
|
32
|
+
payload = message[:payload]
|
|
33
|
+
|
|
25
34
|
message[:headers] ||= {}
|
|
26
35
|
message[:headers]['encryption'] = version
|
|
27
|
-
message[:payload] = cipher.encrypt(
|
|
28
|
-
message
|
|
29
|
-
end
|
|
36
|
+
message[:payload] = cipher.encrypt(payload)
|
|
30
37
|
|
|
31
|
-
|
|
38
|
+
return message unless fingerprinter
|
|
32
39
|
|
|
33
|
-
|
|
34
|
-
def cipher
|
|
35
|
-
@cipher ||= ::Karafka::App.config.encryption.cipher
|
|
36
|
-
end
|
|
40
|
+
message[:headers]['encryption_fingerprint'] = fingerprinter.hexdigest(payload)
|
|
37
41
|
|
|
38
|
-
|
|
39
|
-
def version
|
|
40
|
-
@version ||= ::Karafka::App.config.encryption.version
|
|
42
|
+
message
|
|
41
43
|
end
|
|
42
44
|
end
|
|
43
45
|
end
|
|
@@ -20,34 +20,36 @@ module Karafka
|
|
|
20
20
|
# unencrypted payloads. That is why we always rely on message headers for encryption
|
|
21
21
|
# indication.
|
|
22
22
|
class Parser < ::Karafka::Messages::Parser
|
|
23
|
+
include Helpers::ConfigImporter.new(
|
|
24
|
+
cipher: %i[encryption cipher],
|
|
25
|
+
active: %i[encryption active],
|
|
26
|
+
fingerprinter: %i[encryption fingerprinter]
|
|
27
|
+
)
|
|
28
|
+
|
|
23
29
|
# @param message [::Karafka::Messages::Message]
|
|
24
30
|
# @return [Object] deserialized payload
|
|
25
31
|
def call(message)
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
message.headers['encryption'],
|
|
30
|
-
message.raw_payload
|
|
31
|
-
)
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
super(message)
|
|
35
|
-
end
|
|
32
|
+
headers = message.headers
|
|
33
|
+
encryption = headers['encryption']
|
|
34
|
+
fingerprint = headers['encryption_fingerprint']
|
|
36
35
|
|
|
37
|
-
|
|
36
|
+
return super(message) unless active && encryption
|
|
38
37
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
38
|
+
# Decrypt raw payload so it can be handled by the default parser logic
|
|
39
|
+
decrypted_payload = cipher.decrypt(
|
|
40
|
+
encryption,
|
|
41
|
+
message.raw_payload
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
message.raw_payload = decrypted_payload
|
|
45
|
+
|
|
46
|
+
return super(message) unless fingerprint && fingerprinter
|
|
43
47
|
|
|
44
|
-
|
|
45
|
-
def active?
|
|
46
|
-
return @active unless @active.nil?
|
|
48
|
+
message_fingerprint = fingerprinter.hexdigest(decrypted_payload)
|
|
47
49
|
|
|
48
|
-
|
|
50
|
+
return super(message) if message_fingerprint == fingerprint
|
|
49
51
|
|
|
50
|
-
|
|
52
|
+
raise(Errors::FingerprintVerificationError, message.to_s)
|
|
51
53
|
end
|
|
52
54
|
end
|
|
53
55
|
end
|
|
@@ -40,6 +40,11 @@ module Karafka
|
|
|
40
40
|
# Cipher used to encrypt and decrypt data
|
|
41
41
|
setting(:cipher, default: Encryption::Cipher.new)
|
|
42
42
|
|
|
43
|
+
# When set to any digest that responds to `#hexdigest` will compute checksum of the
|
|
44
|
+
# message payload for post-description integrity verification. It will include a
|
|
45
|
+
# fingerprint in headers
|
|
46
|
+
setting(:fingerprinter, default: false)
|
|
47
|
+
|
|
43
48
|
configure
|
|
44
49
|
end
|
|
45
50
|
end
|
|
@@ -29,7 +29,8 @@ module Karafka
|
|
|
29
29
|
# - { 'topic1' => 100 } - means we run all partitions from the offset 100
|
|
30
30
|
# - { 'topic1' => Time.now - 60 } - we run all partitions from the message from 60s ago
|
|
31
31
|
# - { 'topic1' => { 1 => Time.now - 60 } } - partition1 from message 60s ago
|
|
32
|
-
#
|
|
32
|
+
# - { 'topic1' => { 1 => true } } - will pick first offset not consumed on this CG for p 1
|
|
33
|
+
# - { 'topic1' => true } - will pick first offset not consumed on this CG for all p
|
|
33
34
|
class Expander
|
|
34
35
|
# Expands topics to which we want to subscribe with partitions information in case this
|
|
35
36
|
# info is not provided.
|
|
@@ -36,6 +36,7 @@ module Karafka
|
|
|
36
36
|
resolve_partitions_with_exact_offsets
|
|
37
37
|
resolve_partitions_with_negative_offsets
|
|
38
38
|
resolve_partitions_with_time_offsets
|
|
39
|
+
resolve_partitions_with_cg_expectations
|
|
39
40
|
|
|
40
41
|
# Final tpl with all the data
|
|
41
42
|
tpl = Rdkafka::Consumer::TopicPartitionList.new
|
|
@@ -149,6 +150,43 @@ module Karafka
|
|
|
149
150
|
end
|
|
150
151
|
end
|
|
151
152
|
end
|
|
153
|
+
|
|
154
|
+
# Fetches last used offsets for those partitions for which we want to consume from last
|
|
155
|
+
# moment where given consumer group has finished
|
|
156
|
+
# This is indicated by given partition value being set to `true`.
|
|
157
|
+
def resolve_partitions_with_cg_expectations
|
|
158
|
+
tpl = Rdkafka::Consumer::TopicPartitionList.new
|
|
159
|
+
|
|
160
|
+
# First iterate over all topics that we want to expand
|
|
161
|
+
@expanded_topics.each do |name, partitions|
|
|
162
|
+
partitions_base = {}
|
|
163
|
+
|
|
164
|
+
partitions.each do |partition, offset|
|
|
165
|
+
# Pick only partitions where offset is set to true to indicate that we are interested
|
|
166
|
+
# in committed offset resolution
|
|
167
|
+
next unless offset == true
|
|
168
|
+
|
|
169
|
+
# This can be set to nil because we do not use this offset value when querying
|
|
170
|
+
partitions_base[partition] = nil
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
# If there is nothing to work with, just skip
|
|
174
|
+
next if partitions_base.empty?
|
|
175
|
+
|
|
176
|
+
tpl.add_topic_and_partitions_with_offsets(name, partitions_base)
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# If nothing to resolve, do not resolve
|
|
180
|
+
return if tpl.empty?
|
|
181
|
+
|
|
182
|
+
# Fetch all committed offsets for all the topics partitions of our interest and use
|
|
183
|
+
# those offsets for the mapped topics data
|
|
184
|
+
@consumer.committed(tpl).to_h.each do |name, partitions|
|
|
185
|
+
partitions.each do |partition|
|
|
186
|
+
@mapped_topics[name][partition.partition] = partition.offset
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
end
|
|
152
190
|
end
|
|
153
191
|
end
|
|
154
192
|
end
|
data/lib/karafka/pro/iterator.rb
CHANGED
|
@@ -20,7 +20,9 @@ module Karafka
|
|
|
20
20
|
# the end. It also allows for signaling, when a given message should be last out of certain
|
|
21
21
|
# partition, but we still want to continue iterating in other messages.
|
|
22
22
|
#
|
|
23
|
-
# It does **not** create a consumer group and does not have any offset management
|
|
23
|
+
# It does **not** create a consumer group and does not have any offset management until first
|
|
24
|
+
# consumer offset marking happens. So can be use for quick seeks as well as iterative,
|
|
25
|
+
# repetitive data fetching from rake, etc.
|
|
24
26
|
class Iterator
|
|
25
27
|
# A simple API allowing to iterate over topic/partition data, without having to subscribe
|
|
26
28
|
# and deal with rebalances. This API allows for multi-partition streaming and is optimized
|
|
@@ -92,6 +94,7 @@ module Karafka
|
|
|
92
94
|
end
|
|
93
95
|
end
|
|
94
96
|
|
|
97
|
+
@current_consumer.commit_offsets(async: false) if @stored_offsets
|
|
95
98
|
@current_message = nil
|
|
96
99
|
@current_consumer = nil
|
|
97
100
|
end
|
|
@@ -127,6 +130,29 @@ module Karafka
|
|
|
127
130
|
)
|
|
128
131
|
end
|
|
129
132
|
|
|
133
|
+
# Stops all the iterating
|
|
134
|
+
# @note `break` can also be used but in such cases commits stored async will not be flushed
|
|
135
|
+
# to Kafka. This is why `#stop` is the recommended method.
|
|
136
|
+
def stop
|
|
137
|
+
@stopped = true
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Marks given message as consumed.
|
|
141
|
+
#
|
|
142
|
+
# @param message [Karafka::Messages::Message] message that we want to mark as processed
|
|
143
|
+
def mark_as_consumed(message)
|
|
144
|
+
@current_consumer.store_offset(message, nil)
|
|
145
|
+
@stored_offsets = true
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Marks given message as consumed and commits offsets
|
|
149
|
+
#
|
|
150
|
+
# @param message [Karafka::Messages::Message] message that we want to mark as processed
|
|
151
|
+
def mark_as_consumed!(message)
|
|
152
|
+
mark_as_consumed(message)
|
|
153
|
+
@current_consumer.commit_offsets(async: false)
|
|
154
|
+
end
|
|
155
|
+
|
|
130
156
|
private
|
|
131
157
|
|
|
132
158
|
# @return [Rdkafka::Consumer::Message, nil] message or nil if nothing to do
|
|
@@ -158,7 +184,7 @@ module Karafka
|
|
|
158
184
|
# Do we have all the data we wanted or did every topic partition has reached eof.
|
|
159
185
|
# @return [Boolean]
|
|
160
186
|
def done?
|
|
161
|
-
@stopped_partitions >= @total_partitions
|
|
187
|
+
(@stopped_partitions >= @total_partitions) || @stopped
|
|
162
188
|
end
|
|
163
189
|
end
|
|
164
190
|
end
|
data/lib/karafka/pro/loader.rb
CHANGED
|
@@ -63,6 +63,9 @@ module Karafka
|
|
|
63
63
|
# @param config [Karafka::Core::Configurable::Node]
|
|
64
64
|
def post_setup_all(config)
|
|
65
65
|
features.each { |feature| feature.post_setup(config) }
|
|
66
|
+
|
|
67
|
+
# We initialize it here so we don't initialize it during multi-threading work
|
|
68
|
+
Processing::SubscriptionGroupsCoordinator.instance
|
|
66
69
|
end
|
|
67
70
|
|
|
68
71
|
private
|