karafka 2.3.4 → 2.4.0.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +12 -38
- data/CHANGELOG.md +56 -2
- data/Gemfile +6 -3
- data/Gemfile.lock +25 -23
- data/bin/integrations +1 -1
- data/config/locales/errors.yml +21 -2
- data/config/locales/pro_errors.yml +16 -1
- data/karafka.gemspec +4 -2
- data/lib/active_job/queue_adapters/karafka_adapter.rb +2 -0
- data/lib/karafka/admin/configs/config.rb +81 -0
- data/lib/karafka/admin/configs/resource.rb +88 -0
- data/lib/karafka/admin/configs.rb +103 -0
- data/lib/karafka/admin.rb +201 -100
- data/lib/karafka/base_consumer.rb +2 -2
- data/lib/karafka/cli/info.rb +9 -7
- data/lib/karafka/cli/server.rb +7 -7
- data/lib/karafka/cli/topics/align.rb +109 -0
- data/lib/karafka/cli/topics/base.rb +66 -0
- data/lib/karafka/cli/topics/create.rb +35 -0
- data/lib/karafka/cli/topics/delete.rb +30 -0
- data/lib/karafka/cli/topics/migrate.rb +31 -0
- data/lib/karafka/cli/topics/plan.rb +169 -0
- data/lib/karafka/cli/topics/repartition.rb +41 -0
- data/lib/karafka/cli/topics/reset.rb +18 -0
- data/lib/karafka/cli/topics.rb +13 -123
- data/lib/karafka/connection/client.rb +55 -37
- data/lib/karafka/connection/listener.rb +22 -17
- data/lib/karafka/connection/proxy.rb +93 -4
- data/lib/karafka/connection/status.rb +14 -2
- data/lib/karafka/contracts/config.rb +14 -1
- data/lib/karafka/contracts/topic.rb +1 -1
- data/lib/karafka/deserializers/headers.rb +15 -0
- data/lib/karafka/deserializers/key.rb +15 -0
- data/lib/karafka/deserializers/payload.rb +16 -0
- data/lib/karafka/embedded.rb +2 -0
- data/lib/karafka/helpers/async.rb +5 -2
- data/lib/karafka/helpers/colorize.rb +6 -0
- data/lib/karafka/instrumentation/callbacks/oauthbearer_token_refresh.rb +29 -0
- data/lib/karafka/instrumentation/logger_listener.rb +23 -3
- data/lib/karafka/instrumentation/notifications.rb +10 -0
- data/lib/karafka/instrumentation/vendors/appsignal/client.rb +16 -2
- data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +20 -0
- data/lib/karafka/messages/batch_metadata.rb +1 -1
- data/lib/karafka/messages/builders/batch_metadata.rb +1 -1
- data/lib/karafka/messages/builders/message.rb +10 -6
- data/lib/karafka/messages/message.rb +2 -1
- data/lib/karafka/messages/metadata.rb +20 -4
- data/lib/karafka/messages/parser.rb +1 -1
- data/lib/karafka/pro/base_consumer.rb +12 -23
- data/lib/karafka/pro/encryption/cipher.rb +7 -3
- data/lib/karafka/pro/encryption/contracts/config.rb +1 -0
- data/lib/karafka/pro/encryption/errors.rb +4 -1
- data/lib/karafka/pro/encryption/messages/middleware.rb +13 -11
- data/lib/karafka/pro/encryption/messages/parser.rb +22 -20
- data/lib/karafka/pro/encryption/setup/config.rb +5 -0
- data/lib/karafka/pro/iterator/expander.rb +2 -1
- data/lib/karafka/pro/iterator/tpl_builder.rb +38 -0
- data/lib/karafka/pro/iterator.rb +28 -2
- data/lib/karafka/pro/loader.rb +3 -0
- data/lib/karafka/pro/processing/coordinator.rb +15 -2
- data/lib/karafka/pro/processing/expansions_selector.rb +2 -0
- data/lib/karafka/pro/processing/jobs_queue.rb +122 -5
- data/lib/karafka/pro/processing/periodic_job/consumer.rb +67 -0
- data/lib/karafka/pro/processing/piping/consumer.rb +126 -0
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_mom.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +1 -1
- data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +2 -0
- data/lib/karafka/pro/processing/strategies/default.rb +5 -1
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +21 -5
- data/lib/karafka/pro/processing/strategies/lrj/default.rb +2 -0
- data/lib/karafka/pro/processing/strategies/lrj/mom.rb +2 -0
- data/lib/karafka/pro/processing/subscription_groups_coordinator.rb +52 -0
- data/lib/karafka/pro/routing/features/direct_assignments/config.rb +27 -0
- data/lib/karafka/pro/routing/features/direct_assignments/contracts/consumer_group.rb +53 -0
- data/lib/karafka/pro/routing/features/direct_assignments/contracts/topic.rb +108 -0
- data/lib/karafka/pro/routing/features/direct_assignments/subscription_group.rb +77 -0
- data/lib/karafka/pro/routing/features/direct_assignments/topic.rb +69 -0
- data/lib/karafka/pro/routing/features/direct_assignments.rb +25 -0
- data/lib/karafka/pro/routing/features/patterns/builder.rb +1 -1
- data/lib/karafka/pro/routing/features/swarm/contracts/routing.rb +76 -0
- data/lib/karafka/pro/routing/features/swarm/contracts/topic.rb +16 -5
- data/lib/karafka/pro/routing/features/swarm/topic.rb +25 -2
- data/lib/karafka/pro/routing/features/swarm.rb +11 -0
- data/lib/karafka/pro/swarm/liveness_listener.rb +20 -0
- data/lib/karafka/processing/coordinator.rb +17 -8
- data/lib/karafka/processing/coordinators_buffer.rb +5 -2
- data/lib/karafka/processing/executor.rb +6 -2
- data/lib/karafka/processing/executors_buffer.rb +5 -2
- data/lib/karafka/processing/jobs_queue.rb +9 -4
- data/lib/karafka/processing/strategies/aj_dlq_mom.rb +1 -1
- data/lib/karafka/processing/strategies/default.rb +7 -1
- data/lib/karafka/processing/strategies/dlq.rb +17 -2
- data/lib/karafka/processing/workers_batch.rb +4 -1
- data/lib/karafka/routing/builder.rb +6 -2
- data/lib/karafka/routing/consumer_group.rb +2 -1
- data/lib/karafka/routing/features/dead_letter_queue/config.rb +5 -0
- data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +8 -0
- data/lib/karafka/routing/features/dead_letter_queue/topic.rb +10 -2
- data/lib/karafka/routing/features/deserializers/config.rb +18 -0
- data/lib/karafka/routing/features/deserializers/contracts/topic.rb +31 -0
- data/lib/karafka/routing/features/deserializers/topic.rb +51 -0
- data/lib/karafka/routing/features/deserializers.rb +11 -0
- data/lib/karafka/routing/proxy.rb +9 -14
- data/lib/karafka/routing/router.rb +11 -2
- data/lib/karafka/routing/subscription_group.rb +9 -1
- data/lib/karafka/routing/topic.rb +0 -1
- data/lib/karafka/runner.rb +1 -1
- data/lib/karafka/setup/config.rb +50 -9
- data/lib/karafka/status.rb +7 -8
- data/lib/karafka/swarm/supervisor.rb +16 -2
- data/lib/karafka/templates/karafka.rb.erb +28 -1
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +38 -12
- metadata.gz.sig +0 -0
- data/lib/karafka/routing/consumer_mapper.rb +0 -23
- data/lib/karafka/serialization/json/deserializer.rb +0 -19
- data/lib/karafka/time_trackers/partition_usage.rb +0 -56
@@ -16,10 +16,20 @@ module Karafka
|
|
16
16
|
|
17
17
|
private_constant :USED_LOG_LEVELS
|
18
18
|
|
19
|
+
# @param log_polling [Boolean] should we log the fact that messages are being polled. This is
|
20
|
+
# usually noisy and not useful in production but can be useful in dev. While users can
|
21
|
+
# do this themselves this has been requested and asked for often, thus similar to how
|
22
|
+
# extensive logging can be disabled in WaterDrop, we do it here as well.
|
23
|
+
def initialize(log_polling: true)
|
24
|
+
@log_polling = log_polling
|
25
|
+
end
|
26
|
+
|
19
27
|
# Logs each messages fetching attempt
|
20
28
|
#
|
21
29
|
# @param event [Karafka::Core::Monitoring::Event] event details including payload
|
22
30
|
def on_connection_listener_fetch_loop(event)
|
31
|
+
return unless log_polling?
|
32
|
+
|
23
33
|
listener = event[:caller]
|
24
34
|
debug "[#{listener.id}] Polling messages..."
|
25
35
|
end
|
@@ -28,6 +38,8 @@ module Karafka
|
|
28
38
|
#
|
29
39
|
# @param event [Karafka::Core::Monitoring::Event] event details including payload
|
30
40
|
def on_connection_listener_fetch_loop_received(event)
|
41
|
+
return unless log_polling?
|
42
|
+
|
31
43
|
listener = event[:caller]
|
32
44
|
time = event[:time]
|
33
45
|
messages_count = event[:messages_buffer].size
|
@@ -129,7 +141,7 @@ module Karafka
|
|
129
141
|
Thread.list.each do |thread|
|
130
142
|
tid = (thread.object_id ^ ::Process.pid).to_s(36)
|
131
143
|
|
132
|
-
warn "Thread TID-#{tid} #{thread
|
144
|
+
warn "Thread TID-#{tid} #{thread.name}"
|
133
145
|
|
134
146
|
if thread.backtrace
|
135
147
|
warn thread.backtrace.join("\n")
|
@@ -315,9 +327,10 @@ module Karafka
|
|
315
327
|
when 'connection.client.unsubscribe.error'
|
316
328
|
error "Client unsubscribe error occurred: #{error}"
|
317
329
|
error details
|
330
|
+
# This handles any custom errors coming from places like Web-UI, etc
|
318
331
|
else
|
319
|
-
#
|
320
|
-
|
332
|
+
error "#{type} error occurred: #{error}"
|
333
|
+
error details
|
321
334
|
end
|
322
335
|
end
|
323
336
|
|
@@ -326,6 +339,13 @@ module Karafka
|
|
326
339
|
Karafka.logger.send(log_level, *args)
|
327
340
|
end
|
328
341
|
end
|
342
|
+
|
343
|
+
private
|
344
|
+
|
345
|
+
# @return [Boolean] should we log polling
|
346
|
+
def log_polling?
|
347
|
+
@log_polling
|
348
|
+
end
|
329
349
|
end
|
330
350
|
end
|
331
351
|
end
|
@@ -40,6 +40,14 @@ module Karafka
|
|
40
40
|
connection.listener.fetch_loop.received
|
41
41
|
connection.listener.after_fetch_loop
|
42
42
|
|
43
|
+
connection.listener.pending
|
44
|
+
connection.listener.starting
|
45
|
+
connection.listener.running
|
46
|
+
connection.listener.quieting
|
47
|
+
connection.listener.quiet
|
48
|
+
connection.listener.stopping
|
49
|
+
connection.listener.stopped
|
50
|
+
|
43
51
|
consumer.before_schedule_consume
|
44
52
|
consumer.consume
|
45
53
|
consumer.consumed
|
@@ -66,6 +74,8 @@ module Karafka
|
|
66
74
|
filtering.throttled
|
67
75
|
filtering.seek
|
68
76
|
|
77
|
+
oauthbearer.token_refresh
|
78
|
+
|
69
79
|
process.notice_signal
|
70
80
|
|
71
81
|
rebalance.partitions_assign
|
@@ -11,6 +11,13 @@ module Karafka
|
|
11
11
|
#
|
12
12
|
# @note This client is abstract, it has no notion of Karafka whatsoever
|
13
13
|
class Client
|
14
|
+
# @param namespace_name [String, nil] Name of the AppSignal namespace we want to use or
|
15
|
+
# nil if it is to remain default.
|
16
|
+
# Defaults to `Appsignal::Transaction::BACKGROUND_JOB` in the execution flow.
|
17
|
+
def initialize(namespace_name: nil)
|
18
|
+
@namespace_name = namespace_name
|
19
|
+
end
|
20
|
+
|
14
21
|
# Starts an appsignal transaction with a given action name
|
15
22
|
#
|
16
23
|
# @param action_name [String] action name. For processing this should be equal to
|
@@ -18,7 +25,7 @@ module Karafka
|
|
18
25
|
def start_transaction(action_name)
|
19
26
|
transaction = ::Appsignal::Transaction.create(
|
20
27
|
SecureRandom.uuid,
|
21
|
-
|
28
|
+
namespace_name,
|
22
29
|
::Appsignal::Transaction::GenericRequest.new({})
|
23
30
|
)
|
24
31
|
|
@@ -83,7 +90,7 @@ module Karafka
|
|
83
90
|
transaction.set_error(error)
|
84
91
|
else
|
85
92
|
::Appsignal.send_error(error) do |transaction|
|
86
|
-
transaction.set_namespace(
|
93
|
+
transaction.set_namespace(namespace_name)
|
87
94
|
end
|
88
95
|
end
|
89
96
|
end
|
@@ -115,6 +122,13 @@ module Karafka
|
|
115
122
|
.transform_values(&:to_s)
|
116
123
|
.transform_keys!(&:to_s)
|
117
124
|
end
|
125
|
+
|
126
|
+
# @return [String] transaction namespace. We lazy evaluate it and resolve if needed to
|
127
|
+
# the default `BACKGROUND_JOB` during the execution, to ensure we can initialize the
|
128
|
+
# instrumentation even before appsignal gem is loaded.
|
129
|
+
def namespace_name
|
130
|
+
@namespace_name ||= ::Appsignal::Transaction::BACKGROUND_JOB
|
131
|
+
end
|
118
132
|
end
|
119
133
|
end
|
120
134
|
end
|
@@ -92,6 +92,26 @@ module Karafka
|
|
92
92
|
clear_polling_tick
|
93
93
|
end
|
94
94
|
|
95
|
+
# Deregister the polling tracker for given listener
|
96
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
97
|
+
def on_connection_listener_stopping(_event)
|
98
|
+
# We are interested in disabling tracking for given listener only if it was requested
|
99
|
+
# when karafka was running. If we would always clear, it would not catch the shutdown
|
100
|
+
# polling requirements. The "running" listener shutdown operations happen only when
|
101
|
+
# the manager requests it for downscaling.
|
102
|
+
return if Karafka::App.done?
|
103
|
+
|
104
|
+
clear_polling_tick
|
105
|
+
end
|
106
|
+
|
107
|
+
# Deregister the polling tracker for given listener
|
108
|
+
# @param _event [Karafka::Core::Monitoring::Event]
|
109
|
+
def on_connection_listener_stopped(_event)
|
110
|
+
return if Karafka::App.done?
|
111
|
+
|
112
|
+
clear_polling_tick
|
113
|
+
end
|
114
|
+
|
95
115
|
private
|
96
116
|
|
97
117
|
# Wraps the logic with a mutex
|
@@ -21,7 +21,7 @@ module Karafka
|
|
21
21
|
size: messages.count,
|
22
22
|
first_offset: messages.first&.offset || -1001,
|
23
23
|
last_offset: messages.last&.offset || -1001,
|
24
|
-
|
24
|
+
deserializers: topic.deserializers,
|
25
25
|
partition: partition,
|
26
26
|
topic: topic.name,
|
27
27
|
# We go with the assumption that the creation of the whole batch is the last message
|
@@ -14,14 +14,14 @@ module Karafka
|
|
14
14
|
def call(kafka_message, topic, received_at)
|
15
15
|
metadata = Karafka::Messages::Metadata.new(
|
16
16
|
timestamp: kafka_message.timestamp,
|
17
|
-
headers: kafka_message.headers,
|
18
|
-
key: kafka_message.key,
|
19
17
|
offset: kafka_message.offset,
|
20
|
-
|
18
|
+
deserializers: topic.deserializers,
|
21
19
|
partition: kafka_message.partition,
|
22
20
|
topic: topic.name,
|
23
|
-
received_at: received_at
|
24
|
-
|
21
|
+
received_at: received_at,
|
22
|
+
raw_headers: kafka_message.headers,
|
23
|
+
raw_key: kafka_message.key
|
24
|
+
)
|
25
25
|
|
26
26
|
# Get the raw payload
|
27
27
|
payload = kafka_message.payload
|
@@ -31,7 +31,11 @@ module Karafka
|
|
31
31
|
kafka_message.instance_variable_set('@payload', nil)
|
32
32
|
|
33
33
|
# Karafka messages cannot be frozen because of the lazy deserialization feature
|
34
|
-
Karafka::Messages::Message.new(payload, metadata)
|
34
|
+
message = Karafka::Messages::Message.new(payload, metadata)
|
35
|
+
# Assign message to metadata so we can reverse its relationship if needed
|
36
|
+
metadata[:message] = message
|
37
|
+
|
38
|
+
message
|
35
39
|
end
|
36
40
|
end
|
37
41
|
end
|
@@ -23,7 +23,8 @@ module Karafka
|
|
23
23
|
# prior to the final deserialization
|
24
24
|
attr_accessor :raw_payload
|
25
25
|
|
26
|
-
|
26
|
+
# We remove message as we do not want to do self-reference via `message.message`
|
27
|
+
def_delegators :metadata, *((Metadata.members + %i[key headers]) - %i[message])
|
27
28
|
|
28
29
|
# @param raw_payload [Object] incoming payload before deserialization
|
29
30
|
# @param metadata [Karafka::Messages::Metadata] message metadata object
|
@@ -4,15 +4,31 @@ module Karafka
|
|
4
4
|
module Messages
|
5
5
|
# Single message metadata details that can be accessed without the need of deserialization.
|
6
6
|
Metadata = Struct.new(
|
7
|
+
:message,
|
7
8
|
:timestamp,
|
8
|
-
:headers,
|
9
|
-
:key,
|
10
9
|
:offset,
|
11
|
-
:
|
10
|
+
:deserializers,
|
12
11
|
:partition,
|
13
12
|
:received_at,
|
14
13
|
:topic,
|
14
|
+
:raw_headers,
|
15
|
+
:raw_key,
|
15
16
|
keyword_init: true
|
16
|
-
)
|
17
|
+
) do
|
18
|
+
# @return [Object] deserialized key. By default in the raw string format.
|
19
|
+
def key
|
20
|
+
return @key if @key
|
21
|
+
|
22
|
+
@key = deserializers.key.call(self)
|
23
|
+
end
|
24
|
+
|
25
|
+
# @return [Object] deserialized headers. By default its a hash with keys and payload being
|
26
|
+
# strings
|
27
|
+
def headers
|
28
|
+
return @headers if @headers
|
29
|
+
|
30
|
+
@headers = deserializers.headers.call(self)
|
31
|
+
end
|
32
|
+
end
|
17
33
|
end
|
18
34
|
end
|
@@ -20,29 +20,6 @@ module Karafka
|
|
20
20
|
#
|
21
21
|
# Methods here are suppose to be always available or are expected to be redefined
|
22
22
|
module BaseConsumer
|
23
|
-
# Runs the on-schedule tick periodic operations
|
24
|
-
# This method is an alias but is part of the naming convention used for other flows, this
|
25
|
-
# is why we do not reference the `handle_before_schedule_tick` directly
|
26
|
-
def on_before_schedule_tick
|
27
|
-
handle_before_schedule_tick
|
28
|
-
end
|
29
|
-
|
30
|
-
# Used by the executor to trigger consumer tick
|
31
|
-
# @private
|
32
|
-
def on_tick
|
33
|
-
handle_tick
|
34
|
-
rescue StandardError => e
|
35
|
-
Karafka.monitor.instrument(
|
36
|
-
'error.occurred',
|
37
|
-
error: e,
|
38
|
-
caller: self,
|
39
|
-
type: 'consumer.tick.error'
|
40
|
-
)
|
41
|
-
end
|
42
|
-
|
43
|
-
# By default we do nothing when ticking
|
44
|
-
def tick; end
|
45
|
-
|
46
23
|
# @return [Karafka::Pro::Processing::Coordinators::ErrorsTracker] tracker for errors that
|
47
24
|
# occurred during processing until another successful processing
|
48
25
|
#
|
@@ -58,6 +35,18 @@ module Karafka
|
|
58
35
|
def errors_tracker
|
59
36
|
coordinator.errors_tracker
|
60
37
|
end
|
38
|
+
|
39
|
+
# @return [Karafka::Pro::Processing::SubscriptionGroupsCoordinator] Coordinator allowing to
|
40
|
+
# pause and resume polling of the given subscription group jobs queue for postponing
|
41
|
+
# further work.
|
42
|
+
#
|
43
|
+
# @note Since this stops polling, it can cause reaching `max.poll.interval.ms` limitations.
|
44
|
+
#
|
45
|
+
# @note This is a low-level API used for cross-topic coordination and some advanced features.
|
46
|
+
# Use it at own risk.
|
47
|
+
def subscription_groups_coordinator
|
48
|
+
Processing::SubscriptionGroupsCoordinator.instance
|
49
|
+
end
|
61
50
|
end
|
62
51
|
end
|
63
52
|
end
|
@@ -16,6 +16,10 @@ module Karafka
|
|
16
16
|
module Encryption
|
17
17
|
# Cipher for encrypting and decrypting data
|
18
18
|
class Cipher
|
19
|
+
include Helpers::ConfigImporter.new(
|
20
|
+
encryption: %i[encryption]
|
21
|
+
)
|
22
|
+
|
19
23
|
def initialize
|
20
24
|
@private_pems = {}
|
21
25
|
end
|
@@ -39,7 +43,7 @@ module Karafka
|
|
39
43
|
|
40
44
|
# @return [::OpenSSL::PKey::RSA] rsa public key
|
41
45
|
def public_pem
|
42
|
-
@public_pem ||= ::OpenSSL::PKey::RSA.new(
|
46
|
+
@public_pem ||= ::OpenSSL::PKey::RSA.new(encryption.public_key)
|
43
47
|
end
|
44
48
|
|
45
49
|
# @param version [String] version for which we want to get the rsa key
|
@@ -47,8 +51,8 @@ module Karafka
|
|
47
51
|
def private_pem(version)
|
48
52
|
return @private_pems[version] if @private_pems.key?(version)
|
49
53
|
|
50
|
-
key_string =
|
51
|
-
key_string || raise(Errors::
|
54
|
+
key_string = encryption.private_keys[version]
|
55
|
+
key_string || raise(Errors::PrivateKeyNotFoundError, version)
|
52
56
|
|
53
57
|
@private_pems[version] = ::OpenSSL::PKey::RSA.new(key_string)
|
54
58
|
end
|
@@ -30,6 +30,7 @@ module Karafka
|
|
30
30
|
required(:active) { |val| [true, false].include?(val) }
|
31
31
|
required(:version) { |val| val.is_a?(String) && !val.empty? }
|
32
32
|
required(:public_key) { |val| val.is_a?(String) }
|
33
|
+
required(:fingerprinter) { |val| val == false || val.respond_to?(:hexdigest) }
|
33
34
|
|
34
35
|
required(:private_keys) do |val|
|
35
36
|
val.is_a?(Hash) &&
|
@@ -20,7 +20,10 @@ module Karafka
|
|
20
20
|
BaseError = Class.new(::Karafka::Errors::BaseError)
|
21
21
|
|
22
22
|
# Raised when we have encountered encryption key with version we do not have
|
23
|
-
|
23
|
+
PrivateKeyNotFoundError = Class.new(BaseError)
|
24
|
+
|
25
|
+
# Raised when fingerprinting was enabled and payload after encryption did not match it
|
26
|
+
FingerprintVerificationError = Class.new(BaseError)
|
24
27
|
end
|
25
28
|
end
|
26
29
|
end
|
@@ -18,26 +18,28 @@ module Karafka
|
|
18
18
|
module Messages
|
19
19
|
# Middleware for WaterDrop. It automatically encrypts messages payload.
|
20
20
|
# It is injected only if encryption is enabled.
|
21
|
+
# It also fingerprints the payload for verification if fingerprinting was enabled
|
21
22
|
class Middleware
|
23
|
+
include Helpers::ConfigImporter.new(
|
24
|
+
cipher: %i[encryption cipher],
|
25
|
+
version: %i[encryption version],
|
26
|
+
fingerprinter: %i[encryption fingerprinter]
|
27
|
+
)
|
28
|
+
|
22
29
|
# @param message [Hash] WaterDrop message hash
|
23
30
|
# @return [Hash] hash with encrypted payload and encryption version indicator
|
24
31
|
def call(message)
|
32
|
+
payload = message[:payload]
|
33
|
+
|
25
34
|
message[:headers] ||= {}
|
26
35
|
message[:headers]['encryption'] = version
|
27
|
-
message[:payload] = cipher.encrypt(
|
28
|
-
message
|
29
|
-
end
|
36
|
+
message[:payload] = cipher.encrypt(payload)
|
30
37
|
|
31
|
-
|
38
|
+
return message unless fingerprinter
|
32
39
|
|
33
|
-
|
34
|
-
def cipher
|
35
|
-
@cipher ||= ::Karafka::App.config.encryption.cipher
|
36
|
-
end
|
40
|
+
message[:headers]['encryption_fingerprint'] = fingerprinter.hexdigest(payload)
|
37
41
|
|
38
|
-
|
39
|
-
def version
|
40
|
-
@version ||= ::Karafka::App.config.encryption.version
|
42
|
+
message
|
41
43
|
end
|
42
44
|
end
|
43
45
|
end
|
@@ -20,34 +20,36 @@ module Karafka
|
|
20
20
|
# unencrypted payloads. That is why we always rely on message headers for encryption
|
21
21
|
# indication.
|
22
22
|
class Parser < ::Karafka::Messages::Parser
|
23
|
+
include Helpers::ConfigImporter.new(
|
24
|
+
cipher: %i[encryption cipher],
|
25
|
+
active: %i[encryption active],
|
26
|
+
fingerprinter: %i[encryption fingerprinter]
|
27
|
+
)
|
28
|
+
|
23
29
|
# @param message [::Karafka::Messages::Message]
|
24
30
|
# @return [Object] deserialized payload
|
25
31
|
def call(message)
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
message.headers['encryption'],
|
30
|
-
message.raw_payload
|
31
|
-
)
|
32
|
-
end
|
33
|
-
|
34
|
-
super(message)
|
35
|
-
end
|
32
|
+
headers = message.headers
|
33
|
+
encryption = headers['encryption']
|
34
|
+
fingerprint = headers['encryption_fingerprint']
|
36
35
|
|
37
|
-
|
36
|
+
return super(message) unless active && encryption
|
38
37
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
38
|
+
# Decrypt raw payload so it can be handled by the default parser logic
|
39
|
+
decrypted_payload = cipher.decrypt(
|
40
|
+
encryption,
|
41
|
+
message.raw_payload
|
42
|
+
)
|
43
|
+
|
44
|
+
message.raw_payload = decrypted_payload
|
45
|
+
|
46
|
+
return super(message) unless fingerprint && fingerprinter
|
43
47
|
|
44
|
-
|
45
|
-
def active?
|
46
|
-
return @active unless @active.nil?
|
48
|
+
message_fingerprint = fingerprinter.hexdigest(decrypted_payload)
|
47
49
|
|
48
|
-
|
50
|
+
return super(message) if message_fingerprint == fingerprint
|
49
51
|
|
50
|
-
|
52
|
+
raise(Errors::FingerprintVerificationError, message.to_s)
|
51
53
|
end
|
52
54
|
end
|
53
55
|
end
|
@@ -40,6 +40,11 @@ module Karafka
|
|
40
40
|
# Cipher used to encrypt and decrypt data
|
41
41
|
setting(:cipher, default: Encryption::Cipher.new)
|
42
42
|
|
43
|
+
# When set to any digest that responds to `#hexdigest` will compute checksum of the
|
44
|
+
# message payload for post-description integrity verification. It will include a
|
45
|
+
# fingerprint in headers
|
46
|
+
setting(:fingerprinter, default: false)
|
47
|
+
|
43
48
|
configure
|
44
49
|
end
|
45
50
|
end
|
@@ -29,7 +29,8 @@ module Karafka
|
|
29
29
|
# - { 'topic1' => 100 } - means we run all partitions from the offset 100
|
30
30
|
# - { 'topic1' => Time.now - 60 } - we run all partitions from the message from 60s ago
|
31
31
|
# - { 'topic1' => { 1 => Time.now - 60 } } - partition1 from message 60s ago
|
32
|
-
#
|
32
|
+
# - { 'topic1' => { 1 => true } } - will pick first offset not consumed on this CG for p 1
|
33
|
+
# - { 'topic1' => true } - will pick first offset not consumed on this CG for all p
|
33
34
|
class Expander
|
34
35
|
# Expands topics to which we want to subscribe with partitions information in case this
|
35
36
|
# info is not provided.
|
@@ -36,6 +36,7 @@ module Karafka
|
|
36
36
|
resolve_partitions_with_exact_offsets
|
37
37
|
resolve_partitions_with_negative_offsets
|
38
38
|
resolve_partitions_with_time_offsets
|
39
|
+
resolve_partitions_with_cg_expectations
|
39
40
|
|
40
41
|
# Final tpl with all the data
|
41
42
|
tpl = Rdkafka::Consumer::TopicPartitionList.new
|
@@ -149,6 +150,43 @@ module Karafka
|
|
149
150
|
end
|
150
151
|
end
|
151
152
|
end
|
153
|
+
|
154
|
+
# Fetches last used offsets for those partitions for which we want to consume from last
|
155
|
+
# moment where given consumer group has finished
|
156
|
+
# This is indicated by given partition value being set to `true`.
|
157
|
+
def resolve_partitions_with_cg_expectations
|
158
|
+
tpl = Rdkafka::Consumer::TopicPartitionList.new
|
159
|
+
|
160
|
+
# First iterate over all topics that we want to expand
|
161
|
+
@expanded_topics.each do |name, partitions|
|
162
|
+
partitions_base = {}
|
163
|
+
|
164
|
+
partitions.each do |partition, offset|
|
165
|
+
# Pick only partitions where offset is set to true to indicate that we are interested
|
166
|
+
# in committed offset resolution
|
167
|
+
next unless offset == true
|
168
|
+
|
169
|
+
# This can be set to nil because we do not use this offset value when querying
|
170
|
+
partitions_base[partition] = nil
|
171
|
+
end
|
172
|
+
|
173
|
+
# If there is nothing to work with, just skip
|
174
|
+
next if partitions_base.empty?
|
175
|
+
|
176
|
+
tpl.add_topic_and_partitions_with_offsets(name, partitions_base)
|
177
|
+
end
|
178
|
+
|
179
|
+
# If nothing to resolve, do not resolve
|
180
|
+
return if tpl.empty?
|
181
|
+
|
182
|
+
# Fetch all committed offsets for all the topics partitions of our interest and use
|
183
|
+
# those offsets for the mapped topics data
|
184
|
+
@consumer.committed(tpl).to_h.each do |name, partitions|
|
185
|
+
partitions.each do |partition|
|
186
|
+
@mapped_topics[name][partition.partition] = partition.offset
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
152
190
|
end
|
153
191
|
end
|
154
192
|
end
|
data/lib/karafka/pro/iterator.rb
CHANGED
@@ -20,7 +20,9 @@ module Karafka
|
|
20
20
|
# the end. It also allows for signaling, when a given message should be last out of certain
|
21
21
|
# partition, but we still want to continue iterating in other messages.
|
22
22
|
#
|
23
|
-
# It does **not** create a consumer group and does not have any offset management
|
23
|
+
# It does **not** create a consumer group and does not have any offset management until first
|
24
|
+
# consumer offset marking happens. So can be use for quick seeks as well as iterative,
|
25
|
+
# repetitive data fetching from rake, etc.
|
24
26
|
class Iterator
|
25
27
|
# A simple API allowing to iterate over topic/partition data, without having to subscribe
|
26
28
|
# and deal with rebalances. This API allows for multi-partition streaming and is optimized
|
@@ -92,6 +94,7 @@ module Karafka
|
|
92
94
|
end
|
93
95
|
end
|
94
96
|
|
97
|
+
@current_consumer.commit_offsets(async: false) if @stored_offsets
|
95
98
|
@current_message = nil
|
96
99
|
@current_consumer = nil
|
97
100
|
end
|
@@ -127,6 +130,29 @@ module Karafka
|
|
127
130
|
)
|
128
131
|
end
|
129
132
|
|
133
|
+
# Stops all the iterating
|
134
|
+
# @note `break` can also be used but in such cases commits stored async will not be flushed
|
135
|
+
# to Kafka. This is why `#stop` is the recommended method.
|
136
|
+
def stop
|
137
|
+
@stopped = true
|
138
|
+
end
|
139
|
+
|
140
|
+
# Marks given message as consumed.
|
141
|
+
#
|
142
|
+
# @param message [Karafka::Messages::Message] message that we want to mark as processed
|
143
|
+
def mark_as_consumed(message)
|
144
|
+
@current_consumer.store_offset(message, nil)
|
145
|
+
@stored_offsets = true
|
146
|
+
end
|
147
|
+
|
148
|
+
# Marks given message as consumed and commits offsets
|
149
|
+
#
|
150
|
+
# @param message [Karafka::Messages::Message] message that we want to mark as processed
|
151
|
+
def mark_as_consumed!(message)
|
152
|
+
mark_as_consumed(message)
|
153
|
+
@current_consumer.commit_offsets(async: false)
|
154
|
+
end
|
155
|
+
|
130
156
|
private
|
131
157
|
|
132
158
|
# @return [Rdkafka::Consumer::Message, nil] message or nil if nothing to do
|
@@ -158,7 +184,7 @@ module Karafka
|
|
158
184
|
# Do we have all the data we wanted or did every topic partition has reached eof.
|
159
185
|
# @return [Boolean]
|
160
186
|
def done?
|
161
|
-
@stopped_partitions >= @total_partitions
|
187
|
+
(@stopped_partitions >= @total_partitions) || @stopped
|
162
188
|
end
|
163
189
|
end
|
164
190
|
end
|
data/lib/karafka/pro/loader.rb
CHANGED
@@ -63,6 +63,9 @@ module Karafka
|
|
63
63
|
# @param config [Karafka::Core::Configurable::Node]
|
64
64
|
def post_setup_all(config)
|
65
65
|
features.each { |feature| feature.post_setup(config) }
|
66
|
+
|
67
|
+
# We initialize it here so we don't initialize it during multi-threading work
|
68
|
+
Processing::SubscriptionGroupsCoordinator.instance
|
66
69
|
end
|
67
70
|
|
68
71
|
private
|
@@ -99,8 +99,9 @@ module Karafka
|
|
99
99
|
end
|
100
100
|
|
101
101
|
# @return [Boolean] is the coordinated work finished or not
|
102
|
+
# @note Used only in the consume operation context
|
102
103
|
def finished?
|
103
|
-
@running_jobs.zero?
|
104
|
+
@running_jobs[:consume].zero?
|
104
105
|
end
|
105
106
|
|
106
107
|
# Runs synchronized code once for a collective of virtual partitions prior to work being
|
@@ -122,7 +123,7 @@ module Karafka
|
|
122
123
|
end
|
123
124
|
end
|
124
125
|
|
125
|
-
# Runs once when all the work that is suppose to be coordinated is finished
|
126
|
+
# Runs given code once when all the work that is suppose to be coordinated is finished
|
126
127
|
# It runs once per all the coordinated jobs and should be used to run any type of post
|
127
128
|
# jobs coordination processing execution
|
128
129
|
def on_finished
|
@@ -143,6 +144,18 @@ module Karafka
|
|
143
144
|
end
|
144
145
|
end
|
145
146
|
|
147
|
+
# @param interval [Integer] milliseconds of activity
|
148
|
+
# @return [Boolean] was this partition in activity within last `interval` milliseconds
|
149
|
+
# @note Will return true also if currently active
|
150
|
+
def active_within?(interval)
|
151
|
+
# its always active if there's any job related to this coordinator that is still
|
152
|
+
# enqueued or running
|
153
|
+
return true if @running_jobs.values.any?(:positive?)
|
154
|
+
|
155
|
+
# Otherwise we check last time any job of this coordinator was active
|
156
|
+
@changed_at + interval > monotonic_now
|
157
|
+
end
|
158
|
+
|
146
159
|
private
|
147
160
|
|
148
161
|
# Checks if given action is executable once. If it is and true is returned, this method
|
@@ -23,7 +23,9 @@ module Karafka
|
|
23
23
|
def find(topic)
|
24
24
|
# Start with the non-pro expansions
|
25
25
|
expansions = super
|
26
|
+
expansions << Pro::Processing::Piping::Consumer
|
26
27
|
expansions << Pro::Processing::OffsetMetadata::Consumer if topic.offset_metadata?
|
28
|
+
expansions << Pro::Processing::PeriodicJob::Consumer if topic.periodic_job?
|
27
29
|
expansions
|
28
30
|
end
|
29
31
|
end
|