karafka 2.3.2 → 2.4.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (132) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +12 -38
  4. data/CHANGELOG.md +65 -0
  5. data/Gemfile +6 -3
  6. data/Gemfile.lock +25 -23
  7. data/README.md +2 -2
  8. data/bin/integrations +1 -1
  9. data/config/locales/errors.yml +24 -2
  10. data/config/locales/pro_errors.yml +19 -0
  11. data/karafka.gemspec +4 -2
  12. data/lib/active_job/queue_adapters/karafka_adapter.rb +2 -0
  13. data/lib/karafka/admin/configs/config.rb +81 -0
  14. data/lib/karafka/admin/configs/resource.rb +88 -0
  15. data/lib/karafka/admin/configs.rb +103 -0
  16. data/lib/karafka/admin.rb +200 -89
  17. data/lib/karafka/base_consumer.rb +2 -2
  18. data/lib/karafka/cli/info.rb +9 -7
  19. data/lib/karafka/cli/server.rb +7 -7
  20. data/lib/karafka/cli/topics/align.rb +109 -0
  21. data/lib/karafka/cli/topics/base.rb +66 -0
  22. data/lib/karafka/cli/topics/create.rb +35 -0
  23. data/lib/karafka/cli/topics/delete.rb +30 -0
  24. data/lib/karafka/cli/topics/migrate.rb +31 -0
  25. data/lib/karafka/cli/topics/plan.rb +169 -0
  26. data/lib/karafka/cli/topics/repartition.rb +41 -0
  27. data/lib/karafka/cli/topics/reset.rb +18 -0
  28. data/lib/karafka/cli/topics.rb +13 -123
  29. data/lib/karafka/connection/client.rb +62 -37
  30. data/lib/karafka/connection/listener.rb +22 -17
  31. data/lib/karafka/connection/proxy.rb +93 -4
  32. data/lib/karafka/connection/status.rb +14 -2
  33. data/lib/karafka/contracts/config.rb +36 -1
  34. data/lib/karafka/contracts/topic.rb +1 -1
  35. data/lib/karafka/deserializers/headers.rb +15 -0
  36. data/lib/karafka/deserializers/key.rb +15 -0
  37. data/lib/karafka/deserializers/payload.rb +16 -0
  38. data/lib/karafka/embedded.rb +2 -0
  39. data/lib/karafka/helpers/async.rb +5 -2
  40. data/lib/karafka/helpers/colorize.rb +6 -0
  41. data/lib/karafka/instrumentation/callbacks/oauthbearer_token_refresh.rb +29 -0
  42. data/lib/karafka/instrumentation/logger_listener.rb +23 -3
  43. data/lib/karafka/instrumentation/notifications.rb +10 -0
  44. data/lib/karafka/instrumentation/vendors/appsignal/client.rb +16 -2
  45. data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +34 -4
  46. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +20 -0
  47. data/lib/karafka/messages/batch_metadata.rb +1 -1
  48. data/lib/karafka/messages/builders/batch_metadata.rb +1 -1
  49. data/lib/karafka/messages/builders/message.rb +10 -6
  50. data/lib/karafka/messages/message.rb +2 -1
  51. data/lib/karafka/messages/metadata.rb +20 -4
  52. data/lib/karafka/messages/parser.rb +1 -1
  53. data/lib/karafka/pro/base_consumer.rb +12 -23
  54. data/lib/karafka/pro/encryption/cipher.rb +7 -3
  55. data/lib/karafka/pro/encryption/contracts/config.rb +1 -0
  56. data/lib/karafka/pro/encryption/errors.rb +4 -1
  57. data/lib/karafka/pro/encryption/messages/middleware.rb +13 -11
  58. data/lib/karafka/pro/encryption/messages/parser.rb +22 -20
  59. data/lib/karafka/pro/encryption/setup/config.rb +5 -0
  60. data/lib/karafka/pro/iterator/expander.rb +2 -1
  61. data/lib/karafka/pro/iterator/tpl_builder.rb +38 -0
  62. data/lib/karafka/pro/iterator.rb +28 -2
  63. data/lib/karafka/pro/loader.rb +3 -0
  64. data/lib/karafka/pro/processing/coordinator.rb +15 -2
  65. data/lib/karafka/pro/processing/expansions_selector.rb +2 -0
  66. data/lib/karafka/pro/processing/jobs_queue.rb +122 -5
  67. data/lib/karafka/pro/processing/periodic_job/consumer.rb +67 -0
  68. data/lib/karafka/pro/processing/piping/consumer.rb +126 -0
  69. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +1 -1
  70. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +1 -1
  71. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom.rb +1 -1
  72. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +1 -1
  73. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +1 -1
  74. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +1 -1
  75. data/lib/karafka/pro/processing/strategies/aj/dlq_mom.rb +1 -1
  76. data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +1 -1
  77. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +2 -0
  78. data/lib/karafka/pro/processing/strategies/default.rb +5 -1
  79. data/lib/karafka/pro/processing/strategies/dlq/default.rb +21 -5
  80. data/lib/karafka/pro/processing/strategies/lrj/default.rb +2 -0
  81. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +2 -0
  82. data/lib/karafka/pro/processing/subscription_groups_coordinator.rb +52 -0
  83. data/lib/karafka/pro/routing/features/direct_assignments/config.rb +27 -0
  84. data/lib/karafka/pro/routing/features/direct_assignments/contracts/consumer_group.rb +53 -0
  85. data/lib/karafka/pro/routing/features/direct_assignments/contracts/topic.rb +108 -0
  86. data/lib/karafka/pro/routing/features/direct_assignments/subscription_group.rb +77 -0
  87. data/lib/karafka/pro/routing/features/direct_assignments/topic.rb +69 -0
  88. data/lib/karafka/pro/routing/features/direct_assignments.rb +25 -0
  89. data/lib/karafka/pro/routing/features/patterns/builder.rb +1 -1
  90. data/lib/karafka/pro/routing/features/swarm/config.rb +31 -0
  91. data/lib/karafka/pro/routing/features/swarm/contracts/routing.rb +76 -0
  92. data/lib/karafka/pro/routing/features/swarm/contracts/topic.rb +78 -0
  93. data/lib/karafka/pro/routing/features/swarm/topic.rb +77 -0
  94. data/lib/karafka/pro/routing/features/swarm.rb +36 -0
  95. data/lib/karafka/pro/swarm/liveness_listener.rb +20 -0
  96. data/lib/karafka/processing/coordinator.rb +17 -8
  97. data/lib/karafka/processing/coordinators_buffer.rb +5 -2
  98. data/lib/karafka/processing/executor.rb +6 -2
  99. data/lib/karafka/processing/executors_buffer.rb +5 -2
  100. data/lib/karafka/processing/jobs_queue.rb +9 -4
  101. data/lib/karafka/processing/strategies/aj_dlq_mom.rb +1 -1
  102. data/lib/karafka/processing/strategies/default.rb +7 -1
  103. data/lib/karafka/processing/strategies/dlq.rb +17 -2
  104. data/lib/karafka/processing/workers_batch.rb +4 -1
  105. data/lib/karafka/routing/builder.rb +6 -2
  106. data/lib/karafka/routing/consumer_group.rb +2 -1
  107. data/lib/karafka/routing/features/dead_letter_queue/config.rb +5 -0
  108. data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +8 -0
  109. data/lib/karafka/routing/features/dead_letter_queue/topic.rb +10 -2
  110. data/lib/karafka/routing/features/deserializers/config.rb +18 -0
  111. data/lib/karafka/routing/features/deserializers/contracts/topic.rb +31 -0
  112. data/lib/karafka/routing/features/deserializers/topic.rb +51 -0
  113. data/lib/karafka/routing/features/deserializers.rb +11 -0
  114. data/lib/karafka/routing/proxy.rb +9 -14
  115. data/lib/karafka/routing/router.rb +11 -2
  116. data/lib/karafka/routing/subscription_group.rb +22 -1
  117. data/lib/karafka/routing/topic.rb +0 -1
  118. data/lib/karafka/runner.rb +1 -1
  119. data/lib/karafka/setup/config.rb +51 -10
  120. data/lib/karafka/status.rb +7 -8
  121. data/lib/karafka/swarm/manager.rb +15 -3
  122. data/lib/karafka/swarm/node.rb +3 -3
  123. data/lib/karafka/swarm/pidfd.rb +20 -4
  124. data/lib/karafka/swarm/supervisor.rb +25 -8
  125. data/lib/karafka/templates/karafka.rb.erb +28 -1
  126. data/lib/karafka/version.rb +1 -1
  127. data.tar.gz.sig +0 -0
  128. metadata +42 -12
  129. metadata.gz.sig +0 -0
  130. data/lib/karafka/routing/consumer_mapper.rb +0 -23
  131. data/lib/karafka/serialization/json/deserializer.rb +0 -19
  132. data/lib/karafka/time_trackers/partition_usage.rb +0 -56
@@ -21,6 +21,12 @@ module Karafka
21
21
  def yellow(string)
22
22
  "\033[1;33m#{string}\033[0m"
23
23
  end
24
+
25
+ # @param string [String] string we want to have in grey
26
+ # @return [String] grey string
27
+ def grey(string)
28
+ "\e[38;5;244m#{string}\e[0m"
29
+ end
24
30
  end
25
31
  end
26
32
  end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Instrumentation
5
+ module Callbacks
6
+ # Callback that is triggered when oauth token needs to be refreshed.
7
+ class OauthbearerTokenRefresh
8
+ # @param bearer [Rdkafka::Consumer, Rdkafka::Admin] given rdkafka instance. It is needed as
9
+ # we need to have a reference to call `#oauthbearer_set_token` or
10
+ # `#oauthbearer_set_token_failure` upon the event.
11
+ def initialize(bearer)
12
+ @bearer = bearer
13
+ end
14
+
15
+ # @param _rd_config [Rdkafka::Config]
16
+ # @param bearer_name [String] name of the bearer for which we refresh
17
+ def call(_rd_config, bearer_name)
18
+ return unless @bearer.name == bearer_name
19
+
20
+ ::Karafka.monitor.instrument(
21
+ 'oauthbearer.token_refresh',
22
+ bearer: @bearer,
23
+ caller: self
24
+ )
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -16,10 +16,20 @@ module Karafka
16
16
 
17
17
  private_constant :USED_LOG_LEVELS
18
18
 
19
+ # @param log_polling [Boolean] should we log the fact that messages are being polled. This is
20
+ # usually noisy and not useful in production but can be useful in dev. While users can
21
+ # do this themselves this has been requested and asked for often, thus similar to how
22
+ # extensive logging can be disabled in WaterDrop, we do it here as well.
23
+ def initialize(log_polling: true)
24
+ @log_polling = log_polling
25
+ end
26
+
19
27
  # Logs each messages fetching attempt
20
28
  #
21
29
  # @param event [Karafka::Core::Monitoring::Event] event details including payload
22
30
  def on_connection_listener_fetch_loop(event)
31
+ return unless log_polling?
32
+
23
33
  listener = event[:caller]
24
34
  debug "[#{listener.id}] Polling messages..."
25
35
  end
@@ -28,6 +38,8 @@ module Karafka
28
38
  #
29
39
  # @param event [Karafka::Core::Monitoring::Event] event details including payload
30
40
  def on_connection_listener_fetch_loop_received(event)
41
+ return unless log_polling?
42
+
31
43
  listener = event[:caller]
32
44
  time = event[:time]
33
45
  messages_count = event[:messages_buffer].size
@@ -129,7 +141,7 @@ module Karafka
129
141
  Thread.list.each do |thread|
130
142
  tid = (thread.object_id ^ ::Process.pid).to_s(36)
131
143
 
132
- warn "Thread TID-#{tid} #{thread['label']}"
144
+ warn "Thread TID-#{tid} #{thread.name}"
133
145
 
134
146
  if thread.backtrace
135
147
  warn thread.backtrace.join("\n")
@@ -315,9 +327,10 @@ module Karafka
315
327
  when 'connection.client.unsubscribe.error'
316
328
  error "Client unsubscribe error occurred: #{error}"
317
329
  error details
330
+ # This handles any custom errors coming from places like Web-UI, etc
318
331
  else
319
- # This should never happen. Please contact the maintainers
320
- raise Errors::UnsupportedCaseError, event
332
+ error "#{type} error occurred: #{error}"
333
+ error details
321
334
  end
322
335
  end
323
336
 
@@ -326,6 +339,13 @@ module Karafka
326
339
  Karafka.logger.send(log_level, *args)
327
340
  end
328
341
  end
342
+
343
+ private
344
+
345
+ # @return [Boolean] should we log polling
346
+ def log_polling?
347
+ @log_polling
348
+ end
329
349
  end
330
350
  end
331
351
  end
@@ -40,6 +40,14 @@ module Karafka
40
40
  connection.listener.fetch_loop.received
41
41
  connection.listener.after_fetch_loop
42
42
 
43
+ connection.listener.pending
44
+ connection.listener.starting
45
+ connection.listener.running
46
+ connection.listener.quieting
47
+ connection.listener.quiet
48
+ connection.listener.stopping
49
+ connection.listener.stopped
50
+
43
51
  consumer.before_schedule_consume
44
52
  consumer.consume
45
53
  consumer.consumed
@@ -66,6 +74,8 @@ module Karafka
66
74
  filtering.throttled
67
75
  filtering.seek
68
76
 
77
+ oauthbearer.token_refresh
78
+
69
79
  process.notice_signal
70
80
 
71
81
  rebalance.partitions_assign
@@ -11,6 +11,13 @@ module Karafka
11
11
  #
12
12
  # @note This client is abstract, it has no notion of Karafka whatsoever
13
13
  class Client
14
+ # @param namespace_name [String, nil] Name of the AppSignal namespace we want to use or
15
+ # nil if it is to remain default.
16
+ # Defaults to `Appsignal::Transaction::BACKGROUND_JOB` in the execution flow.
17
+ def initialize(namespace_name: nil)
18
+ @namespace_name = namespace_name
19
+ end
20
+
14
21
  # Starts an appsignal transaction with a given action name
15
22
  #
16
23
  # @param action_name [String] action name. For processing this should be equal to
@@ -18,7 +25,7 @@ module Karafka
18
25
  def start_transaction(action_name)
19
26
  transaction = ::Appsignal::Transaction.create(
20
27
  SecureRandom.uuid,
21
- ::Appsignal::Transaction::BACKGROUND_JOB,
28
+ namespace_name,
22
29
  ::Appsignal::Transaction::GenericRequest.new({})
23
30
  )
24
31
 
@@ -83,7 +90,7 @@ module Karafka
83
90
  transaction.set_error(error)
84
91
  else
85
92
  ::Appsignal.send_error(error) do |transaction|
86
- transaction.set_namespace(::Appsignal::Transaction::BACKGROUND_JOB)
93
+ transaction.set_namespace(namespace_name)
87
94
  end
88
95
  end
89
96
  end
@@ -115,6 +122,13 @@ module Karafka
115
122
  .transform_values(&:to_s)
116
123
  .transform_keys!(&:to_s)
117
124
  end
125
+
126
+ # @return [String] transaction namespace. We lazy evaluate it and resolve if needed to
127
+ # the default `BACKGROUND_JOB` during the execution, to ensure we can initialize the
128
+ # instrumentation even before appsignal gem is loaded.
129
+ def namespace_name
130
+ @namespace_name ||= ::Appsignal::Transaction::BACKGROUND_JOB
131
+ end
118
132
  end
119
133
  end
120
134
  end
@@ -14,7 +14,8 @@ module Karafka
14
14
  include ::Karafka::Core::Configurable
15
15
  extend Forwardable
16
16
 
17
- def_delegators :config, :client, :rd_kafka_metrics, :namespace, :default_tags
17
+ def_delegators :config, :client, :rd_kafka_metrics, :namespace,
18
+ :default_tags, :distribution_mode
18
19
 
19
20
  # Value object for storing a single rdkafka metric publishing details
20
21
  RdKafkaMetric = Struct.new(:type, :scope, :name, :key_location)
@@ -53,6 +54,13 @@ module Karafka
53
54
  RdKafkaMetric.new(:gauge, :topics, 'consumer.lags_delta', 'consumer_lag_stored_d')
54
55
  ].freeze
55
56
 
57
+ # Whether histogram metrics should be sent as distributions or histograms.
58
+ # Distribution metrics are aggregated globally and not agent-side,
59
+ # providing more accurate percentiles whenever consumers are running on multiple hosts.
60
+ #
61
+ # Learn more at https://docs.datadoghq.com/metrics/types/?tab=distribution#metric-types
62
+ setting :distribution_mode, default: :histogram
63
+
56
64
  configure
57
65
 
58
66
  # @param block [Proc] configuration block
@@ -169,18 +177,40 @@ module Karafka
169
177
  %i[
170
178
  count
171
179
  gauge
172
- histogram
173
180
  increment
174
181
  decrement
175
182
  ].each do |metric_type|
176
- class_eval <<~METHODS, __FILE__, __LINE__ + 1
183
+ class_eval <<~RUBY, __FILE__, __LINE__ + 1
177
184
  def #{metric_type}(key, *args)
178
185
  client.#{metric_type}(
179
186
  namespaced_metric(key),
180
187
  *args
181
188
  )
182
189
  end
183
- METHODS
190
+ RUBY
191
+ end
192
+
193
+ # Selects the histogram mode configured and uses it to report to DD client
194
+ # @param key [String] non-namespaced key
195
+ # @param args [Array] extra arguments to pass to the client
196
+ def histogram(key, *args)
197
+ case distribution_mode
198
+ when :histogram
199
+ client.histogram(
200
+ namespaced_metric(key),
201
+ *args
202
+ )
203
+ when :distribution
204
+ client.distribution(
205
+ namespaced_metric(key),
206
+ *args
207
+ )
208
+ else
209
+ raise(
210
+ ::ArgumentError,
211
+ 'distribution_mode setting value must be either :histogram or :distribution'
212
+ )
213
+ end
184
214
  end
185
215
 
186
216
  # Wraps metric name in listener's namespace
@@ -92,6 +92,26 @@ module Karafka
92
92
  clear_polling_tick
93
93
  end
94
94
 
95
+ # Deregister the polling tracker for given listener
96
+ # @param _event [Karafka::Core::Monitoring::Event]
97
+ def on_connection_listener_stopping(_event)
98
+ # We are interested in disabling tracking for given listener only if it was requested
99
+ # when karafka was running. If we would always clear, it would not catch the shutdown
100
+ # polling requirements. The "running" listener shutdown operations happen only when
101
+ # the manager requests it for downscaling.
102
+ return if Karafka::App.done?
103
+
104
+ clear_polling_tick
105
+ end
106
+
107
+ # Deregister the polling tracker for given listener
108
+ # @param _event [Karafka::Core::Monitoring::Event]
109
+ def on_connection_listener_stopped(_event)
110
+ return if Karafka::App.done?
111
+
112
+ clear_polling_tick
113
+ end
114
+
95
115
  private
96
116
 
97
117
  # Wraps the logic with a mutex
@@ -10,7 +10,7 @@ module Karafka
10
10
  :size,
11
11
  :first_offset,
12
12
  :last_offset,
13
- :deserializer,
13
+ :deserializers,
14
14
  :partition,
15
15
  :topic,
16
16
  :created_at,
@@ -21,7 +21,7 @@ module Karafka
21
21
  size: messages.count,
22
22
  first_offset: messages.first&.offset || -1001,
23
23
  last_offset: messages.last&.offset || -1001,
24
- deserializer: topic.deserializer,
24
+ deserializers: topic.deserializers,
25
25
  partition: partition,
26
26
  topic: topic.name,
27
27
  # We go with the assumption that the creation of the whole batch is the last message
@@ -14,14 +14,14 @@ module Karafka
14
14
  def call(kafka_message, topic, received_at)
15
15
  metadata = Karafka::Messages::Metadata.new(
16
16
  timestamp: kafka_message.timestamp,
17
- headers: kafka_message.headers,
18
- key: kafka_message.key,
19
17
  offset: kafka_message.offset,
20
- deserializer: topic.deserializer,
18
+ deserializers: topic.deserializers,
21
19
  partition: kafka_message.partition,
22
20
  topic: topic.name,
23
- received_at: received_at
24
- ).freeze
21
+ received_at: received_at,
22
+ raw_headers: kafka_message.headers,
23
+ raw_key: kafka_message.key
24
+ )
25
25
 
26
26
  # Get the raw payload
27
27
  payload = kafka_message.payload
@@ -31,7 +31,11 @@ module Karafka
31
31
  kafka_message.instance_variable_set('@payload', nil)
32
32
 
33
33
  # Karafka messages cannot be frozen because of the lazy deserialization feature
34
- Karafka::Messages::Message.new(payload, metadata)
34
+ message = Karafka::Messages::Message.new(payload, metadata)
35
+ # Assign message to metadata so we can reverse its relationship if needed
36
+ metadata[:message] = message
37
+
38
+ message
35
39
  end
36
40
  end
37
41
  end
@@ -23,7 +23,8 @@ module Karafka
23
23
  # prior to the final deserialization
24
24
  attr_accessor :raw_payload
25
25
 
26
- def_delegators :metadata, *Metadata.members
26
+ # We remove message as we do not want to do self-reference via `message.message`
27
+ def_delegators :metadata, *((Metadata.members + %i[key headers]) - %i[message])
27
28
 
28
29
  # @param raw_payload [Object] incoming payload before deserialization
29
30
  # @param metadata [Karafka::Messages::Metadata] message metadata object
@@ -4,15 +4,31 @@ module Karafka
4
4
  module Messages
5
5
  # Single message metadata details that can be accessed without the need of deserialization.
6
6
  Metadata = Struct.new(
7
+ :message,
7
8
  :timestamp,
8
- :headers,
9
- :key,
10
9
  :offset,
11
- :deserializer,
10
+ :deserializers,
12
11
  :partition,
13
12
  :received_at,
14
13
  :topic,
14
+ :raw_headers,
15
+ :raw_key,
15
16
  keyword_init: true
16
- )
17
+ ) do
18
+ # @return [Object] deserialized key. By default in the raw string format.
19
+ def key
20
+ return @key if @key
21
+
22
+ @key = deserializers.key.call(self)
23
+ end
24
+
25
+ # @return [Object] deserialized headers. By default its a hash with keys and payload being
26
+ # strings
27
+ def headers
28
+ return @headers if @headers
29
+
30
+ @headers = deserializers.headers.call(self)
31
+ end
32
+ end
17
33
  end
18
34
  end
@@ -7,7 +7,7 @@ module Karafka
7
7
  # @param message [::Karafka::Messages::Message]
8
8
  # @return [Object] deserialized payload
9
9
  def call(message)
10
- message.metadata.deserializer.call(message)
10
+ message.metadata.deserializers.payload.call(message)
11
11
  end
12
12
  end
13
13
  end
@@ -20,29 +20,6 @@ module Karafka
20
20
  #
21
21
  # Methods here are suppose to be always available or are expected to be redefined
22
22
  module BaseConsumer
23
- # Runs the on-schedule tick periodic operations
24
- # This method is an alias but is part of the naming convention used for other flows, this
25
- # is why we do not reference the `handle_before_schedule_tick` directly
26
- def on_before_schedule_tick
27
- handle_before_schedule_tick
28
- end
29
-
30
- # Used by the executor to trigger consumer tick
31
- # @private
32
- def on_tick
33
- handle_tick
34
- rescue StandardError => e
35
- Karafka.monitor.instrument(
36
- 'error.occurred',
37
- error: e,
38
- caller: self,
39
- type: 'consumer.tick.error'
40
- )
41
- end
42
-
43
- # By default we do nothing when ticking
44
- def tick; end
45
-
46
23
  # @return [Karafka::Pro::Processing::Coordinators::ErrorsTracker] tracker for errors that
47
24
  # occurred during processing until another successful processing
48
25
  #
@@ -58,6 +35,18 @@ module Karafka
58
35
  def errors_tracker
59
36
  coordinator.errors_tracker
60
37
  end
38
+
39
+ # @return [Karafka::Pro::Processing::SubscriptionGroupsCoordinator] Coordinator allowing to
40
+ # pause and resume polling of the given subscription group jobs queue for postponing
41
+ # further work.
42
+ #
43
+ # @note Since this stops polling, it can cause reaching `max.poll.interval.ms` limitations.
44
+ #
45
+ # @note This is a low-level API used for cross-topic coordination and some advanced features.
46
+ # Use it at own risk.
47
+ def subscription_groups_coordinator
48
+ Processing::SubscriptionGroupsCoordinator.instance
49
+ end
61
50
  end
62
51
  end
63
52
  end
@@ -16,6 +16,10 @@ module Karafka
16
16
  module Encryption
17
17
  # Cipher for encrypting and decrypting data
18
18
  class Cipher
19
+ include Helpers::ConfigImporter.new(
20
+ encryption: %i[encryption]
21
+ )
22
+
19
23
  def initialize
20
24
  @private_pems = {}
21
25
  end
@@ -39,7 +43,7 @@ module Karafka
39
43
 
40
44
  # @return [::OpenSSL::PKey::RSA] rsa public key
41
45
  def public_pem
42
- @public_pem ||= ::OpenSSL::PKey::RSA.new(::Karafka::App.config.encryption.public_key)
46
+ @public_pem ||= ::OpenSSL::PKey::RSA.new(encryption.public_key)
43
47
  end
44
48
 
45
49
  # @param version [String] version for which we want to get the rsa key
@@ -47,8 +51,8 @@ module Karafka
47
51
  def private_pem(version)
48
52
  return @private_pems[version] if @private_pems.key?(version)
49
53
 
50
- key_string = ::Karafka::App.config.encryption.private_keys[version]
51
- key_string || raise(Errors::PrivateKeyNotFound, version)
54
+ key_string = encryption.private_keys[version]
55
+ key_string || raise(Errors::PrivateKeyNotFoundError, version)
52
56
 
53
57
  @private_pems[version] = ::OpenSSL::PKey::RSA.new(key_string)
54
58
  end
@@ -30,6 +30,7 @@ module Karafka
30
30
  required(:active) { |val| [true, false].include?(val) }
31
31
  required(:version) { |val| val.is_a?(String) && !val.empty? }
32
32
  required(:public_key) { |val| val.is_a?(String) }
33
+ required(:fingerprinter) { |val| val == false || val.respond_to?(:hexdigest) }
33
34
 
34
35
  required(:private_keys) do |val|
35
36
  val.is_a?(Hash) &&
@@ -20,7 +20,10 @@ module Karafka
20
20
  BaseError = Class.new(::Karafka::Errors::BaseError)
21
21
 
22
22
  # Raised when we have encountered encryption key with version we do not have
23
- PrivateKeyNotFound = Class.new(BaseError)
23
+ PrivateKeyNotFoundError = Class.new(BaseError)
24
+
25
+ # Raised when fingerprinting was enabled and payload after encryption did not match it
26
+ FingerprintVerificationError = Class.new(BaseError)
24
27
  end
25
28
  end
26
29
  end
@@ -18,26 +18,28 @@ module Karafka
18
18
  module Messages
19
19
  # Middleware for WaterDrop. It automatically encrypts messages payload.
20
20
  # It is injected only if encryption is enabled.
21
+ # It also fingerprints the payload for verification if fingerprinting was enabled
21
22
  class Middleware
23
+ include Helpers::ConfigImporter.new(
24
+ cipher: %i[encryption cipher],
25
+ version: %i[encryption version],
26
+ fingerprinter: %i[encryption fingerprinter]
27
+ )
28
+
22
29
  # @param message [Hash] WaterDrop message hash
23
30
  # @return [Hash] hash with encrypted payload and encryption version indicator
24
31
  def call(message)
32
+ payload = message[:payload]
33
+
25
34
  message[:headers] ||= {}
26
35
  message[:headers]['encryption'] = version
27
- message[:payload] = cipher.encrypt(message[:payload])
28
- message
29
- end
36
+ message[:payload] = cipher.encrypt(payload)
30
37
 
31
- private
38
+ return message unless fingerprinter
32
39
 
33
- # @return [::Karafka::Pro::Encryption::Cipher]
34
- def cipher
35
- @cipher ||= ::Karafka::App.config.encryption.cipher
36
- end
40
+ message[:headers]['encryption_fingerprint'] = fingerprinter.hexdigest(payload)
37
41
 
38
- # @return [String] encryption version
39
- def version
40
- @version ||= ::Karafka::App.config.encryption.version
42
+ message
41
43
  end
42
44
  end
43
45
  end
@@ -20,34 +20,36 @@ module Karafka
20
20
  # unencrypted payloads. That is why we always rely on message headers for encryption
21
21
  # indication.
22
22
  class Parser < ::Karafka::Messages::Parser
23
+ include Helpers::ConfigImporter.new(
24
+ cipher: %i[encryption cipher],
25
+ active: %i[encryption active],
26
+ fingerprinter: %i[encryption fingerprinter]
27
+ )
28
+
23
29
  # @param message [::Karafka::Messages::Message]
24
30
  # @return [Object] deserialized payload
25
31
  def call(message)
26
- if active? && message.headers.key?('encryption')
27
- # Decrypt raw payload so it can be handled by the default parser logic
28
- message.raw_payload = cipher.decrypt(
29
- message.headers['encryption'],
30
- message.raw_payload
31
- )
32
- end
33
-
34
- super(message)
35
- end
32
+ headers = message.headers
33
+ encryption = headers['encryption']
34
+ fingerprint = headers['encryption_fingerprint']
36
35
 
37
- private
36
+ return super(message) unless active && encryption
38
37
 
39
- # @return [::Karafka::Pro::Encryption::Cipher]
40
- def cipher
41
- @cipher ||= ::Karafka::App.config.encryption.cipher
42
- end
38
+ # Decrypt raw payload so it can be handled by the default parser logic
39
+ decrypted_payload = cipher.decrypt(
40
+ encryption,
41
+ message.raw_payload
42
+ )
43
+
44
+ message.raw_payload = decrypted_payload
45
+
46
+ return super(message) unless fingerprint && fingerprinter
43
47
 
44
- # @return [Boolean] is encryption active
45
- def active?
46
- return @active unless @active.nil?
48
+ message_fingerprint = fingerprinter.hexdigest(decrypted_payload)
47
49
 
48
- @active = ::Karafka::App.config.encryption.active
50
+ return super(message) if message_fingerprint == fingerprint
49
51
 
50
- @active
52
+ raise(Errors::FingerprintVerificationError, message.to_s)
51
53
  end
52
54
  end
53
55
  end
@@ -40,6 +40,11 @@ module Karafka
40
40
  # Cipher used to encrypt and decrypt data
41
41
  setting(:cipher, default: Encryption::Cipher.new)
42
42
 
43
+ # When set to any digest that responds to `#hexdigest` will compute checksum of the
44
+ # message payload for post-description integrity verification. It will include a
45
+ # fingerprint in headers
46
+ setting(:fingerprinter, default: false)
47
+
43
48
  configure
44
49
  end
45
50
  end
@@ -29,7 +29,8 @@ module Karafka
29
29
  # - { 'topic1' => 100 } - means we run all partitions from the offset 100
30
30
  # - { 'topic1' => Time.now - 60 } - we run all partitions from the message from 60s ago
31
31
  # - { 'topic1' => { 1 => Time.now - 60 } } - partition1 from message 60s ago
32
- #
32
+ # - { 'topic1' => { 1 => true } } - will pick first offset not consumed on this CG for p 1
33
+ # - { 'topic1' => true } - will pick first offset not consumed on this CG for all p
33
34
  class Expander
34
35
  # Expands topics to which we want to subscribe with partitions information in case this
35
36
  # info is not provided.
@@ -36,6 +36,7 @@ module Karafka
36
36
  resolve_partitions_with_exact_offsets
37
37
  resolve_partitions_with_negative_offsets
38
38
  resolve_partitions_with_time_offsets
39
+ resolve_partitions_with_cg_expectations
39
40
 
40
41
  # Final tpl with all the data
41
42
  tpl = Rdkafka::Consumer::TopicPartitionList.new
@@ -149,6 +150,43 @@ module Karafka
149
150
  end
150
151
  end
151
152
  end
153
+
154
+ # Fetches last used offsets for those partitions for which we want to consume from last
155
+ # moment where given consumer group has finished
156
+ # This is indicated by given partition value being set to `true`.
157
+ def resolve_partitions_with_cg_expectations
158
+ tpl = Rdkafka::Consumer::TopicPartitionList.new
159
+
160
+ # First iterate over all topics that we want to expand
161
+ @expanded_topics.each do |name, partitions|
162
+ partitions_base = {}
163
+
164
+ partitions.each do |partition, offset|
165
+ # Pick only partitions where offset is set to true to indicate that we are interested
166
+ # in committed offset resolution
167
+ next unless offset == true
168
+
169
+ # This can be set to nil because we do not use this offset value when querying
170
+ partitions_base[partition] = nil
171
+ end
172
+
173
+ # If there is nothing to work with, just skip
174
+ next if partitions_base.empty?
175
+
176
+ tpl.add_topic_and_partitions_with_offsets(name, partitions_base)
177
+ end
178
+
179
+ # If nothing to resolve, do not resolve
180
+ return if tpl.empty?
181
+
182
+ # Fetch all committed offsets for all the topics partitions of our interest and use
183
+ # those offsets for the mapped topics data
184
+ @consumer.committed(tpl).to_h.each do |name, partitions|
185
+ partitions.each do |partition|
186
+ @mapped_topics[name][partition.partition] = partition.offset
187
+ end
188
+ end
189
+ end
152
190
  end
153
191
  end
154
192
  end