karafka 2.3.3 → 2.4.0.beta2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +12 -38
  4. data/CHANGELOG.md +59 -0
  5. data/Gemfile +6 -3
  6. data/Gemfile.lock +29 -27
  7. data/bin/integrations +1 -1
  8. data/config/locales/errors.yml +21 -2
  9. data/config/locales/pro_errors.yml +16 -1
  10. data/karafka.gemspec +4 -2
  11. data/lib/active_job/queue_adapters/karafka_adapter.rb +2 -0
  12. data/lib/karafka/admin/configs/config.rb +81 -0
  13. data/lib/karafka/admin/configs/resource.rb +88 -0
  14. data/lib/karafka/admin/configs.rb +103 -0
  15. data/lib/karafka/admin.rb +211 -90
  16. data/lib/karafka/base_consumer.rb +2 -2
  17. data/lib/karafka/cli/info.rb +9 -7
  18. data/lib/karafka/cli/server.rb +7 -7
  19. data/lib/karafka/cli/topics/align.rb +109 -0
  20. data/lib/karafka/cli/topics/base.rb +66 -0
  21. data/lib/karafka/cli/topics/create.rb +35 -0
  22. data/lib/karafka/cli/topics/delete.rb +30 -0
  23. data/lib/karafka/cli/topics/migrate.rb +31 -0
  24. data/lib/karafka/cli/topics/plan.rb +169 -0
  25. data/lib/karafka/cli/topics/repartition.rb +41 -0
  26. data/lib/karafka/cli/topics/reset.rb +18 -0
  27. data/lib/karafka/cli/topics.rb +13 -123
  28. data/lib/karafka/connection/client.rb +55 -37
  29. data/lib/karafka/connection/listener.rb +22 -17
  30. data/lib/karafka/connection/proxy.rb +93 -4
  31. data/lib/karafka/connection/status.rb +14 -2
  32. data/lib/karafka/constraints.rb +3 -3
  33. data/lib/karafka/contracts/config.rb +14 -1
  34. data/lib/karafka/contracts/topic.rb +1 -1
  35. data/lib/karafka/deserializers/headers.rb +15 -0
  36. data/lib/karafka/deserializers/key.rb +15 -0
  37. data/lib/karafka/deserializers/payload.rb +16 -0
  38. data/lib/karafka/embedded.rb +2 -0
  39. data/lib/karafka/helpers/async.rb +5 -2
  40. data/lib/karafka/helpers/colorize.rb +6 -0
  41. data/lib/karafka/instrumentation/callbacks/oauthbearer_token_refresh.rb +29 -0
  42. data/lib/karafka/instrumentation/logger_listener.rb +23 -3
  43. data/lib/karafka/instrumentation/notifications.rb +10 -0
  44. data/lib/karafka/instrumentation/vendors/appsignal/client.rb +16 -2
  45. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +20 -0
  46. data/lib/karafka/messages/batch_metadata.rb +1 -1
  47. data/lib/karafka/messages/builders/batch_metadata.rb +1 -1
  48. data/lib/karafka/messages/builders/message.rb +10 -6
  49. data/lib/karafka/messages/message.rb +2 -1
  50. data/lib/karafka/messages/metadata.rb +20 -4
  51. data/lib/karafka/messages/parser.rb +1 -1
  52. data/lib/karafka/pro/base_consumer.rb +12 -23
  53. data/lib/karafka/pro/encryption/cipher.rb +7 -3
  54. data/lib/karafka/pro/encryption/contracts/config.rb +1 -0
  55. data/lib/karafka/pro/encryption/errors.rb +4 -1
  56. data/lib/karafka/pro/encryption/messages/middleware.rb +13 -11
  57. data/lib/karafka/pro/encryption/messages/parser.rb +22 -20
  58. data/lib/karafka/pro/encryption/setup/config.rb +5 -0
  59. data/lib/karafka/pro/iterator/expander.rb +2 -1
  60. data/lib/karafka/pro/iterator/tpl_builder.rb +38 -0
  61. data/lib/karafka/pro/iterator.rb +28 -2
  62. data/lib/karafka/pro/loader.rb +3 -0
  63. data/lib/karafka/pro/processing/coordinator.rb +15 -2
  64. data/lib/karafka/pro/processing/expansions_selector.rb +2 -0
  65. data/lib/karafka/pro/processing/jobs_queue.rb +122 -5
  66. data/lib/karafka/pro/processing/periodic_job/consumer.rb +67 -0
  67. data/lib/karafka/pro/processing/piping/consumer.rb +126 -0
  68. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +1 -1
  69. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +1 -1
  70. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom.rb +1 -1
  71. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +1 -1
  72. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +1 -1
  73. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +1 -1
  74. data/lib/karafka/pro/processing/strategies/aj/dlq_mom.rb +1 -1
  75. data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +1 -1
  76. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +2 -0
  77. data/lib/karafka/pro/processing/strategies/default.rb +5 -1
  78. data/lib/karafka/pro/processing/strategies/dlq/default.rb +21 -5
  79. data/lib/karafka/pro/processing/strategies/lrj/default.rb +2 -0
  80. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +2 -0
  81. data/lib/karafka/pro/processing/subscription_groups_coordinator.rb +52 -0
  82. data/lib/karafka/pro/routing/features/direct_assignments/config.rb +27 -0
  83. data/lib/karafka/pro/routing/features/direct_assignments/contracts/consumer_group.rb +53 -0
  84. data/lib/karafka/pro/routing/features/direct_assignments/contracts/topic.rb +108 -0
  85. data/lib/karafka/pro/routing/features/direct_assignments/subscription_group.rb +77 -0
  86. data/lib/karafka/pro/routing/features/direct_assignments/topic.rb +69 -0
  87. data/lib/karafka/pro/routing/features/direct_assignments.rb +25 -0
  88. data/lib/karafka/pro/routing/features/patterns/builder.rb +1 -1
  89. data/lib/karafka/pro/routing/features/swarm/contracts/routing.rb +76 -0
  90. data/lib/karafka/pro/routing/features/swarm/contracts/topic.rb +16 -5
  91. data/lib/karafka/pro/routing/features/swarm/topic.rb +25 -2
  92. data/lib/karafka/pro/routing/features/swarm.rb +11 -0
  93. data/lib/karafka/pro/swarm/liveness_listener.rb +20 -0
  94. data/lib/karafka/processing/coordinator.rb +17 -8
  95. data/lib/karafka/processing/coordinators_buffer.rb +5 -2
  96. data/lib/karafka/processing/executor.rb +6 -2
  97. data/lib/karafka/processing/executors_buffer.rb +5 -2
  98. data/lib/karafka/processing/jobs_queue.rb +9 -4
  99. data/lib/karafka/processing/strategies/aj_dlq_mom.rb +1 -1
  100. data/lib/karafka/processing/strategies/default.rb +7 -1
  101. data/lib/karafka/processing/strategies/dlq.rb +17 -2
  102. data/lib/karafka/processing/workers_batch.rb +4 -1
  103. data/lib/karafka/routing/builder.rb +6 -2
  104. data/lib/karafka/routing/consumer_group.rb +2 -1
  105. data/lib/karafka/routing/features/dead_letter_queue/config.rb +5 -0
  106. data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +8 -0
  107. data/lib/karafka/routing/features/dead_letter_queue/topic.rb +10 -2
  108. data/lib/karafka/routing/features/deserializers/config.rb +18 -0
  109. data/lib/karafka/routing/features/deserializers/contracts/topic.rb +31 -0
  110. data/lib/karafka/routing/features/deserializers/topic.rb +51 -0
  111. data/lib/karafka/routing/features/deserializers.rb +11 -0
  112. data/lib/karafka/routing/proxy.rb +9 -14
  113. data/lib/karafka/routing/router.rb +11 -2
  114. data/lib/karafka/routing/subscription_group.rb +9 -1
  115. data/lib/karafka/routing/topic.rb +0 -1
  116. data/lib/karafka/runner.rb +1 -1
  117. data/lib/karafka/setup/config.rb +50 -9
  118. data/lib/karafka/status.rb +7 -8
  119. data/lib/karafka/swarm/supervisor.rb +16 -2
  120. data/lib/karafka/templates/karafka.rb.erb +28 -1
  121. data/lib/karafka/version.rb +1 -1
  122. data.tar.gz.sig +0 -0
  123. metadata +38 -12
  124. metadata.gz.sig +0 -0
  125. data/lib/karafka/routing/consumer_mapper.rb +0 -23
  126. data/lib/karafka/serialization/json/deserializer.rb +0 -19
  127. data/lib/karafka/time_trackers/partition_usage.rb +0 -56
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Instrumentation
5
+ module Callbacks
6
+ # Callback that is triggered when oauth token needs to be refreshed.
7
+ class OauthbearerTokenRefresh
8
+ # @param bearer [Rdkafka::Consumer, Rdkafka::Admin] given rdkafka instance. It is needed as
9
+ # we need to have a reference to call `#oauthbearer_set_token` or
10
+ # `#oauthbearer_set_token_failure` upon the event.
11
+ def initialize(bearer)
12
+ @bearer = bearer
13
+ end
14
+
15
+ # @param _rd_config [Rdkafka::Config]
16
+ # @param bearer_name [String] name of the bearer for which we refresh
17
+ def call(_rd_config, bearer_name)
18
+ return unless @bearer.name == bearer_name
19
+
20
+ ::Karafka.monitor.instrument(
21
+ 'oauthbearer.token_refresh',
22
+ bearer: @bearer,
23
+ caller: self
24
+ )
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -16,10 +16,20 @@ module Karafka
16
16
 
17
17
  private_constant :USED_LOG_LEVELS
18
18
 
19
+ # @param log_polling [Boolean] should we log the fact that messages are being polled. This is
20
+ # usually noisy and not useful in production but can be useful in dev. While users can
21
+ # do this themselves this has been requested and asked for often, thus similar to how
22
+ # extensive logging can be disabled in WaterDrop, we do it here as well.
23
+ def initialize(log_polling: true)
24
+ @log_polling = log_polling
25
+ end
26
+
19
27
  # Logs each messages fetching attempt
20
28
  #
21
29
  # @param event [Karafka::Core::Monitoring::Event] event details including payload
22
30
  def on_connection_listener_fetch_loop(event)
31
+ return unless log_polling?
32
+
23
33
  listener = event[:caller]
24
34
  debug "[#{listener.id}] Polling messages..."
25
35
  end
@@ -28,6 +38,8 @@ module Karafka
28
38
  #
29
39
  # @param event [Karafka::Core::Monitoring::Event] event details including payload
30
40
  def on_connection_listener_fetch_loop_received(event)
41
+ return unless log_polling?
42
+
31
43
  listener = event[:caller]
32
44
  time = event[:time]
33
45
  messages_count = event[:messages_buffer].size
@@ -129,7 +141,7 @@ module Karafka
129
141
  Thread.list.each do |thread|
130
142
  tid = (thread.object_id ^ ::Process.pid).to_s(36)
131
143
 
132
- warn "Thread TID-#{tid} #{thread['label']}"
144
+ warn "Thread TID-#{tid} #{thread.name}"
133
145
 
134
146
  if thread.backtrace
135
147
  warn thread.backtrace.join("\n")
@@ -315,9 +327,10 @@ module Karafka
315
327
  when 'connection.client.unsubscribe.error'
316
328
  error "Client unsubscribe error occurred: #{error}"
317
329
  error details
330
+ # This handles any custom errors coming from places like Web-UI, etc
318
331
  else
319
- # This should never happen. Please contact the maintainers
320
- raise Errors::UnsupportedCaseError, event
332
+ error "#{type} error occurred: #{error}"
333
+ error details
321
334
  end
322
335
  end
323
336
 
@@ -326,6 +339,13 @@ module Karafka
326
339
  Karafka.logger.send(log_level, *args)
327
340
  end
328
341
  end
342
+
343
+ private
344
+
345
+ # @return [Boolean] should we log polling
346
+ def log_polling?
347
+ @log_polling
348
+ end
329
349
  end
330
350
  end
331
351
  end
@@ -40,6 +40,14 @@ module Karafka
40
40
  connection.listener.fetch_loop.received
41
41
  connection.listener.after_fetch_loop
42
42
 
43
+ connection.listener.pending
44
+ connection.listener.starting
45
+ connection.listener.running
46
+ connection.listener.quieting
47
+ connection.listener.quiet
48
+ connection.listener.stopping
49
+ connection.listener.stopped
50
+
43
51
  consumer.before_schedule_consume
44
52
  consumer.consume
45
53
  consumer.consumed
@@ -66,6 +74,8 @@ module Karafka
66
74
  filtering.throttled
67
75
  filtering.seek
68
76
 
77
+ oauthbearer.token_refresh
78
+
69
79
  process.notice_signal
70
80
 
71
81
  rebalance.partitions_assign
@@ -11,6 +11,13 @@ module Karafka
11
11
  #
12
12
  # @note This client is abstract, it has no notion of Karafka whatsoever
13
13
  class Client
14
+ # @param namespace_name [String, nil] Name of the AppSignal namespace we want to use or
15
+ # nil if it is to remain default.
16
+ # Defaults to `Appsignal::Transaction::BACKGROUND_JOB` in the execution flow.
17
+ def initialize(namespace_name: nil)
18
+ @namespace_name = namespace_name
19
+ end
20
+
14
21
  # Starts an appsignal transaction with a given action name
15
22
  #
16
23
  # @param action_name [String] action name. For processing this should be equal to
@@ -18,7 +25,7 @@ module Karafka
18
25
  def start_transaction(action_name)
19
26
  transaction = ::Appsignal::Transaction.create(
20
27
  SecureRandom.uuid,
21
- ::Appsignal::Transaction::BACKGROUND_JOB,
28
+ namespace_name,
22
29
  ::Appsignal::Transaction::GenericRequest.new({})
23
30
  )
24
31
 
@@ -83,7 +90,7 @@ module Karafka
83
90
  transaction.set_error(error)
84
91
  else
85
92
  ::Appsignal.send_error(error) do |transaction|
86
- transaction.set_namespace(::Appsignal::Transaction::BACKGROUND_JOB)
93
+ transaction.set_namespace(namespace_name)
87
94
  end
88
95
  end
89
96
  end
@@ -115,6 +122,13 @@ module Karafka
115
122
  .transform_values(&:to_s)
116
123
  .transform_keys!(&:to_s)
117
124
  end
125
+
126
+ # @return [String] transaction namespace. We lazy evaluate it and resolve if needed to
127
+ # the default `BACKGROUND_JOB` during the execution, to ensure we can initialize the
128
+ # instrumentation even before appsignal gem is loaded.
129
+ def namespace_name
130
+ @namespace_name ||= ::Appsignal::Transaction::BACKGROUND_JOB
131
+ end
118
132
  end
119
133
  end
120
134
  end
@@ -92,6 +92,26 @@ module Karafka
92
92
  clear_polling_tick
93
93
  end
94
94
 
95
+ # Deregister the polling tracker for given listener
96
+ # @param _event [Karafka::Core::Monitoring::Event]
97
+ def on_connection_listener_stopping(_event)
98
+ # We are interested in disabling tracking for given listener only if it was requested
99
+ # when karafka was running. If we would always clear, it would not catch the shutdown
100
+ # polling requirements. The "running" listener shutdown operations happen only when
101
+ # the manager requests it for downscaling.
102
+ return if Karafka::App.done?
103
+
104
+ clear_polling_tick
105
+ end
106
+
107
+ # Deregister the polling tracker for given listener
108
+ # @param _event [Karafka::Core::Monitoring::Event]
109
+ def on_connection_listener_stopped(_event)
110
+ return if Karafka::App.done?
111
+
112
+ clear_polling_tick
113
+ end
114
+
95
115
  private
96
116
 
97
117
  # Wraps the logic with a mutex
@@ -10,7 +10,7 @@ module Karafka
10
10
  :size,
11
11
  :first_offset,
12
12
  :last_offset,
13
- :deserializer,
13
+ :deserializers,
14
14
  :partition,
15
15
  :topic,
16
16
  :created_at,
@@ -21,7 +21,7 @@ module Karafka
21
21
  size: messages.count,
22
22
  first_offset: messages.first&.offset || -1001,
23
23
  last_offset: messages.last&.offset || -1001,
24
- deserializer: topic.deserializer,
24
+ deserializers: topic.deserializers,
25
25
  partition: partition,
26
26
  topic: topic.name,
27
27
  # We go with the assumption that the creation of the whole batch is the last message
@@ -14,14 +14,14 @@ module Karafka
14
14
  def call(kafka_message, topic, received_at)
15
15
  metadata = Karafka::Messages::Metadata.new(
16
16
  timestamp: kafka_message.timestamp,
17
- headers: kafka_message.headers,
18
- key: kafka_message.key,
19
17
  offset: kafka_message.offset,
20
- deserializer: topic.deserializer,
18
+ deserializers: topic.deserializers,
21
19
  partition: kafka_message.partition,
22
20
  topic: topic.name,
23
- received_at: received_at
24
- ).freeze
21
+ received_at: received_at,
22
+ raw_headers: kafka_message.headers,
23
+ raw_key: kafka_message.key
24
+ )
25
25
 
26
26
  # Get the raw payload
27
27
  payload = kafka_message.payload
@@ -31,7 +31,11 @@ module Karafka
31
31
  kafka_message.instance_variable_set('@payload', nil)
32
32
 
33
33
  # Karafka messages cannot be frozen because of the lazy deserialization feature
34
- Karafka::Messages::Message.new(payload, metadata)
34
+ message = Karafka::Messages::Message.new(payload, metadata)
35
+ # Assign message to metadata so we can reverse its relationship if needed
36
+ metadata[:message] = message
37
+
38
+ message
35
39
  end
36
40
  end
37
41
  end
@@ -23,7 +23,8 @@ module Karafka
23
23
  # prior to the final deserialization
24
24
  attr_accessor :raw_payload
25
25
 
26
- def_delegators :metadata, *Metadata.members
26
+ # We remove message as we do not want to do self-reference via `message.message`
27
+ def_delegators :metadata, *((Metadata.members + %i[key headers]) - %i[message])
27
28
 
28
29
  # @param raw_payload [Object] incoming payload before deserialization
29
30
  # @param metadata [Karafka::Messages::Metadata] message metadata object
@@ -4,15 +4,31 @@ module Karafka
4
4
  module Messages
5
5
  # Single message metadata details that can be accessed without the need of deserialization.
6
6
  Metadata = Struct.new(
7
+ :message,
7
8
  :timestamp,
8
- :headers,
9
- :key,
10
9
  :offset,
11
- :deserializer,
10
+ :deserializers,
12
11
  :partition,
13
12
  :received_at,
14
13
  :topic,
14
+ :raw_headers,
15
+ :raw_key,
15
16
  keyword_init: true
16
- )
17
+ ) do
18
+ # @return [Object] deserialized key. By default in the raw string format.
19
+ def key
20
+ return @key if @key
21
+
22
+ @key = deserializers.key.call(self)
23
+ end
24
+
25
+ # @return [Object] deserialized headers. By default its a hash with keys and payload being
26
+ # strings
27
+ def headers
28
+ return @headers if @headers
29
+
30
+ @headers = deserializers.headers.call(self)
31
+ end
32
+ end
17
33
  end
18
34
  end
@@ -7,7 +7,7 @@ module Karafka
7
7
  # @param message [::Karafka::Messages::Message]
8
8
  # @return [Object] deserialized payload
9
9
  def call(message)
10
- message.metadata.deserializer.call(message)
10
+ message.metadata.deserializers.payload.call(message)
11
11
  end
12
12
  end
13
13
  end
@@ -20,29 +20,6 @@ module Karafka
20
20
  #
21
21
  # Methods here are suppose to be always available or are expected to be redefined
22
22
  module BaseConsumer
23
- # Runs the on-schedule tick periodic operations
24
- # This method is an alias but is part of the naming convention used for other flows, this
25
- # is why we do not reference the `handle_before_schedule_tick` directly
26
- def on_before_schedule_tick
27
- handle_before_schedule_tick
28
- end
29
-
30
- # Used by the executor to trigger consumer tick
31
- # @private
32
- def on_tick
33
- handle_tick
34
- rescue StandardError => e
35
- Karafka.monitor.instrument(
36
- 'error.occurred',
37
- error: e,
38
- caller: self,
39
- type: 'consumer.tick.error'
40
- )
41
- end
42
-
43
- # By default we do nothing when ticking
44
- def tick; end
45
-
46
23
  # @return [Karafka::Pro::Processing::Coordinators::ErrorsTracker] tracker for errors that
47
24
  # occurred during processing until another successful processing
48
25
  #
@@ -58,6 +35,18 @@ module Karafka
58
35
  def errors_tracker
59
36
  coordinator.errors_tracker
60
37
  end
38
+
39
+ # @return [Karafka::Pro::Processing::SubscriptionGroupsCoordinator] Coordinator allowing to
40
+ # pause and resume polling of the given subscription group jobs queue for postponing
41
+ # further work.
42
+ #
43
+ # @note Since this stops polling, it can cause reaching `max.poll.interval.ms` limitations.
44
+ #
45
+ # @note This is a low-level API used for cross-topic coordination and some advanced features.
46
+ # Use it at own risk.
47
+ def subscription_groups_coordinator
48
+ Processing::SubscriptionGroupsCoordinator.instance
49
+ end
61
50
  end
62
51
  end
63
52
  end
@@ -16,6 +16,10 @@ module Karafka
16
16
  module Encryption
17
17
  # Cipher for encrypting and decrypting data
18
18
  class Cipher
19
+ include Helpers::ConfigImporter.new(
20
+ encryption: %i[encryption]
21
+ )
22
+
19
23
  def initialize
20
24
  @private_pems = {}
21
25
  end
@@ -39,7 +43,7 @@ module Karafka
39
43
 
40
44
  # @return [::OpenSSL::PKey::RSA] rsa public key
41
45
  def public_pem
42
- @public_pem ||= ::OpenSSL::PKey::RSA.new(::Karafka::App.config.encryption.public_key)
46
+ @public_pem ||= ::OpenSSL::PKey::RSA.new(encryption.public_key)
43
47
  end
44
48
 
45
49
  # @param version [String] version for which we want to get the rsa key
@@ -47,8 +51,8 @@ module Karafka
47
51
  def private_pem(version)
48
52
  return @private_pems[version] if @private_pems.key?(version)
49
53
 
50
- key_string = ::Karafka::App.config.encryption.private_keys[version]
51
- key_string || raise(Errors::PrivateKeyNotFound, version)
54
+ key_string = encryption.private_keys[version]
55
+ key_string || raise(Errors::PrivateKeyNotFoundError, version)
52
56
 
53
57
  @private_pems[version] = ::OpenSSL::PKey::RSA.new(key_string)
54
58
  end
@@ -30,6 +30,7 @@ module Karafka
30
30
  required(:active) { |val| [true, false].include?(val) }
31
31
  required(:version) { |val| val.is_a?(String) && !val.empty? }
32
32
  required(:public_key) { |val| val.is_a?(String) }
33
+ required(:fingerprinter) { |val| val == false || val.respond_to?(:hexdigest) }
33
34
 
34
35
  required(:private_keys) do |val|
35
36
  val.is_a?(Hash) &&
@@ -20,7 +20,10 @@ module Karafka
20
20
  BaseError = Class.new(::Karafka::Errors::BaseError)
21
21
 
22
22
  # Raised when we have encountered encryption key with version we do not have
23
- PrivateKeyNotFound = Class.new(BaseError)
23
+ PrivateKeyNotFoundError = Class.new(BaseError)
24
+
25
+ # Raised when fingerprinting was enabled and payload after encryption did not match it
26
+ FingerprintVerificationError = Class.new(BaseError)
24
27
  end
25
28
  end
26
29
  end
@@ -18,26 +18,28 @@ module Karafka
18
18
  module Messages
19
19
  # Middleware for WaterDrop. It automatically encrypts messages payload.
20
20
  # It is injected only if encryption is enabled.
21
+ # It also fingerprints the payload for verification if fingerprinting was enabled
21
22
  class Middleware
23
+ include Helpers::ConfigImporter.new(
24
+ cipher: %i[encryption cipher],
25
+ version: %i[encryption version],
26
+ fingerprinter: %i[encryption fingerprinter]
27
+ )
28
+
22
29
  # @param message [Hash] WaterDrop message hash
23
30
  # @return [Hash] hash with encrypted payload and encryption version indicator
24
31
  def call(message)
32
+ payload = message[:payload]
33
+
25
34
  message[:headers] ||= {}
26
35
  message[:headers]['encryption'] = version
27
- message[:payload] = cipher.encrypt(message[:payload])
28
- message
29
- end
36
+ message[:payload] = cipher.encrypt(payload)
30
37
 
31
- private
38
+ return message unless fingerprinter
32
39
 
33
- # @return [::Karafka::Pro::Encryption::Cipher]
34
- def cipher
35
- @cipher ||= ::Karafka::App.config.encryption.cipher
36
- end
40
+ message[:headers]['encryption_fingerprint'] = fingerprinter.hexdigest(payload)
37
41
 
38
- # @return [String] encryption version
39
- def version
40
- @version ||= ::Karafka::App.config.encryption.version
42
+ message
41
43
  end
42
44
  end
43
45
  end
@@ -20,34 +20,36 @@ module Karafka
20
20
  # unencrypted payloads. That is why we always rely on message headers for encryption
21
21
  # indication.
22
22
  class Parser < ::Karafka::Messages::Parser
23
+ include Helpers::ConfigImporter.new(
24
+ cipher: %i[encryption cipher],
25
+ active: %i[encryption active],
26
+ fingerprinter: %i[encryption fingerprinter]
27
+ )
28
+
23
29
  # @param message [::Karafka::Messages::Message]
24
30
  # @return [Object] deserialized payload
25
31
  def call(message)
26
- if active? && message.headers.key?('encryption')
27
- # Decrypt raw payload so it can be handled by the default parser logic
28
- message.raw_payload = cipher.decrypt(
29
- message.headers['encryption'],
30
- message.raw_payload
31
- )
32
- end
33
-
34
- super(message)
35
- end
32
+ headers = message.headers
33
+ encryption = headers['encryption']
34
+ fingerprint = headers['encryption_fingerprint']
36
35
 
37
- private
36
+ return super(message) unless active && encryption
38
37
 
39
- # @return [::Karafka::Pro::Encryption::Cipher]
40
- def cipher
41
- @cipher ||= ::Karafka::App.config.encryption.cipher
42
- end
38
+ # Decrypt raw payload so it can be handled by the default parser logic
39
+ decrypted_payload = cipher.decrypt(
40
+ encryption,
41
+ message.raw_payload
42
+ )
43
+
44
+ message.raw_payload = decrypted_payload
45
+
46
+ return super(message) unless fingerprint && fingerprinter
43
47
 
44
- # @return [Boolean] is encryption active
45
- def active?
46
- return @active unless @active.nil?
48
+ message_fingerprint = fingerprinter.hexdigest(decrypted_payload)
47
49
 
48
- @active = ::Karafka::App.config.encryption.active
50
+ return super(message) if message_fingerprint == fingerprint
49
51
 
50
- @active
52
+ raise(Errors::FingerprintVerificationError, message.to_s)
51
53
  end
52
54
  end
53
55
  end
@@ -40,6 +40,11 @@ module Karafka
40
40
  # Cipher used to encrypt and decrypt data
41
41
  setting(:cipher, default: Encryption::Cipher.new)
42
42
 
43
+ # When set to any digest that responds to `#hexdigest` will compute checksum of the
44
+ # message payload for post-description integrity verification. It will include a
45
+ # fingerprint in headers
46
+ setting(:fingerprinter, default: false)
47
+
43
48
  configure
44
49
  end
45
50
  end
@@ -29,7 +29,8 @@ module Karafka
29
29
  # - { 'topic1' => 100 } - means we run all partitions from the offset 100
30
30
  # - { 'topic1' => Time.now - 60 } - we run all partitions from the message from 60s ago
31
31
  # - { 'topic1' => { 1 => Time.now - 60 } } - partition1 from message 60s ago
32
- #
32
+ # - { 'topic1' => { 1 => true } } - will pick first offset not consumed on this CG for p 1
33
+ # - { 'topic1' => true } - will pick first offset not consumed on this CG for all p
33
34
  class Expander
34
35
  # Expands topics to which we want to subscribe with partitions information in case this
35
36
  # info is not provided.
@@ -36,6 +36,7 @@ module Karafka
36
36
  resolve_partitions_with_exact_offsets
37
37
  resolve_partitions_with_negative_offsets
38
38
  resolve_partitions_with_time_offsets
39
+ resolve_partitions_with_cg_expectations
39
40
 
40
41
  # Final tpl with all the data
41
42
  tpl = Rdkafka::Consumer::TopicPartitionList.new
@@ -149,6 +150,43 @@ module Karafka
149
150
  end
150
151
  end
151
152
  end
153
+
154
+ # Fetches last used offsets for those partitions for which we want to consume from last
155
+ # moment where given consumer group has finished
156
+ # This is indicated by given partition value being set to `true`.
157
+ def resolve_partitions_with_cg_expectations
158
+ tpl = Rdkafka::Consumer::TopicPartitionList.new
159
+
160
+ # First iterate over all topics that we want to expand
161
+ @expanded_topics.each do |name, partitions|
162
+ partitions_base = {}
163
+
164
+ partitions.each do |partition, offset|
165
+ # Pick only partitions where offset is set to true to indicate that we are interested
166
+ # in committed offset resolution
167
+ next unless offset == true
168
+
169
+ # This can be set to nil because we do not use this offset value when querying
170
+ partitions_base[partition] = nil
171
+ end
172
+
173
+ # If there is nothing to work with, just skip
174
+ next if partitions_base.empty?
175
+
176
+ tpl.add_topic_and_partitions_with_offsets(name, partitions_base)
177
+ end
178
+
179
+ # If nothing to resolve, do not resolve
180
+ return if tpl.empty?
181
+
182
+ # Fetch all committed offsets for all the topics partitions of our interest and use
183
+ # those offsets for the mapped topics data
184
+ @consumer.committed(tpl).to_h.each do |name, partitions|
185
+ partitions.each do |partition|
186
+ @mapped_topics[name][partition.partition] = partition.offset
187
+ end
188
+ end
189
+ end
152
190
  end
153
191
  end
154
192
  end
@@ -20,7 +20,9 @@ module Karafka
20
20
  # the end. It also allows for signaling, when a given message should be last out of certain
21
21
  # partition, but we still want to continue iterating in other messages.
22
22
  #
23
- # It does **not** create a consumer group and does not have any offset management.
23
+ # It does **not** create a consumer group and does not have any offset management until first
24
+ # consumer offset marking happens. So can be use for quick seeks as well as iterative,
25
+ # repetitive data fetching from rake, etc.
24
26
  class Iterator
25
27
  # A simple API allowing to iterate over topic/partition data, without having to subscribe
26
28
  # and deal with rebalances. This API allows for multi-partition streaming and is optimized
@@ -92,6 +94,7 @@ module Karafka
92
94
  end
93
95
  end
94
96
 
97
+ @current_consumer.commit_offsets(async: false) if @stored_offsets
95
98
  @current_message = nil
96
99
  @current_consumer = nil
97
100
  end
@@ -127,6 +130,29 @@ module Karafka
127
130
  )
128
131
  end
129
132
 
133
+ # Stops all the iterating
134
+ # @note `break` can also be used but in such cases commits stored async will not be flushed
135
+ # to Kafka. This is why `#stop` is the recommended method.
136
+ def stop
137
+ @stopped = true
138
+ end
139
+
140
+ # Marks given message as consumed.
141
+ #
142
+ # @param message [Karafka::Messages::Message] message that we want to mark as processed
143
+ def mark_as_consumed(message)
144
+ @current_consumer.store_offset(message, nil)
145
+ @stored_offsets = true
146
+ end
147
+
148
+ # Marks given message as consumed and commits offsets
149
+ #
150
+ # @param message [Karafka::Messages::Message] message that we want to mark as processed
151
+ def mark_as_consumed!(message)
152
+ mark_as_consumed(message)
153
+ @current_consumer.commit_offsets(async: false)
154
+ end
155
+
130
156
  private
131
157
 
132
158
  # @return [Rdkafka::Consumer::Message, nil] message or nil if nothing to do
@@ -158,7 +184,7 @@ module Karafka
158
184
  # Do we have all the data we wanted or did every topic partition has reached eof.
159
185
  # @return [Boolean]
160
186
  def done?
161
- @stopped_partitions >= @total_partitions
187
+ (@stopped_partitions >= @total_partitions) || @stopped
162
188
  end
163
189
  end
164
190
  end
@@ -63,6 +63,9 @@ module Karafka
63
63
  # @param config [Karafka::Core::Configurable::Node]
64
64
  def post_setup_all(config)
65
65
  features.each { |feature| feature.post_setup(config) }
66
+
67
+ # We initialize it here so we don't initialize it during multi-threading work
68
+ Processing::SubscriptionGroupsCoordinator.instance
66
69
  end
67
70
 
68
71
  private