karafka 2.4.18 → 2.5.0.beta2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. checksums.yaml +4 -4
  2. data/.github/CODEOWNERS +3 -0
  3. data/.github/workflows/ci.yml +58 -14
  4. data/.github/workflows/push.yml +36 -0
  5. data/.github/workflows/verify-action-pins.yml +16 -0
  6. data/.ruby-version +1 -1
  7. data/CHANGELOG.md +60 -0
  8. data/Gemfile +2 -2
  9. data/Gemfile.lock +69 -50
  10. data/LICENSE-COMM +2 -2
  11. data/README.md +1 -1
  12. data/Rakefile +4 -0
  13. data/bin/clean_kafka +43 -0
  14. data/bin/integrations +19 -6
  15. data/bin/rspecs +15 -3
  16. data/bin/verify_kafka_warnings +35 -0
  17. data/bin/verify_topics_naming +27 -0
  18. data/config/locales/errors.yml +3 -0
  19. data/config/locales/pro_errors.yml +13 -2
  20. data/docker-compose.yml +1 -1
  21. data/examples/payloads/json/enrollment_event.json +579 -0
  22. data/examples/payloads/json/ingestion_event.json +30 -0
  23. data/examples/payloads/json/transaction_event.json +17 -0
  24. data/examples/payloads/json/user_event.json +11 -0
  25. data/karafka.gemspec +3 -8
  26. data/lib/karafka/active_job/current_attributes.rb +1 -1
  27. data/lib/karafka/admin/acl.rb +5 -1
  28. data/lib/karafka/admin/configs.rb +5 -1
  29. data/lib/karafka/admin.rb +69 -34
  30. data/lib/karafka/base_consumer.rb +17 -8
  31. data/lib/karafka/cli/base.rb +8 -2
  32. data/lib/karafka/cli/topics/align.rb +7 -4
  33. data/lib/karafka/cli/topics/base.rb +17 -0
  34. data/lib/karafka/cli/topics/create.rb +9 -7
  35. data/lib/karafka/cli/topics/delete.rb +4 -2
  36. data/lib/karafka/cli/topics/help.rb +39 -0
  37. data/lib/karafka/cli/topics/repartition.rb +4 -2
  38. data/lib/karafka/cli/topics.rb +10 -3
  39. data/lib/karafka/cli.rb +2 -0
  40. data/lib/karafka/connection/client.rb +30 -9
  41. data/lib/karafka/connection/listener.rb +24 -12
  42. data/lib/karafka/connection/messages_buffer.rb +1 -1
  43. data/lib/karafka/connection/proxy.rb +3 -0
  44. data/lib/karafka/constraints.rb +3 -3
  45. data/lib/karafka/contracts/config.rb +3 -0
  46. data/lib/karafka/contracts/topic.rb +1 -1
  47. data/lib/karafka/errors.rb +46 -2
  48. data/lib/karafka/helpers/async.rb +3 -1
  49. data/lib/karafka/instrumentation/callbacks/rebalance.rb +5 -1
  50. data/lib/karafka/instrumentation/logger_listener.rb +86 -23
  51. data/lib/karafka/instrumentation/proctitle_listener.rb +5 -1
  52. data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +2 -2
  53. data/lib/karafka/messages/builders/batch_metadata.rb +1 -1
  54. data/lib/karafka/pro/cleaner.rb +8 -0
  55. data/lib/karafka/pro/cli/parallel_segments/base.rb +89 -0
  56. data/lib/karafka/pro/cli/parallel_segments/collapse.rb +164 -0
  57. data/lib/karafka/pro/cli/parallel_segments/distribute.rb +164 -0
  58. data/lib/karafka/pro/cli/parallel_segments.rb +60 -0
  59. data/lib/karafka/pro/connection/manager.rb +5 -8
  60. data/lib/karafka/pro/encryption.rb +8 -0
  61. data/lib/karafka/pro/instrumentation/performance_tracker.rb +1 -1
  62. data/lib/karafka/pro/iterator/expander.rb +5 -3
  63. data/lib/karafka/pro/iterator/tpl_builder.rb +23 -0
  64. data/lib/karafka/pro/loader.rb +10 -0
  65. data/lib/karafka/pro/processing/coordinator.rb +4 -1
  66. data/lib/karafka/pro/processing/coordinators/errors_tracker.rb +27 -3
  67. data/lib/karafka/pro/processing/coordinators/filters_applier.rb +11 -0
  68. data/lib/karafka/pro/processing/filters/base.rb +10 -2
  69. data/lib/karafka/pro/processing/filters/expirer.rb +5 -0
  70. data/lib/karafka/pro/processing/filters/inline_insights_delayer.rb +2 -2
  71. data/lib/karafka/pro/processing/filters/virtual_limiter.rb +5 -0
  72. data/lib/karafka/pro/processing/parallel_segments/filters/base.rb +73 -0
  73. data/lib/karafka/pro/processing/parallel_segments/filters/default.rb +85 -0
  74. data/lib/karafka/pro/processing/parallel_segments/filters/mom.rb +66 -0
  75. data/lib/karafka/pro/processing/partitioner.rb +1 -13
  76. data/lib/karafka/pro/processing/piping/consumer.rb +13 -13
  77. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +1 -1
  78. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +1 -1
  79. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +1 -1
  80. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +1 -1
  81. data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +1 -1
  82. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
  83. data/lib/karafka/pro/processing/strategies/default.rb +36 -8
  84. data/lib/karafka/pro/processing/strategies/dlq/default.rb +14 -10
  85. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +1 -1
  86. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +1 -1
  87. data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +3 -1
  88. data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +1 -1
  89. data/lib/karafka/pro/processing/strategies/ftr/default.rb +1 -1
  90. data/lib/karafka/pro/processing/strategies/lrj/default.rb +4 -1
  91. data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +1 -1
  92. data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +1 -1
  93. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
  94. data/lib/karafka/pro/processing/virtual_partitions/distributors/balanced.rb +50 -0
  95. data/lib/karafka/pro/processing/virtual_partitions/distributors/base.rb +29 -0
  96. data/lib/karafka/pro/processing/virtual_partitions/distributors/consistent.rb +27 -0
  97. data/lib/karafka/pro/recurring_tasks/contracts/config.rb +8 -4
  98. data/lib/karafka/pro/recurring_tasks/dispatcher.rb +3 -3
  99. data/lib/karafka/pro/recurring_tasks/setup/config.rb +7 -2
  100. data/lib/karafka/pro/recurring_tasks.rb +13 -0
  101. data/lib/karafka/pro/routing/features/dead_letter_queue/topic.rb +1 -1
  102. data/lib/karafka/pro/routing/features/multiplexing/config.rb +1 -0
  103. data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +17 -0
  104. data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +5 -2
  105. data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +8 -1
  106. data/lib/karafka/pro/routing/features/parallel_segments/builder.rb +47 -0
  107. data/lib/karafka/pro/routing/features/parallel_segments/config.rb +27 -0
  108. data/lib/karafka/pro/routing/features/parallel_segments/consumer_group.rb +83 -0
  109. data/lib/karafka/pro/routing/features/parallel_segments/contracts/consumer_group.rb +49 -0
  110. data/lib/karafka/pro/routing/features/parallel_segments/topic.rb +43 -0
  111. data/lib/karafka/pro/routing/features/parallel_segments.rb +24 -0
  112. data/lib/karafka/pro/routing/features/patterns/pattern.rb +1 -1
  113. data/lib/karafka/pro/routing/features/recurring_tasks/builder.rb +2 -2
  114. data/lib/karafka/pro/routing/features/scheduled_messages/builder.rb +10 -6
  115. data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +20 -2
  116. data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
  117. data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +8 -2
  118. data/lib/karafka/pro/scheduled_messages/consumer.rb +19 -21
  119. data/lib/karafka/pro/scheduled_messages/daily_buffer.rb +9 -6
  120. data/lib/karafka/pro/scheduled_messages/deserializers/headers.rb +7 -1
  121. data/lib/karafka/pro/scheduled_messages/max_epoch.rb +15 -6
  122. data/lib/karafka/pro/scheduled_messages.rb +13 -0
  123. data/lib/karafka/processing/coordinators_buffer.rb +1 -0
  124. data/lib/karafka/processing/strategies/default.rb +4 -4
  125. data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
  126. data/lib/karafka/routing/subscription_group.rb +1 -1
  127. data/lib/karafka/runner.rb +7 -1
  128. data/lib/karafka/server.rb +19 -19
  129. data/lib/karafka/setup/attributes_map.rb +2 -0
  130. data/lib/karafka/setup/config.rb +22 -1
  131. data/lib/karafka/setup/defaults_injector.rb +26 -1
  132. data/lib/karafka/status.rb +6 -1
  133. data/lib/karafka/swarm/node.rb +31 -0
  134. data/lib/karafka/swarm/supervisor.rb +4 -0
  135. data/lib/karafka/templates/karafka.rb.erb +14 -1
  136. data/lib/karafka/version.rb +1 -1
  137. data/lib/karafka.rb +17 -9
  138. data/renovate.json +14 -2
  139. metadata +40 -40
  140. checksums.yaml.gz.sig +0 -0
  141. data/certs/cert.pem +0 -26
  142. data.tar.gz.sig +0 -0
  143. metadata.gz.sig +0 -0
@@ -54,9 +54,9 @@ module Karafka
54
54
  @applied = true
55
55
  end
56
56
 
57
- # @return [Integer] ms timeout in case of pause
57
+ # @return [Integer, nil] ms timeout in case of pause or nil if not delaying
58
58
  def timeout
59
- @cursor && applied? ? PAUSE_TIMEOUT : 0
59
+ @cursor && applied? ? PAUSE_TIMEOUT : nil
60
60
  end
61
61
 
62
62
  # Pause when we had to back-off or skip if delay is not needed
@@ -37,6 +37,11 @@ module Karafka
37
37
 
38
38
  messages.delete_if { |message| marked.include?(message.offset) }
39
39
  end
40
+
41
+ # @return [nil] This filter does not deal with pausing, so timeout is always nil
42
+ def timeout
43
+ nil
44
+ end
40
45
  end
41
46
  end
42
47
  end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This code is part of Karafka Pro, a commercial component not licensed under LGPL.
4
+ # See LICENSE for details.
5
+
6
+ module Karafka
7
+ module Pro
8
+ module Processing
9
+ module ParallelSegments
10
+ # Module for filters injected into the processing pipeline of each of the topics used
11
+ # within the parallel segmented consumer groups
12
+ module Filters
13
+ # Base class for filters for parallel segments that deal with different feature scenarios
14
+ class Base < Processing::Filters::Base
15
+ # @param segment_id [Integer] numeric id of the parallel segment group to use with the
16
+ # partitioner and reducer for segment matching comparison
17
+ # @param partitioner [Proc]
18
+ # @param reducer [Proc]
19
+ def initialize(segment_id:, partitioner:, reducer:)
20
+ super()
21
+
22
+ @segment_id = segment_id
23
+ @partitioner = partitioner
24
+ @reducer = reducer
25
+ end
26
+
27
+ private
28
+
29
+ # @param message [Karafka::Messages::Message] received message
30
+ # @return [String, Numeric] segment assignment key
31
+ def partition(message)
32
+ @partitioner.call(message)
33
+ rescue StandardError => e
34
+ # This should not happen. If you are seeing this it means your partitioner code
35
+ # failed and raised an error. We highly recommend mitigating partitioner level errors
36
+ # on the user side because this type of collapse should be considered a last resort
37
+ Karafka.monitor.instrument(
38
+ 'error.occurred',
39
+ caller: self,
40
+ error: e,
41
+ message: message,
42
+ type: 'parallel_segments.partitioner.error'
43
+ )
44
+
45
+ :failure
46
+ end
47
+
48
+ # @param message_segment_key [String, Numeric] segment key to pass to the reducer
49
+ # @return [Integer] segment assignment of a given message
50
+ def reduce(message_segment_key)
51
+ # Assign to segment 0 always in case of failures in partitioner
52
+ # This is a fail-safe
53
+ return 0 if message_segment_key == :failure
54
+
55
+ @reducer.call(message_segment_key)
56
+ rescue StandardError => e
57
+ # @see `#partition` method error handling doc
58
+ Karafka.monitor.instrument(
59
+ 'error.occurred',
60
+ caller: self,
61
+ error: e,
62
+ message_segment_key: message_segment_key,
63
+ type: 'parallel_segments.reducer.error'
64
+ )
65
+
66
+ 0
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This code is part of Karafka Pro, a commercial component not licensed under LGPL.
4
+ # See LICENSE for details.
5
+
6
+ module Karafka
7
+ module Pro
8
+ module Processing
9
+ # Processing components namespace for parallel segments feature
10
+ module ParallelSegments
11
+ module Filters
12
+ # Filter used for handling parallel segments with automatic offset management. Handles
13
+ # message distribution and ensures proper offset management when messages are filtered
14
+ # out during the distribution process.
15
+ #
16
+ # When operating in automatic offset management mode, this filter takes care of marking
17
+ # offsets of messages that were filtered out during the distribution process to maintain
18
+ # proper offset progression.
19
+ #
20
+ # @note This is the default filter that should be used when manual offset management
21
+ # is not enabled. For manual offset management scenarios use the Mom filter instead.
22
+ class Default < Base
23
+ # Applies the filter to the batch of messages
24
+ # It removes messages that don't belong to the current parallel segment group
25
+ # based on the partitioner and reducer logic
26
+ #
27
+ # @param messages [Array<Karafka::Messages::Message>] messages batch that we want to
28
+ # filter
29
+ def apply!(messages)
30
+ @applied = false
31
+ @all_filtered = false
32
+ @cursor = messages.first
33
+
34
+ # Keep track of how many messages we had initially
35
+ initial_size = messages.size
36
+
37
+ # Filter out messages that don't match our segment group
38
+ messages.delete_if do |message|
39
+ message_segment_key = partition(message)
40
+
41
+ # Use the reducer to get the target group for this message
42
+ target_segment = reduce(message_segment_key)
43
+
44
+ # Remove the message if it doesn't belong to our group
45
+ remove = target_segment != @segment_id
46
+
47
+ if remove
48
+ @cursor = message
49
+ @applied = true
50
+ end
51
+
52
+ remove
53
+ end
54
+
55
+ # If all messages were filtered out, we want to mark them as consumed
56
+ @all_filtered = messages.empty? && initial_size.positive?
57
+ end
58
+
59
+ # @return [Boolean] true if any messages were filtered out
60
+ def applied?
61
+ @applied
62
+ end
63
+
64
+ # @return [Boolean] true if we should mark as consumed (when all were filtered)
65
+ def mark_as_consumed?
66
+ @all_filtered
67
+ end
68
+
69
+ # @return [nil] Since we do not timeout ever in this filter, we should not return
70
+ # any value for it.
71
+ def timeout
72
+ nil
73
+ end
74
+
75
+ # Only return cursor if we wanted to mark as consumed in case all was filtered.
76
+ # Otherwise it could interfere with other filters
77
+ def cursor
78
+ @all_filtered ? @cursor : nil
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This code is part of Karafka Pro, a commercial component not licensed under LGPL.
4
+ # See LICENSE for details.
5
+
6
+ module Karafka
7
+ module Pro
8
+ module Processing
9
+ module ParallelSegments
10
+ module Filters
11
+ # Filter used for handling parallel segments when manual offset management (mom) is
12
+ # enabled. Provides message distribution without any post-filtering offset state
13
+ # management as it is fully user-based.
14
+ #
15
+ # Since with manual offset management we need to ensure that offsets are never marked
16
+ # even in cases where all data in a batch is filtered out.
17
+ #
18
+ # This separation allows for cleaner implementation and easier debugging of each flow.
19
+ #
20
+ # @note This filter should be used only when manual offset management is enabled.
21
+ # For automatic offset management scenarios use the regular filter instead.
22
+ class Mom < Base
23
+ # Applies the filter to the batch of messages
24
+ # It removes messages that don't belong to the current parallel segment group
25
+ # based on the partitioner and reducer logic without any offset marking
26
+ #
27
+ # @param messages [Array<Karafka::Messages::Message>] messages batch that we want to
28
+ # filter
29
+ def apply!(messages)
30
+ @applied = false
31
+
32
+ # Filter out messages that don't match our segment group
33
+ messages.delete_if do |message|
34
+ message_segment_key = partition(message)
35
+ # Use the reducer to get the target group for this message
36
+ target_segment = reduce(message_segment_key)
37
+ # Remove the message if it doesn't belong to our segment
38
+ remove = target_segment != @segment_id
39
+
40
+ @applied = true if remove
41
+
42
+ remove
43
+ end
44
+ end
45
+
46
+ # @return [Boolean] true if any messages were filtered out
47
+ def applied?
48
+ @applied
49
+ end
50
+
51
+ # @return [Boolean] false, as mom mode never marks as consumed automatically
52
+ def mark_as_consumed?
53
+ false
54
+ end
55
+
56
+ # @return [nil] Since we do not timeout ever in this filter, we should not return
57
+ # any value for it.
58
+ def timeout
59
+ nil
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
@@ -38,19 +38,7 @@ module Karafka
38
38
  # reduce the whole set into one partition and emit error. This should still allow for
39
39
  # user flow but should mitigate damages by not virtualizing
40
40
  begin
41
- groupings = messages.group_by do |msg|
42
- # We need to reduce it to the max concurrency, so the group_id is not a direct
43
- # effect of the end user action. Otherwise the persistence layer for consumers
44
- # would cache it forever and it would cause memory leaks
45
- #
46
- # This also needs to be consistent because the aggregation here needs to warrant,
47
- # that the same partitioned message will always be assigned to the same virtual
48
- # partition. Otherwise in case of a window aggregation with VP spanning across
49
- # several polls, the data could not be complete.
50
- vps.reducer.call(
51
- vps.partitioner.call(msg)
52
- )
53
- end
41
+ groupings = vps.distributor.call(messages)
54
42
  rescue StandardError => e
55
43
  # This should not happen. If you are seeing this it means your partitioner code
56
44
  # failed and raised an error. We highly recommend mitigating partitioner level errors
@@ -20,16 +20,16 @@ module Karafka
20
20
 
21
21
  # Pipes given message to the provided topic with expected details. Useful for
22
22
  # pass-through operations where deserialization is not needed. Upon usage it will include
23
- # all the original headers + meta headers about the source of message.
23
+ # all the source headers + meta headers about the source of message.
24
24
  #
25
25
  # @param topic [String, Symbol] where we want to send the message
26
- # @param message [Karafka::Messages::Message] original message to pipe
26
+ # @param message [Karafka::Messages::Message] source message to pipe
27
27
  #
28
28
  # @note It will NOT deserialize the payload so it is fast
29
29
  #
30
30
  # @note We assume that there can be different number of partitions in the target topic,
31
- # this is why we use `key` based on the original topic key and not the partition id.
32
- # This will not utilize partitions beyond the number of partitions of original topic,
31
+ # this is why we use `key` based on the source topic key and not the partition id.
32
+ # This will not utilize partitions beyond the number of partitions of source topic,
33
33
  # but will accommodate for topics with less partitions.
34
34
  def pipe_async(topic:, message:)
35
35
  produce_async(
@@ -40,7 +40,7 @@ module Karafka
40
40
  # Sync version of pipe for one message
41
41
  #
42
42
  # @param topic [String, Symbol] where we want to send the message
43
- # @param message [Karafka::Messages::Message] original message to pipe
43
+ # @param message [Karafka::Messages::Message] source message to pipe
44
44
  # @see [#pipe_async]
45
45
  def pipe_sync(topic:, message:)
46
46
  produce_sync(
@@ -51,7 +51,7 @@ module Karafka
51
51
  # Async multi-message pipe
52
52
  #
53
53
  # @param topic [String, Symbol] where we want to send the message
54
- # @param messages [Array<Karafka::Messages::Message>] original messages to pipe
54
+ # @param messages [Array<Karafka::Messages::Message>] source messages to pipe
55
55
  #
56
56
  # @note If transactional producer in use and dispatch is not wrapped with a transaction,
57
57
  # it will automatically wrap the dispatch with a transaction
@@ -66,7 +66,7 @@ module Karafka
66
66
  # Sync multi-message pipe
67
67
  #
68
68
  # @param topic [String, Symbol] where we want to send the message
69
- # @param messages [Array<Karafka::Messages::Message>] original messages to pipe
69
+ # @param messages [Array<Karafka::Messages::Message>] source messages to pipe
70
70
  #
71
71
  # @note If transactional producer in use and dispatch is not wrapped with a transaction,
72
72
  # it will automatically wrap the dispatch with a transaction
@@ -81,7 +81,7 @@ module Karafka
81
81
  private
82
82
 
83
83
  # @param topic [String, Symbol] where we want to send the message
84
- # @param message [Karafka::Messages::Message] original message to pipe
84
+ # @param message [Karafka::Messages::Message] source message to pipe
85
85
  # @return [Hash] hash with message to pipe.
86
86
  #
87
87
  # @note If you need to alter this, please define the `#enhance_pipe_message` method
@@ -90,17 +90,17 @@ module Karafka
90
90
  topic: topic,
91
91
  payload: message.raw_payload,
92
92
  headers: message.raw_headers.merge(
93
- 'original_topic' => message.topic,
94
- 'original_partition' => message.partition.to_s,
95
- 'original_offset' => message.offset.to_s,
96
- 'original_consumer_group' => self.topic.consumer_group.id
93
+ 'source_topic' => message.topic,
94
+ 'source_partition' => message.partition.to_s,
95
+ 'source_offset' => message.offset.to_s,
96
+ 'source_consumer_group' => self.topic.consumer_group.id
97
97
  )
98
98
  }
99
99
 
100
100
  # Use a key only if key was provided
101
101
  if message.raw_key
102
102
  pipe_message[:key] = message.raw_key
103
- # Otherwise pipe creating a key that will assign it based on the original partition
103
+ # Otherwise pipe creating a key that will assign it based on the source partition
104
104
  # number
105
105
  else
106
106
  pipe_message[:key] = message.partition.to_s
@@ -38,7 +38,7 @@ module Karafka
38
38
  elsif !revoked?
39
39
  # no need to check for manual seek because AJ consumer is internal and
40
40
  # fully controlled by us
41
- seek(seek_offset, false)
41
+ seek(seek_offset, false, reset_offset: false)
42
42
  resume
43
43
  else
44
44
  resume
@@ -44,7 +44,7 @@ module Karafka
44
44
  elsif !revoked?
45
45
  # no need to check for manual seek because AJ consumer is internal and
46
46
  # fully controlled by us
47
- seek(seek_offset, false)
47
+ seek(seek_offset, false, reset_offset: false)
48
48
  resume
49
49
  else
50
50
  resume
@@ -36,7 +36,7 @@ module Karafka
36
36
 
37
37
  # no need to check for manual seek because AJ consumer is internal and
38
38
  # fully controlled by us
39
- seek(seek_offset, false) unless revoked?
39
+ seek(seek_offset, false, reset_offset: false) unless revoked?
40
40
 
41
41
  resume
42
42
  else
@@ -40,7 +40,7 @@ module Karafka
40
40
  mark_as_consumed(last_group_message) unless revoked?
41
41
  # no need to check for manual seek because AJ consumer is internal and
42
42
  # fully controlled by us
43
- seek(seek_offset, false) unless revoked?
43
+ seek(seek_offset, false, reset_offset: false) unless revoked?
44
44
 
45
45
  resume
46
46
  else
@@ -40,7 +40,7 @@ module Karafka
40
40
  elsif !revoked?
41
41
  # no need to check for manual seek because AJ consumer is internal and
42
42
  # fully controlled by us
43
- seek(seek_offset, false)
43
+ seek(seek_offset, false, reset_offset: false)
44
44
  resume
45
45
  else
46
46
  resume
@@ -43,7 +43,7 @@ module Karafka
43
43
 
44
44
  # no need to check for manual seek because AJ consumer is internal and
45
45
  # fully controlled by us
46
- seek(seek_offset, false) unless revoked?
46
+ seek(seek_offset, false, reset_offset: false) unless revoked?
47
47
 
48
48
  resume
49
49
  else
@@ -55,14 +55,19 @@ module Karafka
55
55
  # seek offset can be nil only in case `#seek` was invoked with offset reset request
56
56
  # In case like this we ignore marking
57
57
  return true if seek_offset.nil?
58
- # Ignore earlier offsets than the one we already committed
59
- return true if seek_offset > message.offset
58
+ # Ignore if it is the same offset as the one that is marked currently
59
+ # We ignore second marking because it changes nothing and in case of people using
60
+ # metadata storage but with automatic offset marking, this would cause metadata to be
61
+ # erased by automatic marking
62
+ return true if (seek_offset - 1) == message.offset
60
63
  return false if revoked?
61
64
 
62
65
  # If we are not inside a transaction but this is a transactional topic, we mark with
63
66
  # artificially created transaction
64
67
  stored = if producer.transactional?
65
68
  mark_with_transaction(message, offset_metadata, true)
69
+ elsif @_transactional_marking
70
+ raise Errors::NonTransactionalMarkingAttemptError
66
71
  else
67
72
  client.mark_as_consumed(message, offset_metadata)
68
73
  end
@@ -92,14 +97,19 @@ module Karafka
92
97
  # seek offset can be nil only in case `#seek` was invoked with offset reset request
93
98
  # In case like this we ignore marking
94
99
  return true if seek_offset.nil?
95
- # Ignore earlier offsets than the one we already committed
96
- return true if seek_offset > message.offset
100
+ # Ignore if it is the same offset as the one that is marked currently
101
+ # We ignore second marking because it changes nothing and in case of people using
102
+ # metadata storage but with automatic offset marking, this would cause metadata to be
103
+ # erased by automatic marking
104
+ return true if (seek_offset - 1) == message.offset
97
105
  return false if revoked?
98
106
 
99
107
  # If we are not inside a transaction but this is a transactional topic, we mark with
100
108
  # artificially created transaction
101
109
  stored = if producer.transactional?
102
110
  mark_with_transaction(message, offset_metadata, false)
111
+ elsif @_transactional_marking
112
+ raise Errors::NonTransactionalMarkingAttemptError
103
113
  else
104
114
  client.mark_as_consumed!(message, offset_metadata)
105
115
  end
@@ -143,6 +153,7 @@ module Karafka
143
153
  self.producer = active_producer
144
154
 
145
155
  transaction_started = false
156
+ transaction_completed = false
146
157
 
147
158
  # Prevent from nested transactions. It would not make any sense
148
159
  raise Errors::TransactionAlreadyInitializedError if @_in_transaction
@@ -159,6 +170,12 @@ module Karafka
159
170
  # transaction. We do it only for transactions that contain offset management as for
160
171
  # producer only, this is not relevant.
161
172
  raise Errors::AssignmentLostError if @_in_transaction_marked && revoked?
173
+
174
+ # If we do not reach this, we should not move seek offsets because it means that
175
+ # either an error occured or transaction was aborted.
176
+ # In case of error, it will bubble up so no issue but in case of abort, while we
177
+ # do not reach this place, the code will continue
178
+ transaction_completed = true
162
179
  end
163
180
 
164
181
  @_in_transaction = false
@@ -180,8 +197,13 @@ module Karafka
180
197
  # to mimic this
181
198
  # - Complex strategies like VPs can use this in VPs to mark in parallel without
182
199
  # having to redefine the transactional flow completely
183
- @_transaction_marked.each do |marking|
184
- marking.pop ? mark_as_consumed(*marking) : mark_as_consumed!(*marking)
200
+ #
201
+ # @note This should be applied only if transaction did not error and if it was not
202
+ # aborted.
203
+ if transaction_completed
204
+ @_transaction_marked.each do |marking|
205
+ marking.pop ? mark_as_consumed(*marking) : mark_as_consumed!(*marking)
206
+ end
185
207
  end
186
208
 
187
209
  true
@@ -213,6 +235,9 @@ module Karafka
213
235
  offset_metadata
214
236
  )
215
237
 
238
+ # This one is long lived and used to make sure, that users do not mix transactional
239
+ # marking with non-transactional. When this happens we should raise error
240
+ @_transactional_marking = true
216
241
  @_in_transaction_marked = true
217
242
  @_transaction_marked ||= []
218
243
  @_transaction_marked << [message, offset_metadata, async]
@@ -252,8 +277,11 @@ module Karafka
252
277
  # seek offset can be nil only in case `#seek` was invoked with offset reset request
253
278
  # In case like this we ignore marking
254
279
  return true if seek_offset.nil?
255
- # Ignore earlier offsets than the one we already committed
256
- return true if seek_offset > message.offset
280
+ # Ignore if it is the same offset as the one that is marked currently
281
+ # We ignore second marking because it changes nothing and in case of people using
282
+ # metadata storage but with automatic offset marking, this would cause metadata to be
283
+ # erased by automatic marking
284
+ return true if (seek_offset - 1) == message.offset
257
285
  return false if revoked?
258
286
 
259
287
  # If we have already marked this successfully in a transaction that was running
@@ -145,19 +145,19 @@ module Karafka
145
145
  # @param skippable_message [Array<Karafka::Messages::Message>]
146
146
  # @return [Hash] dispatch DLQ message
147
147
  def build_dlq_message(skippable_message)
148
- original_partition = skippable_message.partition.to_s
148
+ source_partition = skippable_message.partition.to_s
149
149
 
150
150
  dlq_message = {
151
- topic: topic.dead_letter_queue.topic,
152
- key: original_partition,
151
+ topic: @_dispatch_to_dlq_topic || topic.dead_letter_queue.topic,
152
+ key: source_partition,
153
153
  payload: skippable_message.raw_payload,
154
154
  headers: skippable_message.raw_headers.merge(
155
- 'original_topic' => topic.name,
156
- 'original_partition' => original_partition,
157
- 'original_offset' => skippable_message.offset.to_s,
158
- 'original_consumer_group' => topic.consumer_group.id,
159
- 'original_key' => skippable_message.raw_key.to_s,
160
- 'original_attempts' => attempt.to_s
155
+ 'source_topic' => topic.name,
156
+ 'source_partition' => source_partition,
157
+ 'source_offset' => skippable_message.offset.to_s,
158
+ 'source_consumer_group' => topic.consumer_group.id,
159
+ 'source_key' => skippable_message.raw_key.to_s,
160
+ 'source_attempts' => attempt.to_s
161
161
  )
162
162
  }
163
163
 
@@ -205,7 +205,7 @@ module Karafka
205
205
  # In case of `:skip` and `:dispatch` will run the exact flow provided in a block
206
206
  # In case of `:retry` always `#retry_after_pause` is applied
207
207
  def apply_dlq_flow
208
- flow = topic.dead_letter_queue.strategy.call(errors_tracker, attempt)
208
+ flow, target_topic = topic.dead_letter_queue.strategy.call(errors_tracker, attempt)
209
209
 
210
210
  case flow
211
211
  when :retry
@@ -216,6 +216,8 @@ module Karafka
216
216
  @_dispatch_to_dlq = false
217
217
  when :dispatch
218
218
  @_dispatch_to_dlq = true
219
+ # Use custom topic if it was returned from the strategy
220
+ @_dispatch_to_dlq_topic = target_topic || topic.dead_letter_queue.topic
219
221
  else
220
222
  raise Karafka::UnsupportedCaseError, flow
221
223
  end
@@ -227,6 +229,8 @@ module Karafka
227
229
 
228
230
  # Always backoff after DLQ dispatch even on skip to prevent overloads on errors
229
231
  pause(seek_offset, nil, false)
232
+ ensure
233
+ @_dispatch_to_dlq_topic = nil
230
234
  end
231
235
 
232
236
  # Marks message that went to DLQ (if applicable) based on the requested method
@@ -40,7 +40,7 @@ module Karafka
40
40
  if coordinator.filtered? && !revoked?
41
41
  handle_post_filtering
42
42
  elsif !revoked? && !coordinator.manual_seek?
43
- seek(seek_offset, false)
43
+ seek(seek_offset, false, reset_offset: false)
44
44
  resume
45
45
  else
46
46
  resume
@@ -35,7 +35,7 @@ module Karafka
35
35
  if coordinator.filtered? && !revoked?
36
36
  handle_post_filtering
37
37
  elsif !revoked? && !coordinator.manual_seek?
38
- seek(last_group_message.offset + 1, false)
38
+ seek(last_group_message.offset + 1, false, reset_offset: false)
39
39
  resume
40
40
  else
41
41
  resume
@@ -31,7 +31,9 @@ module Karafka
31
31
 
32
32
  mark_as_consumed(last_group_message) unless revoked?
33
33
  # We should not overwrite user manual seel request with our seek
34
- seek(seek_offset, false) unless revoked? || coordinator.manual_seek?
34
+ unless revoked? || coordinator.manual_seek?
35
+ seek(seek_offset, false, reset_offset: false)
36
+ end
35
37
 
36
38
  resume
37
39
  else
@@ -30,7 +30,7 @@ module Karafka
30
30
  return if coordinator.manual_pause?
31
31
 
32
32
  unless revoked? || coordinator.manual_seek?
33
- seek(last_group_message.offset + 1, false)
33
+ seek(last_group_message.offset + 1, false, reset_offset: false)
34
34
  end
35
35
 
36
36
  resume
@@ -67,7 +67,7 @@ module Karafka
67
67
  if filter.mark_as_consumed?
68
68
  send(
69
69
  filter.marking_method,
70
- filter.cursor
70
+ filter.marking_cursor
71
71
  )
72
72
  end
73
73
 
@@ -43,7 +43,10 @@ module Karafka
43
43
  return if coordinator.manual_pause?
44
44
 
45
45
  mark_as_consumed(last_group_message) unless revoked?
46
- seek(seek_offset, false) unless revoked? || coordinator.manual_seek?
46
+
47
+ unless revoked? || coordinator.manual_seek?
48
+ seek(seek_offset, false, reset_offset: false)
49
+ end
47
50
 
48
51
  resume
49
52
  else
@@ -40,7 +40,7 @@ module Karafka
40
40
  elsif !revoked? && !coordinator.manual_seek?
41
41
  # If not revoked and not throttled, we move to where we were suppose to and
42
42
  # resume
43
- seek(seek_offset, false)
43
+ seek(seek_offset, false, reset_offset: false)
44
44
  resume
45
45
  else
46
46
  resume