karafka 2.1.12 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/CHANGELOG.md +41 -0
  4. data/Gemfile.lock +1 -1
  5. data/bin/record_rss +50 -0
  6. data/config/locales/errors.yml +4 -0
  7. data/config/locales/pro_errors.yml +17 -0
  8. data/lib/karafka/admin.rb +21 -33
  9. data/lib/karafka/connection/client.rb +1 -1
  10. data/lib/karafka/contracts/config.rb +24 -0
  11. data/lib/karafka/errors.rb +3 -0
  12. data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +5 -2
  13. data/lib/karafka/messages/builders/message.rb +8 -4
  14. data/lib/karafka/pro/active_job/consumer.rb +1 -1
  15. data/lib/karafka/pro/cleaner/errors.rb +27 -0
  16. data/lib/karafka/pro/cleaner/messages/message.rb +46 -0
  17. data/lib/karafka/pro/cleaner/messages/messages.rb +42 -0
  18. data/lib/karafka/pro/cleaner.rb +41 -0
  19. data/lib/karafka/pro/contracts/base.rb +23 -0
  20. data/lib/karafka/pro/contracts/server_cli_options.rb +111 -0
  21. data/lib/karafka/pro/encryption/errors.rb +4 -1
  22. data/lib/karafka/pro/loader.rb +6 -2
  23. data/lib/karafka/pro/processing/strategies/dlq/default.rb +6 -0
  24. data/lib/karafka/pro/routing/features/active_job/builder.rb +45 -0
  25. data/lib/karafka/pro/routing/features/active_job.rb +26 -0
  26. data/lib/karafka/pro/routing/features/dead_letter_queue/contracts/topic.rb +53 -0
  27. data/lib/karafka/pro/routing/features/delaying/contracts/topic.rb +41 -0
  28. data/lib/karafka/pro/routing/features/expiring/contracts/topic.rb +41 -0
  29. data/lib/karafka/pro/routing/features/filtering/contracts/topic.rb +44 -0
  30. data/lib/karafka/pro/routing/features/long_running_job/{contract.rb → contracts/topic.rb} +14 -11
  31. data/lib/karafka/pro/routing/features/{filtering/contract.rb → patterns/builder.rb} +13 -16
  32. data/lib/karafka/pro/routing/features/patterns/config.rb +54 -0
  33. data/lib/karafka/pro/routing/features/patterns/consumer_group.rb +68 -0
  34. data/lib/karafka/pro/routing/features/patterns/contracts/consumer_group.rb +62 -0
  35. data/lib/karafka/pro/routing/features/patterns/contracts/pattern.rb +46 -0
  36. data/lib/karafka/pro/routing/features/patterns/contracts/topic.rb +41 -0
  37. data/lib/karafka/pro/routing/features/patterns/detector.rb +68 -0
  38. data/lib/karafka/pro/routing/features/patterns/pattern.rb +81 -0
  39. data/lib/karafka/pro/routing/features/{delaying/contract.rb → patterns/patterns.rb} +11 -14
  40. data/lib/karafka/pro/routing/features/patterns/topic.rb +50 -0
  41. data/lib/karafka/pro/routing/features/patterns/topics.rb +53 -0
  42. data/lib/karafka/pro/routing/features/patterns.rb +33 -0
  43. data/lib/karafka/pro/routing/features/pausing/contracts/topic.rb +51 -0
  44. data/lib/karafka/pro/routing/features/throttling/contracts/topic.rb +44 -0
  45. data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +55 -0
  46. data/lib/karafka/routing/consumer_group.rb +1 -1
  47. data/lib/karafka/routing/features/active_job/contracts/topic.rb +44 -0
  48. data/lib/karafka/routing/features/active_job/proxy.rb +14 -0
  49. data/lib/karafka/routing/features/base/expander.rb +8 -2
  50. data/lib/karafka/routing/features/base.rb +4 -2
  51. data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +46 -0
  52. data/lib/karafka/routing/features/declaratives/contracts/topic.rb +33 -0
  53. data/lib/karafka/routing/features/manual_offset_management/contracts/topic.rb +27 -0
  54. data/lib/karafka/routing/router.rb +0 -11
  55. data/lib/karafka/routing/subscription_group.rb +9 -0
  56. data/lib/karafka/routing/topic.rb +5 -0
  57. data/lib/karafka/server.rb +9 -4
  58. data/lib/karafka/setup/config.rb +45 -0
  59. data/lib/karafka/version.rb +1 -1
  60. data.tar.gz.sig +0 -0
  61. metadata +37 -15
  62. metadata.gz.sig +0 -0
  63. data/lib/karafka/pro/routing/features/dead_letter_queue/contract.rb +0 -50
  64. data/lib/karafka/pro/routing/features/expiring/contract.rb +0 -38
  65. data/lib/karafka/pro/routing/features/pausing/contract.rb +0 -48
  66. data/lib/karafka/pro/routing/features/throttling/contract.rb +0 -41
  67. data/lib/karafka/pro/routing/features/virtual_partitions/contract.rb +0 -52
  68. data/lib/karafka/routing/features/active_job/contract.rb +0 -41
  69. data/lib/karafka/routing/features/dead_letter_queue/contract.rb +0 -42
  70. data/lib/karafka/routing/features/declaratives/contract.rb +0 -30
  71. data/lib/karafka/routing/features/manual_offset_management/contract.rb +0 -24
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: de7a5880846f3b3cdab696683f4753bebdb6d133648297e930a415949937d1f5
4
- data.tar.gz: df12badb044151bccbecb3832302182794af543b1740d9a939f984b299bd1521
3
+ metadata.gz: 6ca6426dc8527aac122a1d9ebcaccf33a4eff1608133b210e68e66d3a5f5c2c7
4
+ data.tar.gz: 85233cc04e591e5a96d53b3b9d3fe203b4b56bbde24c564ffa3663daeb3673c6
5
5
  SHA512:
6
- metadata.gz: 17fb8af9c36c3df7e6ef084aab4de9d796710cf6a324cbbac4be29a57dc09f254bac817ec0b44e2a0cf0d2b8aae69ab3834eb59c93bce4d4a540d8000808f31f
7
- data.tar.gz: 8fedb0f5dfe436bd3854caf5a4ac0da367345be98e059e47ca24dd9d31d414aaf25debc66533965f9fdbdd3b5a78aeb4d719929b1ee274f5f3a36b3737ee2393
6
+ metadata.gz: e89b72adccdb6a622d571ab9c580191e725cdb98d88f5d86f59d9e4a9900eff74fa438b3838cfe104f7259c0c4cee0dc60f7fb6d16cf14b59ff9229170bae504
7
+ data.tar.gz: a8ba4b92e1002c6d1112837e1b96706208ba0d55fa39feaadaa4f04f4c616095eb406d72bffa50bd346fb1daff7ca45951efd2237a71c25a7539be5a4397a781
checksums.yaml.gz.sig CHANGED
Binary file
data/CHANGELOG.md CHANGED
@@ -1,5 +1,43 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.2.0 (2023-09-01)
4
+ - **[Feature]** Introduce dynamic topic subscriptions based on patterns [Pro].
5
+ - [Enhancement] Allow for `Karafka::Admin` setup reconfiguration via `config.admin` scope.
6
+ - [Enhancement] Make sure that consumer group used by `Karafka::Admin` obeys the `ConsumerMapper` setup.
7
+ - [Fix] Fix a case where subscription group would not accept a symbol name.
8
+
9
+ ### Upgrade notes
10
+
11
+ As always, please make sure you have upgraded to the most recent version of `2.1` before upgrading to `2.2`.
12
+
13
+ If you are not using Kafka ACLs, there is no action you need to take.
14
+
15
+ If you are using Kafka ACLs and you've set up permissions for `karafka_admin` group, please note that this name has now been changed and is subject to [Consumer Name Mapping](https://karafka.io/docs/Consumer-mappers/).
16
+
17
+ That means you must ensure that the new consumer group that by default equals `CLIENT_ID_karafka_admin` has appropriate permissions. Please note that the Web UI also uses this group.
18
+
19
+ `Karafka::Admin` now has its own set of configuration options available, and you can find more details about that [here](https://karafka.io/docs/Topics-management-and-administration/#configuration).
20
+
21
+ If you want to maintain the `2.1` behavior, that is `karafka_admin` admin group, we recommend introducing this case inside your consumer mapper. Assuming you use the default one, the code will look as follows:
22
+
23
+ ```ruby
24
+ class MyMapper
25
+ def call(raw_consumer_group_name)
26
+ # If group is the admin one, use as it was in 2.1
27
+ return 'karafka_admin' if raw_consumer_group_name == 'karafka_admin'
28
+
29
+ # Otherwise use default karafka strategy for the rest
30
+ "#{Karafka::App.config.client_id}_#{raw_consumer_group_name}"
31
+ end
32
+ end
33
+ ```
34
+
35
+ ## 2.1.13 (2023-08-28)
36
+ - **[Feature]** Introduce Cleaning API for much better memory management for iterative data processing [Pro].
37
+ - [Enhancement] Automatically free message resources after processed for ActiveJob jobs [Pro]
38
+ - [Enhancement] Free memory used by the raw payload as fast as possible after obtaining it from `karafka-rdkafka`.
39
+ - [Enhancement] Support changing `service_name` in DataDog integration.
40
+
3
41
  ## 2.1.12 (2023-08-25)
4
42
  - [Fix] Fix a case where DLQ + VP without intermediate marking would mark earlier message then the last one.
5
43
 
@@ -9,7 +47,10 @@
9
47
 
10
48
  ## 2.1.10 (2023-08-21)
11
49
  - [Enhancement] Introduce `connection.client.rebalance_callback` event for instrumentation of rebalances.
50
+ - [Enhancement] Introduce new `runner.before_call` monitor event.
12
51
  - [Refactor] Introduce low level commands proxy to handle deviation in how we want to run certain commands and how rdkafka-ruby runs that by design.
52
+ - [Change] No longer validate excluded topics routing presence if patterns any as it does not match pattern subscriptions where you can exclude things that could be subscribed in the future.
53
+ - [Fix] do not report negative lag stored in the DD listener.
13
54
  - [Fix] Do not report lags in the DD listener for cases where the assignment is not workable.
14
55
  - [Fix] Do not report negative lags in the DD listener.
15
56
  - [Fix] Extremely fast shutdown after boot in specs can cause process not to stop.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.1.12)
4
+ karafka (2.2.0)
5
5
  karafka-core (>= 2.1.1, < 2.2.0)
6
6
  thor (>= 0.20)
7
7
  waterdrop (>= 2.6.6, < 3.0.0)
data/bin/record_rss ADDED
@@ -0,0 +1,50 @@
1
+ #!/bin/bash
2
+
3
+ # This script monitors and records the Resident Set Size (RSS) of a process given its PID.
4
+ # The RSS is logged every second to the specified output file until the process terminates.
5
+ #
6
+ # Usage:
7
+ # ./script_name.sh <PID> <OUTPUT_FILE>
8
+ #
9
+ # Arguments:
10
+ # <PID> - Process ID of the process you want to monitor.
11
+ # <OUTPUT_FILE> - Name of the file where RSS values will be logged.
12
+ #
13
+ # The script first checks if the correct number of arguments are provided.
14
+ # It then verifies if the given PID exists. If it does, it starts recording the RSS.
15
+ # For every iteration, the script fetches the current RSS of the process using the 'ps' command,
16
+ # then appends the RSS value along with a timestamp to the output file.
17
+ # This recording is done every second.
18
+ # The loop stops if the process with the given PID terminates.
19
+ # An informative message is printed out when recording starts and when it stops.
20
+
21
+ # Check if the correct number of arguments are passed
22
+ if [ "$#" -ne 2 ]; then
23
+ echo "Usage: $0 <PID> <OUTPUT_FILE>"
24
+ exit 1
25
+ fi
26
+
27
+ PID=$1
28
+ OUTPUT_FILE=$2
29
+
30
+ # Check if the given PID exists
31
+ if ! kill -0 $PID 2>/dev/null; then
32
+ echo "Error: PID $PID does not exist."
33
+ exit 1
34
+ fi
35
+
36
+ # Start recording the RSS
37
+ echo "Recording RSS for PID $PID every second to $OUTPUT_FILE..."
38
+
39
+ while kill -0 $PID 2>/dev/null; do
40
+ RSS=$(ps -o rss= -p $PID)
41
+ if [ -z "$RSS" ]; then
42
+ echo "Error: Failed to get RSS for PID $PID."
43
+ exit 1
44
+ fi
45
+ TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S')
46
+ echo "$TIMESTAMP: $RSS KB" >> $OUTPUT_FILE
47
+ sleep 1
48
+ done
49
+
50
+ echo "Process $PID has terminated. Stopping recording."
@@ -35,6 +35,10 @@ en:
35
35
  key_must_be_a_symbol: All keys under the kafka settings scope need to be symbols
36
36
  max_timeout_vs_pause_max_timeout: pause_timeout must be less or equal to pause_max_timeout
37
37
  shutdown_timeout_vs_max_wait_time: shutdown_timeout must be more than max_wait_time
38
+ admin.kafka_format: needs to be a hash
39
+ admin.group_id_format: 'needs to be a string with a Kafka accepted format'
40
+ admin.max_wait_time_format: 'needs to be an integer bigger than 0'
41
+ admin.max_attempts_format: 'needs to be an integer bigger than 0'
38
42
 
39
43
  server_cli_options:
40
44
  missing: needs to be present
@@ -28,6 +28,20 @@ en:
28
28
  pause_with_exponential_backoff_format: needs to be either true or false
29
29
  pause_timeout_max_timeout_vs_pause_max_timeout: pause_timeout must be less or equal to pause_max_timeout
30
30
 
31
+ patterns.active_format: 'needs to be boolean'
32
+ patterns.type_format: 'needs to be :matcher, :discovered or :regular'
33
+
34
+ consumer_group:
35
+ patterns_format: must be an array with hashes
36
+ patterns_missing: needs to be present
37
+ patterns_regexps_not_unique: 'must be unique within consumer group'
38
+
39
+ pattern:
40
+ regexp_format: must be a regular expression
41
+ name_format: 'needs to be a string with a Kafka accepted format'
42
+ regexp_string_format: 'needs to be a string and start with ^'
43
+ missing: needs to be present
44
+
31
45
  config:
32
46
  encryption.active_format: 'needs to be either true or false'
33
47
  encryption.public_key_invalid: 'is not a valid public RSA key'
@@ -37,3 +51,6 @@ en:
37
51
  encryption.version_format: must be a non-empty string
38
52
  encryption.public_key_format: 'is not a valid public RSA key'
39
53
  encryption.private_keys_invalid: 'contains an invalid private RSA key string'
54
+
55
+ patterns.ttl_format: needs to be an integer bigger than 0
56
+ patterns.ttl_missing: needs to be present
data/lib/karafka/admin.rb CHANGED
@@ -7,32 +7,9 @@ module Karafka
7
7
  # Since admin actions are not performed that often, that should be ok.
8
8
  #
9
9
  # @note It always uses the primary defined cluster and does not support multi-cluster work.
10
- # If you need this, just replace the cluster info for the time you use this
10
+ # Cluster on which operations are performed can be changed via `admin.kafka` config, however
11
+ # there is no multi-cluster runtime support.
11
12
  module Admin
12
- # We wait only for this amount of time before raising error as we intercept this error and
13
- # retry after checking that the operation was finished or failed using external factor.
14
- MAX_WAIT_TIMEOUT = 1
15
-
16
- # How many times should be try. 1 x 60 => 60 seconds wait in total
17
- MAX_ATTEMPTS = 60
18
-
19
- # Defaults for config
20
- CONFIG_DEFAULTS = {
21
- 'group.id': 'karafka_admin',
22
- # We want to know when there is no more data not to end up with an endless loop
23
- 'enable.partition.eof': true,
24
- 'statistics.interval.ms': 0,
25
- # Fetch at most 5 MBs when using admin
26
- 'fetch.message.max.bytes': 5 * 1_048_576,
27
- # Do not commit offset automatically, this prevents offset tracking for operations involving
28
- # a consumer instance
29
- 'enable.auto.commit': false,
30
- # Make sure that topic metadata lookups do not create topics accidentally
31
- 'allow.auto.create.topics': false
32
- }.freeze
33
-
34
- private_constant :CONFIG_DEFAULTS, :MAX_WAIT_TIMEOUT, :MAX_ATTEMPTS
35
-
36
13
  class << self
37
14
  # Allows us to read messages from the topic
38
15
  #
@@ -123,7 +100,7 @@ module Karafka
123
100
  handler = admin.create_topic(name, partitions, replication_factor, topic_config)
124
101
 
125
102
  with_re_wait(
126
- -> { handler.wait(max_wait_timeout: MAX_WAIT_TIMEOUT) },
103
+ -> { handler.wait(max_wait_timeout: app_config.admin.max_wait_time) },
127
104
  -> { topics_names.include?(name) }
128
105
  )
129
106
  end
@@ -137,7 +114,7 @@ module Karafka
137
114
  handler = admin.delete_topic(name)
138
115
 
139
116
  with_re_wait(
140
- -> { handler.wait(max_wait_timeout: MAX_WAIT_TIMEOUT) },
117
+ -> { handler.wait(max_wait_timeout: app_config.admin.max_wait_time) },
141
118
  -> { !topics_names.include?(name) }
142
119
  )
143
120
  end
@@ -152,7 +129,7 @@ module Karafka
152
129
  handler = admin.create_partitions(name, partitions)
153
130
 
154
131
  with_re_wait(
155
- -> { handler.wait(max_wait_timeout: MAX_WAIT_TIMEOUT) },
132
+ -> { handler.wait(max_wait_timeout: app_config.admin.max_wait_time) },
156
133
  -> { topic(name).fetch(:partition_count) >= partitions }
157
134
  )
158
135
  end
@@ -242,7 +219,7 @@ module Karafka
242
219
  rescue Rdkafka::AbstractHandle::WaitTimeoutError
243
220
  return if breaker.call
244
221
 
245
- retry if attempt <= MAX_ATTEMPTS
222
+ retry if attempt <= app_config.admin.max_attempts
246
223
 
247
224
  raise
248
225
  end
@@ -251,12 +228,18 @@ module Karafka
251
228
  # @param settings [Hash] extra settings for config (if needed)
252
229
  # @return [::Rdkafka::Config] rdkafka config
253
230
  def config(type, settings)
254
- config_hash = Karafka::Setup::AttributesMap.public_send(
255
- type,
256
- Karafka::App.config.kafka.dup.merge(CONFIG_DEFAULTS).merge!(settings)
231
+ group_id = app_config.consumer_mapper.call(
232
+ app_config.admin.group_id
257
233
  )
258
234
 
259
- ::Rdkafka::Config.new(config_hash)
235
+ app_config
236
+ .kafka
237
+ .then(&:dup)
238
+ .merge(app_config.admin.kafka)
239
+ .merge!(settings)
240
+ .tap { |config| config[:'group.id'] = group_id }
241
+ .then { |config| Karafka::Setup::AttributesMap.public_send(type, config) }
242
+ .then { |config| ::Rdkafka::Config.new(config) }
260
243
  end
261
244
 
262
245
  # Resolves the offset if offset is in a time format. Otherwise returns the offset without
@@ -281,6 +264,11 @@ module Karafka
281
264
  offset
282
265
  end
283
266
  end
267
+
268
+ # @return [Karafka::Core::Configurable::Node] root node config
269
+ def app_config
270
+ ::Karafka::App.config
271
+ end
284
272
  end
285
273
  end
286
274
  end
@@ -510,7 +510,7 @@ module Karafka
510
510
 
511
511
  # Subscription needs to happen after we assigned the rebalance callbacks just in case of
512
512
  # a race condition
513
- consumer.subscribe(*@subscription_group.topics.map(&:name))
513
+ consumer.subscribe(*@subscription_group.subscriptions)
514
514
  consumer
515
515
  end
516
516
 
@@ -34,6 +34,14 @@ module Karafka
34
34
  required(:max_wait_time) { |val| val.is_a?(Integer) && val.positive? }
35
35
  required(:kafka) { |val| val.is_a?(Hash) && !val.empty? }
36
36
 
37
+ nested(:admin) do
38
+ # Can be empty because inherits values from the root kafka
39
+ required(:kafka) { |val| val.is_a?(Hash) }
40
+ required(:group_id) { |val| val.is_a?(String) && Contracts::TOPIC_REGEXP.match?(val) }
41
+ required(:max_wait_time) { |val| val.is_a?(Integer) && val.positive? }
42
+ required(:max_attempts) { |val| val.is_a?(Integer) && val.positive? }
43
+ end
44
+
37
45
  # We validate internals just to be sure, that they are present and working
38
46
  nested(:internal) do
39
47
  required(:status) { |val| !val.nil? }
@@ -74,6 +82,7 @@ module Karafka
74
82
  end
75
83
  end
76
84
 
85
+ # Ensure all root kafka keys are symbols
77
86
  virtual do |data, errors|
78
87
  next unless errors.empty?
79
88
 
@@ -88,6 +97,21 @@ module Karafka
88
97
  detected_errors
89
98
  end
90
99
 
100
+ # Ensure all admin kafka keys are symbols
101
+ virtual do |data, errors|
102
+ next unless errors.empty?
103
+
104
+ detected_errors = []
105
+
106
+ data.fetch(:admin).fetch(:kafka).each_key do |key|
107
+ next if key.is_a?(Symbol)
108
+
109
+ detected_errors << [[:admin, :kafka, key], :key_must_be_a_symbol]
110
+ end
111
+
112
+ detected_errors
113
+ end
114
+
91
115
  virtual do |data, errors|
92
116
  next unless errors.empty?
93
117
 
@@ -41,6 +41,9 @@ module Karafka
41
41
  # Raised when the license token is not valid
42
42
  InvalidLicenseTokenError = Class.new(BaseError)
43
43
 
44
+ # Raised on attempt to deserializer a cleared message
45
+ MessageClearedError = Class.new(BaseError)
46
+
44
47
  # This should never happen. Please open an issue if it does.
45
48
  InvalidCoordinatorStateError = Class.new(BaseError)
46
49
 
@@ -12,11 +12,14 @@ module Karafka
12
12
  include ::Karafka::Core::Configurable
13
13
  extend Forwardable
14
14
 
15
- def_delegators :config, :client
15
+ def_delegators :config, :client, :service_name
16
16
 
17
17
  # `Datadog::Tracing` client that we should use to trace stuff
18
18
  setting :client
19
19
 
20
+ # @see https://docs.datadoghq.com/tracing/trace_collection/dd_libraries/ruby
21
+ setting :service_name, default: nil
22
+
20
23
  configure
21
24
 
22
25
  # Log levels that we use in this particular listener
@@ -44,7 +47,7 @@ module Karafka
44
47
  #
45
48
  # @param event [Karafka::Core::Monitoring::Event] event details including payload
46
49
  def on_worker_process(event)
47
- current_span = client.trace('karafka.consumer')
50
+ current_span = client.trace('karafka.consumer', service: service_name)
48
51
  push_tags
49
52
 
50
53
  job = event[:job]
@@ -23,11 +23,15 @@ module Karafka
23
23
  received_at: received_at
24
24
  ).freeze
25
25
 
26
+ # Get the raw payload
27
+ payload = kafka_message.payload
28
+
29
+ # And nullify it in the kafka message. This can save a lot of memory when used with
30
+ # the Pro Cleaner API
31
+ kafka_message.instance_variable_set('@payload', nil)
32
+
26
33
  # Karafka messages cannot be frozen because of the lazy deserialization feature
27
- Karafka::Messages::Message.new(
28
- kafka_message.payload,
29
- metadata
30
- )
34
+ Karafka::Messages::Message.new(payload, metadata)
31
35
  end
32
36
  end
33
37
  end
@@ -25,7 +25,7 @@ module Karafka
25
25
  class Consumer < ::Karafka::ActiveJob::Consumer
26
26
  # Runs ActiveJob jobs processing and handles lrj if needed
27
27
  def consume
28
- messages.each do |message|
28
+ messages.each(clean: true) do |message|
29
29
  # If for any reason we've lost this partition, not worth iterating over new messages
30
30
  # as they are no longer ours
31
31
  break if revoked?
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Cleaner
17
+ # Cleaner related errors
18
+ module Errors
19
+ # Base for all the clearer errors
20
+ BaseError = Class.new(::Karafka::Errors::BaseError)
21
+
22
+ # Raised when trying to deserialize a message that has already been cleaned
23
+ MessageCleanedError = Class.new(BaseError)
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Cleaner
17
+ # Cleaner messages components related enhancements
18
+ module Messages
19
+ # Extensions to the message that allow for granular memory control on a per message basis
20
+ module Message
21
+ # @return [Object] lazy-deserialized data (deserialized upon first request)
22
+ def payload
23
+ # If message has already been cleaned, it cannot be deserialized again
24
+ cleaned? ? raise(Errors::MessageCleanedError) : super
25
+ end
26
+
27
+ # @return [Boolean] true if the message has been cleaned
28
+ def cleaned?
29
+ @raw_payload == false
30
+ end
31
+
32
+ # Cleans the message payload and removes the deserialized data references
33
+ # This is useful when working with big messages that take a lot of space.
34
+ #
35
+ # After the message content is no longer needed, it can be removed so it does not consume
36
+ # space anymore.
37
+ def clean!
38
+ @deserialized = false
39
+ @raw_payload = false
40
+ @payload = nil
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Cleaner
17
+ module Messages
18
+ # Extensions to the messages batch allowing for automatic cleaning of each message after
19
+ # message is processed.
20
+ module Messages
21
+ # @param clean [Boolean] do we want to clean each message after we're done working with
22
+ # it.
23
+ # @yield block we want to execute per each message
24
+ #
25
+ # @note Cleaning messages after we're done with each of them and did not fail does not
26
+ # affect any other functionalities. The only thing that is crucial is to make sure,
27
+ # that if DLQ is used, that we mark each message as consumed when using this API as
28
+ # otherwise a cleaned message may be dispatched and that should never happen
29
+ def each(clean: false)
30
+ @messages_array.each do |message|
31
+ yield(message)
32
+
33
+ next unless clean
34
+
35
+ message.clean!
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ # Feature that introduces a granular memory management for each message and messages iterator
17
+ #
18
+ # It allows for better resource allocation by providing an API to clear payload and raw payload
19
+ # from a message after those are no longer needed but before whole messages are freed and
20
+ # removed by Ruby GC.
21
+ #
22
+ # This can be useful when processing bigger batches or bigger messages one after another and
23
+ # wanting not to have all of the data loaded into memory.
24
+ #
25
+ # Can yield significant memory savings (up to 80%).
26
+ module Cleaner
27
+ class << self
28
+ # @param _config [Karafka::Core::Configurable::Node] root node config
29
+ def pre_setup(_config)
30
+ ::Karafka::Messages::Message.prepend(Messages::Message)
31
+ ::Karafka::Messages::Messages.prepend(Messages::Messages)
32
+ end
33
+
34
+ # @param _config [Karafka::Core::Configurable::Node] root node config
35
+ def post_setup(_config)
36
+ true
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ # Pro contracts that aim to replace or complement the general framework contracts
17
+ module Contracts
18
+ # Base for all the Pro contracts
19
+ class Base < ::Karafka::Contracts::Base
20
+ end
21
+ end
22
+ end
23
+ end