karafka 2.1.11 → 2.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8d1e3000ced82b96afc7c4588d20170e0908d3b41305eead2e5e0a850c9a70c4
4
- data.tar.gz: 1b16fc0a06f1bfd9bdb1207dd1faac04d3a7f956ccb42edd45ada76fed6dcf09
3
+ metadata.gz: 262c2bbfa4fb09c4038ce559c71baa6c40b861497dd7ef3b1915ba6b8aa47652
4
+ data.tar.gz: a34d5fad3bdbd1a58a0938f33c8fd28a5df72cb89293d7835c41cfce09c9e736
5
5
  SHA512:
6
- metadata.gz: 7e018450fae0ad666bff80ca9875750303946746c20e527200ee569a1a182c3d81bb71b64a0b2bd94b4a35c8cf204326b26752b1d5c0425a1f544ff9b4572323
7
- data.tar.gz: 6025b3583179592313540231dd9d00354c30d8af3d49f482fc6c7dbd96359dbc7bbccee8cfa4cf1d14a2f5dfb3a9999b141317c782ec33af7503c4a8e26c616c
6
+ metadata.gz: 2fd4672fef274f5913b543d9b3a91aef8a8aba59a23e457a61f455cf6095ea8176c6f8e799f65602c7ef0bb15add27a960df70cd99913fe6b5340df9cfff8c31
7
+ data.tar.gz: 4b890531f2a783c72573bcc2e915cc70b85e09c5e43de76b979fc34ea732f51a2c8ec4f64e91c95293337973103c7eeee89e424d27fa35dc708f935364fa9db3
checksums.yaml.gz.sig CHANGED
Binary file
data/CHANGELOG.md CHANGED
@@ -1,5 +1,14 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.1.13 (2023-08-28)
4
+ - **[Feature]** Introduce Cleaning API for much better memory management for iterative data processing [Pro].
5
+ - [Enhancement] Automatically free message resources after processed for ActiveJob jobs [Pro]
6
+ - [Enhancement] Free memory used by the raw payload as fast as possible after obtaining it from `karafka-rdkafka`.
7
+ - [Enhancement] Support changing `service_name` in DataDog integration.
8
+
9
+ ## 2.1.12 (2023-08-25)
10
+ - [Fix] Fix a case where DLQ + VP without intermediate marking would mark earlier message then the last one.
11
+
3
12
  ## 2.1.11 (2023-08-23)
4
13
  - [Enhancement] Expand the error handling for offset related queries with timeout error retries.
5
14
  - [Enhancement] Allow for connection proxy timeouts configuration.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.1.11)
4
+ karafka (2.1.13)
5
5
  karafka-core (>= 2.1.1, < 2.2.0)
6
6
  thor (>= 0.20)
7
7
  waterdrop (>= 2.6.6, < 3.0.0)
data/bin/record_rss ADDED
@@ -0,0 +1,50 @@
1
+ #!/bin/bash
2
+
3
+ # This script monitors and records the Resident Set Size (RSS) of a process given its PID.
4
+ # The RSS is logged every second to the specified output file until the process terminates.
5
+ #
6
+ # Usage:
7
+ # ./script_name.sh <PID> <OUTPUT_FILE>
8
+ #
9
+ # Arguments:
10
+ # <PID> - Process ID of the process you want to monitor.
11
+ # <OUTPUT_FILE> - Name of the file where RSS values will be logged.
12
+ #
13
+ # The script first checks if the correct number of arguments are provided.
14
+ # It then verifies if the given PID exists. If it does, it starts recording the RSS.
15
+ # For every iteration, the script fetches the current RSS of the process using the 'ps' command,
16
+ # then appends the RSS value along with a timestamp to the output file.
17
+ # This recording is done every second.
18
+ # The loop stops if the process with the given PID terminates.
19
+ # An informative message is printed out when recording starts and when it stops.
20
+
21
+ # Check if the correct number of arguments are passed
22
+ if [ "$#" -ne 2 ]; then
23
+ echo "Usage: $0 <PID> <OUTPUT_FILE>"
24
+ exit 1
25
+ fi
26
+
27
+ PID=$1
28
+ OUTPUT_FILE=$2
29
+
30
+ # Check if the given PID exists
31
+ if ! kill -0 $PID 2>/dev/null; then
32
+ echo "Error: PID $PID does not exist."
33
+ exit 1
34
+ fi
35
+
36
+ # Start recording the RSS
37
+ echo "Recording RSS for PID $PID every second to $OUTPUT_FILE..."
38
+
39
+ while kill -0 $PID 2>/dev/null; do
40
+ RSS=$(ps -o rss= -p $PID)
41
+ if [ -z "$RSS" ]; then
42
+ echo "Error: Failed to get RSS for PID $PID."
43
+ exit 1
44
+ fi
45
+ TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S')
46
+ echo "$TIMESTAMP: $RSS KB" >> $OUTPUT_FILE
47
+ sleep 1
48
+ done
49
+
50
+ echo "Process $PID has terminated. Stopping recording."
@@ -41,6 +41,9 @@ module Karafka
41
41
  # Raised when the license token is not valid
42
42
  InvalidLicenseTokenError = Class.new(BaseError)
43
43
 
44
+ # Raised on attempt to deserializer a cleared message
45
+ MessageClearedError = Class.new(BaseError)
46
+
44
47
  # This should never happen. Please open an issue if it does.
45
48
  InvalidCoordinatorStateError = Class.new(BaseError)
46
49
 
@@ -12,11 +12,14 @@ module Karafka
12
12
  include ::Karafka::Core::Configurable
13
13
  extend Forwardable
14
14
 
15
- def_delegators :config, :client
15
+ def_delegators :config, :client, :service_name
16
16
 
17
17
  # `Datadog::Tracing` client that we should use to trace stuff
18
18
  setting :client
19
19
 
20
+ # @see https://docs.datadoghq.com/tracing/trace_collection/dd_libraries/ruby
21
+ setting :service_name, default: nil
22
+
20
23
  configure
21
24
 
22
25
  # Log levels that we use in this particular listener
@@ -44,7 +47,7 @@ module Karafka
44
47
  #
45
48
  # @param event [Karafka::Core::Monitoring::Event] event details including payload
46
49
  def on_worker_process(event)
47
- current_span = client.trace('karafka.consumer')
50
+ current_span = client.trace('karafka.consumer', service: service_name)
48
51
  push_tags
49
52
 
50
53
  job = event[:job]
@@ -23,11 +23,15 @@ module Karafka
23
23
  received_at: received_at
24
24
  ).freeze
25
25
 
26
+ # Get the raw payload
27
+ payload = kafka_message.payload
28
+
29
+ # And nullify it in the kafka message. This can save a lot of memory when used with
30
+ # the Pro Cleaner API
31
+ kafka_message.instance_variable_set('@payload', nil)
32
+
26
33
  # Karafka messages cannot be frozen because of the lazy deserialization feature
27
- Karafka::Messages::Message.new(
28
- kafka_message.payload,
29
- metadata
30
- )
34
+ Karafka::Messages::Message.new(payload, metadata)
31
35
  end
32
36
  end
33
37
  end
@@ -25,7 +25,7 @@ module Karafka
25
25
  class Consumer < ::Karafka::ActiveJob::Consumer
26
26
  # Runs ActiveJob jobs processing and handles lrj if needed
27
27
  def consume
28
- messages.each do |message|
28
+ messages.each(clean: true) do |message|
29
29
  # If for any reason we've lost this partition, not worth iterating over new messages
30
30
  # as they are no longer ours
31
31
  break if revoked?
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Cleaner
17
+ # Cleaner related errors
18
+ module Errors
19
+ # Base for all the clearer errors
20
+ BaseError = Class.new(::Karafka::Errors::BaseError)
21
+
22
+ # Raised when trying to deserialize a message that has already been cleaned
23
+ MessageCleanedError = Class.new(BaseError)
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Cleaner
17
+ # Cleaner messages components related enhancements
18
+ module Messages
19
+ # Extensions to the message that allow for granular memory control on a per message basis
20
+ module Message
21
+ # @return [Object] lazy-deserialized data (deserialized upon first request)
22
+ def payload
23
+ # If message has already been cleaned, it cannot be deserialized again
24
+ cleaned? ? raise(Errors::MessageCleanedError) : super
25
+ end
26
+
27
+ # @return [Boolean] true if the message has been cleaned
28
+ def cleaned?
29
+ @raw_payload == false
30
+ end
31
+
32
+ # Cleans the message payload and removes the deserialized data references
33
+ # This is useful when working with big messages that take a lot of space.
34
+ #
35
+ # After the message content is no longer needed, it can be removed so it does not consume
36
+ # space anymore.
37
+ def clean!
38
+ @deserialized = false
39
+ @raw_payload = false
40
+ @payload = nil
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Cleaner
17
+ module Messages
18
+ # Extensions to the messages batch allowing for automatic cleaning of each message after
19
+ # message is processed.
20
+ module Messages
21
+ # @param clean [Boolean] do we want to clean each message after we're done working with
22
+ # it.
23
+ # @yield block we want to execute per each message
24
+ #
25
+ # @note Cleaning messages after we're done with each of them and did not fail does not
26
+ # affect any other functionalities. The only thing that is crucial is to make sure,
27
+ # that if DLQ is used, that we mark each message as consumed when using this API as
28
+ # otherwise a cleaned message may be dispatched and that should never happen
29
+ def each(clean: false)
30
+ @messages_array.each do |message|
31
+ yield(message)
32
+
33
+ next unless clean
34
+
35
+ message.clean!
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ # Feature that introduces a granular memory management for each message and messages iterator
17
+ #
18
+ # It allows for better resource allocation by providing an API to clear payload and raw payload
19
+ # from a message after those are no longer needed but before whole messages are freed and
20
+ # removed by Ruby GC.
21
+ #
22
+ # This can be useful when processing bigger batches or bigger messages one after another and
23
+ # wanting not to have all of the data loaded into memory.
24
+ #
25
+ # Can yield significant memory savings (up to 80%).
26
+ module Cleaner
27
+ class << self
28
+ # @param _config [Karafka::Core::Configurable::Node] root node config
29
+ def pre_setup(_config)
30
+ ::Karafka::Messages::Message.prepend(Messages::Message)
31
+ ::Karafka::Messages::Messages.prepend(Messages::Messages)
32
+ end
33
+
34
+ # @param _config [Karafka::Core::Configurable::Node] root node config
35
+ def post_setup(_config)
36
+ true
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -16,8 +16,11 @@ module Karafka
16
16
  module Encryption
17
17
  # Encryption related errors
18
18
  module Errors
19
+ # Base for all the encryption errors
20
+ BaseError = Class.new(::Karafka::Errors::BaseError)
21
+
19
22
  # Raised when we have encountered encryption key with version we do not have
20
- PrivateKeyNotFound = Class.new(::Karafka::Errors::BaseError)
23
+ PrivateKeyNotFound = Class.new(BaseError)
21
24
  end
22
25
  end
23
26
  end
@@ -69,7 +69,8 @@ module Karafka
69
69
  # @return [Array<Module>] extra non-routing related pro features
70
70
  def features
71
71
  [
72
- Encryption
72
+ Encryption,
73
+ Cleaner
73
74
  ]
74
75
  end
75
76
 
@@ -28,7 +28,7 @@ module Karafka
28
28
 
29
29
  # When we encounter non-recoverable message, we skip it and go on with our lives
30
30
  def handle_after_consume
31
- coordinator.on_finished do
31
+ coordinator.on_finished do |last_group_message|
32
32
  return if revoked?
33
33
 
34
34
  if coordinator.success?
@@ -36,7 +36,7 @@ module Karafka
36
36
 
37
37
  return if coordinator.manual_pause?
38
38
 
39
- mark_as_consumed(messages.last)
39
+ mark_as_consumed(last_group_message)
40
40
  elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
41
41
  retry_after_pause
42
42
  # If we've reached number of retries that we could, we need to skip the first
@@ -74,6 +74,12 @@ module Karafka
74
74
  # @param skippable_message [Array<Karafka::Messages::Message>] message we want to
75
75
  # dispatch to DLQ
76
76
  def dispatch_to_dlq(skippable_message)
77
+ # DLQ should never try to dispatch a message that was cleaned. It message was
78
+ # cleaned, we will not have all the needed data. If you see this error, it means
79
+ # that your processing flow is not as expected and you have cleaned message that
80
+ # should not be cleaned as it should go to the DLQ
81
+ raise(Cleaner::Errors::MessageCleanedError) if skippable_message.cleaned?
82
+
77
83
  producer.produce_async(
78
84
  build_dlq_message(
79
85
  skippable_message
@@ -31,7 +31,7 @@ module Karafka
31
31
  # DLQ flow is standard here, what is not, is the success component where we need to
32
32
  # take into consideration the filtering
33
33
  def handle_after_consume
34
- coordinator.on_finished do
34
+ coordinator.on_finished do |last_group_message|
35
35
  return if revoked?
36
36
 
37
37
  if coordinator.success?
@@ -39,7 +39,7 @@ module Karafka
39
39
 
40
40
  return if coordinator.manual_pause?
41
41
 
42
- mark_as_consumed(messages.last)
42
+ mark_as_consumed(last_group_message)
43
43
 
44
44
  handle_post_filtering
45
45
  elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.1.11'
6
+ VERSION = '2.1.13'
7
7
  end
data.tar.gz.sig CHANGED
@@ -1,3 +1,6 @@
1
- g+��\)E"nޗCD�\�S�)�(�U��M��Q
2
- ��*PV =���&{S?����n ��WM����
3
- �(9H��(�eݜ|5�j�M�y!쁦�w&��\wh? �ʺA[�*����Z�#DY���!1�g��=�W�ʉ����~��7{�
1
+ �ֈ�
2
+ y߀�T�
3
+ :�
4
+ 9�Um(��疈tg�y/#�l]D��2���KX��E㜔�R��:���UM�
5
+ �s8׾4*mɦh*�;[�\m>Q�n��g�T.�x�����Mǎ*O����-�`8���"��+?��M��pԏ�]�� ���d�&�I�Ky��VB��dl��'
6
+ n=��Vp�Z�Β7U�/n���I>���k0��%*�;N�e�����]��[��gtU�4�9f#r,|����6Un��W'����I,��N��:Pf�F�^�㎌r����FR��N����gd2���S�`��Է��V
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.11
4
+ version: 2.1.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  AnG1dJU+yL2BK7vaVytLTstJME5mepSZ46qqIJXMuWob/YPDmVaBF39TDSG9e34s
36
36
  msG3BiCqgOgHAnL23+CN3Rt8MsuRfEtoTKpJVcCfoEoNHOkc
37
37
  -----END CERTIFICATE-----
38
- date: 2023-08-23 00:00:00.000000000 Z
38
+ date: 2023-08-28 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: karafka-core
@@ -141,6 +141,7 @@ files:
141
141
  - bin/create_token
142
142
  - bin/integrations
143
143
  - bin/karafka
144
+ - bin/record_rss
144
145
  - bin/rspecs
145
146
  - bin/scenario
146
147
  - bin/stress_many
@@ -219,6 +220,10 @@ files:
219
220
  - lib/karafka/pro/active_job/consumer.rb
220
221
  - lib/karafka/pro/active_job/dispatcher.rb
221
222
  - lib/karafka/pro/active_job/job_options_contract.rb
223
+ - lib/karafka/pro/cleaner.rb
224
+ - lib/karafka/pro/cleaner/errors.rb
225
+ - lib/karafka/pro/cleaner/messages/message.rb
226
+ - lib/karafka/pro/cleaner/messages/messages.rb
222
227
  - lib/karafka/pro/encryption.rb
223
228
  - lib/karafka/pro/encryption/cipher.rb
224
229
  - lib/karafka/pro/encryption/contracts/config.rb
metadata.gz.sig CHANGED
Binary file