karafka 2.1.11 → 2.1.13

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8d1e3000ced82b96afc7c4588d20170e0908d3b41305eead2e5e0a850c9a70c4
4
- data.tar.gz: 1b16fc0a06f1bfd9bdb1207dd1faac04d3a7f956ccb42edd45ada76fed6dcf09
3
+ metadata.gz: 262c2bbfa4fb09c4038ce559c71baa6c40b861497dd7ef3b1915ba6b8aa47652
4
+ data.tar.gz: a34d5fad3bdbd1a58a0938f33c8fd28a5df72cb89293d7835c41cfce09c9e736
5
5
  SHA512:
6
- metadata.gz: 7e018450fae0ad666bff80ca9875750303946746c20e527200ee569a1a182c3d81bb71b64a0b2bd94b4a35c8cf204326b26752b1d5c0425a1f544ff9b4572323
7
- data.tar.gz: 6025b3583179592313540231dd9d00354c30d8af3d49f482fc6c7dbd96359dbc7bbccee8cfa4cf1d14a2f5dfb3a9999b141317c782ec33af7503c4a8e26c616c
6
+ metadata.gz: 2fd4672fef274f5913b543d9b3a91aef8a8aba59a23e457a61f455cf6095ea8176c6f8e799f65602c7ef0bb15add27a960df70cd99913fe6b5340df9cfff8c31
7
+ data.tar.gz: 4b890531f2a783c72573bcc2e915cc70b85e09c5e43de76b979fc34ea732f51a2c8ec4f64e91c95293337973103c7eeee89e424d27fa35dc708f935364fa9db3
checksums.yaml.gz.sig CHANGED
Binary file
data/CHANGELOG.md CHANGED
@@ -1,5 +1,14 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.1.13 (2023-08-28)
4
+ - **[Feature]** Introduce Cleaning API for much better memory management for iterative data processing [Pro].
5
+ - [Enhancement] Automatically free message resources after processed for ActiveJob jobs [Pro]
6
+ - [Enhancement] Free memory used by the raw payload as fast as possible after obtaining it from `karafka-rdkafka`.
7
+ - [Enhancement] Support changing `service_name` in DataDog integration.
8
+
9
+ ## 2.1.12 (2023-08-25)
10
+ - [Fix] Fix a case where DLQ + VP without intermediate marking would mark earlier message then the last one.
11
+
3
12
  ## 2.1.11 (2023-08-23)
4
13
  - [Enhancement] Expand the error handling for offset related queries with timeout error retries.
5
14
  - [Enhancement] Allow for connection proxy timeouts configuration.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.1.11)
4
+ karafka (2.1.13)
5
5
  karafka-core (>= 2.1.1, < 2.2.0)
6
6
  thor (>= 0.20)
7
7
  waterdrop (>= 2.6.6, < 3.0.0)
data/bin/record_rss ADDED
@@ -0,0 +1,50 @@
1
+ #!/bin/bash
2
+
3
+ # This script monitors and records the Resident Set Size (RSS) of a process given its PID.
4
+ # The RSS is logged every second to the specified output file until the process terminates.
5
+ #
6
+ # Usage:
7
+ # ./script_name.sh <PID> <OUTPUT_FILE>
8
+ #
9
+ # Arguments:
10
+ # <PID> - Process ID of the process you want to monitor.
11
+ # <OUTPUT_FILE> - Name of the file where RSS values will be logged.
12
+ #
13
+ # The script first checks if the correct number of arguments are provided.
14
+ # It then verifies if the given PID exists. If it does, it starts recording the RSS.
15
+ # For every iteration, the script fetches the current RSS of the process using the 'ps' command,
16
+ # then appends the RSS value along with a timestamp to the output file.
17
+ # This recording is done every second.
18
+ # The loop stops if the process with the given PID terminates.
19
+ # An informative message is printed out when recording starts and when it stops.
20
+
21
+ # Check if the correct number of arguments are passed
22
+ if [ "$#" -ne 2 ]; then
23
+ echo "Usage: $0 <PID> <OUTPUT_FILE>"
24
+ exit 1
25
+ fi
26
+
27
+ PID=$1
28
+ OUTPUT_FILE=$2
29
+
30
+ # Check if the given PID exists
31
+ if ! kill -0 $PID 2>/dev/null; then
32
+ echo "Error: PID $PID does not exist."
33
+ exit 1
34
+ fi
35
+
36
+ # Start recording the RSS
37
+ echo "Recording RSS for PID $PID every second to $OUTPUT_FILE..."
38
+
39
+ while kill -0 $PID 2>/dev/null; do
40
+ RSS=$(ps -o rss= -p $PID)
41
+ if [ -z "$RSS" ]; then
42
+ echo "Error: Failed to get RSS for PID $PID."
43
+ exit 1
44
+ fi
45
+ TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S')
46
+ echo "$TIMESTAMP: $RSS KB" >> $OUTPUT_FILE
47
+ sleep 1
48
+ done
49
+
50
+ echo "Process $PID has terminated. Stopping recording."
@@ -41,6 +41,9 @@ module Karafka
41
41
  # Raised when the license token is not valid
42
42
  InvalidLicenseTokenError = Class.new(BaseError)
43
43
 
44
+ # Raised on attempt to deserializer a cleared message
45
+ MessageClearedError = Class.new(BaseError)
46
+
44
47
  # This should never happen. Please open an issue if it does.
45
48
  InvalidCoordinatorStateError = Class.new(BaseError)
46
49
 
@@ -12,11 +12,14 @@ module Karafka
12
12
  include ::Karafka::Core::Configurable
13
13
  extend Forwardable
14
14
 
15
- def_delegators :config, :client
15
+ def_delegators :config, :client, :service_name
16
16
 
17
17
  # `Datadog::Tracing` client that we should use to trace stuff
18
18
  setting :client
19
19
 
20
+ # @see https://docs.datadoghq.com/tracing/trace_collection/dd_libraries/ruby
21
+ setting :service_name, default: nil
22
+
20
23
  configure
21
24
 
22
25
  # Log levels that we use in this particular listener
@@ -44,7 +47,7 @@ module Karafka
44
47
  #
45
48
  # @param event [Karafka::Core::Monitoring::Event] event details including payload
46
49
  def on_worker_process(event)
47
- current_span = client.trace('karafka.consumer')
50
+ current_span = client.trace('karafka.consumer', service: service_name)
48
51
  push_tags
49
52
 
50
53
  job = event[:job]
@@ -23,11 +23,15 @@ module Karafka
23
23
  received_at: received_at
24
24
  ).freeze
25
25
 
26
+ # Get the raw payload
27
+ payload = kafka_message.payload
28
+
29
+ # And nullify it in the kafka message. This can save a lot of memory when used with
30
+ # the Pro Cleaner API
31
+ kafka_message.instance_variable_set('@payload', nil)
32
+
26
33
  # Karafka messages cannot be frozen because of the lazy deserialization feature
27
- Karafka::Messages::Message.new(
28
- kafka_message.payload,
29
- metadata
30
- )
34
+ Karafka::Messages::Message.new(payload, metadata)
31
35
  end
32
36
  end
33
37
  end
@@ -25,7 +25,7 @@ module Karafka
25
25
  class Consumer < ::Karafka::ActiveJob::Consumer
26
26
  # Runs ActiveJob jobs processing and handles lrj if needed
27
27
  def consume
28
- messages.each do |message|
28
+ messages.each(clean: true) do |message|
29
29
  # If for any reason we've lost this partition, not worth iterating over new messages
30
30
  # as they are no longer ours
31
31
  break if revoked?
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Cleaner
17
+ # Cleaner related errors
18
+ module Errors
19
+ # Base for all the clearer errors
20
+ BaseError = Class.new(::Karafka::Errors::BaseError)
21
+
22
+ # Raised when trying to deserialize a message that has already been cleaned
23
+ MessageCleanedError = Class.new(BaseError)
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Cleaner
17
+ # Cleaner messages components related enhancements
18
+ module Messages
19
+ # Extensions to the message that allow for granular memory control on a per message basis
20
+ module Message
21
+ # @return [Object] lazy-deserialized data (deserialized upon first request)
22
+ def payload
23
+ # If message has already been cleaned, it cannot be deserialized again
24
+ cleaned? ? raise(Errors::MessageCleanedError) : super
25
+ end
26
+
27
+ # @return [Boolean] true if the message has been cleaned
28
+ def cleaned?
29
+ @raw_payload == false
30
+ end
31
+
32
+ # Cleans the message payload and removes the deserialized data references
33
+ # This is useful when working with big messages that take a lot of space.
34
+ #
35
+ # After the message content is no longer needed, it can be removed so it does not consume
36
+ # space anymore.
37
+ def clean!
38
+ @deserialized = false
39
+ @raw_payload = false
40
+ @payload = nil
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Cleaner
17
+ module Messages
18
+ # Extensions to the messages batch allowing for automatic cleaning of each message after
19
+ # message is processed.
20
+ module Messages
21
+ # @param clean [Boolean] do we want to clean each message after we're done working with
22
+ # it.
23
+ # @yield block we want to execute per each message
24
+ #
25
+ # @note Cleaning messages after we're done with each of them and did not fail does not
26
+ # affect any other functionalities. The only thing that is crucial is to make sure,
27
+ # that if DLQ is used, that we mark each message as consumed when using this API as
28
+ # otherwise a cleaned message may be dispatched and that should never happen
29
+ def each(clean: false)
30
+ @messages_array.each do |message|
31
+ yield(message)
32
+
33
+ next unless clean
34
+
35
+ message.clean!
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ # Feature that introduces a granular memory management for each message and messages iterator
17
+ #
18
+ # It allows for better resource allocation by providing an API to clear payload and raw payload
19
+ # from a message after those are no longer needed but before whole messages are freed and
20
+ # removed by Ruby GC.
21
+ #
22
+ # This can be useful when processing bigger batches or bigger messages one after another and
23
+ # wanting not to have all of the data loaded into memory.
24
+ #
25
+ # Can yield significant memory savings (up to 80%).
26
+ module Cleaner
27
+ class << self
28
+ # @param _config [Karafka::Core::Configurable::Node] root node config
29
+ def pre_setup(_config)
30
+ ::Karafka::Messages::Message.prepend(Messages::Message)
31
+ ::Karafka::Messages::Messages.prepend(Messages::Messages)
32
+ end
33
+
34
+ # @param _config [Karafka::Core::Configurable::Node] root node config
35
+ def post_setup(_config)
36
+ true
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -16,8 +16,11 @@ module Karafka
16
16
  module Encryption
17
17
  # Encryption related errors
18
18
  module Errors
19
+ # Base for all the encryption errors
20
+ BaseError = Class.new(::Karafka::Errors::BaseError)
21
+
19
22
  # Raised when we have encountered encryption key with version we do not have
20
- PrivateKeyNotFound = Class.new(::Karafka::Errors::BaseError)
23
+ PrivateKeyNotFound = Class.new(BaseError)
21
24
  end
22
25
  end
23
26
  end
@@ -69,7 +69,8 @@ module Karafka
69
69
  # @return [Array<Module>] extra non-routing related pro features
70
70
  def features
71
71
  [
72
- Encryption
72
+ Encryption,
73
+ Cleaner
73
74
  ]
74
75
  end
75
76
 
@@ -28,7 +28,7 @@ module Karafka
28
28
 
29
29
  # When we encounter non-recoverable message, we skip it and go on with our lives
30
30
  def handle_after_consume
31
- coordinator.on_finished do
31
+ coordinator.on_finished do |last_group_message|
32
32
  return if revoked?
33
33
 
34
34
  if coordinator.success?
@@ -36,7 +36,7 @@ module Karafka
36
36
 
37
37
  return if coordinator.manual_pause?
38
38
 
39
- mark_as_consumed(messages.last)
39
+ mark_as_consumed(last_group_message)
40
40
  elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
41
41
  retry_after_pause
42
42
  # If we've reached number of retries that we could, we need to skip the first
@@ -74,6 +74,12 @@ module Karafka
74
74
  # @param skippable_message [Array<Karafka::Messages::Message>] message we want to
75
75
  # dispatch to DLQ
76
76
  def dispatch_to_dlq(skippable_message)
77
+ # DLQ should never try to dispatch a message that was cleaned. It message was
78
+ # cleaned, we will not have all the needed data. If you see this error, it means
79
+ # that your processing flow is not as expected and you have cleaned message that
80
+ # should not be cleaned as it should go to the DLQ
81
+ raise(Cleaner::Errors::MessageCleanedError) if skippable_message.cleaned?
82
+
77
83
  producer.produce_async(
78
84
  build_dlq_message(
79
85
  skippable_message
@@ -31,7 +31,7 @@ module Karafka
31
31
  # DLQ flow is standard here, what is not, is the success component where we need to
32
32
  # take into consideration the filtering
33
33
  def handle_after_consume
34
- coordinator.on_finished do
34
+ coordinator.on_finished do |last_group_message|
35
35
  return if revoked?
36
36
 
37
37
  if coordinator.success?
@@ -39,7 +39,7 @@ module Karafka
39
39
 
40
40
  return if coordinator.manual_pause?
41
41
 
42
- mark_as_consumed(messages.last)
42
+ mark_as_consumed(last_group_message)
43
43
 
44
44
  handle_post_filtering
45
45
  elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.1.11'
6
+ VERSION = '2.1.13'
7
7
  end
data.tar.gz.sig CHANGED
@@ -1,3 +1,6 @@
1
- g+��\)E"nޗCD�\�S�)�(�U��M��Q
2
- ��*PV =���&{S?����n ��WM����
3
- �(9H��(�eݜ|5�j�M�y!쁦�w&��\wh? �ʺA[�*����Z�#DY���!1�g��=�W�ʉ����~��7{�
1
+ �ֈ�
2
+ y߀�T�
3
+ :�
4
+ 9�Um(��疈tg�y/#�l]D��2���KX��E㜔�R��:���UM�
5
+ �s8׾4*mɦh*�;[�\m>Q�n��g�T.�x�����Mǎ*O����-�`8���"��+?��M��pԏ�]�� ���d�&�I�Ky��VB��dl��'
6
+ n=��Vp�Z�Β7U�/n���I>���k0��%*�;N�e�����]��[��gtU�4�9f#r,|����6Un��W'����I,��N��:Pf�F�^�㎌r����FR��N����gd2���S�`��Է��V
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.11
4
+ version: 2.1.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  AnG1dJU+yL2BK7vaVytLTstJME5mepSZ46qqIJXMuWob/YPDmVaBF39TDSG9e34s
36
36
  msG3BiCqgOgHAnL23+CN3Rt8MsuRfEtoTKpJVcCfoEoNHOkc
37
37
  -----END CERTIFICATE-----
38
- date: 2023-08-23 00:00:00.000000000 Z
38
+ date: 2023-08-28 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: karafka-core
@@ -141,6 +141,7 @@ files:
141
141
  - bin/create_token
142
142
  - bin/integrations
143
143
  - bin/karafka
144
+ - bin/record_rss
144
145
  - bin/rspecs
145
146
  - bin/scenario
146
147
  - bin/stress_many
@@ -219,6 +220,10 @@ files:
219
220
  - lib/karafka/pro/active_job/consumer.rb
220
221
  - lib/karafka/pro/active_job/dispatcher.rb
221
222
  - lib/karafka/pro/active_job/job_options_contract.rb
223
+ - lib/karafka/pro/cleaner.rb
224
+ - lib/karafka/pro/cleaner/errors.rb
225
+ - lib/karafka/pro/cleaner/messages/message.rb
226
+ - lib/karafka/pro/cleaner/messages/messages.rb
222
227
  - lib/karafka/pro/encryption.rb
223
228
  - lib/karafka/pro/encryption/cipher.rb
224
229
  - lib/karafka/pro/encryption/contracts/config.rb
metadata.gz.sig CHANGED
Binary file