karafka 2.1.11 → 2.1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +9 -0
- data/Gemfile.lock +1 -1
- data/bin/record_rss +50 -0
- data/lib/karafka/errors.rb +3 -0
- data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +5 -2
- data/lib/karafka/messages/builders/message.rb +8 -4
- data/lib/karafka/pro/active_job/consumer.rb +1 -1
- data/lib/karafka/pro/cleaner/errors.rb +27 -0
- data/lib/karafka/pro/cleaner/messages/message.rb +46 -0
- data/lib/karafka/pro/cleaner/messages/messages.rb +42 -0
- data/lib/karafka/pro/cleaner.rb +41 -0
- data/lib/karafka/pro/encryption/errors.rb +4 -1
- data/lib/karafka/pro/loader.rb +2 -1
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +8 -2
- data/lib/karafka/pro/processing/strategies/dlq/ftr.rb +2 -2
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +6 -3
- metadata +7 -2
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 262c2bbfa4fb09c4038ce559c71baa6c40b861497dd7ef3b1915ba6b8aa47652
|
4
|
+
data.tar.gz: a34d5fad3bdbd1a58a0938f33c8fd28a5df72cb89293d7835c41cfce09c9e736
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2fd4672fef274f5913b543d9b3a91aef8a8aba59a23e457a61f455cf6095ea8176c6f8e799f65602c7ef0bb15add27a960df70cd99913fe6b5340df9cfff8c31
|
7
|
+
data.tar.gz: 4b890531f2a783c72573bcc2e915cc70b85e09c5e43de76b979fc34ea732f51a2c8ec4f64e91c95293337973103c7eeee89e424d27fa35dc708f935364fa9db3
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,14 @@
|
|
1
1
|
# Karafka framework changelog
|
2
2
|
|
3
|
+
## 2.1.13 (2023-08-28)
|
4
|
+
- **[Feature]** Introduce Cleaning API for much better memory management for iterative data processing [Pro].
|
5
|
+
- [Enhancement] Automatically free message resources after processed for ActiveJob jobs [Pro]
|
6
|
+
- [Enhancement] Free memory used by the raw payload as fast as possible after obtaining it from `karafka-rdkafka`.
|
7
|
+
- [Enhancement] Support changing `service_name` in DataDog integration.
|
8
|
+
|
9
|
+
## 2.1.12 (2023-08-25)
|
10
|
+
- [Fix] Fix a case where DLQ + VP without intermediate marking would mark earlier message then the last one.
|
11
|
+
|
3
12
|
## 2.1.11 (2023-08-23)
|
4
13
|
- [Enhancement] Expand the error handling for offset related queries with timeout error retries.
|
5
14
|
- [Enhancement] Allow for connection proxy timeouts configuration.
|
data/Gemfile.lock
CHANGED
data/bin/record_rss
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
# This script monitors and records the Resident Set Size (RSS) of a process given its PID.
|
4
|
+
# The RSS is logged every second to the specified output file until the process terminates.
|
5
|
+
#
|
6
|
+
# Usage:
|
7
|
+
# ./script_name.sh <PID> <OUTPUT_FILE>
|
8
|
+
#
|
9
|
+
# Arguments:
|
10
|
+
# <PID> - Process ID of the process you want to monitor.
|
11
|
+
# <OUTPUT_FILE> - Name of the file where RSS values will be logged.
|
12
|
+
#
|
13
|
+
# The script first checks if the correct number of arguments are provided.
|
14
|
+
# It then verifies if the given PID exists. If it does, it starts recording the RSS.
|
15
|
+
# For every iteration, the script fetches the current RSS of the process using the 'ps' command,
|
16
|
+
# then appends the RSS value along with a timestamp to the output file.
|
17
|
+
# This recording is done every second.
|
18
|
+
# The loop stops if the process with the given PID terminates.
|
19
|
+
# An informative message is printed out when recording starts and when it stops.
|
20
|
+
|
21
|
+
# Check if the correct number of arguments are passed
|
22
|
+
if [ "$#" -ne 2 ]; then
|
23
|
+
echo "Usage: $0 <PID> <OUTPUT_FILE>"
|
24
|
+
exit 1
|
25
|
+
fi
|
26
|
+
|
27
|
+
PID=$1
|
28
|
+
OUTPUT_FILE=$2
|
29
|
+
|
30
|
+
# Check if the given PID exists
|
31
|
+
if ! kill -0 $PID 2>/dev/null; then
|
32
|
+
echo "Error: PID $PID does not exist."
|
33
|
+
exit 1
|
34
|
+
fi
|
35
|
+
|
36
|
+
# Start recording the RSS
|
37
|
+
echo "Recording RSS for PID $PID every second to $OUTPUT_FILE..."
|
38
|
+
|
39
|
+
while kill -0 $PID 2>/dev/null; do
|
40
|
+
RSS=$(ps -o rss= -p $PID)
|
41
|
+
if [ -z "$RSS" ]; then
|
42
|
+
echo "Error: Failed to get RSS for PID $PID."
|
43
|
+
exit 1
|
44
|
+
fi
|
45
|
+
TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S')
|
46
|
+
echo "$TIMESTAMP: $RSS KB" >> $OUTPUT_FILE
|
47
|
+
sleep 1
|
48
|
+
done
|
49
|
+
|
50
|
+
echo "Process $PID has terminated. Stopping recording."
|
data/lib/karafka/errors.rb
CHANGED
@@ -41,6 +41,9 @@ module Karafka
|
|
41
41
|
# Raised when the license token is not valid
|
42
42
|
InvalidLicenseTokenError = Class.new(BaseError)
|
43
43
|
|
44
|
+
# Raised on attempt to deserializer a cleared message
|
45
|
+
MessageClearedError = Class.new(BaseError)
|
46
|
+
|
44
47
|
# This should never happen. Please open an issue if it does.
|
45
48
|
InvalidCoordinatorStateError = Class.new(BaseError)
|
46
49
|
|
@@ -12,11 +12,14 @@ module Karafka
|
|
12
12
|
include ::Karafka::Core::Configurable
|
13
13
|
extend Forwardable
|
14
14
|
|
15
|
-
def_delegators :config, :client
|
15
|
+
def_delegators :config, :client, :service_name
|
16
16
|
|
17
17
|
# `Datadog::Tracing` client that we should use to trace stuff
|
18
18
|
setting :client
|
19
19
|
|
20
|
+
# @see https://docs.datadoghq.com/tracing/trace_collection/dd_libraries/ruby
|
21
|
+
setting :service_name, default: nil
|
22
|
+
|
20
23
|
configure
|
21
24
|
|
22
25
|
# Log levels that we use in this particular listener
|
@@ -44,7 +47,7 @@ module Karafka
|
|
44
47
|
#
|
45
48
|
# @param event [Karafka::Core::Monitoring::Event] event details including payload
|
46
49
|
def on_worker_process(event)
|
47
|
-
current_span = client.trace('karafka.consumer')
|
50
|
+
current_span = client.trace('karafka.consumer', service: service_name)
|
48
51
|
push_tags
|
49
52
|
|
50
53
|
job = event[:job]
|
@@ -23,11 +23,15 @@ module Karafka
|
|
23
23
|
received_at: received_at
|
24
24
|
).freeze
|
25
25
|
|
26
|
+
# Get the raw payload
|
27
|
+
payload = kafka_message.payload
|
28
|
+
|
29
|
+
# And nullify it in the kafka message. This can save a lot of memory when used with
|
30
|
+
# the Pro Cleaner API
|
31
|
+
kafka_message.instance_variable_set('@payload', nil)
|
32
|
+
|
26
33
|
# Karafka messages cannot be frozen because of the lazy deserialization feature
|
27
|
-
Karafka::Messages::Message.new(
|
28
|
-
kafka_message.payload,
|
29
|
-
metadata
|
30
|
-
)
|
34
|
+
Karafka::Messages::Message.new(payload, metadata)
|
31
35
|
end
|
32
36
|
end
|
33
37
|
end
|
@@ -25,7 +25,7 @@ module Karafka
|
|
25
25
|
class Consumer < ::Karafka::ActiveJob::Consumer
|
26
26
|
# Runs ActiveJob jobs processing and handles lrj if needed
|
27
27
|
def consume
|
28
|
-
messages.each do |message|
|
28
|
+
messages.each(clean: true) do |message|
|
29
29
|
# If for any reason we've lost this partition, not worth iterating over new messages
|
30
30
|
# as they are no longer ours
|
31
31
|
break if revoked?
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Cleaner
|
17
|
+
# Cleaner related errors
|
18
|
+
module Errors
|
19
|
+
# Base for all the clearer errors
|
20
|
+
BaseError = Class.new(::Karafka::Errors::BaseError)
|
21
|
+
|
22
|
+
# Raised when trying to deserialize a message that has already been cleaned
|
23
|
+
MessageCleanedError = Class.new(BaseError)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Cleaner
|
17
|
+
# Cleaner messages components related enhancements
|
18
|
+
module Messages
|
19
|
+
# Extensions to the message that allow for granular memory control on a per message basis
|
20
|
+
module Message
|
21
|
+
# @return [Object] lazy-deserialized data (deserialized upon first request)
|
22
|
+
def payload
|
23
|
+
# If message has already been cleaned, it cannot be deserialized again
|
24
|
+
cleaned? ? raise(Errors::MessageCleanedError) : super
|
25
|
+
end
|
26
|
+
|
27
|
+
# @return [Boolean] true if the message has been cleaned
|
28
|
+
def cleaned?
|
29
|
+
@raw_payload == false
|
30
|
+
end
|
31
|
+
|
32
|
+
# Cleans the message payload and removes the deserialized data references
|
33
|
+
# This is useful when working with big messages that take a lot of space.
|
34
|
+
#
|
35
|
+
# After the message content is no longer needed, it can be removed so it does not consume
|
36
|
+
# space anymore.
|
37
|
+
def clean!
|
38
|
+
@deserialized = false
|
39
|
+
@raw_payload = false
|
40
|
+
@payload = nil
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Cleaner
|
17
|
+
module Messages
|
18
|
+
# Extensions to the messages batch allowing for automatic cleaning of each message after
|
19
|
+
# message is processed.
|
20
|
+
module Messages
|
21
|
+
# @param clean [Boolean] do we want to clean each message after we're done working with
|
22
|
+
# it.
|
23
|
+
# @yield block we want to execute per each message
|
24
|
+
#
|
25
|
+
# @note Cleaning messages after we're done with each of them and did not fail does not
|
26
|
+
# affect any other functionalities. The only thing that is crucial is to make sure,
|
27
|
+
# that if DLQ is used, that we mark each message as consumed when using this API as
|
28
|
+
# otherwise a cleaned message may be dispatched and that should never happen
|
29
|
+
def each(clean: false)
|
30
|
+
@messages_array.each do |message|
|
31
|
+
yield(message)
|
32
|
+
|
33
|
+
next unless clean
|
34
|
+
|
35
|
+
message.clean!
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
# Feature that introduces a granular memory management for each message and messages iterator
|
17
|
+
#
|
18
|
+
# It allows for better resource allocation by providing an API to clear payload and raw payload
|
19
|
+
# from a message after those are no longer needed but before whole messages are freed and
|
20
|
+
# removed by Ruby GC.
|
21
|
+
#
|
22
|
+
# This can be useful when processing bigger batches or bigger messages one after another and
|
23
|
+
# wanting not to have all of the data loaded into memory.
|
24
|
+
#
|
25
|
+
# Can yield significant memory savings (up to 80%).
|
26
|
+
module Cleaner
|
27
|
+
class << self
|
28
|
+
# @param _config [Karafka::Core::Configurable::Node] root node config
|
29
|
+
def pre_setup(_config)
|
30
|
+
::Karafka::Messages::Message.prepend(Messages::Message)
|
31
|
+
::Karafka::Messages::Messages.prepend(Messages::Messages)
|
32
|
+
end
|
33
|
+
|
34
|
+
# @param _config [Karafka::Core::Configurable::Node] root node config
|
35
|
+
def post_setup(_config)
|
36
|
+
true
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -16,8 +16,11 @@ module Karafka
|
|
16
16
|
module Encryption
|
17
17
|
# Encryption related errors
|
18
18
|
module Errors
|
19
|
+
# Base for all the encryption errors
|
20
|
+
BaseError = Class.new(::Karafka::Errors::BaseError)
|
21
|
+
|
19
22
|
# Raised when we have encountered encryption key with version we do not have
|
20
|
-
PrivateKeyNotFound = Class.new(
|
23
|
+
PrivateKeyNotFound = Class.new(BaseError)
|
21
24
|
end
|
22
25
|
end
|
23
26
|
end
|
data/lib/karafka/pro/loader.rb
CHANGED
@@ -28,7 +28,7 @@ module Karafka
|
|
28
28
|
|
29
29
|
# When we encounter non-recoverable message, we skip it and go on with our lives
|
30
30
|
def handle_after_consume
|
31
|
-
coordinator.on_finished do
|
31
|
+
coordinator.on_finished do |last_group_message|
|
32
32
|
return if revoked?
|
33
33
|
|
34
34
|
if coordinator.success?
|
@@ -36,7 +36,7 @@ module Karafka
|
|
36
36
|
|
37
37
|
return if coordinator.manual_pause?
|
38
38
|
|
39
|
-
mark_as_consumed(
|
39
|
+
mark_as_consumed(last_group_message)
|
40
40
|
elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
|
41
41
|
retry_after_pause
|
42
42
|
# If we've reached number of retries that we could, we need to skip the first
|
@@ -74,6 +74,12 @@ module Karafka
|
|
74
74
|
# @param skippable_message [Array<Karafka::Messages::Message>] message we want to
|
75
75
|
# dispatch to DLQ
|
76
76
|
def dispatch_to_dlq(skippable_message)
|
77
|
+
# DLQ should never try to dispatch a message that was cleaned. It message was
|
78
|
+
# cleaned, we will not have all the needed data. If you see this error, it means
|
79
|
+
# that your processing flow is not as expected and you have cleaned message that
|
80
|
+
# should not be cleaned as it should go to the DLQ
|
81
|
+
raise(Cleaner::Errors::MessageCleanedError) if skippable_message.cleaned?
|
82
|
+
|
77
83
|
producer.produce_async(
|
78
84
|
build_dlq_message(
|
79
85
|
skippable_message
|
@@ -31,7 +31,7 @@ module Karafka
|
|
31
31
|
# DLQ flow is standard here, what is not, is the success component where we need to
|
32
32
|
# take into consideration the filtering
|
33
33
|
def handle_after_consume
|
34
|
-
coordinator.on_finished do
|
34
|
+
coordinator.on_finished do |last_group_message|
|
35
35
|
return if revoked?
|
36
36
|
|
37
37
|
if coordinator.success?
|
@@ -39,7 +39,7 @@ module Karafka
|
|
39
39
|
|
40
40
|
return if coordinator.manual_pause?
|
41
41
|
|
42
|
-
mark_as_consumed(
|
42
|
+
mark_as_consumed(last_group_message)
|
43
43
|
|
44
44
|
handle_post_filtering
|
45
45
|
elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
|
data/lib/karafka/version.rb
CHANGED
data.tar.gz.sig
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
1
|
+
�ֈ�
|
2
|
+
y߀�T�
|
3
|
+
:�
|
4
|
+
9�Um(��疈tg�y/#�l]D��2���KX��E㜔�R��:���UM�
|
5
|
+
�s84*mɦh*�;[�\m>Q�n��g�T.�x�����Mǎ*O����-�`8���"��+?��M��pԏ�]�� ���d�&�I�Ky��VB��dl��'
|
6
|
+
n=��Vp�Z�Β7U�/n���I>���k0��%*�;N�e�����]��[��gtU�4�9f#r,|����6Un��W'����I,��N��:Pf�F�^�㎌r����FR��N����gd2���S�`��Է��V
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: karafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Mensfeld
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
AnG1dJU+yL2BK7vaVytLTstJME5mepSZ46qqIJXMuWob/YPDmVaBF39TDSG9e34s
|
36
36
|
msG3BiCqgOgHAnL23+CN3Rt8MsuRfEtoTKpJVcCfoEoNHOkc
|
37
37
|
-----END CERTIFICATE-----
|
38
|
-
date: 2023-08-
|
38
|
+
date: 2023-08-28 00:00:00.000000000 Z
|
39
39
|
dependencies:
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
41
|
name: karafka-core
|
@@ -141,6 +141,7 @@ files:
|
|
141
141
|
- bin/create_token
|
142
142
|
- bin/integrations
|
143
143
|
- bin/karafka
|
144
|
+
- bin/record_rss
|
144
145
|
- bin/rspecs
|
145
146
|
- bin/scenario
|
146
147
|
- bin/stress_many
|
@@ -219,6 +220,10 @@ files:
|
|
219
220
|
- lib/karafka/pro/active_job/consumer.rb
|
220
221
|
- lib/karafka/pro/active_job/dispatcher.rb
|
221
222
|
- lib/karafka/pro/active_job/job_options_contract.rb
|
223
|
+
- lib/karafka/pro/cleaner.rb
|
224
|
+
- lib/karafka/pro/cleaner/errors.rb
|
225
|
+
- lib/karafka/pro/cleaner/messages/message.rb
|
226
|
+
- lib/karafka/pro/cleaner/messages/messages.rb
|
222
227
|
- lib/karafka/pro/encryption.rb
|
223
228
|
- lib/karafka/pro/encryption/cipher.rb
|
224
229
|
- lib/karafka/pro/encryption/contracts/config.rb
|
metadata.gz.sig
CHANGED
Binary file
|