karafka 2.1.11 → 2.1.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +9 -0
- data/Gemfile.lock +1 -1
- data/bin/record_rss +50 -0
- data/lib/karafka/errors.rb +3 -0
- data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +5 -2
- data/lib/karafka/messages/builders/message.rb +8 -4
- data/lib/karafka/pro/active_job/consumer.rb +1 -1
- data/lib/karafka/pro/cleaner/errors.rb +27 -0
- data/lib/karafka/pro/cleaner/messages/message.rb +46 -0
- data/lib/karafka/pro/cleaner/messages/messages.rb +42 -0
- data/lib/karafka/pro/cleaner.rb +41 -0
- data/lib/karafka/pro/encryption/errors.rb +4 -1
- data/lib/karafka/pro/loader.rb +2 -1
- data/lib/karafka/pro/processing/strategies/dlq/default.rb +8 -2
- data/lib/karafka/pro/processing/strategies/dlq/ftr.rb +2 -2
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +6 -3
- metadata +7 -2
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 262c2bbfa4fb09c4038ce559c71baa6c40b861497dd7ef3b1915ba6b8aa47652
|
4
|
+
data.tar.gz: a34d5fad3bdbd1a58a0938f33c8fd28a5df72cb89293d7835c41cfce09c9e736
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2fd4672fef274f5913b543d9b3a91aef8a8aba59a23e457a61f455cf6095ea8176c6f8e799f65602c7ef0bb15add27a960df70cd99913fe6b5340df9cfff8c31
|
7
|
+
data.tar.gz: 4b890531f2a783c72573bcc2e915cc70b85e09c5e43de76b979fc34ea732f51a2c8ec4f64e91c95293337973103c7eeee89e424d27fa35dc708f935364fa9db3
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,14 @@
|
|
1
1
|
# Karafka framework changelog
|
2
2
|
|
3
|
+
## 2.1.13 (2023-08-28)
|
4
|
+
- **[Feature]** Introduce Cleaning API for much better memory management for iterative data processing [Pro].
|
5
|
+
- [Enhancement] Automatically free message resources after processed for ActiveJob jobs [Pro]
|
6
|
+
- [Enhancement] Free memory used by the raw payload as fast as possible after obtaining it from `karafka-rdkafka`.
|
7
|
+
- [Enhancement] Support changing `service_name` in DataDog integration.
|
8
|
+
|
9
|
+
## 2.1.12 (2023-08-25)
|
10
|
+
- [Fix] Fix a case where DLQ + VP without intermediate marking would mark earlier message then the last one.
|
11
|
+
|
3
12
|
## 2.1.11 (2023-08-23)
|
4
13
|
- [Enhancement] Expand the error handling for offset related queries with timeout error retries.
|
5
14
|
- [Enhancement] Allow for connection proxy timeouts configuration.
|
data/Gemfile.lock
CHANGED
data/bin/record_rss
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
# This script monitors and records the Resident Set Size (RSS) of a process given its PID.
|
4
|
+
# The RSS is logged every second to the specified output file until the process terminates.
|
5
|
+
#
|
6
|
+
# Usage:
|
7
|
+
# ./script_name.sh <PID> <OUTPUT_FILE>
|
8
|
+
#
|
9
|
+
# Arguments:
|
10
|
+
# <PID> - Process ID of the process you want to monitor.
|
11
|
+
# <OUTPUT_FILE> - Name of the file where RSS values will be logged.
|
12
|
+
#
|
13
|
+
# The script first checks if the correct number of arguments are provided.
|
14
|
+
# It then verifies if the given PID exists. If it does, it starts recording the RSS.
|
15
|
+
# For every iteration, the script fetches the current RSS of the process using the 'ps' command,
|
16
|
+
# then appends the RSS value along with a timestamp to the output file.
|
17
|
+
# This recording is done every second.
|
18
|
+
# The loop stops if the process with the given PID terminates.
|
19
|
+
# An informative message is printed out when recording starts and when it stops.
|
20
|
+
|
21
|
+
# Check if the correct number of arguments are passed
|
22
|
+
if [ "$#" -ne 2 ]; then
|
23
|
+
echo "Usage: $0 <PID> <OUTPUT_FILE>"
|
24
|
+
exit 1
|
25
|
+
fi
|
26
|
+
|
27
|
+
PID=$1
|
28
|
+
OUTPUT_FILE=$2
|
29
|
+
|
30
|
+
# Check if the given PID exists
|
31
|
+
if ! kill -0 $PID 2>/dev/null; then
|
32
|
+
echo "Error: PID $PID does not exist."
|
33
|
+
exit 1
|
34
|
+
fi
|
35
|
+
|
36
|
+
# Start recording the RSS
|
37
|
+
echo "Recording RSS for PID $PID every second to $OUTPUT_FILE..."
|
38
|
+
|
39
|
+
while kill -0 $PID 2>/dev/null; do
|
40
|
+
RSS=$(ps -o rss= -p $PID)
|
41
|
+
if [ -z "$RSS" ]; then
|
42
|
+
echo "Error: Failed to get RSS for PID $PID."
|
43
|
+
exit 1
|
44
|
+
fi
|
45
|
+
TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S')
|
46
|
+
echo "$TIMESTAMP: $RSS KB" >> $OUTPUT_FILE
|
47
|
+
sleep 1
|
48
|
+
done
|
49
|
+
|
50
|
+
echo "Process $PID has terminated. Stopping recording."
|
data/lib/karafka/errors.rb
CHANGED
@@ -41,6 +41,9 @@ module Karafka
|
|
41
41
|
# Raised when the license token is not valid
|
42
42
|
InvalidLicenseTokenError = Class.new(BaseError)
|
43
43
|
|
44
|
+
# Raised on attempt to deserializer a cleared message
|
45
|
+
MessageClearedError = Class.new(BaseError)
|
46
|
+
|
44
47
|
# This should never happen. Please open an issue if it does.
|
45
48
|
InvalidCoordinatorStateError = Class.new(BaseError)
|
46
49
|
|
@@ -12,11 +12,14 @@ module Karafka
|
|
12
12
|
include ::Karafka::Core::Configurable
|
13
13
|
extend Forwardable
|
14
14
|
|
15
|
-
def_delegators :config, :client
|
15
|
+
def_delegators :config, :client, :service_name
|
16
16
|
|
17
17
|
# `Datadog::Tracing` client that we should use to trace stuff
|
18
18
|
setting :client
|
19
19
|
|
20
|
+
# @see https://docs.datadoghq.com/tracing/trace_collection/dd_libraries/ruby
|
21
|
+
setting :service_name, default: nil
|
22
|
+
|
20
23
|
configure
|
21
24
|
|
22
25
|
# Log levels that we use in this particular listener
|
@@ -44,7 +47,7 @@ module Karafka
|
|
44
47
|
#
|
45
48
|
# @param event [Karafka::Core::Monitoring::Event] event details including payload
|
46
49
|
def on_worker_process(event)
|
47
|
-
current_span = client.trace('karafka.consumer')
|
50
|
+
current_span = client.trace('karafka.consumer', service: service_name)
|
48
51
|
push_tags
|
49
52
|
|
50
53
|
job = event[:job]
|
@@ -23,11 +23,15 @@ module Karafka
|
|
23
23
|
received_at: received_at
|
24
24
|
).freeze
|
25
25
|
|
26
|
+
# Get the raw payload
|
27
|
+
payload = kafka_message.payload
|
28
|
+
|
29
|
+
# And nullify it in the kafka message. This can save a lot of memory when used with
|
30
|
+
# the Pro Cleaner API
|
31
|
+
kafka_message.instance_variable_set('@payload', nil)
|
32
|
+
|
26
33
|
# Karafka messages cannot be frozen because of the lazy deserialization feature
|
27
|
-
Karafka::Messages::Message.new(
|
28
|
-
kafka_message.payload,
|
29
|
-
metadata
|
30
|
-
)
|
34
|
+
Karafka::Messages::Message.new(payload, metadata)
|
31
35
|
end
|
32
36
|
end
|
33
37
|
end
|
@@ -25,7 +25,7 @@ module Karafka
|
|
25
25
|
class Consumer < ::Karafka::ActiveJob::Consumer
|
26
26
|
# Runs ActiveJob jobs processing and handles lrj if needed
|
27
27
|
def consume
|
28
|
-
messages.each do |message|
|
28
|
+
messages.each(clean: true) do |message|
|
29
29
|
# If for any reason we've lost this partition, not worth iterating over new messages
|
30
30
|
# as they are no longer ours
|
31
31
|
break if revoked?
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Cleaner
|
17
|
+
# Cleaner related errors
|
18
|
+
module Errors
|
19
|
+
# Base for all the clearer errors
|
20
|
+
BaseError = Class.new(::Karafka::Errors::BaseError)
|
21
|
+
|
22
|
+
# Raised when trying to deserialize a message that has already been cleaned
|
23
|
+
MessageCleanedError = Class.new(BaseError)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Cleaner
|
17
|
+
# Cleaner messages components related enhancements
|
18
|
+
module Messages
|
19
|
+
# Extensions to the message that allow for granular memory control on a per message basis
|
20
|
+
module Message
|
21
|
+
# @return [Object] lazy-deserialized data (deserialized upon first request)
|
22
|
+
def payload
|
23
|
+
# If message has already been cleaned, it cannot be deserialized again
|
24
|
+
cleaned? ? raise(Errors::MessageCleanedError) : super
|
25
|
+
end
|
26
|
+
|
27
|
+
# @return [Boolean] true if the message has been cleaned
|
28
|
+
def cleaned?
|
29
|
+
@raw_payload == false
|
30
|
+
end
|
31
|
+
|
32
|
+
# Cleans the message payload and removes the deserialized data references
|
33
|
+
# This is useful when working with big messages that take a lot of space.
|
34
|
+
#
|
35
|
+
# After the message content is no longer needed, it can be removed so it does not consume
|
36
|
+
# space anymore.
|
37
|
+
def clean!
|
38
|
+
@deserialized = false
|
39
|
+
@raw_payload = false
|
40
|
+
@payload = nil
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
module Cleaner
|
17
|
+
module Messages
|
18
|
+
# Extensions to the messages batch allowing for automatic cleaning of each message after
|
19
|
+
# message is processed.
|
20
|
+
module Messages
|
21
|
+
# @param clean [Boolean] do we want to clean each message after we're done working with
|
22
|
+
# it.
|
23
|
+
# @yield block we want to execute per each message
|
24
|
+
#
|
25
|
+
# @note Cleaning messages after we're done with each of them and did not fail does not
|
26
|
+
# affect any other functionalities. The only thing that is crucial is to make sure,
|
27
|
+
# that if DLQ is used, that we mark each message as consumed when using this API as
|
28
|
+
# otherwise a cleaned message may be dispatched and that should never happen
|
29
|
+
def each(clean: false)
|
30
|
+
@messages_array.each do |message|
|
31
|
+
yield(message)
|
32
|
+
|
33
|
+
next unless clean
|
34
|
+
|
35
|
+
message.clean!
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
6
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
7
|
+
# repository and their usage requires commercial license agreement.
|
8
|
+
#
|
9
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
10
|
+
#
|
11
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
12
|
+
# your code to Maciej Mensfeld.
|
13
|
+
|
14
|
+
module Karafka
|
15
|
+
module Pro
|
16
|
+
# Feature that introduces a granular memory management for each message and messages iterator
|
17
|
+
#
|
18
|
+
# It allows for better resource allocation by providing an API to clear payload and raw payload
|
19
|
+
# from a message after those are no longer needed but before whole messages are freed and
|
20
|
+
# removed by Ruby GC.
|
21
|
+
#
|
22
|
+
# This can be useful when processing bigger batches or bigger messages one after another and
|
23
|
+
# wanting not to have all of the data loaded into memory.
|
24
|
+
#
|
25
|
+
# Can yield significant memory savings (up to 80%).
|
26
|
+
module Cleaner
|
27
|
+
class << self
|
28
|
+
# @param _config [Karafka::Core::Configurable::Node] root node config
|
29
|
+
def pre_setup(_config)
|
30
|
+
::Karafka::Messages::Message.prepend(Messages::Message)
|
31
|
+
::Karafka::Messages::Messages.prepend(Messages::Messages)
|
32
|
+
end
|
33
|
+
|
34
|
+
# @param _config [Karafka::Core::Configurable::Node] root node config
|
35
|
+
def post_setup(_config)
|
36
|
+
true
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -16,8 +16,11 @@ module Karafka
|
|
16
16
|
module Encryption
|
17
17
|
# Encryption related errors
|
18
18
|
module Errors
|
19
|
+
# Base for all the encryption errors
|
20
|
+
BaseError = Class.new(::Karafka::Errors::BaseError)
|
21
|
+
|
19
22
|
# Raised when we have encountered encryption key with version we do not have
|
20
|
-
PrivateKeyNotFound = Class.new(
|
23
|
+
PrivateKeyNotFound = Class.new(BaseError)
|
21
24
|
end
|
22
25
|
end
|
23
26
|
end
|
data/lib/karafka/pro/loader.rb
CHANGED
@@ -28,7 +28,7 @@ module Karafka
|
|
28
28
|
|
29
29
|
# When we encounter non-recoverable message, we skip it and go on with our lives
|
30
30
|
def handle_after_consume
|
31
|
-
coordinator.on_finished do
|
31
|
+
coordinator.on_finished do |last_group_message|
|
32
32
|
return if revoked?
|
33
33
|
|
34
34
|
if coordinator.success?
|
@@ -36,7 +36,7 @@ module Karafka
|
|
36
36
|
|
37
37
|
return if coordinator.manual_pause?
|
38
38
|
|
39
|
-
mark_as_consumed(
|
39
|
+
mark_as_consumed(last_group_message)
|
40
40
|
elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
|
41
41
|
retry_after_pause
|
42
42
|
# If we've reached number of retries that we could, we need to skip the first
|
@@ -74,6 +74,12 @@ module Karafka
|
|
74
74
|
# @param skippable_message [Array<Karafka::Messages::Message>] message we want to
|
75
75
|
# dispatch to DLQ
|
76
76
|
def dispatch_to_dlq(skippable_message)
|
77
|
+
# DLQ should never try to dispatch a message that was cleaned. It message was
|
78
|
+
# cleaned, we will not have all the needed data. If you see this error, it means
|
79
|
+
# that your processing flow is not as expected and you have cleaned message that
|
80
|
+
# should not be cleaned as it should go to the DLQ
|
81
|
+
raise(Cleaner::Errors::MessageCleanedError) if skippable_message.cleaned?
|
82
|
+
|
77
83
|
producer.produce_async(
|
78
84
|
build_dlq_message(
|
79
85
|
skippable_message
|
@@ -31,7 +31,7 @@ module Karafka
|
|
31
31
|
# DLQ flow is standard here, what is not, is the success component where we need to
|
32
32
|
# take into consideration the filtering
|
33
33
|
def handle_after_consume
|
34
|
-
coordinator.on_finished do
|
34
|
+
coordinator.on_finished do |last_group_message|
|
35
35
|
return if revoked?
|
36
36
|
|
37
37
|
if coordinator.success?
|
@@ -39,7 +39,7 @@ module Karafka
|
|
39
39
|
|
40
40
|
return if coordinator.manual_pause?
|
41
41
|
|
42
|
-
mark_as_consumed(
|
42
|
+
mark_as_consumed(last_group_message)
|
43
43
|
|
44
44
|
handle_post_filtering
|
45
45
|
elsif coordinator.pause_tracker.attempt <= topic.dead_letter_queue.max_retries
|
data/lib/karafka/version.rb
CHANGED
data.tar.gz.sig
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
1
|
+
�ֈ�
|
2
|
+
y߀�T�
|
3
|
+
:�
|
4
|
+
9�Um(��疈tg�y/#�l]D��2���KX��E㜔�R��:���UM�
|
5
|
+
�s84*mɦh*�;[�\m>Q�n��g�T.�x�����Mǎ*O����-�`8���"��+?��M��pԏ�]�� ���d�&�I�Ky��VB��dl��'
|
6
|
+
n=��Vp�Z�Β7U�/n���I>���k0��%*�;N�e�����]��[��gtU�4�9f#r,|����6Un��W'����I,��N��:Pf�F�^�㎌r����FR��N����gd2���S�`��Է��V
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: karafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Mensfeld
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
AnG1dJU+yL2BK7vaVytLTstJME5mepSZ46qqIJXMuWob/YPDmVaBF39TDSG9e34s
|
36
36
|
msG3BiCqgOgHAnL23+CN3Rt8MsuRfEtoTKpJVcCfoEoNHOkc
|
37
37
|
-----END CERTIFICATE-----
|
38
|
-
date: 2023-08-
|
38
|
+
date: 2023-08-28 00:00:00.000000000 Z
|
39
39
|
dependencies:
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
41
|
name: karafka-core
|
@@ -141,6 +141,7 @@ files:
|
|
141
141
|
- bin/create_token
|
142
142
|
- bin/integrations
|
143
143
|
- bin/karafka
|
144
|
+
- bin/record_rss
|
144
145
|
- bin/rspecs
|
145
146
|
- bin/scenario
|
146
147
|
- bin/stress_many
|
@@ -219,6 +220,10 @@ files:
|
|
219
220
|
- lib/karafka/pro/active_job/consumer.rb
|
220
221
|
- lib/karafka/pro/active_job/dispatcher.rb
|
221
222
|
- lib/karafka/pro/active_job/job_options_contract.rb
|
223
|
+
- lib/karafka/pro/cleaner.rb
|
224
|
+
- lib/karafka/pro/cleaner/errors.rb
|
225
|
+
- lib/karafka/pro/cleaner/messages/message.rb
|
226
|
+
- lib/karafka/pro/cleaner/messages/messages.rb
|
222
227
|
- lib/karafka/pro/encryption.rb
|
223
228
|
- lib/karafka/pro/encryption/cipher.rb
|
224
229
|
- lib/karafka/pro/encryption/contracts/config.rb
|
metadata.gz.sig
CHANGED
Binary file
|