karafka 2.1.3 → 2.1.5.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +12 -0
- data/Gemfile.lock +20 -20
- data/karafka.gemspec +2 -2
- data/lib/karafka/admin.rb +3 -1
- data/lib/karafka/base_consumer.rb +6 -3
- data/lib/karafka/connection/client.rb +11 -10
- data/lib/karafka/connection/rebalance_manager.rb +2 -4
- data/lib/karafka/messages/batch_metadata.rb +9 -2
- data/lib/karafka/messages/builders/message.rb +0 -3
- data/lib/karafka/patches/rdkafka/bindings.rb +4 -6
- data/lib/karafka/processing/strategies/default.rb +18 -0
- data/lib/karafka/setup/attributes_map.rb +1 -0
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +10 -11
- metadata.gz.sig +0 -0
- data/lib/karafka/patches/rdkafka/consumer.rb +0 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 967412d28c31483df6a0c151ec0578367da24e503184608789791774347ffc53
|
4
|
+
data.tar.gz: '0828ba1db27f9f287b3ab9f80928816a81ae45ad79ce092e02cccd33b9bac204'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3774daedd74efe8e2498e97e503226cc01848e7a7be6ca255852bcaf112bc790ff98953584c771de8afa96142e8244e9f1b036b18490305766b428671b136bce
|
7
|
+
data.tar.gz: b575cf2be3d4cdcb73fec9cdfd65cd705408d1efae39ed200ae2d815e7c9241f582d5ebb8b802347289de910ee50589f0551bb4c194a161905e38425d05bd296
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,17 @@
|
|
1
1
|
# Karafka framework changelog
|
2
2
|
|
3
|
+
## 2.1.5 (Unreleased)
|
4
|
+
- [Improvement] Drastically improve `#revoked?` response quality by checking the real time assignment lost state on librdkafka.
|
5
|
+
- [Improvement] Improve eviction of saturated jobs that would run on already revoked assignments.
|
6
|
+
- [Improvement] Expose `#commit_offsets` and `#commit_offsets!` methods in the consumer to provide ability to commit offsets directly to Kafka without having to mark new messages as consumed.
|
7
|
+
- [Improvement] No longer skip offset commit when no messages marked as consumed as `librdkafka` has fixed the crashes there.
|
8
|
+
- [Improvement] Remove no longer needed patches.
|
9
|
+
- [Change] Require `karafka-core` `>= 2.1.0`
|
10
|
+
- [Change] Require `waterdrop` `>= 2.6.1`
|
11
|
+
|
12
|
+
## 2.1.4 (2023-06-06)
|
13
|
+
- [Fix] `processing_lag` and `consumption_lag` on empty batch fail on shutdown usage (#1475)
|
14
|
+
|
3
15
|
## 2.1.3 (2023-05-29)
|
4
16
|
- [Maintenance] Add linter to ensure, that all integration specs end with `_spec.rb`.
|
5
17
|
- [Fix] Fix `#retrying?` helper result value (Aerdayne).
|
data/Gemfile.lock
CHANGED
@@ -1,19 +1,19 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
karafka (2.1.
|
5
|
-
karafka-core (>= 2.0.
|
4
|
+
karafka (2.1.5.beta1)
|
5
|
+
karafka-core (>= 2.1.0.beta1, < 2.2.0)
|
6
6
|
thor (>= 0.20)
|
7
|
-
waterdrop (>= 2.
|
7
|
+
waterdrop (>= 2.6.1.beta1, < 3.0.0)
|
8
8
|
zeitwerk (~> 2.3)
|
9
9
|
|
10
10
|
GEM
|
11
11
|
remote: https://rubygems.org/
|
12
12
|
specs:
|
13
|
-
activejob (7.0.
|
14
|
-
activesupport (= 7.0.
|
13
|
+
activejob (7.0.5)
|
14
|
+
activesupport (= 7.0.5)
|
15
15
|
globalid (>= 0.3.6)
|
16
|
-
activesupport (7.0.
|
16
|
+
activesupport (7.0.5)
|
17
17
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
18
18
|
i18n (>= 1.6, < 2)
|
19
19
|
minitest (>= 5.1)
|
@@ -28,26 +28,26 @@ GEM
|
|
28
28
|
ffi (1.15.5)
|
29
29
|
globalid (1.1.0)
|
30
30
|
activesupport (>= 5.0)
|
31
|
-
i18n (1.
|
31
|
+
i18n (1.14.1)
|
32
32
|
concurrent-ruby (~> 1.0)
|
33
|
-
karafka-core (2.0.
|
33
|
+
karafka-core (2.1.0.beta1)
|
34
34
|
concurrent-ruby (>= 1.1)
|
35
|
-
karafka-rdkafka (>= 0.
|
36
|
-
karafka-rdkafka (0.
|
35
|
+
karafka-rdkafka (>= 0.13.0.beta2, < 0.14.0)
|
36
|
+
karafka-rdkafka (0.13.0.beta2)
|
37
37
|
ffi (~> 1.15)
|
38
38
|
mini_portile2 (~> 2.6)
|
39
39
|
rake (> 12)
|
40
|
-
karafka-web (0.
|
40
|
+
karafka-web (0.6.0)
|
41
41
|
erubi (~> 1.4)
|
42
|
-
karafka (>= 2.
|
43
|
-
karafka-core (>= 2.0.
|
44
|
-
roda (~> 3.
|
42
|
+
karafka (>= 2.1.4, < 3.0.0)
|
43
|
+
karafka-core (>= 2.0.13, < 3.0.0)
|
44
|
+
roda (~> 3.68, >= 3.68)
|
45
45
|
tilt (~> 2.0)
|
46
46
|
mini_portile2 (2.8.2)
|
47
|
-
minitest (5.18.
|
48
|
-
rack (3.0.
|
47
|
+
minitest (5.18.1)
|
48
|
+
rack (3.0.8)
|
49
49
|
rake (13.0.6)
|
50
|
-
roda (3.
|
50
|
+
roda (3.69.0)
|
51
51
|
rack
|
52
52
|
rspec (3.12.0)
|
53
53
|
rspec-core (~> 3.12.0)
|
@@ -69,11 +69,11 @@ GEM
|
|
69
69
|
simplecov-html (0.12.3)
|
70
70
|
simplecov_json_formatter (0.1.4)
|
71
71
|
thor (1.2.2)
|
72
|
-
tilt (2.
|
72
|
+
tilt (2.2.0)
|
73
73
|
tzinfo (2.0.6)
|
74
74
|
concurrent-ruby (~> 1.0)
|
75
|
-
waterdrop (2.
|
76
|
-
karafka-core (>= 2.0.
|
75
|
+
waterdrop (2.6.1.beta1)
|
76
|
+
karafka-core (>= 2.1.0.beta1, < 3.0.0)
|
77
77
|
zeitwerk (~> 2.3)
|
78
78
|
zeitwerk (2.6.8)
|
79
79
|
|
data/karafka.gemspec
CHANGED
@@ -21,9 +21,9 @@ Gem::Specification.new do |spec|
|
|
21
21
|
without having to focus on things that are not your business domain.
|
22
22
|
DESC
|
23
23
|
|
24
|
-
spec.add_dependency 'karafka-core', '>= 2.0.
|
24
|
+
spec.add_dependency 'karafka-core', '>= 2.1.0.beta1', '< 2.2.0'
|
25
25
|
spec.add_dependency 'thor', '>= 0.20'
|
26
|
-
spec.add_dependency 'waterdrop', '>= 2.
|
26
|
+
spec.add_dependency 'waterdrop', '>= 2.6.1.beta1', '< 3.0.0'
|
27
27
|
spec.add_dependency 'zeitwerk', '~> 2.3'
|
28
28
|
|
29
29
|
if $PROGRAM_NAME.end_with?('gem')
|
data/lib/karafka/admin.rb
CHANGED
@@ -171,7 +171,9 @@ module Karafka
|
|
171
171
|
# @return [Rdkafka::Metadata] cluster metadata info
|
172
172
|
def cluster_info
|
173
173
|
with_admin do |admin|
|
174
|
-
|
174
|
+
admin.instance_variable_get('@native_kafka').with_inner do |inner|
|
175
|
+
Rdkafka::Metadata.new(inner)
|
176
|
+
end
|
175
177
|
end
|
176
178
|
end
|
177
179
|
|
@@ -215,10 +215,13 @@ module Karafka
|
|
215
215
|
end
|
216
216
|
|
217
217
|
# @return [Boolean] true if partition was revoked from the current consumer
|
218
|
-
# @note
|
219
|
-
#
|
218
|
+
# @note There are two "levels" on which we can know that partition was revoked. First one is
|
219
|
+
# when we loose the assignment involuntarily and second is when coordinator gets this info
|
220
|
+
# after we poll with the rebalance callbacks. The first check allows us to get this notion
|
221
|
+
# even before we poll but it gets reset when polling happens, hence we also need to switch
|
222
|
+
# the coordinator state after the revocation (but prior to running more jobs)
|
220
223
|
def revoked?
|
221
|
-
coordinator.revoked?
|
224
|
+
client.assignment_lost? || coordinator.revoked?
|
222
225
|
end
|
223
226
|
|
224
227
|
# @return [Boolean] are we retrying processing after an error. This can be used to provide a
|
@@ -41,9 +41,6 @@ module Karafka
|
|
41
41
|
@buffer = RawMessagesBuffer.new
|
42
42
|
@rebalance_manager = RebalanceManager.new
|
43
43
|
@kafka = build_consumer
|
44
|
-
# Marks if we need to offset. If we did not store offsets, we should not commit the offset
|
45
|
-
# position as it will crash rdkafka
|
46
|
-
@offsetting = false
|
47
44
|
# We need to keep track of what we have paused for resuming
|
48
45
|
# In case we loose partition, we still need to resume it, otherwise it won't be fetched
|
49
46
|
# again if we get reassigned to it later on. We need to keep them as after revocation we
|
@@ -109,8 +106,14 @@ module Karafka
|
|
109
106
|
end
|
110
107
|
end
|
111
108
|
|
109
|
+
# @return [Boolean] true if our current assignment has been lost involuntarily.
|
110
|
+
def assignment_lost?
|
111
|
+
@mutex.synchronize do
|
112
|
+
@kafka.assignment_lost?
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
112
116
|
# Commits the offset on a current consumer in a non-blocking or blocking way.
|
113
|
-
# Ignoring a case where there would not be an offset (for example when rebalance occurs).
|
114
117
|
#
|
115
118
|
# @param async [Boolean] should the commit happen async or sync (async by default)
|
116
119
|
# @return [Boolean] did committing was successful. It may be not, when we no longer own
|
@@ -118,7 +121,7 @@ module Karafka
|
|
118
121
|
#
|
119
122
|
# @note This will commit all the offsets for the whole consumer. In order to achieve
|
120
123
|
# granular control over where the offset should be for particular topic partitions, the
|
121
|
-
# store_offset should be used to only store new offset when we want
|
124
|
+
# store_offset should be used to only store new offset when we want them to be flushed
|
122
125
|
def commit_offsets(async: true)
|
123
126
|
@mutex.lock
|
124
127
|
|
@@ -256,7 +259,6 @@ module Karafka
|
|
256
259
|
|
257
260
|
@mutex.synchronize do
|
258
261
|
@closed = false
|
259
|
-
@offsetting = false
|
260
262
|
@paused_tpls.clear
|
261
263
|
@kafka = build_consumer
|
262
264
|
end
|
@@ -281,7 +283,6 @@ module Karafka
|
|
281
283
|
# @param message [Karafka::Messages::Message]
|
282
284
|
# @return [Boolean] true if we could store the offset (if we still own the partition)
|
283
285
|
def internal_store_offset(message)
|
284
|
-
@offsetting = true
|
285
286
|
@kafka.store_offset(message)
|
286
287
|
true
|
287
288
|
rescue Rdkafka::RdkafkaError => e
|
@@ -294,11 +295,11 @@ module Karafka
|
|
294
295
|
# Non thread-safe message committing method
|
295
296
|
# @param async [Boolean] should the commit happen async or sync (async by default)
|
296
297
|
# @return [Boolean] true if offset commit worked, false if we've lost the assignment
|
298
|
+
# @note We do **not** consider `no_offset` as any problem and we allow to commit offsets
|
299
|
+
# even when no stored, because with sync commit, it refreshes the ownership state of the
|
300
|
+
# consumer in a sync way.
|
297
301
|
def internal_commit_offsets(async: true)
|
298
|
-
return true unless @offsetting
|
299
|
-
|
300
302
|
@kafka.commit(nil, async)
|
301
|
-
@offsetting = false
|
302
303
|
|
303
304
|
true
|
304
305
|
rescue Rdkafka::RdkafkaError => e
|
@@ -49,9 +49,8 @@ module Karafka
|
|
49
49
|
# Callback that kicks in inside of rdkafka, when new partitions are assigned.
|
50
50
|
#
|
51
51
|
# @private
|
52
|
-
# @param _ [Rdkafka::Consumer]
|
53
52
|
# @param partitions [Rdkafka::Consumer::TopicPartitionList]
|
54
|
-
def on_partitions_assigned(
|
53
|
+
def on_partitions_assigned(partitions)
|
55
54
|
@assigned_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
|
56
55
|
@changed = true
|
57
56
|
end
|
@@ -59,9 +58,8 @@ module Karafka
|
|
59
58
|
# Callback that kicks in inside of rdkafka, when partitions are revoked.
|
60
59
|
#
|
61
60
|
# @private
|
62
|
-
# @param _ [Rdkafka::Consumer]
|
63
61
|
# @param partitions [Rdkafka::Consumer::TopicPartitionList]
|
64
|
-
def on_partitions_revoked(
|
62
|
+
def on_partitions_revoked(partitions)
|
65
63
|
@revoked_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
|
66
64
|
@changed = true
|
67
65
|
end
|
@@ -20,14 +20,21 @@ module Karafka
|
|
20
20
|
) do
|
21
21
|
# This lag describes how long did it take for a message to be consumed from the moment it was
|
22
22
|
# created
|
23
|
+
#
|
24
|
+
#
|
25
|
+
# @return [Integer] number of milliseconds
|
26
|
+
# @note In case of usage in workless flows, this value will be set to -1
|
23
27
|
def consumption_lag
|
24
|
-
time_distance_in_ms(processed_at, created_at)
|
28
|
+
processed_at ? time_distance_in_ms(processed_at, created_at) : -1
|
25
29
|
end
|
26
30
|
|
27
31
|
# This lag describes how long did a batch have to wait before it was picked up by one of the
|
28
32
|
# workers
|
33
|
+
#
|
34
|
+
# @return [Integer] number of milliseconds
|
35
|
+
# @note In case of usage in workless flows, this value will be set to -1
|
29
36
|
def processing_lag
|
30
|
-
time_distance_in_ms(processed_at, scheduled_at)
|
37
|
+
processed_at ? time_distance_in_ms(processed_at, scheduled_at) : -1
|
31
38
|
end
|
32
39
|
|
33
40
|
private
|
@@ -12,9 +12,6 @@ module Karafka
|
|
12
12
|
# @param received_at [Time] moment when we've received the message
|
13
13
|
# @return [Karafka::Messages::Message] message object with payload and metadata
|
14
14
|
def call(kafka_message, topic, received_at)
|
15
|
-
# @see https://github.com/appsignal/rdkafka-ruby/issues/168
|
16
|
-
kafka_message.headers.transform_keys!(&:to_s)
|
17
|
-
|
18
15
|
metadata = Karafka::Messages::Metadata.new(
|
19
16
|
timestamp: kafka_message.timestamp,
|
20
17
|
headers: kafka_message.headers,
|
@@ -49,14 +49,13 @@ module Karafka
|
|
49
49
|
#
|
50
50
|
# @param code [Integer]
|
51
51
|
# @param opaque [Rdkafka::Opaque]
|
52
|
-
# @param consumer [Rdkafka::Consumer]
|
53
52
|
# @param tpl [Rdkafka::Consumer::TopicPartitionList]
|
54
|
-
def trigger_callbacks(code, opaque,
|
53
|
+
def trigger_callbacks(code, opaque, tpl)
|
55
54
|
case code
|
56
55
|
when RB::RD_KAFKA_RESP_ERR__ASSIGN_PARTITIONS
|
57
|
-
opaque.call_on_partitions_assigned(
|
56
|
+
opaque.call_on_partitions_assigned(tpl)
|
58
57
|
when RB::RD_KAFKA_RESP_ERR__REVOKE_PARTITIONS
|
59
|
-
opaque.call_on_partitions_revoked(
|
58
|
+
opaque.call_on_partitions_revoked(tpl)
|
60
59
|
end
|
61
60
|
rescue StandardError => e
|
62
61
|
Karafka.monitor.instrument(
|
@@ -92,9 +91,8 @@ module Karafka
|
|
92
91
|
return unless opaque
|
93
92
|
|
94
93
|
tpl = ::Rdkafka::Consumer::TopicPartitionList.from_native_tpl(partitions_ptr).freeze
|
95
|
-
consumer = ::Rdkafka::Consumer.new(client_ptr)
|
96
94
|
|
97
|
-
pr.trigger_callbacks(code, opaque,
|
95
|
+
pr.trigger_callbacks(code, opaque, tpl)
|
98
96
|
end
|
99
97
|
end
|
100
98
|
end
|
@@ -60,6 +60,24 @@ module Karafka
|
|
60
60
|
true
|
61
61
|
end
|
62
62
|
|
63
|
+
# Triggers an async offset commit
|
64
|
+
#
|
65
|
+
# @return [Boolean] true if we still own the partition.
|
66
|
+
# @note Due to its async nature, this may not fully represent the offset state in some
|
67
|
+
# edge cases (like for example going beyond max.poll.interval)
|
68
|
+
def commit_offsets
|
69
|
+
client.commit_offsets(async: true)
|
70
|
+
end
|
71
|
+
|
72
|
+
# Triggers a synchronous offsets commit to Kafka
|
73
|
+
#
|
74
|
+
# @return [Boolean] true if we still own the partition, false otherwise.
|
75
|
+
# @note This is fully synchronous, hence the result of this can be used in DB transactions
|
76
|
+
# etc as a way of making sure, that we still own the partition.
|
77
|
+
def commit_offsets!
|
78
|
+
client.commit_offsets(async: false)
|
79
|
+
end
|
80
|
+
|
63
81
|
# No actions needed for the standard flow here
|
64
82
|
def handle_before_enqueue
|
65
83
|
nil
|
data/lib/karafka/version.rb
CHANGED
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: karafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.5.beta1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Mensfeld
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
Qf04B9ceLUaC4fPVEz10FyobjaFoY4i32xRto3XnrzeAgfEe4swLq8bQsR3w/EF3
|
36
36
|
MGU0FeSV2Yj7Xc2x/7BzLK8xQn5l7Yy75iPF+KP3vVmDHnNl
|
37
37
|
-----END CERTIFICATE-----
|
38
|
-
date: 2023-
|
38
|
+
date: 2023-06-18 00:00:00.000000000 Z
|
39
39
|
dependencies:
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
41
|
name: karafka-core
|
@@ -43,20 +43,20 @@ dependencies:
|
|
43
43
|
requirements:
|
44
44
|
- - ">="
|
45
45
|
- !ruby/object:Gem::Version
|
46
|
-
version: 2.0.
|
46
|
+
version: 2.1.0.beta1
|
47
47
|
- - "<"
|
48
48
|
- !ruby/object:Gem::Version
|
49
|
-
version:
|
49
|
+
version: 2.2.0
|
50
50
|
type: :runtime
|
51
51
|
prerelease: false
|
52
52
|
version_requirements: !ruby/object:Gem::Requirement
|
53
53
|
requirements:
|
54
54
|
- - ">="
|
55
55
|
- !ruby/object:Gem::Version
|
56
|
-
version: 2.0.
|
56
|
+
version: 2.1.0.beta1
|
57
57
|
- - "<"
|
58
58
|
- !ruby/object:Gem::Version
|
59
|
-
version:
|
59
|
+
version: 2.2.0
|
60
60
|
- !ruby/object:Gem::Dependency
|
61
61
|
name: thor
|
62
62
|
requirement: !ruby/object:Gem::Requirement
|
@@ -77,7 +77,7 @@ dependencies:
|
|
77
77
|
requirements:
|
78
78
|
- - ">="
|
79
79
|
- !ruby/object:Gem::Version
|
80
|
-
version: 2.
|
80
|
+
version: 2.6.1.beta1
|
81
81
|
- - "<"
|
82
82
|
- !ruby/object:Gem::Version
|
83
83
|
version: 3.0.0
|
@@ -87,7 +87,7 @@ dependencies:
|
|
87
87
|
requirements:
|
88
88
|
- - ">="
|
89
89
|
- !ruby/object:Gem::Version
|
90
|
-
version: 2.
|
90
|
+
version: 2.6.1.beta1
|
91
91
|
- - "<"
|
92
92
|
- !ruby/object:Gem::Version
|
93
93
|
version: 3.0.0
|
@@ -214,7 +214,6 @@ files:
|
|
214
214
|
- lib/karafka/messages/parser.rb
|
215
215
|
- lib/karafka/messages/seek.rb
|
216
216
|
- lib/karafka/patches/rdkafka/bindings.rb
|
217
|
-
- lib/karafka/patches/rdkafka/consumer.rb
|
218
217
|
- lib/karafka/pro.rb
|
219
218
|
- lib/karafka/pro/active_job/consumer.rb
|
220
219
|
- lib/karafka/pro/active_job/dispatcher.rb
|
@@ -415,9 +414,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
415
414
|
version: '0'
|
416
415
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
417
416
|
requirements:
|
418
|
-
- - "
|
417
|
+
- - ">"
|
419
418
|
- !ruby/object:Gem::Version
|
420
|
-
version:
|
419
|
+
version: 1.3.1
|
421
420
|
requirements: []
|
422
421
|
rubygems_version: 3.4.10
|
423
422
|
signing_key:
|
metadata.gz.sig
CHANGED
Binary file
|
@@ -1,22 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Karafka
|
4
|
-
# Patches to external components
|
5
|
-
module Patches
|
6
|
-
# Rdkafka related patches
|
7
|
-
module Rdkafka
|
8
|
-
# Rdkafka::Consumer patches
|
9
|
-
module Consumer
|
10
|
-
# A method that allows us to get the native kafka producer name
|
11
|
-
# @return [String] producer instance name
|
12
|
-
# @note We need this to make sure that we allocate proper dispatched events only to
|
13
|
-
# callback listeners that should publish them
|
14
|
-
def name
|
15
|
-
@name ||= ::Rdkafka::Bindings.rd_kafka_name(@native_kafka)
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
::Rdkafka::Consumer.include ::Karafka::Patches::Rdkafka::Consumer
|