karafka 2.1.3 → 2.1.5.beta1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6e65b59d084427bababbe797da6aa39ce76d04ceb235bdc43d5f85649e030c0f
4
- data.tar.gz: a0599693bea1a05f2066180492cdb5017ac609b118195c64983990550f8e7e98
3
+ metadata.gz: 967412d28c31483df6a0c151ec0578367da24e503184608789791774347ffc53
4
+ data.tar.gz: '0828ba1db27f9f287b3ab9f80928816a81ae45ad79ce092e02cccd33b9bac204'
5
5
  SHA512:
6
- metadata.gz: 9ef5dfbce059aedad3c24911a02e05068122d78d6e54f183c9b1155f1bb6b9f80cfc49cc7cdcc678ea427f86b1832f161aa7c9a411afbb2e6d83fc87bc67ffbe
7
- data.tar.gz: 43f0a4ae3c9247df214d6fccd7ee9735a975b45cb0db49dd9490d675abe5d03ffeddbafc658d3953a39243304fd7754a265d60450d941f5a19dfa3cbe3716ac9
6
+ metadata.gz: 3774daedd74efe8e2498e97e503226cc01848e7a7be6ca255852bcaf112bc790ff98953584c771de8afa96142e8244e9f1b036b18490305766b428671b136bce
7
+ data.tar.gz: b575cf2be3d4cdcb73fec9cdfd65cd705408d1efae39ed200ae2d815e7c9241f582d5ebb8b802347289de910ee50589f0551bb4c194a161905e38425d05bd296
checksums.yaml.gz.sig CHANGED
Binary file
data/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.1.5 (Unreleased)
4
+ - [Improvement] Drastically improve `#revoked?` response quality by checking the real time assignment lost state on librdkafka.
5
+ - [Improvement] Improve eviction of saturated jobs that would run on already revoked assignments.
6
+ - [Improvement] Expose `#commit_offsets` and `#commit_offsets!` methods in the consumer to provide ability to commit offsets directly to Kafka without having to mark new messages as consumed.
7
+ - [Improvement] No longer skip offset commit when no messages marked as consumed as `librdkafka` has fixed the crashes there.
8
+ - [Improvement] Remove no longer needed patches.
9
+ - [Change] Require `karafka-core` `>= 2.1.0`
10
+ - [Change] Require `waterdrop` `>= 2.6.1`
11
+
12
+ ## 2.1.4 (2023-06-06)
13
+ - [Fix] `processing_lag` and `consumption_lag` on empty batch fail on shutdown usage (#1475)
14
+
3
15
  ## 2.1.3 (2023-05-29)
4
16
  - [Maintenance] Add linter to ensure, that all integration specs end with `_spec.rb`.
5
17
  - [Fix] Fix `#retrying?` helper result value (Aerdayne).
data/Gemfile.lock CHANGED
@@ -1,19 +1,19 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.1.3)
5
- karafka-core (>= 2.0.13, < 3.0.0)
4
+ karafka (2.1.5.beta1)
5
+ karafka-core (>= 2.1.0.beta1, < 2.2.0)
6
6
  thor (>= 0.20)
7
- waterdrop (>= 2.5.3, < 3.0.0)
7
+ waterdrop (>= 2.6.1.beta1, < 3.0.0)
8
8
  zeitwerk (~> 2.3)
9
9
 
10
10
  GEM
11
11
  remote: https://rubygems.org/
12
12
  specs:
13
- activejob (7.0.4.3)
14
- activesupport (= 7.0.4.3)
13
+ activejob (7.0.5)
14
+ activesupport (= 7.0.5)
15
15
  globalid (>= 0.3.6)
16
- activesupport (7.0.4.3)
16
+ activesupport (7.0.5)
17
17
  concurrent-ruby (~> 1.0, >= 1.0.2)
18
18
  i18n (>= 1.6, < 2)
19
19
  minitest (>= 5.1)
@@ -28,26 +28,26 @@ GEM
28
28
  ffi (1.15.5)
29
29
  globalid (1.1.0)
30
30
  activesupport (>= 5.0)
31
- i18n (1.13.0)
31
+ i18n (1.14.1)
32
32
  concurrent-ruby (~> 1.0)
33
- karafka-core (2.0.13)
33
+ karafka-core (2.1.0.beta1)
34
34
  concurrent-ruby (>= 1.1)
35
- karafka-rdkafka (>= 0.12.3)
36
- karafka-rdkafka (0.12.3)
35
+ karafka-rdkafka (>= 0.13.0.beta2, < 0.14.0)
36
+ karafka-rdkafka (0.13.0.beta2)
37
37
  ffi (~> 1.15)
38
38
  mini_portile2 (~> 2.6)
39
39
  rake (> 12)
40
- karafka-web (0.5.2)
40
+ karafka-web (0.6.0)
41
41
  erubi (~> 1.4)
42
- karafka (>= 2.0.40, < 3.0.0)
43
- karafka-core (>= 2.0.12, < 3.0.0)
44
- roda (~> 3.63)
42
+ karafka (>= 2.1.4, < 3.0.0)
43
+ karafka-core (>= 2.0.13, < 3.0.0)
44
+ roda (~> 3.68, >= 3.68)
45
45
  tilt (~> 2.0)
46
46
  mini_portile2 (2.8.2)
47
- minitest (5.18.0)
48
- rack (3.0.7)
47
+ minitest (5.18.1)
48
+ rack (3.0.8)
49
49
  rake (13.0.6)
50
- roda (3.68.0)
50
+ roda (3.69.0)
51
51
  rack
52
52
  rspec (3.12.0)
53
53
  rspec-core (~> 3.12.0)
@@ -69,11 +69,11 @@ GEM
69
69
  simplecov-html (0.12.3)
70
70
  simplecov_json_formatter (0.1.4)
71
71
  thor (1.2.2)
72
- tilt (2.1.0)
72
+ tilt (2.2.0)
73
73
  tzinfo (2.0.6)
74
74
  concurrent-ruby (~> 1.0)
75
- waterdrop (2.5.3)
76
- karafka-core (>= 2.0.13, < 3.0.0)
75
+ waterdrop (2.6.1.beta1)
76
+ karafka-core (>= 2.1.0.beta1, < 3.0.0)
77
77
  zeitwerk (~> 2.3)
78
78
  zeitwerk (2.6.8)
79
79
 
data/karafka.gemspec CHANGED
@@ -21,9 +21,9 @@ Gem::Specification.new do |spec|
21
21
  without having to focus on things that are not your business domain.
22
22
  DESC
23
23
 
24
- spec.add_dependency 'karafka-core', '>= 2.0.13', '< 3.0.0'
24
+ spec.add_dependency 'karafka-core', '>= 2.1.0.beta1', '< 2.2.0'
25
25
  spec.add_dependency 'thor', '>= 0.20'
26
- spec.add_dependency 'waterdrop', '>= 2.5.3', '< 3.0.0'
26
+ spec.add_dependency 'waterdrop', '>= 2.6.1.beta1', '< 3.0.0'
27
27
  spec.add_dependency 'zeitwerk', '~> 2.3'
28
28
 
29
29
  if $PROGRAM_NAME.end_with?('gem')
data/lib/karafka/admin.rb CHANGED
@@ -171,7 +171,9 @@ module Karafka
171
171
  # @return [Rdkafka::Metadata] cluster metadata info
172
172
  def cluster_info
173
173
  with_admin do |admin|
174
- Rdkafka::Metadata.new(admin.instance_variable_get('@native_kafka'))
174
+ admin.instance_variable_get('@native_kafka').with_inner do |inner|
175
+ Rdkafka::Metadata.new(inner)
176
+ end
175
177
  end
176
178
  end
177
179
 
@@ -215,10 +215,13 @@ module Karafka
215
215
  end
216
216
 
217
217
  # @return [Boolean] true if partition was revoked from the current consumer
218
- # @note We know that partition got revoked because when we try to mark message as consumed,
219
- # unless if is successful, it will return false
218
+ # @note There are two "levels" on which we can know that partition was revoked. First one is
219
+ # when we loose the assignment involuntarily and second is when coordinator gets this info
220
+ # after we poll with the rebalance callbacks. The first check allows us to get this notion
221
+ # even before we poll but it gets reset when polling happens, hence we also need to switch
222
+ # the coordinator state after the revocation (but prior to running more jobs)
220
223
  def revoked?
221
- coordinator.revoked?
224
+ client.assignment_lost? || coordinator.revoked?
222
225
  end
223
226
 
224
227
  # @return [Boolean] are we retrying processing after an error. This can be used to provide a
@@ -41,9 +41,6 @@ module Karafka
41
41
  @buffer = RawMessagesBuffer.new
42
42
  @rebalance_manager = RebalanceManager.new
43
43
  @kafka = build_consumer
44
- # Marks if we need to offset. If we did not store offsets, we should not commit the offset
45
- # position as it will crash rdkafka
46
- @offsetting = false
47
44
  # We need to keep track of what we have paused for resuming
48
45
  # In case we loose partition, we still need to resume it, otherwise it won't be fetched
49
46
  # again if we get reassigned to it later on. We need to keep them as after revocation we
@@ -109,8 +106,14 @@ module Karafka
109
106
  end
110
107
  end
111
108
 
109
+ # @return [Boolean] true if our current assignment has been lost involuntarily.
110
+ def assignment_lost?
111
+ @mutex.synchronize do
112
+ @kafka.assignment_lost?
113
+ end
114
+ end
115
+
112
116
  # Commits the offset on a current consumer in a non-blocking or blocking way.
113
- # Ignoring a case where there would not be an offset (for example when rebalance occurs).
114
117
  #
115
118
  # @param async [Boolean] should the commit happen async or sync (async by default)
116
119
  # @return [Boolean] did committing was successful. It may be not, when we no longer own
@@ -118,7 +121,7 @@ module Karafka
118
121
  #
119
122
  # @note This will commit all the offsets for the whole consumer. In order to achieve
120
123
  # granular control over where the offset should be for particular topic partitions, the
121
- # store_offset should be used to only store new offset when we want to to be flushed
124
+ # store_offset should be used to only store new offset when we want them to be flushed
122
125
  def commit_offsets(async: true)
123
126
  @mutex.lock
124
127
 
@@ -256,7 +259,6 @@ module Karafka
256
259
 
257
260
  @mutex.synchronize do
258
261
  @closed = false
259
- @offsetting = false
260
262
  @paused_tpls.clear
261
263
  @kafka = build_consumer
262
264
  end
@@ -281,7 +283,6 @@ module Karafka
281
283
  # @param message [Karafka::Messages::Message]
282
284
  # @return [Boolean] true if we could store the offset (if we still own the partition)
283
285
  def internal_store_offset(message)
284
- @offsetting = true
285
286
  @kafka.store_offset(message)
286
287
  true
287
288
  rescue Rdkafka::RdkafkaError => e
@@ -294,11 +295,11 @@ module Karafka
294
295
  # Non thread-safe message committing method
295
296
  # @param async [Boolean] should the commit happen async or sync (async by default)
296
297
  # @return [Boolean] true if offset commit worked, false if we've lost the assignment
298
+ # @note We do **not** consider `no_offset` as any problem and we allow to commit offsets
299
+ # even when no stored, because with sync commit, it refreshes the ownership state of the
300
+ # consumer in a sync way.
297
301
  def internal_commit_offsets(async: true)
298
- return true unless @offsetting
299
-
300
302
  @kafka.commit(nil, async)
301
- @offsetting = false
302
303
 
303
304
  true
304
305
  rescue Rdkafka::RdkafkaError => e
@@ -49,9 +49,8 @@ module Karafka
49
49
  # Callback that kicks in inside of rdkafka, when new partitions are assigned.
50
50
  #
51
51
  # @private
52
- # @param _ [Rdkafka::Consumer]
53
52
  # @param partitions [Rdkafka::Consumer::TopicPartitionList]
54
- def on_partitions_assigned(_, partitions)
53
+ def on_partitions_assigned(partitions)
55
54
  @assigned_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
56
55
  @changed = true
57
56
  end
@@ -59,9 +58,8 @@ module Karafka
59
58
  # Callback that kicks in inside of rdkafka, when partitions are revoked.
60
59
  #
61
60
  # @private
62
- # @param _ [Rdkafka::Consumer]
63
61
  # @param partitions [Rdkafka::Consumer::TopicPartitionList]
64
- def on_partitions_revoked(_, partitions)
62
+ def on_partitions_revoked(partitions)
65
63
  @revoked_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
66
64
  @changed = true
67
65
  end
@@ -20,14 +20,21 @@ module Karafka
20
20
  ) do
21
21
  # This lag describes how long did it take for a message to be consumed from the moment it was
22
22
  # created
23
+ #
24
+ #
25
+ # @return [Integer] number of milliseconds
26
+ # @note In case of usage in workless flows, this value will be set to -1
23
27
  def consumption_lag
24
- time_distance_in_ms(processed_at, created_at)
28
+ processed_at ? time_distance_in_ms(processed_at, created_at) : -1
25
29
  end
26
30
 
27
31
  # This lag describes how long did a batch have to wait before it was picked up by one of the
28
32
  # workers
33
+ #
34
+ # @return [Integer] number of milliseconds
35
+ # @note In case of usage in workless flows, this value will be set to -1
29
36
  def processing_lag
30
- time_distance_in_ms(processed_at, scheduled_at)
37
+ processed_at ? time_distance_in_ms(processed_at, scheduled_at) : -1
31
38
  end
32
39
 
33
40
  private
@@ -12,9 +12,6 @@ module Karafka
12
12
  # @param received_at [Time] moment when we've received the message
13
13
  # @return [Karafka::Messages::Message] message object with payload and metadata
14
14
  def call(kafka_message, topic, received_at)
15
- # @see https://github.com/appsignal/rdkafka-ruby/issues/168
16
- kafka_message.headers.transform_keys!(&:to_s)
17
-
18
15
  metadata = Karafka::Messages::Metadata.new(
19
16
  timestamp: kafka_message.timestamp,
20
17
  headers: kafka_message.headers,
@@ -49,14 +49,13 @@ module Karafka
49
49
  #
50
50
  # @param code [Integer]
51
51
  # @param opaque [Rdkafka::Opaque]
52
- # @param consumer [Rdkafka::Consumer]
53
52
  # @param tpl [Rdkafka::Consumer::TopicPartitionList]
54
- def trigger_callbacks(code, opaque, consumer, tpl)
53
+ def trigger_callbacks(code, opaque, tpl)
55
54
  case code
56
55
  when RB::RD_KAFKA_RESP_ERR__ASSIGN_PARTITIONS
57
- opaque.call_on_partitions_assigned(consumer, tpl)
56
+ opaque.call_on_partitions_assigned(tpl)
58
57
  when RB::RD_KAFKA_RESP_ERR__REVOKE_PARTITIONS
59
- opaque.call_on_partitions_revoked(consumer, tpl)
58
+ opaque.call_on_partitions_revoked(tpl)
60
59
  end
61
60
  rescue StandardError => e
62
61
  Karafka.monitor.instrument(
@@ -92,9 +91,8 @@ module Karafka
92
91
  return unless opaque
93
92
 
94
93
  tpl = ::Rdkafka::Consumer::TopicPartitionList.from_native_tpl(partitions_ptr).freeze
95
- consumer = ::Rdkafka::Consumer.new(client_ptr)
96
94
 
97
- pr.trigger_callbacks(code, opaque, consumer, tpl)
95
+ pr.trigger_callbacks(code, opaque, tpl)
98
96
  end
99
97
  end
100
98
  end
@@ -60,6 +60,24 @@ module Karafka
60
60
  true
61
61
  end
62
62
 
63
+ # Triggers an async offset commit
64
+ #
65
+ # @return [Boolean] true if we still own the partition.
66
+ # @note Due to its async nature, this may not fully represent the offset state in some
67
+ # edge cases (like for example going beyond max.poll.interval)
68
+ def commit_offsets
69
+ client.commit_offsets(async: true)
70
+ end
71
+
72
+ # Triggers a synchronous offsets commit to Kafka
73
+ #
74
+ # @return [Boolean] true if we still own the partition, false otherwise.
75
+ # @note This is fully synchronous, hence the result of this can be used in DB transactions
76
+ # etc as a way of making sure, that we still own the partition.
77
+ def commit_offsets!
78
+ client.commit_offsets(async: false)
79
+ end
80
+
63
81
  # No actions needed for the standard flow here
64
82
  def handle_before_enqueue
65
83
  nil
@@ -47,6 +47,7 @@ module Karafka
47
47
  fetch.max.bytes
48
48
  fetch.message.max.bytes
49
49
  fetch.min.bytes
50
+ fetch.queue.backoff.ms
50
51
  fetch.wait.max.ms
51
52
  group.id
52
53
  group.instance.id
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.1.3'
6
+ VERSION = '2.1.5.beta1'
7
7
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.3
4
+ version: 2.1.5.beta1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  Qf04B9ceLUaC4fPVEz10FyobjaFoY4i32xRto3XnrzeAgfEe4swLq8bQsR3w/EF3
36
36
  MGU0FeSV2Yj7Xc2x/7BzLK8xQn5l7Yy75iPF+KP3vVmDHnNl
37
37
  -----END CERTIFICATE-----
38
- date: 2023-05-29 00:00:00.000000000 Z
38
+ date: 2023-06-18 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: karafka-core
@@ -43,20 +43,20 @@ dependencies:
43
43
  requirements:
44
44
  - - ">="
45
45
  - !ruby/object:Gem::Version
46
- version: 2.0.13
46
+ version: 2.1.0.beta1
47
47
  - - "<"
48
48
  - !ruby/object:Gem::Version
49
- version: 3.0.0
49
+ version: 2.2.0
50
50
  type: :runtime
51
51
  prerelease: false
52
52
  version_requirements: !ruby/object:Gem::Requirement
53
53
  requirements:
54
54
  - - ">="
55
55
  - !ruby/object:Gem::Version
56
- version: 2.0.13
56
+ version: 2.1.0.beta1
57
57
  - - "<"
58
58
  - !ruby/object:Gem::Version
59
- version: 3.0.0
59
+ version: 2.2.0
60
60
  - !ruby/object:Gem::Dependency
61
61
  name: thor
62
62
  requirement: !ruby/object:Gem::Requirement
@@ -77,7 +77,7 @@ dependencies:
77
77
  requirements:
78
78
  - - ">="
79
79
  - !ruby/object:Gem::Version
80
- version: 2.5.3
80
+ version: 2.6.1.beta1
81
81
  - - "<"
82
82
  - !ruby/object:Gem::Version
83
83
  version: 3.0.0
@@ -87,7 +87,7 @@ dependencies:
87
87
  requirements:
88
88
  - - ">="
89
89
  - !ruby/object:Gem::Version
90
- version: 2.5.3
90
+ version: 2.6.1.beta1
91
91
  - - "<"
92
92
  - !ruby/object:Gem::Version
93
93
  version: 3.0.0
@@ -214,7 +214,6 @@ files:
214
214
  - lib/karafka/messages/parser.rb
215
215
  - lib/karafka/messages/seek.rb
216
216
  - lib/karafka/patches/rdkafka/bindings.rb
217
- - lib/karafka/patches/rdkafka/consumer.rb
218
217
  - lib/karafka/pro.rb
219
218
  - lib/karafka/pro/active_job/consumer.rb
220
219
  - lib/karafka/pro/active_job/dispatcher.rb
@@ -415,9 +414,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
415
414
  version: '0'
416
415
  required_rubygems_version: !ruby/object:Gem::Requirement
417
416
  requirements:
418
- - - ">="
417
+ - - ">"
419
418
  - !ruby/object:Gem::Version
420
- version: '0'
419
+ version: 1.3.1
421
420
  requirements: []
422
421
  rubygems_version: 3.4.10
423
422
  signing_key:
metadata.gz.sig CHANGED
Binary file
@@ -1,22 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Karafka
4
- # Patches to external components
5
- module Patches
6
- # Rdkafka related patches
7
- module Rdkafka
8
- # Rdkafka::Consumer patches
9
- module Consumer
10
- # A method that allows us to get the native kafka producer name
11
- # @return [String] producer instance name
12
- # @note We need this to make sure that we allocate proper dispatched events only to
13
- # callback listeners that should publish them
14
- def name
15
- @name ||= ::Rdkafka::Bindings.rd_kafka_name(@native_kafka)
16
- end
17
- end
18
- end
19
- end
20
- end
21
-
22
- ::Rdkafka::Consumer.include ::Karafka::Patches::Rdkafka::Consumer