karafka 2.0.36 → 2.0.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +10 -0
- data/Gemfile.lock +4 -4
- data/README.md +1 -1
- data/lib/karafka/active_job/consumer.rb +22 -7
- data/lib/karafka/admin.rb +50 -15
- data/lib/karafka/cli/topics.rb +3 -0
- data/lib/karafka/instrumentation/notifications.rb +3 -0
- data/lib/karafka/pro/active_job/consumer.rb +2 -6
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +2 -2
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1b9653385cf5a3b1e27eae06d53b9761c9a1f265252f721773258459eb3df1e7
|
4
|
+
data.tar.gz: c0af983ab0539e8463bf2612068a6b261de1325078c3e8600b0d6df0f596d100
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d9000a8f71d7fff762db5f567956f6ea68e436b428014c509ae233730c9f75fd6ac311e51b0022999dfdce64362c86dab6912ce549378d9def231e5749961140
|
7
|
+
data.tar.gz: f980261b5ada2f46efbf919aac86ab63da5bccce26639b9e7d98c07c6012cc3c727189a548627687092ee2802aca8df3d5459bcdcc8d9d29b35f2d6da92a64fc
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,15 @@
|
|
1
1
|
# Karafka framework changelog
|
2
2
|
|
3
|
+
## 2.0.38 (2023-03-27)
|
4
|
+
- [Improvement] Introduce `Karafka::Admin#read_watermark_offsets` to get low and high watermark offsets values.
|
5
|
+
- [Improvement] Track active_job_id in instrumentation (#1372)
|
6
|
+
- [Improvement] Improve `#read_topic` reading in case of a compacted partition where the offset is below the low watermark offset. This should optimize reading and should not go beyond the low watermark offset.
|
7
|
+
- [Improvement] Allow `#read_topic` to accept instance settings to overwrite any settings needed to customize reading behaviours.
|
8
|
+
|
9
|
+
## 2.0.37 (2023-03-20)
|
10
|
+
- [Fix] Declarative topics execution on a secondary cluster run topics creation on the primary one (#1365)
|
11
|
+
- [Fix] Admin read operations commit offset when not needed (#1369)
|
12
|
+
|
3
13
|
## 2.0.36 (2023-03-17)
|
4
14
|
- [Refactor] Rename internal naming of `Structurable` to `Declaratives` for declarative topics feature.
|
5
15
|
- [Fix] AJ + DLQ + MOM + LRJ is pausing indefinitely after the first job (#1362)
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
karafka (2.0.
|
4
|
+
karafka (2.0.38)
|
5
5
|
karafka-core (>= 2.0.12, < 3.0.0)
|
6
6
|
thor (>= 0.20)
|
7
7
|
waterdrop (>= 2.4.10, < 3.0.0)
|
@@ -10,10 +10,10 @@ PATH
|
|
10
10
|
GEM
|
11
11
|
remote: https://rubygems.org/
|
12
12
|
specs:
|
13
|
-
activejob (7.0.4.
|
14
|
-
activesupport (= 7.0.4.
|
13
|
+
activejob (7.0.4.3)
|
14
|
+
activesupport (= 7.0.4.3)
|
15
15
|
globalid (>= 0.3.6)
|
16
|
-
activesupport (7.0.4.
|
16
|
+
activesupport (7.0.4.3)
|
17
17
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
18
18
|
i18n (>= 1.6, < 2)
|
19
19
|
minitest (>= 5.1)
|
data/README.md
CHANGED
@@ -86,7 +86,7 @@ bundle exec karafka server
|
|
86
86
|
|
87
87
|
I also sell Karafka Pro subscriptions. It includes a commercial-friendly license, priority support, architecture consultations, enhanced Web UI and high throughput data processing-related features (virtual partitions, long-running jobs, and more).
|
88
88
|
|
89
|
-
**
|
89
|
+
**10%** of the income will be distributed back to other OSS projects that Karafka uses under the hood.
|
90
90
|
|
91
91
|
Help me provide high-quality open-source software. Please see the Karafka [homepage](https://karafka.io/#become-pro) for more details.
|
92
92
|
|
@@ -12,16 +12,31 @@ module Karafka
|
|
12
12
|
messages.each do |message|
|
13
13
|
break if Karafka::App.stopping?
|
14
14
|
|
15
|
-
|
16
|
-
# message instead of using the `#raw_payload`. This is not done on purpose to simplify
|
17
|
-
# the ActiveJob setup here
|
18
|
-
job = ::ActiveSupport::JSON.decode(message.raw_payload)
|
15
|
+
consume_job(message)
|
19
16
|
|
20
|
-
|
17
|
+
mark_as_consumed(message)
|
18
|
+
end
|
19
|
+
end
|
21
20
|
|
22
|
-
|
21
|
+
private
|
23
22
|
|
24
|
-
|
23
|
+
# Consumes a message with the job and runs needed instrumentation
|
24
|
+
#
|
25
|
+
# @param job_message [Karafka::Messages::Message] message with active job
|
26
|
+
def consume_job(job_message)
|
27
|
+
# We technically speaking could set this as deserializer and reference it from the
|
28
|
+
# message instead of using the `#raw_payload`. This is not done on purpose to simplify
|
29
|
+
# the ActiveJob setup here
|
30
|
+
job = ::ActiveSupport::JSON.decode(job_message.raw_payload)
|
31
|
+
|
32
|
+
tags.add(:job_class, job['job_class'])
|
33
|
+
|
34
|
+
payload = { caller: self, job: job, message: job_message }
|
35
|
+
|
36
|
+
# We publish both to make it consistent with `consumer.x` events
|
37
|
+
Karafka.monitor.instrument('active_job.consume', payload)
|
38
|
+
Karafka.monitor.instrument('active_job.consumed', payload) do
|
39
|
+
::ActiveJob::Base.execute(job)
|
25
40
|
end
|
26
41
|
end
|
27
42
|
end
|
data/lib/karafka/admin.rb
CHANGED
@@ -28,7 +28,10 @@ module Karafka
|
|
28
28
|
'enable.partition.eof': true,
|
29
29
|
'statistics.interval.ms': 0,
|
30
30
|
# Fetch at most 5 MBs when using admin
|
31
|
-
'fetch.message.max.bytes': 5 * 1_048_576
|
31
|
+
'fetch.message.max.bytes': 5 * 1_048_576,
|
32
|
+
# Do not commit offset automatically, this prevents offset tracking for operations involving
|
33
|
+
# a consumer instance
|
34
|
+
'enable.auto.commit': false
|
32
35
|
}.freeze
|
33
36
|
|
34
37
|
private_constant :Topic, :CONFIG_DEFAULTS, :MAX_WAIT_TIMEOUT, :MAX_ATTEMPTS
|
@@ -41,17 +44,32 @@ module Karafka
|
|
41
44
|
# @param count [Integer] how many messages we want to get at most
|
42
45
|
# @param start_offset [Integer] offset from which we should start. If -1 is provided
|
43
46
|
# (default) we will start from the latest offset
|
47
|
+
# @param settings [Hash] kafka extra settings (optional)
|
44
48
|
#
|
45
49
|
# @return [Array<Karafka::Messages::Message>] array with messages
|
46
|
-
def read_topic(name, partition, count, start_offset = -1)
|
50
|
+
def read_topic(name, partition, count, start_offset = -1, settings = {})
|
47
51
|
messages = []
|
48
52
|
tpl = Rdkafka::Consumer::TopicPartitionList.new
|
53
|
+
low_offset, high_offset = nil
|
49
54
|
|
50
|
-
with_consumer do |consumer|
|
51
|
-
|
52
|
-
|
55
|
+
with_consumer(settings) do |consumer|
|
56
|
+
low_offset, high_offset = consumer.query_watermark_offsets(name, partition)
|
57
|
+
|
58
|
+
# Select offset dynamically if -1 or less
|
59
|
+
start_offset = high_offset - count if start_offset.negative?
|
53
60
|
|
54
|
-
|
61
|
+
# Build the requested range - since first element is on the start offset we need to
|
62
|
+
# subtract one from requested count to end up with expected number of elements
|
63
|
+
requested_range = (start_offset..start_offset + (count - 1))
|
64
|
+
# Establish theoretical available range. Note, that this does not handle cases related to
|
65
|
+
# log retention or compaction
|
66
|
+
available_range = (low_offset..high_offset)
|
67
|
+
# Select only offset that we can select. This will remove all the potential offsets that
|
68
|
+
# are below the low watermark offset
|
69
|
+
possible_range = requested_range.select { |offset| available_range.include?(offset) }
|
70
|
+
|
71
|
+
start_offset = possible_range.first
|
72
|
+
count = possible_range.count
|
55
73
|
|
56
74
|
tpl.add_topic_and_partitions_with_offsets(name, partition => start_offset)
|
57
75
|
consumer.assign(tpl)
|
@@ -61,11 +79,15 @@ module Karafka
|
|
61
79
|
loop do
|
62
80
|
# If we've got as many messages as we've wanted stop
|
63
81
|
break if messages.size >= count
|
64
|
-
# If we've reached end of the topic messages, don't process more
|
65
|
-
break if !messages.empty? && end_offset <= messages.last.offset
|
66
82
|
|
67
83
|
message = consumer.poll(200)
|
68
|
-
|
84
|
+
|
85
|
+
next unless message
|
86
|
+
|
87
|
+
# If the message we've got is beyond the requested range, stop
|
88
|
+
break unless possible_range.include?(message.offset)
|
89
|
+
|
90
|
+
messages << message
|
69
91
|
rescue Rdkafka::RdkafkaError => e
|
70
92
|
# End of partition
|
71
93
|
break if e.code == :partition_eof
|
@@ -74,7 +96,7 @@ module Karafka
|
|
74
96
|
end
|
75
97
|
end
|
76
98
|
|
77
|
-
messages.map do |message|
|
99
|
+
messages.map! do |message|
|
78
100
|
Messages::Builders::Message.call(
|
79
101
|
message,
|
80
102
|
# Use topic from routes if we can match it or create a dummy one
|
@@ -133,6 +155,17 @@ module Karafka
|
|
133
155
|
end
|
134
156
|
end
|
135
157
|
|
158
|
+
# Fetches the watermark offsets for a given topic partition
|
159
|
+
#
|
160
|
+
# @param name [String, Symbol] topic name
|
161
|
+
# @param partition [Integer] partition
|
162
|
+
# @return [Array<Integer, Integer>] low watermark offset and high watermark offset
|
163
|
+
def read_watermark_offsets(name, partition)
|
164
|
+
with_consumer do |consumer|
|
165
|
+
consumer.query_watermark_offsets(name, partition)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
136
169
|
# @return [Rdkafka::Metadata] cluster metadata info
|
137
170
|
def cluster_info
|
138
171
|
with_admin do |admin|
|
@@ -156,15 +189,16 @@ module Karafka
|
|
156
189
|
|
157
190
|
# Creates admin instance and yields it. After usage it closes the admin instance
|
158
191
|
def with_admin
|
159
|
-
admin = config(:producer).admin
|
192
|
+
admin = config(:producer, {}).admin
|
160
193
|
yield(admin)
|
161
194
|
ensure
|
162
195
|
admin&.close
|
163
196
|
end
|
164
197
|
|
165
198
|
# Creates consumer instance and yields it. After usage it closes the consumer instance
|
166
|
-
|
167
|
-
|
199
|
+
# @param settings [Hash] extra settings to customize consumer
|
200
|
+
def with_consumer(settings = {})
|
201
|
+
consumer = config(:consumer, settings).consumer
|
168
202
|
yield(consumer)
|
169
203
|
ensure
|
170
204
|
consumer&.close
|
@@ -193,11 +227,12 @@ module Karafka
|
|
193
227
|
end
|
194
228
|
|
195
229
|
# @param type [Symbol] type of config we want
|
230
|
+
# @param settings [Hash] extra settings for config (if needed)
|
196
231
|
# @return [::Rdkafka::Config] rdkafka config
|
197
|
-
def config(type)
|
232
|
+
def config(type, settings)
|
198
233
|
config_hash = Karafka::Setup::AttributesMap.public_send(
|
199
234
|
type,
|
200
|
-
Karafka::App.config.kafka.dup.merge(CONFIG_DEFAULTS)
|
235
|
+
Karafka::App.config.kafka.dup.merge(CONFIG_DEFAULTS).merge!(settings)
|
201
236
|
)
|
202
237
|
|
203
238
|
::Rdkafka::Config.new(config_hash)
|
data/lib/karafka/cli/topics.rb
CHANGED
@@ -116,11 +116,14 @@ module Karafka
|
|
116
116
|
return @declaratives_routing_topics if @declaratives_routing_topics
|
117
117
|
|
118
118
|
collected_topics = {}
|
119
|
+
default_servers = Karafka::App.config.kafka[:'bootstrap.servers']
|
119
120
|
|
120
121
|
App.consumer_groups.each do |consumer_group|
|
121
122
|
consumer_group.topics.each do |topic|
|
122
123
|
# Skip topics that were explicitly disabled from management
|
123
124
|
next unless topic.declaratives.active?
|
125
|
+
# If bootstrap servers are different, consider this a different cluster
|
126
|
+
next unless default_servers == topic.kafka[:'bootstrap.servers']
|
124
127
|
|
125
128
|
collected_topics[topic.name] ||= topic
|
126
129
|
end
|
@@ -17,6 +17,9 @@ module Karafka
|
|
17
17
|
# complete list of all the events. Please use the #available_events on fully loaded
|
18
18
|
# Karafka system to determine all of the events you can use.
|
19
19
|
EVENTS = %w[
|
20
|
+
active_job.consume
|
21
|
+
active_job.consumed
|
22
|
+
|
20
23
|
app.initialized
|
21
24
|
app.running
|
22
25
|
app.quieting
|
@@ -22,7 +22,7 @@ module Karafka
|
|
22
22
|
#
|
23
23
|
# It contains slightly better revocation warranties than the regular blocking consumer as
|
24
24
|
# it can stop processing batch of jobs in the middle after the revocation.
|
25
|
-
class Consumer < Karafka::
|
25
|
+
class Consumer < ::Karafka::ActiveJob::Consumer
|
26
26
|
# Runs ActiveJob jobs processing and handles lrj if needed
|
27
27
|
def consume
|
28
28
|
messages.each do |message|
|
@@ -31,11 +31,7 @@ module Karafka
|
|
31
31
|
break if revoked?
|
32
32
|
break if Karafka::App.stopping?
|
33
33
|
|
34
|
-
|
35
|
-
|
36
|
-
tags.add(:job_class, job['job_class'])
|
37
|
-
|
38
|
-
::ActiveJob::Base.execute(job)
|
34
|
+
consume_job(message)
|
39
35
|
|
40
36
|
# We cannot mark jobs as done after each if there are virtual partitions. Otherwise
|
41
37
|
# this could create random markings.
|
data/lib/karafka/version.rb
CHANGED
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: karafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.38
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Mensfeld
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
Qf04B9ceLUaC4fPVEz10FyobjaFoY4i32xRto3XnrzeAgfEe4swLq8bQsR3w/EF3
|
36
36
|
MGU0FeSV2Yj7Xc2x/7BzLK8xQn5l7Yy75iPF+KP3vVmDHnNl
|
37
37
|
-----END CERTIFICATE-----
|
38
|
-
date: 2023-03-
|
38
|
+
date: 2023-03-27 00:00:00.000000000 Z
|
39
39
|
dependencies:
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
41
|
name: karafka-core
|
metadata.gz.sig
CHANGED
Binary file
|