karafka 2.0.0.beta3 → 2.0.0.beta4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +9 -23
- data/CHANGELOG.md +14 -0
- data/Gemfile.lock +5 -5
- data/bin/wait_for_kafka +20 -0
- data/docker-compose.yml +10 -0
- data/karafka.gemspec +1 -1
- data/lib/karafka/base_consumer.rb +50 -42
- data/lib/karafka/connection/client.rb +28 -5
- data/lib/karafka/instrumentation/logger_listener.rb +0 -3
- data/lib/karafka/instrumentation/monitor.rb +0 -1
- data/lib/karafka/pro/active_job/consumer.rb +4 -3
- data/lib/karafka/pro/base_consumer.rb +76 -0
- data/lib/karafka/pro/loader.rb +1 -2
- data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +1 -1
- data/lib/karafka/processing/executor.rb +22 -5
- data/lib/karafka/processing/jobs/base.rb +8 -3
- data/lib/karafka/processing/jobs/consume.rb +8 -3
- data/lib/karafka/processing/result.rb +34 -0
- data/lib/karafka/processing/worker.rb +2 -2
- data/lib/karafka/routing/topic.rb +5 -0
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +7 -5
- metadata.gz.sig +0 -0
- data/lib/karafka/pro/base_consumer_extensions.rb +0 -66
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e4e9430d2278617cbed38f5696011603d9c0d8c53813dfc180499dc6e4b97563
|
4
|
+
data.tar.gz: f082a95aa9841912f819dc0598591c4b96d7ef1199eff324e65ca0c601008dae
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7252c5503234ab4d35fa02d2bb0a18dd8239584fdddc5b451cfdf028a61f37d59a269bac804913d0abf46e2d3273188560e48aa9de40fbb319c766624c1a3b95
|
7
|
+
data.tar.gz: a4cc5d7c18d2a45483ee26acbacf62c9c13f8824697af96a3f2bf5bccb232d5b07097ed49cfb84a9b46e09f31405813d50b1564d6668f0a483023f449427428b
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/.github/workflows/ci.yml
CHANGED
@@ -21,8 +21,7 @@ jobs:
|
|
21
21
|
uses: ruby/setup-ruby@v1
|
22
22
|
with:
|
23
23
|
ruby-version: 3.1
|
24
|
-
|
25
|
-
run: gem install bundler --no-document
|
24
|
+
bundler-cache: true
|
26
25
|
- name: Install Diffend plugin
|
27
26
|
run: bundle plugin install diffend
|
28
27
|
- name: Bundle Secure
|
@@ -57,25 +56,19 @@ jobs:
|
|
57
56
|
- name: Install package dependencies
|
58
57
|
run: "[ -e $APT_DEPS ] || sudo apt-get install -y --no-install-recommends $APT_DEPS"
|
59
58
|
|
60
|
-
- name:
|
59
|
+
- name: Start Kafka with docker-compose
|
61
60
|
run: |
|
62
61
|
docker-compose up -d
|
63
|
-
sleep 10
|
64
62
|
|
65
63
|
- name: Set up Ruby
|
66
64
|
uses: ruby/setup-ruby@v1
|
67
65
|
with:
|
68
66
|
ruby-version: ${{matrix.ruby}}
|
67
|
+
bundler-cache: true
|
69
68
|
|
70
|
-
- name:
|
69
|
+
- name: Ensure all needed Kafka topics are created and wait if not
|
71
70
|
run: |
|
72
|
-
|
73
|
-
bundle config set without 'tools benchmarks docs'
|
74
|
-
|
75
|
-
- name: Bundle install
|
76
|
-
run: |
|
77
|
-
bundle config set without development
|
78
|
-
bundle install --jobs 4 --retry 3
|
71
|
+
bin/wait_for_kafka
|
79
72
|
|
80
73
|
- name: Run all specs
|
81
74
|
env:
|
@@ -100,26 +93,19 @@ jobs:
|
|
100
93
|
- name: Install package dependencies
|
101
94
|
run: "[ -e $APT_DEPS ] || sudo apt-get install -y --no-install-recommends $APT_DEPS"
|
102
95
|
|
103
|
-
- name:
|
96
|
+
- name: Start Kafka with docker-compose
|
104
97
|
run: |
|
105
98
|
docker-compose up -d
|
106
|
-
sleep 5
|
107
99
|
|
108
100
|
- name: Set up Ruby
|
109
101
|
uses: ruby/setup-ruby@v1
|
110
102
|
with:
|
111
103
|
ruby-version: ${{matrix.ruby}}
|
104
|
+
bundler-cache: true
|
112
105
|
|
113
|
-
- name:
|
114
|
-
run: |
|
115
|
-
gem install bundler --no-document
|
116
|
-
gem update --system --no-document
|
117
|
-
bundle config set without 'tools benchmarks docs'
|
118
|
-
|
119
|
-
- name: Bundle install
|
106
|
+
- name: Ensure all needed Kafka topics are created and wait if not
|
120
107
|
run: |
|
121
|
-
|
122
|
-
bundle install --jobs 4 --retry 3
|
108
|
+
bin/wait_for_kafka
|
123
109
|
|
124
110
|
- name: Run integration tests
|
125
111
|
env:
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,19 @@
|
|
1
1
|
# Karafka framework changelog
|
2
2
|
|
3
|
+
## 2.0.0-beta4 (2022-06-20)
|
4
|
+
- Rename job internal api methods from `#prepare` to `#before_call` and from `#teardown` to `#after_call` to abstract away jobs execution from any type of executors and consumers logic
|
5
|
+
- Remove ability of running `before_consume` and `after_consume` completely. Those should be for internal usage only.
|
6
|
+
- Reorganize how Pro consumer and Pro AJ consumers inherit.
|
7
|
+
- Require WaterDrop `2.3.1`.
|
8
|
+
- Add more integration specs for rebalancing and max poll exceeded.
|
9
|
+
- Move `revoked?` state from PRO to regular Karafka.
|
10
|
+
- Use return value of `mark_as_consumed!` and `mark_as_consumed` as indicator of partition ownership + use it to switch the ownership state.
|
11
|
+
- Do not remove rebalance manager upon client reset and recovery. This will allow us to keep the notion of lost partitions, so we can run revocation jobs for blocking jobs that exceeded the max poll interval.
|
12
|
+
- Run revocation jobs upon reaching max poll interval for blocking jobs.
|
13
|
+
- Early exit `poll` operation upon partition lost or max poll exceeded event.
|
14
|
+
- Always reset consumer instances on timeout exceeded.
|
15
|
+
- Wait for Kafka to create all the needed topics before running specs in CI.
|
16
|
+
|
3
17
|
## 2.0.0-beta3 (2022-06-14)
|
4
18
|
- Jobs building responsibility extracted out of the listener code base.
|
5
19
|
- Fix a case where specs supervisor would try to kill no longer running process (#868)
|
data/Gemfile.lock
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
karafka (2.0.0.
|
4
|
+
karafka (2.0.0.beta4)
|
5
5
|
dry-configurable (~> 0.13)
|
6
6
|
dry-monitor (~> 0.5)
|
7
7
|
dry-validation (~> 1.7)
|
8
8
|
rdkafka (>= 0.10)
|
9
9
|
thor (>= 0.20)
|
10
|
-
waterdrop (>= 2.3.
|
10
|
+
waterdrop (>= 2.3.1, < 3.0.0)
|
11
11
|
zeitwerk (~> 2.3)
|
12
12
|
|
13
13
|
GEM
|
@@ -74,7 +74,7 @@ GEM
|
|
74
74
|
mini_portile2 (2.8.0)
|
75
75
|
minitest (5.15.0)
|
76
76
|
rake (13.0.6)
|
77
|
-
rdkafka (0.
|
77
|
+
rdkafka (0.12.0)
|
78
78
|
ffi (~> 1.15)
|
79
79
|
mini_portile2 (~> 2.6)
|
80
80
|
rake (> 12)
|
@@ -100,14 +100,14 @@ GEM
|
|
100
100
|
thor (1.2.1)
|
101
101
|
tzinfo (2.0.4)
|
102
102
|
concurrent-ruby (~> 1.0)
|
103
|
-
waterdrop (2.3.
|
103
|
+
waterdrop (2.3.1)
|
104
104
|
concurrent-ruby (>= 1.1)
|
105
105
|
dry-configurable (~> 0.13)
|
106
106
|
dry-monitor (~> 0.5)
|
107
107
|
dry-validation (~> 1.7)
|
108
108
|
rdkafka (>= 0.10)
|
109
109
|
zeitwerk (~> 2.3)
|
110
|
-
zeitwerk (2.
|
110
|
+
zeitwerk (2.6.0)
|
111
111
|
|
112
112
|
PLATFORMS
|
113
113
|
x86_64-linux
|
data/bin/wait_for_kafka
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
# This script allows us to wait for Kafka docker to fully be ready
|
4
|
+
# We consider it fully ready when all our topics that need to be created are created as expected
|
5
|
+
|
6
|
+
KAFKA_NAME='karafka_20_kafka'
|
7
|
+
ZOOKEEPER='zookeeper:2181'
|
8
|
+
LIST_CMD="kafka-topics.sh --list --zookeeper $ZOOKEEPER"
|
9
|
+
|
10
|
+
# Take the number of topics that we need to create prior to running anything
|
11
|
+
TOPICS_COUNT=`cat docker-compose.yml | grep -E -i 'integrations_|benchmarks_' | wc -l`
|
12
|
+
|
13
|
+
# And wait until all of them are created
|
14
|
+
until (((`docker exec $KAFKA_NAME $LIST_CMD | wc -l`) >= $TOPICS_COUNT));
|
15
|
+
do
|
16
|
+
echo "Waiting for Kafka to create all the needed topics..."
|
17
|
+
sleep 1
|
18
|
+
done
|
19
|
+
|
20
|
+
echo "All the needed topics created."
|
data/docker-compose.yml
CHANGED
@@ -1,10 +1,12 @@
|
|
1
1
|
version: '2'
|
2
2
|
services:
|
3
3
|
zookeeper:
|
4
|
+
container_name: karafka_20_zookeeper
|
4
5
|
image: wurstmeister/zookeeper
|
5
6
|
ports:
|
6
7
|
- '2181:2181'
|
7
8
|
kafka:
|
9
|
+
container_name: karafka_20_kafka
|
8
10
|
image: wurstmeister/kafka
|
9
11
|
ports:
|
10
12
|
- '9092:9092'
|
@@ -19,6 +21,14 @@ services:
|
|
19
21
|
integrations_2_02:2:1,\
|
20
22
|
integrations_3_02:2:1,\
|
21
23
|
integrations_4_02:2:1,\
|
24
|
+
integrations_5_02:2:1,\
|
25
|
+
integrations_6_02:2:1,\
|
26
|
+
integrations_7_02:2:1,\
|
27
|
+
integrations_8_02:2:1,\
|
28
|
+
integrations_9_02:2:1,\
|
29
|
+
integrations_10_02:2:1,\
|
30
|
+
integrations_11_02:2:1,\
|
31
|
+
integrations_12_02:2:1,\
|
22
32
|
integrations_0_03:3:1,\
|
23
33
|
integrations_1_03:3:1,\
|
24
34
|
integrations_2_03:3:1,\
|
data/karafka.gemspec
CHANGED
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.add_dependency 'dry-validation', '~> 1.7'
|
22
22
|
spec.add_dependency 'rdkafka', '>= 0.10'
|
23
23
|
spec.add_dependency 'thor', '>= 0.20'
|
24
|
-
spec.add_dependency 'waterdrop', '>= 2.3.
|
24
|
+
spec.add_dependency 'waterdrop', '>= 2.3.1', '< 3.0.0'
|
25
25
|
spec.add_dependency 'zeitwerk', '~> 2.3'
|
26
26
|
|
27
27
|
spec.required_ruby_version = '>= 2.6.0'
|
@@ -15,28 +15,19 @@ module Karafka
|
|
15
15
|
# @return [Waterdrop::Producer] producer instance
|
16
16
|
attr_accessor :producer
|
17
17
|
|
18
|
+
def initialize
|
19
|
+
# We re-use one to save on object allocation
|
20
|
+
# It also allows us to transfer the consumption notion to another batch
|
21
|
+
@consumption = Processing::Result.new
|
22
|
+
end
|
23
|
+
|
18
24
|
# Can be used to run preparation code
|
19
25
|
#
|
20
26
|
# @private
|
21
27
|
# @note This should not be used by the end users as it is part of the lifecycle of things but
|
22
28
|
# not as part of the public api. This can act as a hook when creating non-blocking
|
23
29
|
# consumers and doing other advanced stuff
|
24
|
-
def
|
25
|
-
Karafka.monitor.instrument('consumer.prepared', caller: self) do
|
26
|
-
prepare
|
27
|
-
end
|
28
|
-
|
29
|
-
true
|
30
|
-
rescue StandardError => e
|
31
|
-
Karafka.monitor.instrument(
|
32
|
-
'error.occurred',
|
33
|
-
error: e,
|
34
|
-
caller: self,
|
35
|
-
type: 'consumer.prepare.error'
|
36
|
-
)
|
37
|
-
|
38
|
-
false
|
39
|
-
end
|
30
|
+
def on_before_consume; end
|
40
31
|
|
41
32
|
# Executes the default consumer flow.
|
42
33
|
#
|
@@ -48,29 +39,36 @@ module Karafka
|
|
48
39
|
def on_consume
|
49
40
|
Karafka.monitor.instrument('consumer.consumed', caller: self) do
|
50
41
|
consume
|
51
|
-
|
52
|
-
pause_tracker.reset
|
53
|
-
|
54
|
-
# Mark as consumed only if manual offset management is not on
|
55
|
-
next if topic.manual_offset_management
|
56
|
-
|
57
|
-
# We use the non-blocking one here. If someone needs the blocking one, can implement it
|
58
|
-
# with manual offset management
|
59
|
-
mark_as_consumed(messages.last)
|
60
42
|
end
|
61
43
|
|
62
|
-
|
44
|
+
@consumption.success!
|
63
45
|
rescue StandardError => e
|
46
|
+
@consumption.failure!
|
47
|
+
|
64
48
|
Karafka.monitor.instrument(
|
65
49
|
'error.occurred',
|
66
50
|
error: e,
|
67
51
|
caller: self,
|
68
52
|
type: 'consumer.consume.error'
|
69
53
|
)
|
54
|
+
end
|
70
55
|
|
71
|
-
|
56
|
+
# @private
|
57
|
+
# @note This should not be used by the end users as it is part of the lifecycle of things but
|
58
|
+
# not as part of the public api.
|
59
|
+
def on_after_consume
|
60
|
+
if @consumption.success?
|
61
|
+
pause_tracker.reset
|
62
|
+
|
63
|
+
# Mark as consumed only if manual offset management is not on
|
64
|
+
return if topic.manual_offset_management?
|
72
65
|
|
73
|
-
|
66
|
+
# We use the non-blocking one here. If someone needs the blocking one, can implement it
|
67
|
+
# with manual offset management
|
68
|
+
mark_as_consumed(messages.last)
|
69
|
+
else
|
70
|
+
pause(@seek_offset || messages.first.offset)
|
71
|
+
end
|
74
72
|
end
|
75
73
|
|
76
74
|
# Trigger method for running on shutdown.
|
@@ -80,8 +78,6 @@ module Karafka
|
|
80
78
|
Karafka.monitor.instrument('consumer.revoked', caller: self) do
|
81
79
|
revoked
|
82
80
|
end
|
83
|
-
|
84
|
-
true
|
85
81
|
rescue StandardError => e
|
86
82
|
Karafka.monitor.instrument(
|
87
83
|
'error.occurred',
|
@@ -89,8 +85,6 @@ module Karafka
|
|
89
85
|
caller: self,
|
90
86
|
type: 'consumer.revoked.error'
|
91
87
|
)
|
92
|
-
|
93
|
-
false
|
94
88
|
end
|
95
89
|
|
96
90
|
# Trigger method for running on shutdown.
|
@@ -100,8 +94,6 @@ module Karafka
|
|
100
94
|
Karafka.monitor.instrument('consumer.shutdown', caller: self) do
|
101
95
|
shutdown
|
102
96
|
end
|
103
|
-
|
104
|
-
true
|
105
97
|
rescue StandardError => e
|
106
98
|
Karafka.monitor.instrument(
|
107
99
|
'error.occurred',
|
@@ -109,16 +101,10 @@ module Karafka
|
|
109
101
|
caller: self,
|
110
102
|
type: 'consumer.shutdown.error'
|
111
103
|
)
|
112
|
-
|
113
|
-
false
|
114
104
|
end
|
115
105
|
|
116
106
|
private
|
117
107
|
|
118
|
-
# Method that gets called in the blocking flow allowing to setup any type of resources or to
|
119
|
-
# send additional commands to Kafka before the proper execution starts.
|
120
|
-
def prepare; end
|
121
|
-
|
122
108
|
# Method that will perform business logic and on data received from Kafka (it will consume
|
123
109
|
# the data)
|
124
110
|
# @note This method needs bo be implemented in a subclass. We stub it here as a failover if
|
@@ -138,21 +124,36 @@ module Karafka
|
|
138
124
|
# Marks message as consumed in an async way.
|
139
125
|
#
|
140
126
|
# @param message [Messages::Message] last successfully processed message.
|
127
|
+
# @return [Boolean] true if we were able to mark the offset, false otherwise. False indicates
|
128
|
+
# that we were not able and that we have lost the partition.
|
129
|
+
#
|
141
130
|
# @note We keep track of this offset in case we would mark as consumed and got error when
|
142
131
|
# processing another message. In case like this we do not pause on the message we've already
|
143
132
|
# processed but rather at the next one. This applies to both sync and async versions of this
|
144
133
|
# method.
|
145
134
|
def mark_as_consumed(message)
|
146
|
-
client.mark_as_consumed(message)
|
135
|
+
@revoked = !client.mark_as_consumed(message)
|
136
|
+
|
137
|
+
return false if revoked?
|
138
|
+
|
147
139
|
@seek_offset = message.offset + 1
|
140
|
+
|
141
|
+
true
|
148
142
|
end
|
149
143
|
|
150
144
|
# Marks message as consumed in a sync way.
|
151
145
|
#
|
152
146
|
# @param message [Messages::Message] last successfully processed message.
|
147
|
+
# @return [Boolean] true if we were able to mark the offset, false otherwise. False indicates
|
148
|
+
# that we were not able and that we have lost the partition.
|
153
149
|
def mark_as_consumed!(message)
|
154
|
-
client.mark_as_consumed!(message)
|
150
|
+
@revoked = !client.mark_as_consumed!(message)
|
151
|
+
|
152
|
+
return false if revoked?
|
153
|
+
|
155
154
|
@seek_offset = message.offset + 1
|
155
|
+
|
156
|
+
true
|
156
157
|
end
|
157
158
|
|
158
159
|
# Pauses processing on a given offset for the current topic partition
|
@@ -190,5 +191,12 @@ module Karafka
|
|
190
191
|
)
|
191
192
|
)
|
192
193
|
end
|
194
|
+
|
195
|
+
# @return [Boolean] true if partition was revoked from the current consumer
|
196
|
+
# @note We know that partition got revoked because when we try to mark message as consumed,
|
197
|
+
# unless if is successful, it will return false
|
198
|
+
def revoked?
|
199
|
+
@revoked || false
|
200
|
+
end
|
193
201
|
end
|
194
202
|
end
|
@@ -190,6 +190,7 @@ module Karafka
|
|
190
190
|
# Marks given message as consumed.
|
191
191
|
#
|
192
192
|
# @param [Karafka::Messages::Message] message that we want to mark as processed
|
193
|
+
# @return [Boolean] true if successful. False if we no longer own given partition
|
193
194
|
# @note This method won't trigger automatic offsets commits, rather relying on the offset
|
194
195
|
# check-pointing trigger that happens with each batch processed
|
195
196
|
def mark_as_consumed(message)
|
@@ -199,8 +200,10 @@ module Karafka
|
|
199
200
|
# Marks a given message as consumed and commits the offsets in a blocking way.
|
200
201
|
#
|
201
202
|
# @param [Karafka::Messages::Message] message that we want to mark as processed
|
203
|
+
# @return [Boolean] true if successful. False if we no longer own given partition
|
202
204
|
def mark_as_consumed!(message)
|
203
|
-
mark_as_consumed(message)
|
205
|
+
return false unless mark_as_consumed(message)
|
206
|
+
|
204
207
|
commit_offsets!
|
205
208
|
end
|
206
209
|
|
@@ -217,22 +220,35 @@ module Karafka
|
|
217
220
|
|
218
221
|
private
|
219
222
|
|
223
|
+
# When we cannot store an offset, it means we no longer own the partition
|
224
|
+
#
|
220
225
|
# Non thread-safe offset storing method
|
221
226
|
# @param message [Karafka::Messages::Message]
|
227
|
+
# @return [Boolean] true if we could store the offset (if we still own the partition)
|
222
228
|
def internal_store_offset(message)
|
223
229
|
@offsetting = true
|
224
230
|
@kafka.store_offset(message)
|
231
|
+
true
|
232
|
+
rescue Rdkafka::RdkafkaError => e
|
233
|
+
return false if e.code == :assignment_lost
|
234
|
+
return false if e.code == :state
|
235
|
+
|
236
|
+
raise e
|
225
237
|
end
|
226
238
|
|
227
239
|
# Non thread-safe message committing method
|
228
240
|
# @param async [Boolean] should the commit happen async or sync (async by default)
|
241
|
+
# @return [Boolean] true if offset commit worked, false if we've lost the assignment
|
229
242
|
def internal_commit_offsets(async: true)
|
230
|
-
return unless @offsetting
|
243
|
+
return true unless @offsetting
|
231
244
|
|
232
245
|
@kafka.commit(nil, async)
|
233
246
|
@offsetting = false
|
247
|
+
|
248
|
+
true
|
234
249
|
rescue Rdkafka::RdkafkaError => e
|
235
|
-
return if e.code == :
|
250
|
+
return false if e.code == :assignment_lost
|
251
|
+
return false if e.code == :no_offset
|
236
252
|
|
237
253
|
raise e
|
238
254
|
end
|
@@ -250,7 +266,8 @@ module Karafka
|
|
250
266
|
|
251
267
|
@kafka.close
|
252
268
|
@buffer.clear
|
253
|
-
@
|
269
|
+
# @note We do not clear rebalance manager here as we may still have revocation info here
|
270
|
+
# that we want to consider valid prior to running another reconnection
|
254
271
|
end
|
255
272
|
end
|
256
273
|
|
@@ -303,7 +320,13 @@ module Karafka
|
|
303
320
|
|
304
321
|
time_poll.backoff
|
305
322
|
|
306
|
-
|
323
|
+
# We return nil, so we do not restart until running the whole loop
|
324
|
+
# This allows us to run revocation jobs and other things and we will pick up new work
|
325
|
+
# next time after dispatching all the things that are needed
|
326
|
+
#
|
327
|
+
# If we would retry here, the client reset would become transparent and we would not have
|
328
|
+
# a chance to take any actions
|
329
|
+
nil
|
307
330
|
end
|
308
331
|
|
309
332
|
# Builds a new rdkafka consumer instance based on the subscription group configuration
|
@@ -98,9 +98,6 @@ module Karafka
|
|
98
98
|
details = (error.backtrace || []).join("\n")
|
99
99
|
|
100
100
|
case type
|
101
|
-
when 'consumer.prepared.error'
|
102
|
-
error "Consumer prepared error: #{error}"
|
103
|
-
error details
|
104
101
|
when 'consumer.consume.error'
|
105
102
|
error "Consumer consuming error: #{error}"
|
106
103
|
error details
|
@@ -20,7 +20,7 @@ module Karafka
|
|
20
20
|
#
|
21
21
|
# It contains slightly better revocation warranties than the regular blocking consumer as
|
22
22
|
# it can stop processing batch of jobs in the middle after the revocation.
|
23
|
-
class Consumer < Karafka::
|
23
|
+
class Consumer < Karafka::Pro::BaseConsumer
|
24
24
|
# Runs ActiveJob jobs processing and handles lrj if needed
|
25
25
|
def consume
|
26
26
|
messages.each do |message|
|
@@ -33,11 +33,12 @@ module Karafka
|
|
33
33
|
::ActiveSupport::JSON.decode(message.raw_payload)
|
34
34
|
)
|
35
35
|
|
36
|
+
mark_as_consumed(message)
|
37
|
+
|
36
38
|
# We check it twice as the job may be long running
|
39
|
+
# If marking fails, it also means it got revoked and we can stop consuming
|
37
40
|
return if revoked?
|
38
41
|
|
39
|
-
mark_as_consumed(message)
|
40
|
-
|
41
42
|
# Do not process more if we are shutting down
|
42
43
|
break if Karafka::App.stopping?
|
43
44
|
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This Karafka component is a Pro component.
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
+
# repository and their usage requires commercial license agreement.
|
6
|
+
#
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
+
#
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
+
# your code to Maciej Mensfeld.
|
11
|
+
|
12
|
+
module Karafka
|
13
|
+
module Pro
|
14
|
+
# Karafka PRO consumer.
|
15
|
+
#
|
16
|
+
# If you use PRO, all your consumers should inherit (indirectly) from it.
|
17
|
+
#
|
18
|
+
# @note In case of using lrj, manual pausing may not be the best idea as resume needs to happen
|
19
|
+
# after each batch is processed.
|
20
|
+
class BaseConsumer < Karafka::BaseConsumer
|
21
|
+
# Pause for tops 31 years
|
22
|
+
MAX_PAUSE_TIME = 1_000_000_000_000
|
23
|
+
|
24
|
+
private_constant :MAX_PAUSE_TIME
|
25
|
+
|
26
|
+
# Pauses processing of a given partition until we're done with the processing
|
27
|
+
# This ensures, that we can easily poll not reaching the `max.poll.interval`
|
28
|
+
def on_before_consume
|
29
|
+
# Pause at the first message in a batch. That way in case of a crash, we will not loose
|
30
|
+
# any messages
|
31
|
+
return unless topic.long_running_job?
|
32
|
+
|
33
|
+
pause(messages.first.offset, MAX_PAUSE_TIME)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Runs extra logic after consumption that is related to handling long running jobs
|
37
|
+
# @note This overwrites the '#on_after_consume' from the base consumer
|
38
|
+
def on_after_consume
|
39
|
+
# Nothing to do if we lost the partition
|
40
|
+
return if revoked?
|
41
|
+
|
42
|
+
if @consumption.success?
|
43
|
+
pause_tracker.reset
|
44
|
+
|
45
|
+
# We use the non-blocking one here. If someone needs the blocking one, can implement it
|
46
|
+
# with manual offset management
|
47
|
+
# Mark as consumed only if manual offset management is not on
|
48
|
+
mark_as_consumed(messages.last) unless topic.manual_offset_management?
|
49
|
+
|
50
|
+
# If this is not a long running job there is nothing for us to do here
|
51
|
+
return unless topic.long_running_job?
|
52
|
+
|
53
|
+
# Once processing is done, we move to the new offset based on commits
|
54
|
+
# Here, in case manual offset management is off, we have the new proper offset of a
|
55
|
+
# first message from another batch from `@seek_offset`. If manual offset management
|
56
|
+
# is on, we move to place where the user indicated it was finished.
|
57
|
+
seek(@seek_offset || messages.first.offset)
|
58
|
+
resume
|
59
|
+
else
|
60
|
+
# If processing failed, we need to pause
|
61
|
+
pause(@seek_offset || messages.first.offset)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# Marks this consumer revoked state as true
|
66
|
+
# This allows us for things like lrj to finish early as this state may change during lrj
|
67
|
+
# execution
|
68
|
+
def on_revoked
|
69
|
+
# @note This may already be set to true if we tried to commit offsets and failed. In case
|
70
|
+
# like this it will automatically be marked as revoked.
|
71
|
+
@revoked = true
|
72
|
+
super
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
data/lib/karafka/pro/loader.rb
CHANGED
@@ -15,9 +15,9 @@ module Karafka
|
|
15
15
|
class Loader
|
16
16
|
# All the pro components that need to be loaded
|
17
17
|
COMPONENTS = %w[
|
18
|
+
base_consumer
|
18
19
|
performance_tracker
|
19
20
|
scheduler
|
20
|
-
base_consumer_extensions
|
21
21
|
processing/jobs/consume_non_blocking
|
22
22
|
processing/jobs_builder
|
23
23
|
routing/extensions
|
@@ -42,7 +42,6 @@ module Karafka
|
|
42
42
|
config.internal.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
|
43
43
|
|
44
44
|
::Karafka::Routing::Topic.include(Routing::Extensions)
|
45
|
-
::Karafka::BaseConsumer.prepend(BaseConsumerExtensions)
|
46
45
|
|
47
46
|
config.monitor.subscribe(PerformanceTracker.instance)
|
48
47
|
end
|
@@ -26,7 +26,7 @@ module Karafka
|
|
26
26
|
# management. This layer of the framework knows nothing about Kafka messages consumption.
|
27
27
|
class ConsumeNonBlocking < ::Karafka::Processing::Jobs::Consume
|
28
28
|
# Releases the blocking lock after it is done with the preparation phase for this job
|
29
|
-
def
|
29
|
+
def before_call
|
30
30
|
super
|
31
31
|
@non_blocking = true
|
32
32
|
end
|
@@ -45,12 +45,20 @@ module Karafka
|
|
45
45
|
# @param messages [Array<Karafka::Messages::Message>]
|
46
46
|
# @param received_at [Time] the moment we've received the batch (actually the moment we've)
|
47
47
|
# enqueued it, but good enough
|
48
|
-
def
|
48
|
+
def before_consume(messages, received_at)
|
49
49
|
# Recreate consumer with each batch if persistence is not enabled
|
50
50
|
# We reload the consumers with each batch instead of relying on some external signals
|
51
51
|
# when needed for consistency. That way devs may have it on or off and not in this
|
52
52
|
# middle state, where re-creation of a consumer instance would occur only sometimes
|
53
|
-
@
|
53
|
+
@recreate = true unless ::Karafka::App.config.consumer_persistence
|
54
|
+
|
55
|
+
# If @recreate was set to true (aside from non persistent), it means, that revocation or
|
56
|
+
# a shutdown happened and we need to have a new instance for running another consume for
|
57
|
+
# this topic partition
|
58
|
+
if @recreate
|
59
|
+
@consumer = nil
|
60
|
+
@recreate = false
|
61
|
+
end
|
54
62
|
|
55
63
|
# First we build messages batch...
|
56
64
|
consumer.messages = Messages::Builders::Messages.call(
|
@@ -59,7 +67,7 @@ module Karafka
|
|
59
67
|
received_at
|
60
68
|
)
|
61
69
|
|
62
|
-
consumer.
|
70
|
+
consumer.on_before_consume
|
63
71
|
end
|
64
72
|
|
65
73
|
# Runs consumer data processing against given batch and handles failures and errors.
|
@@ -68,6 +76,11 @@ module Karafka
|
|
68
76
|
consumer.on_consume
|
69
77
|
end
|
70
78
|
|
79
|
+
# Runs consumer after consumption code
|
80
|
+
def after_consume
|
81
|
+
consumer.on_after_consume if @consumer
|
82
|
+
end
|
83
|
+
|
71
84
|
# Runs the controller `#revoked` method that should be triggered when a given consumer is
|
72
85
|
# no longer needed due to partitions reassignment.
|
73
86
|
#
|
@@ -76,9 +89,13 @@ module Karafka
|
|
76
89
|
#
|
77
90
|
# @note We run it only when consumer was present, because presence indicates, that at least
|
78
91
|
# a single message has been consumed.
|
92
|
+
#
|
93
|
+
# @note We do not reset the consumer but we indicate need for recreation instead, because
|
94
|
+
# after the revocation, there still may be `#after_consume` running that needs a given
|
95
|
+
# consumer instance.
|
79
96
|
def revoked
|
80
97
|
consumer.on_revoked if @consumer
|
81
|
-
@
|
98
|
+
@recreate = true
|
82
99
|
end
|
83
100
|
|
84
101
|
# Runs the controller `#shutdown` method that should be triggered when a given consumer is
|
@@ -90,7 +107,7 @@ module Karafka
|
|
90
107
|
# There is a case, where the consumer no longer exists because it was revoked, in case like
|
91
108
|
# that we do not build a new instance and shutdown should not be triggered.
|
92
109
|
consumer.on_shutdown if @consumer
|
93
|
-
@
|
110
|
+
@recreate = true
|
94
111
|
end
|
95
112
|
|
96
113
|
private
|
@@ -5,7 +5,7 @@ module Karafka
|
|
5
5
|
# Namespace for all the jobs that are suppose to run in workers.
|
6
6
|
module Jobs
|
7
7
|
# Base class for all the jobs types that are suppose to run in workers threads.
|
8
|
-
# Each job can have 3 main entry-points: `#
|
8
|
+
# Each job can have 3 main entry-points: `#before_call`, `#call` and `#after_call`
|
9
9
|
# Only `#call` is required.
|
10
10
|
class Base
|
11
11
|
extend Forwardable
|
@@ -23,10 +23,15 @@ module Karafka
|
|
23
23
|
end
|
24
24
|
|
25
25
|
# When redefined can run any code that should run before executing the proper code
|
26
|
-
def
|
26
|
+
def before_call; end
|
27
|
+
|
28
|
+
# The main entry-point of a job
|
29
|
+
def call
|
30
|
+
raise NotImplementedError, 'Please implement in a subclass'
|
31
|
+
end
|
27
32
|
|
28
33
|
# When redefined can run any code that should run after executing the proper code
|
29
|
-
def
|
34
|
+
def after_call; end
|
30
35
|
|
31
36
|
# @return [Boolean] is this a non-blocking job
|
32
37
|
#
|
@@ -20,15 +20,20 @@ module Karafka
|
|
20
20
|
super()
|
21
21
|
end
|
22
22
|
|
23
|
-
# Runs the preparations on the executor
|
24
|
-
def
|
25
|
-
executor.
|
23
|
+
# Runs the before consumption preparations on the executor
|
24
|
+
def before_call
|
25
|
+
executor.before_consume(@messages, @created_at)
|
26
26
|
end
|
27
27
|
|
28
28
|
# Runs the given executor
|
29
29
|
def call
|
30
30
|
executor.consume
|
31
31
|
end
|
32
|
+
|
33
|
+
# Runs any error handling and other post-consumption stuff on the executor
|
34
|
+
def after_call
|
35
|
+
executor.after_consume
|
36
|
+
end
|
32
37
|
end
|
33
38
|
end
|
34
39
|
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Processing
|
5
|
+
# A simple object that allows us to keep track of processing state.
|
6
|
+
# It allows to indicate if given thing moved from success to a failure or the other way around
|
7
|
+
# Useful for tracking consumption state
|
8
|
+
class Result
|
9
|
+
def initialize
|
10
|
+
@success = true
|
11
|
+
end
|
12
|
+
|
13
|
+
# @return [Boolean]
|
14
|
+
def failure?
|
15
|
+
!success?
|
16
|
+
end
|
17
|
+
|
18
|
+
# @return [Boolean]
|
19
|
+
def success?
|
20
|
+
@success
|
21
|
+
end
|
22
|
+
|
23
|
+
# Marks state as successful
|
24
|
+
def success!
|
25
|
+
@success = true
|
26
|
+
end
|
27
|
+
|
28
|
+
# Marks state as failure
|
29
|
+
def failure!
|
30
|
+
@success = false
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -50,7 +50,7 @@ module Karafka
|
|
50
50
|
Karafka.monitor.instrument('worker.process', caller: self, job: job)
|
51
51
|
|
52
52
|
Karafka.monitor.instrument('worker.processed', caller: self, job: job) do
|
53
|
-
job.
|
53
|
+
job.before_call
|
54
54
|
|
55
55
|
# If a job is marked as non blocking, we can run a tick in the job queue and if there
|
56
56
|
# are no other blocking factors, the job queue will be unlocked.
|
@@ -60,7 +60,7 @@ module Karafka
|
|
60
60
|
|
61
61
|
job.call
|
62
62
|
|
63
|
-
job.
|
63
|
+
job.after_call
|
64
64
|
|
65
65
|
true
|
66
66
|
end
|
@@ -66,6 +66,11 @@ module Karafka
|
|
66
66
|
end
|
67
67
|
end
|
68
68
|
|
69
|
+
# @return [Boolean] true if this topic offset is handled by the end user
|
70
|
+
def manual_offset_management?
|
71
|
+
manual_offset_management
|
72
|
+
end
|
73
|
+
|
69
74
|
# @return [Hash] hash with all the topic attributes
|
70
75
|
# @note This is being used when we validate the consumer_group and its topics
|
71
76
|
def to_h
|
data/lib/karafka/version.rb
CHANGED
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: karafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.0.
|
4
|
+
version: 2.0.0.beta4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Mensfeld
|
@@ -34,7 +34,7 @@ cert_chain:
|
|
34
34
|
R2P11bWoCtr70BsccVrN8jEhzwXngMyI2gVt750Y+dbTu1KgRqZKp/ECe7ZzPzXj
|
35
35
|
pIy9vHxTANKYVyI4qj8OrFdEM5BQNu8oQpL0iQ==
|
36
36
|
-----END CERTIFICATE-----
|
37
|
-
date: 2022-06-
|
37
|
+
date: 2022-06-20 00:00:00.000000000 Z
|
38
38
|
dependencies:
|
39
39
|
- !ruby/object:Gem::Dependency
|
40
40
|
name: dry-configurable
|
@@ -112,7 +112,7 @@ dependencies:
|
|
112
112
|
requirements:
|
113
113
|
- - ">="
|
114
114
|
- !ruby/object:Gem::Version
|
115
|
-
version: 2.3.
|
115
|
+
version: 2.3.1
|
116
116
|
- - "<"
|
117
117
|
- !ruby/object:Gem::Version
|
118
118
|
version: 3.0.0
|
@@ -122,7 +122,7 @@ dependencies:
|
|
122
122
|
requirements:
|
123
123
|
- - ">="
|
124
124
|
- !ruby/object:Gem::Version
|
125
|
-
version: 2.3.
|
125
|
+
version: 2.3.1
|
126
126
|
- - "<"
|
127
127
|
- !ruby/object:Gem::Version
|
128
128
|
version: 3.0.0
|
@@ -173,6 +173,7 @@ files:
|
|
173
173
|
- bin/karafka
|
174
174
|
- bin/scenario
|
175
175
|
- bin/stress
|
176
|
+
- bin/wait_for_kafka
|
176
177
|
- certs/karafka-pro.pem
|
177
178
|
- certs/mensfeld.pem
|
178
179
|
- config/errors.yml
|
@@ -232,7 +233,7 @@ files:
|
|
232
233
|
- lib/karafka/pro/active_job/consumer.rb
|
233
234
|
- lib/karafka/pro/active_job/dispatcher.rb
|
234
235
|
- lib/karafka/pro/active_job/job_options_contract.rb
|
235
|
-
- lib/karafka/pro/
|
236
|
+
- lib/karafka/pro/base_consumer.rb
|
236
237
|
- lib/karafka/pro/loader.rb
|
237
238
|
- lib/karafka/pro/performance_tracker.rb
|
238
239
|
- lib/karafka/pro/processing/jobs/consume_non_blocking.rb
|
@@ -248,6 +249,7 @@ files:
|
|
248
249
|
- lib/karafka/processing/jobs/shutdown.rb
|
249
250
|
- lib/karafka/processing/jobs_builder.rb
|
250
251
|
- lib/karafka/processing/jobs_queue.rb
|
252
|
+
- lib/karafka/processing/result.rb
|
251
253
|
- lib/karafka/processing/worker.rb
|
252
254
|
- lib/karafka/processing/workers_batch.rb
|
253
255
|
- lib/karafka/railtie.rb
|
metadata.gz.sig
CHANGED
Binary file
|
@@ -1,66 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# This Karafka component is a Pro component.
|
4
|
-
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
|
-
# repository and their usage requires commercial license agreement.
|
6
|
-
#
|
7
|
-
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
8
|
-
#
|
9
|
-
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
10
|
-
# your code to Maciej Mensfeld.
|
11
|
-
|
12
|
-
module Karafka
|
13
|
-
module Pro
|
14
|
-
# Extensions to the base consumer that make it more pro and fancy
|
15
|
-
#
|
16
|
-
# @note In case of using lrj, manual pausing may not be the best idea as resume needs to happen
|
17
|
-
# after each batch is processed.
|
18
|
-
#
|
19
|
-
# They need to be added to the consumer via `#prepend`
|
20
|
-
module BaseConsumerExtensions
|
21
|
-
# Pause for tops 31 years
|
22
|
-
MAX_PAUSE_TIME = 1_000_000_000_000
|
23
|
-
|
24
|
-
private_constant :MAX_PAUSE_TIME
|
25
|
-
|
26
|
-
# Pauses processing of a given partition until we're done with the processing
|
27
|
-
# This ensures, that we can easily poll not reaching the `max.poll.interval`
|
28
|
-
def on_prepare
|
29
|
-
# Pause at the first message in a batch. That way in case of a crash, we will not loose
|
30
|
-
# any messages
|
31
|
-
pause(messages.first.offset, MAX_PAUSE_TIME) if topic.long_running_job?
|
32
|
-
|
33
|
-
super
|
34
|
-
end
|
35
|
-
|
36
|
-
# After user code, we seek and un-pause our partition
|
37
|
-
def on_consume
|
38
|
-
# If anything went wrong here, we should not run any partition management as it's Karafka
|
39
|
-
# core that will handle the backoff
|
40
|
-
return unless super
|
41
|
-
|
42
|
-
return unless topic.long_running_job?
|
43
|
-
|
44
|
-
# Nothing to resume if it was revoked
|
45
|
-
return if revoked?
|
46
|
-
|
47
|
-
# Once processing is done, we move to the new offset based on commits
|
48
|
-
seek(@seek_offset || messages.first.offset)
|
49
|
-
resume
|
50
|
-
end
|
51
|
-
|
52
|
-
# Marks this consumer revoked state as true
|
53
|
-
# This allows us for things like lrj to finish early as this state may change during lrj
|
54
|
-
# execution
|
55
|
-
def on_revoked
|
56
|
-
@revoked = true
|
57
|
-
super
|
58
|
-
end
|
59
|
-
|
60
|
-
# @return [Boolean] true if partition was revoked from the current consumer
|
61
|
-
def revoked?
|
62
|
-
@revoked || false
|
63
|
-
end
|
64
|
-
end
|
65
|
-
end
|
66
|
-
end
|