karafka 2.0.0.beta1 → 2.0.0.beta4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +9 -23
- data/CHANGELOG.md +47 -0
- data/Gemfile.lock +8 -8
- data/bin/integrations +36 -14
- data/bin/scenario +29 -0
- data/bin/wait_for_kafka +20 -0
- data/config/errors.yml +1 -0
- data/docker-compose.yml +12 -0
- data/karafka.gemspec +2 -2
- data/lib/active_job/karafka.rb +2 -2
- data/lib/karafka/active_job/routing/extensions.rb +31 -0
- data/lib/karafka/base_consumer.rb +65 -42
- data/lib/karafka/connection/client.rb +65 -19
- data/lib/karafka/connection/listener.rb +99 -34
- data/lib/karafka/connection/listeners_batch.rb +24 -0
- data/lib/karafka/connection/messages_buffer.rb +50 -54
- data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
- data/lib/karafka/contracts/config.rb +9 -1
- data/lib/karafka/helpers/async.rb +33 -0
- data/lib/karafka/instrumentation/logger_listener.rb +34 -10
- data/lib/karafka/instrumentation/monitor.rb +3 -1
- data/lib/karafka/licenser.rb +26 -7
- data/lib/karafka/messages/batch_metadata.rb +26 -3
- data/lib/karafka/messages/builders/batch_metadata.rb +17 -29
- data/lib/karafka/messages/builders/message.rb +1 -0
- data/lib/karafka/messages/builders/messages.rb +4 -12
- data/lib/karafka/pro/active_job/consumer.rb +49 -0
- data/lib/karafka/pro/active_job/dispatcher.rb +10 -10
- data/lib/karafka/pro/active_job/job_options_contract.rb +9 -9
- data/lib/karafka/pro/base_consumer.rb +76 -0
- data/lib/karafka/pro/loader.rb +30 -13
- data/lib/karafka/pro/performance_tracker.rb +9 -9
- data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +37 -0
- data/lib/karafka/pro/processing/jobs_builder.rb +31 -0
- data/lib/karafka/pro/routing/extensions.rb +32 -0
- data/lib/karafka/pro/scheduler.rb +54 -0
- data/lib/karafka/processing/executor.rb +34 -7
- data/lib/karafka/processing/executors_buffer.rb +15 -7
- data/lib/karafka/processing/jobs/base.rb +21 -4
- data/lib/karafka/processing/jobs/consume.rb +12 -5
- data/lib/karafka/processing/jobs_builder.rb +28 -0
- data/lib/karafka/processing/jobs_queue.rb +15 -12
- data/lib/karafka/processing/result.rb +34 -0
- data/lib/karafka/processing/worker.rb +23 -17
- data/lib/karafka/processing/workers_batch.rb +5 -0
- data/lib/karafka/routing/consumer_group.rb +1 -1
- data/lib/karafka/routing/subscription_group.rb +2 -2
- data/lib/karafka/routing/subscription_groups_builder.rb +3 -2
- data/lib/karafka/routing/topic.rb +5 -0
- data/lib/karafka/routing/topics.rb +38 -0
- data/lib/karafka/runner.rb +19 -27
- data/lib/karafka/scheduler.rb +10 -11
- data/lib/karafka/server.rb +24 -23
- data/lib/karafka/setup/config.rb +4 -1
- data/lib/karafka/status.rb +1 -3
- data/lib/karafka/version.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +20 -5
- metadata.gz.sig +0 -0
- data/lib/karafka/active_job/routing_extensions.rb +0 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e4e9430d2278617cbed38f5696011603d9c0d8c53813dfc180499dc6e4b97563
|
4
|
+
data.tar.gz: f082a95aa9841912f819dc0598591c4b96d7ef1199eff324e65ca0c601008dae
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7252c5503234ab4d35fa02d2bb0a18dd8239584fdddc5b451cfdf028a61f37d59a269bac804913d0abf46e2d3273188560e48aa9de40fbb319c766624c1a3b95
|
7
|
+
data.tar.gz: a4cc5d7c18d2a45483ee26acbacf62c9c13f8824697af96a3f2bf5bccb232d5b07097ed49cfb84a9b46e09f31405813d50b1564d6668f0a483023f449427428b
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/.github/workflows/ci.yml
CHANGED
@@ -21,8 +21,7 @@ jobs:
|
|
21
21
|
uses: ruby/setup-ruby@v1
|
22
22
|
with:
|
23
23
|
ruby-version: 3.1
|
24
|
-
|
25
|
-
run: gem install bundler --no-document
|
24
|
+
bundler-cache: true
|
26
25
|
- name: Install Diffend plugin
|
27
26
|
run: bundle plugin install diffend
|
28
27
|
- name: Bundle Secure
|
@@ -57,25 +56,19 @@ jobs:
|
|
57
56
|
- name: Install package dependencies
|
58
57
|
run: "[ -e $APT_DEPS ] || sudo apt-get install -y --no-install-recommends $APT_DEPS"
|
59
58
|
|
60
|
-
- name:
|
59
|
+
- name: Start Kafka with docker-compose
|
61
60
|
run: |
|
62
61
|
docker-compose up -d
|
63
|
-
sleep 10
|
64
62
|
|
65
63
|
- name: Set up Ruby
|
66
64
|
uses: ruby/setup-ruby@v1
|
67
65
|
with:
|
68
66
|
ruby-version: ${{matrix.ruby}}
|
67
|
+
bundler-cache: true
|
69
68
|
|
70
|
-
- name:
|
69
|
+
- name: Ensure all needed Kafka topics are created and wait if not
|
71
70
|
run: |
|
72
|
-
|
73
|
-
bundle config set without 'tools benchmarks docs'
|
74
|
-
|
75
|
-
- name: Bundle install
|
76
|
-
run: |
|
77
|
-
bundle config set without development
|
78
|
-
bundle install --jobs 4 --retry 3
|
71
|
+
bin/wait_for_kafka
|
79
72
|
|
80
73
|
- name: Run all specs
|
81
74
|
env:
|
@@ -100,26 +93,19 @@ jobs:
|
|
100
93
|
- name: Install package dependencies
|
101
94
|
run: "[ -e $APT_DEPS ] || sudo apt-get install -y --no-install-recommends $APT_DEPS"
|
102
95
|
|
103
|
-
- name:
|
96
|
+
- name: Start Kafka with docker-compose
|
104
97
|
run: |
|
105
98
|
docker-compose up -d
|
106
|
-
sleep 5
|
107
99
|
|
108
100
|
- name: Set up Ruby
|
109
101
|
uses: ruby/setup-ruby@v1
|
110
102
|
with:
|
111
103
|
ruby-version: ${{matrix.ruby}}
|
104
|
+
bundler-cache: true
|
112
105
|
|
113
|
-
- name:
|
114
|
-
run: |
|
115
|
-
gem install bundler --no-document
|
116
|
-
gem update --system --no-document
|
117
|
-
bundle config set without 'tools benchmarks docs'
|
118
|
-
|
119
|
-
- name: Bundle install
|
106
|
+
- name: Ensure all needed Kafka topics are created and wait if not
|
120
107
|
run: |
|
121
|
-
|
122
|
-
bundle install --jobs 4 --retry 3
|
108
|
+
bin/wait_for_kafka
|
123
109
|
|
124
110
|
- name: Run integration tests
|
125
111
|
env:
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,52 @@
|
|
1
1
|
# Karafka framework changelog
|
2
2
|
|
3
|
+
## 2.0.0-beta4 (2022-06-20)
|
4
|
+
- Rename job internal api methods from `#prepare` to `#before_call` and from `#teardown` to `#after_call` to abstract away jobs execution from any type of executors and consumers logic
|
5
|
+
- Remove ability of running `before_consume` and `after_consume` completely. Those should be for internal usage only.
|
6
|
+
- Reorganize how Pro consumer and Pro AJ consumers inherit.
|
7
|
+
- Require WaterDrop `2.3.1`.
|
8
|
+
- Add more integration specs for rebalancing and max poll exceeded.
|
9
|
+
- Move `revoked?` state from PRO to regular Karafka.
|
10
|
+
- Use return value of `mark_as_consumed!` and `mark_as_consumed` as indicator of partition ownership + use it to switch the ownership state.
|
11
|
+
- Do not remove rebalance manager upon client reset and recovery. This will allow us to keep the notion of lost partitions, so we can run revocation jobs for blocking jobs that exceeded the max poll interval.
|
12
|
+
- Run revocation jobs upon reaching max poll interval for blocking jobs.
|
13
|
+
- Early exit `poll` operation upon partition lost or max poll exceeded event.
|
14
|
+
- Always reset consumer instances on timeout exceeded.
|
15
|
+
- Wait for Kafka to create all the needed topics before running specs in CI.
|
16
|
+
|
17
|
+
## 2.0.0-beta3 (2022-06-14)
|
18
|
+
- Jobs building responsibility extracted out of the listener code base.
|
19
|
+
- Fix a case where specs supervisor would try to kill no longer running process (#868)
|
20
|
+
- Fix an instable integration spec that could misbehave under load
|
21
|
+
- Commit offsets prior to pausing partitions to ensure that the latest offset is always committed
|
22
|
+
- Fix a case where consecutive CTRL+C (non-stop) would case an exception during forced shutdown
|
23
|
+
- Add missing `consumer.prepared.error` into `LoggerListener`
|
24
|
+
- Delegate partition resuming from the consumers to listeners threads.
|
25
|
+
- Add support for Long Running Jobs (LRJ) for ActiveJob [PRO]
|
26
|
+
- Add support for Long Running Jobs for consumers [PRO]
|
27
|
+
- Allow `active_job_topic` to accept a block for extra topic related settings
|
28
|
+
- Remove no longer needed logger threads
|
29
|
+
- Auto-adapt number of processes for integration specs based on the number of CPUs
|
30
|
+
- Introduce an integration spec runner that prints everything to stdout (better for development)
|
31
|
+
- Introduce extra integration specs for various ActiveJob usage scenarios
|
32
|
+
- Rename consumer method `#prepared` to `#prepare` to reflect better its use-case
|
33
|
+
- For test and dev raise an error when expired license key is used (never for non dev)
|
34
|
+
- Add worker related monitor events (`worker.process` and `worker.processed`)
|
35
|
+
- Update `LoggerListener` to include more useful information about processing and polling messages
|
36
|
+
|
37
|
+
## 2.0.0-beta2 (2022-06-07)
|
38
|
+
- Abstract away notion of topics groups (until now it was just an array)
|
39
|
+
- Optimize how jobs queue is closed. Since we enqueue jobs only from the listeners, we can safely close jobs queue once listeners are done. By extracting this responsibility from listeners, we remove corner cases and race conditions. Note here: for non-blocking jobs we do wait for them to finish while running the `poll`. This ensures, that for async jobs that are long-living, we do not reach `max.poll.interval`.
|
40
|
+
- `Shutdown` jobs are executed in workers to align all the jobs behaviours.
|
41
|
+
- `Shutdown` jobs are always blocking.
|
42
|
+
- Notion of `ListenersBatch` was introduced similar to `WorkersBatch` to abstract this concept.
|
43
|
+
- Change default `shutdown_timeout` to be more than `max_wait_time` not to cause forced shutdown when no messages are being received from Kafka.
|
44
|
+
- Abstract away scheduling of revocation and shutdown jobs for both default and pro schedulers
|
45
|
+
- Introduce a second (internal) messages buffer to distinguish between raw messages buffer and karafka messages buffer
|
46
|
+
- Move messages and their metadata remap process to the listener thread to allow for their inline usage
|
47
|
+
- Change how we wait in the shutdown phase, so shutdown jobs can still use Kafka connection even if they run for a longer period of time. This will prevent us from being kicked out from the group early.
|
48
|
+
- Introduce validation that ensures, that `shutdown_timeout` is more than `max_wait_time`. This will prevent users from ending up with a config that could lead to frequent forceful shutdowns.
|
49
|
+
|
3
50
|
## 2.0.0-beta1 (2022-05-22)
|
4
51
|
- Update the jobs queue blocking engine and allow for non-blocking jobs execution
|
5
52
|
- Provide `#prepared` hook that always runs before the fetching loop is unblocked
|
data/Gemfile.lock
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
karafka (2.0.0.
|
4
|
+
karafka (2.0.0.beta4)
|
5
5
|
dry-configurable (~> 0.13)
|
6
6
|
dry-monitor (~> 0.5)
|
7
7
|
dry-validation (~> 1.7)
|
8
8
|
rdkafka (>= 0.10)
|
9
9
|
thor (>= 0.20)
|
10
|
-
waterdrop (>= 2.3.
|
10
|
+
waterdrop (>= 2.3.1, < 3.0.0)
|
11
11
|
zeitwerk (~> 2.3)
|
12
12
|
|
13
13
|
GEM
|
@@ -45,7 +45,7 @@ GEM
|
|
45
45
|
dry-configurable (~> 0.13, >= 0.13.0)
|
46
46
|
dry-core (~> 0.5, >= 0.5)
|
47
47
|
dry-events (~> 0.2)
|
48
|
-
dry-schema (1.9.
|
48
|
+
dry-schema (1.9.2)
|
49
49
|
concurrent-ruby (~> 1.0)
|
50
50
|
dry-configurable (~> 0.13, >= 0.13.0)
|
51
51
|
dry-core (~> 0.5, >= 0.5)
|
@@ -58,12 +58,12 @@ GEM
|
|
58
58
|
dry-core (~> 0.5, >= 0.5)
|
59
59
|
dry-inflector (~> 0.1, >= 0.1.2)
|
60
60
|
dry-logic (~> 1.0, >= 1.0.2)
|
61
|
-
dry-validation (1.8.
|
61
|
+
dry-validation (1.8.1)
|
62
62
|
concurrent-ruby (~> 1.0)
|
63
63
|
dry-container (~> 0.7, >= 0.7.1)
|
64
64
|
dry-core (~> 0.5, >= 0.5)
|
65
65
|
dry-initializer (~> 3.0)
|
66
|
-
dry-schema (~> 1.
|
66
|
+
dry-schema (~> 1.8, >= 1.8.0)
|
67
67
|
factory_bot (6.2.1)
|
68
68
|
activesupport (>= 5.0.0)
|
69
69
|
ffi (1.15.5)
|
@@ -74,7 +74,7 @@ GEM
|
|
74
74
|
mini_portile2 (2.8.0)
|
75
75
|
minitest (5.15.0)
|
76
76
|
rake (13.0.6)
|
77
|
-
rdkafka (0.
|
77
|
+
rdkafka (0.12.0)
|
78
78
|
ffi (~> 1.15)
|
79
79
|
mini_portile2 (~> 2.6)
|
80
80
|
rake (> 12)
|
@@ -100,14 +100,14 @@ GEM
|
|
100
100
|
thor (1.2.1)
|
101
101
|
tzinfo (2.0.4)
|
102
102
|
concurrent-ruby (~> 1.0)
|
103
|
-
waterdrop (2.3.
|
103
|
+
waterdrop (2.3.1)
|
104
104
|
concurrent-ruby (>= 1.1)
|
105
105
|
dry-configurable (~> 0.13)
|
106
106
|
dry-monitor (~> 0.5)
|
107
107
|
dry-validation (~> 1.7)
|
108
108
|
rdkafka (>= 0.10)
|
109
109
|
zeitwerk (~> 2.3)
|
110
|
-
zeitwerk (2.
|
110
|
+
zeitwerk (2.6.0)
|
111
111
|
|
112
112
|
PLATFORMS
|
113
113
|
x86_64-linux
|
data/bin/integrations
CHANGED
@@ -11,21 +11,21 @@ require 'open3'
|
|
11
11
|
require 'fileutils'
|
12
12
|
require 'pathname'
|
13
13
|
require 'tmpdir'
|
14
|
+
require 'etc'
|
14
15
|
|
15
16
|
ROOT_PATH = Pathname.new(File.expand_path(File.join(File.dirname(__FILE__), '../')))
|
16
17
|
|
17
|
-
# Raised from the parent process if any of the integration tests fails
|
18
|
-
IntegrationTestError = Class.new(StandardError)
|
19
|
-
|
20
18
|
# How many child processes with integration specs do we want to run in parallel
|
21
|
-
# When the value is high, there's a problem with thread allocation on Github
|
22
|
-
|
19
|
+
# When the value is high, there's a problem with thread allocation on Github CI, tht is why
|
20
|
+
# we limit it. Locally we can run a lot of those, as many of them have sleeps and do not use a lot
|
21
|
+
# of CPU
|
22
|
+
CONCURRENCY = ENV.key?('CI') ? 5 : Etc.nprocessors * 2
|
23
23
|
|
24
24
|
# Abstraction around a single test scenario execution process
|
25
25
|
class Scenario
|
26
26
|
# How long a scenario can run before we kill it
|
27
27
|
# This is a fail-safe just in case something would hang
|
28
|
-
MAX_RUN_TIME =
|
28
|
+
MAX_RUN_TIME = 3 * 60 # 3 minutes tops
|
29
29
|
|
30
30
|
# There are rare cases where Karafka may force shutdown for some of the integration cases
|
31
31
|
# This includes exactly those
|
@@ -73,7 +73,12 @@ class Scenario
|
|
73
73
|
# If the thread is running too long, kill it
|
74
74
|
if current_time - @started_at > MAX_RUN_TIME
|
75
75
|
@wait_thr.kill
|
76
|
-
|
76
|
+
|
77
|
+
begin
|
78
|
+
Process.kill('TERM', pid)
|
79
|
+
# It may finish right after we want to kill it, that's why we ignore this
|
80
|
+
rescue Errno::ESRCH
|
81
|
+
end
|
77
82
|
end
|
78
83
|
|
79
84
|
# We read it so it won't grow as we use our default logger that prints to both test.log and
|
@@ -106,14 +111,15 @@ class Scenario
|
|
106
111
|
|
107
112
|
# Prints a status report when scenario is finished and stdout if it failed
|
108
113
|
def report
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
+
if success?
|
115
|
+
print "\e[#{32}m#{'.'}\e[0m"
|
116
|
+
else
|
117
|
+
puts
|
118
|
+
puts "\e[#{31}m#{'[FAILED]'}\e[0m #{name}"
|
114
119
|
puts "Exit code: #{exit_code}"
|
115
120
|
puts @stdout_tail
|
116
121
|
puts @stderr.read
|
122
|
+
puts
|
117
123
|
end
|
118
124
|
end
|
119
125
|
|
@@ -204,5 +210,21 @@ while finished_scenarios.size < scenarios.size
|
|
204
210
|
sleep(0.1)
|
205
211
|
end
|
206
212
|
|
207
|
-
|
208
|
-
|
213
|
+
failed_scenarios = finished_scenarios.reject(&:success?)
|
214
|
+
|
215
|
+
# Report once more on the failed jobs
|
216
|
+
# This will only list scenarios that failed without printing their stdout here.
|
217
|
+
if failed_scenarios.empty?
|
218
|
+
puts
|
219
|
+
else
|
220
|
+
puts "\nFailed scenarios:\n\n"
|
221
|
+
|
222
|
+
failed_scenarios.each do |scenario|
|
223
|
+
puts "\e[#{31}m#{'[FAILED]'}\e[0m #{scenario.name}"
|
224
|
+
end
|
225
|
+
|
226
|
+
puts
|
227
|
+
|
228
|
+
# Exit with 1 if not all scenarios were successful
|
229
|
+
exit 1
|
230
|
+
end
|
data/bin/scenario
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Runner for non-parallel execution of a single scenario.
|
4
|
+
# It prints all the info stdout, etc and basically replaces itself with the scenario execution.
|
5
|
+
# It is useful when we work with a single spec and we need all the debug info
|
6
|
+
|
7
|
+
raise 'This code needs to be executed WITHOUT bundle exec' if Kernel.const_defined?(:Bundler)
|
8
|
+
|
9
|
+
require 'open3'
|
10
|
+
require 'fileutils'
|
11
|
+
require 'pathname'
|
12
|
+
require 'tmpdir'
|
13
|
+
require 'etc'
|
14
|
+
|
15
|
+
ROOT_PATH = Pathname.new(File.expand_path(File.join(File.dirname(__FILE__), '../')))
|
16
|
+
|
17
|
+
# Load all the specs
|
18
|
+
specs = Dir[ROOT_PATH.join('spec/integrations/**/*.rb')]
|
19
|
+
|
20
|
+
# If filters is provided, apply
|
21
|
+
# Allows to provide several filters one after another and applies all of them
|
22
|
+
ARGV.each do |filter|
|
23
|
+
specs.delete_if { |name| !name.include?(filter) }
|
24
|
+
end
|
25
|
+
|
26
|
+
raise ArgumentError, "No integration specs with filters: #{ARGV.join(', ')}" if specs.empty?
|
27
|
+
raise ArgumentError, "Many specs found with filters: #{ARGV.join(', ')}" if specs.size != 1
|
28
|
+
|
29
|
+
exec("bundle exec ruby -r #{ROOT_PATH}/spec/integrations_helper.rb #{specs[0]}")
|
data/bin/wait_for_kafka
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
# This script allows us to wait for Kafka docker to fully be ready
|
4
|
+
# We consider it fully ready when all our topics that need to be created are created as expected
|
5
|
+
|
6
|
+
KAFKA_NAME='karafka_20_kafka'
|
7
|
+
ZOOKEEPER='zookeeper:2181'
|
8
|
+
LIST_CMD="kafka-topics.sh --list --zookeeper $ZOOKEEPER"
|
9
|
+
|
10
|
+
# Take the number of topics that we need to create prior to running anything
|
11
|
+
TOPICS_COUNT=`cat docker-compose.yml | grep -E -i 'integrations_|benchmarks_' | wc -l`
|
12
|
+
|
13
|
+
# And wait until all of them are created
|
14
|
+
until (((`docker exec $KAFKA_NAME $LIST_CMD | wc -l`) >= $TOPICS_COUNT));
|
15
|
+
do
|
16
|
+
echo "Waiting for Kafka to create all the needed topics..."
|
17
|
+
sleep 1
|
18
|
+
done
|
19
|
+
|
20
|
+
echo "All the needed topics created."
|
data/config/errors.yml
CHANGED
@@ -2,6 +2,7 @@ en:
|
|
2
2
|
dry_validation:
|
3
3
|
errors:
|
4
4
|
max_timeout_vs_pause_max_timeout: pause_timeout must be less or equal to pause_max_timeout
|
5
|
+
shutdown_timeout_vs_max_wait_time: shutdown_timeout must be more than max_wait_time
|
5
6
|
topics_names_not_unique: all topic names within a single consumer group must be unique
|
6
7
|
required_usage_count: Given topic must be used at least once
|
7
8
|
consumer_groups_inclusion: Unknown consumer group
|
data/docker-compose.yml
CHANGED
@@ -1,10 +1,12 @@
|
|
1
1
|
version: '2'
|
2
2
|
services:
|
3
3
|
zookeeper:
|
4
|
+
container_name: karafka_20_zookeeper
|
4
5
|
image: wurstmeister/zookeeper
|
5
6
|
ports:
|
6
7
|
- '2181:2181'
|
7
8
|
kafka:
|
9
|
+
container_name: karafka_20_kafka
|
8
10
|
image: wurstmeister/kafka
|
9
11
|
ports:
|
10
12
|
- '9092:9092'
|
@@ -17,6 +19,16 @@ services:
|
|
17
19
|
"integrations_0_02:2:1,\
|
18
20
|
integrations_1_02:2:1,\
|
19
21
|
integrations_2_02:2:1,\
|
22
|
+
integrations_3_02:2:1,\
|
23
|
+
integrations_4_02:2:1,\
|
24
|
+
integrations_5_02:2:1,\
|
25
|
+
integrations_6_02:2:1,\
|
26
|
+
integrations_7_02:2:1,\
|
27
|
+
integrations_8_02:2:1,\
|
28
|
+
integrations_9_02:2:1,\
|
29
|
+
integrations_10_02:2:1,\
|
30
|
+
integrations_11_02:2:1,\
|
31
|
+
integrations_12_02:2:1,\
|
20
32
|
integrations_0_03:3:1,\
|
21
33
|
integrations_1_03:3:1,\
|
22
34
|
integrations_2_03:3:1,\
|
data/karafka.gemspec
CHANGED
@@ -14,14 +14,14 @@ Gem::Specification.new do |spec|
|
|
14
14
|
spec.homepage = 'https://karafka.io'
|
15
15
|
spec.summary = 'Ruby based framework for working with Apache Kafka'
|
16
16
|
spec.description = 'Framework used to simplify Apache Kafka based Ruby applications development'
|
17
|
-
spec.
|
17
|
+
spec.licenses = ['LGPL-3.0', 'Commercial']
|
18
18
|
|
19
19
|
spec.add_dependency 'dry-configurable', '~> 0.13'
|
20
20
|
spec.add_dependency 'dry-monitor', '~> 0.5'
|
21
21
|
spec.add_dependency 'dry-validation', '~> 1.7'
|
22
22
|
spec.add_dependency 'rdkafka', '>= 0.10'
|
23
23
|
spec.add_dependency 'thor', '>= 0.20'
|
24
|
-
spec.add_dependency 'waterdrop', '>= 2.3.
|
24
|
+
spec.add_dependency 'waterdrop', '>= 2.3.1', '< 3.0.0'
|
25
25
|
spec.add_dependency 'zeitwerk', '~> 2.3'
|
26
26
|
|
27
27
|
spec.required_ruby_version = '>= 2.6.0'
|
data/lib/active_job/karafka.rb
CHANGED
@@ -14,8 +14,8 @@ begin
|
|
14
14
|
# We extend routing builder by adding a simple wrapper for easier jobs topics defining
|
15
15
|
# This needs to be extended here as it is going to be used in karafka routes, hence doing that in
|
16
16
|
# the railtie initializer would be too late
|
17
|
-
::Karafka::Routing::Builder.include ::Karafka::ActiveJob::
|
18
|
-
::Karafka::Routing::Proxy.include ::Karafka::ActiveJob::
|
17
|
+
::Karafka::Routing::Builder.include ::Karafka::ActiveJob::Routing::Extensions
|
18
|
+
::Karafka::Routing::Proxy.include ::Karafka::ActiveJob::Routing::Extensions
|
19
19
|
rescue LoadError
|
20
20
|
# We extend ActiveJob stuff in the railtie
|
21
21
|
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
# ActiveJob related Karafka stuff
|
5
|
+
module ActiveJob
|
6
|
+
# Karafka routing ActiveJob related components
|
7
|
+
module Routing
|
8
|
+
# Routing extensions for ActiveJob
|
9
|
+
module Extensions
|
10
|
+
# This method simplifies routes definition for ActiveJob topics / queues by auto-injecting
|
11
|
+
# the consumer class
|
12
|
+
# @param name [String, Symbol] name of the topic where ActiveJobs jobs should go
|
13
|
+
# @param block [Proc] block that we can use for some extra configuration
|
14
|
+
def active_job_topic(name, &block)
|
15
|
+
topic(name) do
|
16
|
+
consumer App.config.internal.active_job.consumer
|
17
|
+
|
18
|
+
next unless block
|
19
|
+
|
20
|
+
instance_eval(&block)
|
21
|
+
|
22
|
+
# This is handled by our custom ActiveJob consumer
|
23
|
+
# Without this, default behaviour would cause messages to skip upon shutdown as the
|
24
|
+
# offset would be committed for the last message
|
25
|
+
manual_offset_management true
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -15,33 +15,60 @@ module Karafka
|
|
15
15
|
# @return [Waterdrop::Producer] producer instance
|
16
16
|
attr_accessor :producer
|
17
17
|
|
18
|
+
def initialize
|
19
|
+
# We re-use one to save on object allocation
|
20
|
+
# It also allows us to transfer the consumption notion to another batch
|
21
|
+
@consumption = Processing::Result.new
|
22
|
+
end
|
23
|
+
|
24
|
+
# Can be used to run preparation code
|
25
|
+
#
|
26
|
+
# @private
|
27
|
+
# @note This should not be used by the end users as it is part of the lifecycle of things but
|
28
|
+
# not as part of the public api. This can act as a hook when creating non-blocking
|
29
|
+
# consumers and doing other advanced stuff
|
30
|
+
def on_before_consume; end
|
31
|
+
|
18
32
|
# Executes the default consumer flow.
|
19
33
|
#
|
34
|
+
# @return [Boolean] true if there was no exception, otherwise false.
|
35
|
+
#
|
20
36
|
# @note We keep the seek offset tracking, and use it to compensate for async offset flushing
|
21
37
|
# that may not yet kick in when error occurs. That way we pause always on the last processed
|
22
38
|
# message.
|
23
39
|
def on_consume
|
24
40
|
Karafka.monitor.instrument('consumer.consumed', caller: self) do
|
25
41
|
consume
|
26
|
-
|
27
|
-
pause_tracker.reset
|
28
|
-
|
29
|
-
# Mark as consumed only if manual offset management is not on
|
30
|
-
return if topic.manual_offset_management
|
31
|
-
|
32
|
-
# We use the non-blocking one here. If someone needs the blocking one, can implement it
|
33
|
-
# with manual offset management
|
34
|
-
mark_as_consumed(messages.last)
|
35
42
|
end
|
43
|
+
|
44
|
+
@consumption.success!
|
36
45
|
rescue StandardError => e
|
46
|
+
@consumption.failure!
|
47
|
+
|
37
48
|
Karafka.monitor.instrument(
|
38
49
|
'error.occurred',
|
39
50
|
error: e,
|
40
51
|
caller: self,
|
41
52
|
type: 'consumer.consume.error'
|
42
53
|
)
|
54
|
+
end
|
55
|
+
|
56
|
+
# @private
|
57
|
+
# @note This should not be used by the end users as it is part of the lifecycle of things but
|
58
|
+
# not as part of the public api.
|
59
|
+
def on_after_consume
|
60
|
+
if @consumption.success?
|
61
|
+
pause_tracker.reset
|
62
|
+
|
63
|
+
# Mark as consumed only if manual offset management is not on
|
64
|
+
return if topic.manual_offset_management?
|
43
65
|
|
44
|
-
|
66
|
+
# We use the non-blocking one here. If someone needs the blocking one, can implement it
|
67
|
+
# with manual offset management
|
68
|
+
mark_as_consumed(messages.last)
|
69
|
+
else
|
70
|
+
pause(@seek_offset || messages.first.offset)
|
71
|
+
end
|
45
72
|
end
|
46
73
|
|
47
74
|
# Trigger method for running on shutdown.
|
@@ -76,31 +103,8 @@ module Karafka
|
|
76
103
|
)
|
77
104
|
end
|
78
105
|
|
79
|
-
# Can be used to run preparation code
|
80
|
-
#
|
81
|
-
# @private
|
82
|
-
# @note This should not be used by the end users as it is part of the lifecycle of things but
|
83
|
-
# not as part of the public api. This can act as a hook when creating non-blocking
|
84
|
-
# consumers and doing other advanced stuff
|
85
|
-
def on_prepared
|
86
|
-
Karafka.monitor.instrument('consumer.prepared', caller: self) do
|
87
|
-
prepared
|
88
|
-
end
|
89
|
-
rescue StandardError => e
|
90
|
-
Karafka.monitor.instrument(
|
91
|
-
'error.occurred',
|
92
|
-
error: e,
|
93
|
-
caller: self,
|
94
|
-
type: 'consumer.prepared.error'
|
95
|
-
)
|
96
|
-
end
|
97
|
-
|
98
106
|
private
|
99
107
|
|
100
|
-
# Method that gets called in the blocking flow allowing to setup any type of resources or to
|
101
|
-
# send additional commands to Kafka before the proper execution starts.
|
102
|
-
def prepared; end
|
103
|
-
|
104
108
|
# Method that will perform business logic and on data received from Kafka (it will consume
|
105
109
|
# the data)
|
106
110
|
# @note This method needs bo be implemented in a subclass. We stub it here as a failover if
|
@@ -120,21 +124,36 @@ module Karafka
|
|
120
124
|
# Marks message as consumed in an async way.
|
121
125
|
#
|
122
126
|
# @param message [Messages::Message] last successfully processed message.
|
127
|
+
# @return [Boolean] true if we were able to mark the offset, false otherwise. False indicates
|
128
|
+
# that we were not able and that we have lost the partition.
|
129
|
+
#
|
123
130
|
# @note We keep track of this offset in case we would mark as consumed and got error when
|
124
131
|
# processing another message. In case like this we do not pause on the message we've already
|
125
132
|
# processed but rather at the next one. This applies to both sync and async versions of this
|
126
133
|
# method.
|
127
134
|
def mark_as_consumed(message)
|
128
|
-
client.mark_as_consumed(message)
|
135
|
+
@revoked = !client.mark_as_consumed(message)
|
136
|
+
|
137
|
+
return false if revoked?
|
138
|
+
|
129
139
|
@seek_offset = message.offset + 1
|
140
|
+
|
141
|
+
true
|
130
142
|
end
|
131
143
|
|
132
144
|
# Marks message as consumed in a sync way.
|
133
145
|
#
|
134
146
|
# @param message [Messages::Message] last successfully processed message.
|
147
|
+
# @return [Boolean] true if we were able to mark the offset, false otherwise. False indicates
|
148
|
+
# that we were not able and that we have lost the partition.
|
135
149
|
def mark_as_consumed!(message)
|
136
|
-
client.mark_as_consumed!(message)
|
150
|
+
@revoked = !client.mark_as_consumed!(message)
|
151
|
+
|
152
|
+
return false if revoked?
|
153
|
+
|
137
154
|
@seek_offset = message.offset + 1
|
155
|
+
|
156
|
+
true
|
138
157
|
end
|
139
158
|
|
140
159
|
# Pauses processing on a given offset for the current topic partition
|
@@ -144,22 +163,19 @@ module Karafka
|
|
144
163
|
# @param timeout [Integer, nil] how long in milliseconds do we want to pause or nil to use the
|
145
164
|
# default exponential pausing strategy defined for retries
|
146
165
|
def pause(offset, timeout = nil)
|
166
|
+
timeout ? pause_tracker.pause(timeout) : pause_tracker.pause
|
167
|
+
|
147
168
|
client.pause(
|
148
169
|
messages.metadata.topic,
|
149
170
|
messages.metadata.partition,
|
150
171
|
offset
|
151
172
|
)
|
152
|
-
|
153
|
-
timeout ? pause_tracker.pause(timeout) : pause_tracker.pause
|
154
173
|
end
|
155
174
|
|
156
175
|
# Resumes processing of the current topic partition
|
157
176
|
def resume
|
158
|
-
|
159
|
-
|
160
|
-
messages.metadata.partition
|
161
|
-
)
|
162
|
-
|
177
|
+
# This is sufficient to expire a partition pause, as with it will be resumed by the listener
|
178
|
+
# thread before the next poll.
|
163
179
|
pause_tracker.expire
|
164
180
|
end
|
165
181
|
|
@@ -175,5 +191,12 @@ module Karafka
|
|
175
191
|
)
|
176
192
|
)
|
177
193
|
end
|
194
|
+
|
195
|
+
# @return [Boolean] true if partition was revoked from the current consumer
|
196
|
+
# @note We know that partition got revoked because when we try to mark message as consumed,
|
197
|
+
# unless if is successful, it will return false
|
198
|
+
def revoked?
|
199
|
+
@revoked || false
|
200
|
+
end
|
178
201
|
end
|
179
202
|
end
|