karafka 2.0.0.beta1 → 2.0.0.beta4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +9 -23
  4. data/CHANGELOG.md +47 -0
  5. data/Gemfile.lock +8 -8
  6. data/bin/integrations +36 -14
  7. data/bin/scenario +29 -0
  8. data/bin/wait_for_kafka +20 -0
  9. data/config/errors.yml +1 -0
  10. data/docker-compose.yml +12 -0
  11. data/karafka.gemspec +2 -2
  12. data/lib/active_job/karafka.rb +2 -2
  13. data/lib/karafka/active_job/routing/extensions.rb +31 -0
  14. data/lib/karafka/base_consumer.rb +65 -42
  15. data/lib/karafka/connection/client.rb +65 -19
  16. data/lib/karafka/connection/listener.rb +99 -34
  17. data/lib/karafka/connection/listeners_batch.rb +24 -0
  18. data/lib/karafka/connection/messages_buffer.rb +50 -54
  19. data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
  20. data/lib/karafka/contracts/config.rb +9 -1
  21. data/lib/karafka/helpers/async.rb +33 -0
  22. data/lib/karafka/instrumentation/logger_listener.rb +34 -10
  23. data/lib/karafka/instrumentation/monitor.rb +3 -1
  24. data/lib/karafka/licenser.rb +26 -7
  25. data/lib/karafka/messages/batch_metadata.rb +26 -3
  26. data/lib/karafka/messages/builders/batch_metadata.rb +17 -29
  27. data/lib/karafka/messages/builders/message.rb +1 -0
  28. data/lib/karafka/messages/builders/messages.rb +4 -12
  29. data/lib/karafka/pro/active_job/consumer.rb +49 -0
  30. data/lib/karafka/pro/active_job/dispatcher.rb +10 -10
  31. data/lib/karafka/pro/active_job/job_options_contract.rb +9 -9
  32. data/lib/karafka/pro/base_consumer.rb +76 -0
  33. data/lib/karafka/pro/loader.rb +30 -13
  34. data/lib/karafka/pro/performance_tracker.rb +9 -9
  35. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +37 -0
  36. data/lib/karafka/pro/processing/jobs_builder.rb +31 -0
  37. data/lib/karafka/pro/routing/extensions.rb +32 -0
  38. data/lib/karafka/pro/scheduler.rb +54 -0
  39. data/lib/karafka/processing/executor.rb +34 -7
  40. data/lib/karafka/processing/executors_buffer.rb +15 -7
  41. data/lib/karafka/processing/jobs/base.rb +21 -4
  42. data/lib/karafka/processing/jobs/consume.rb +12 -5
  43. data/lib/karafka/processing/jobs_builder.rb +28 -0
  44. data/lib/karafka/processing/jobs_queue.rb +15 -12
  45. data/lib/karafka/processing/result.rb +34 -0
  46. data/lib/karafka/processing/worker.rb +23 -17
  47. data/lib/karafka/processing/workers_batch.rb +5 -0
  48. data/lib/karafka/routing/consumer_group.rb +1 -1
  49. data/lib/karafka/routing/subscription_group.rb +2 -2
  50. data/lib/karafka/routing/subscription_groups_builder.rb +3 -2
  51. data/lib/karafka/routing/topic.rb +5 -0
  52. data/lib/karafka/routing/topics.rb +38 -0
  53. data/lib/karafka/runner.rb +19 -27
  54. data/lib/karafka/scheduler.rb +10 -11
  55. data/lib/karafka/server.rb +24 -23
  56. data/lib/karafka/setup/config.rb +4 -1
  57. data/lib/karafka/status.rb +1 -3
  58. data/lib/karafka/version.rb +1 -1
  59. data.tar.gz.sig +0 -0
  60. metadata +20 -5
  61. metadata.gz.sig +0 -0
  62. data/lib/karafka/active_job/routing_extensions.rb +0 -18
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f108cb4288d0ed0510381f51c77d49e052b947f6180c9b9c0b06e0ac2b599894
4
- data.tar.gz: 3d79066d0107c08f450ca9f4c3b5c4a39aae497836c80bf8380c65f1406b82c0
3
+ metadata.gz: e4e9430d2278617cbed38f5696011603d9c0d8c53813dfc180499dc6e4b97563
4
+ data.tar.gz: f082a95aa9841912f819dc0598591c4b96d7ef1199eff324e65ca0c601008dae
5
5
  SHA512:
6
- metadata.gz: 4aae257010c992c59ce4b01ead54ff2cfd4e8ccd8cbe6b52214b3cedf8f879690e0d577f2b41f44b1ab6888d7e27bbc92f3ba4a69e8b127687fb4c43bff51fbc
7
- data.tar.gz: f65e425cb84152d20a055bdb9a94fd98280597cdf5e431337cb8604040534cacbfdd03efd6dc23b86c9ecf25721c860bd55ca75ad3f98e4c66136a88c1efc4e7
6
+ metadata.gz: 7252c5503234ab4d35fa02d2bb0a18dd8239584fdddc5b451cfdf028a61f37d59a269bac804913d0abf46e2d3273188560e48aa9de40fbb319c766624c1a3b95
7
+ data.tar.gz: a4cc5d7c18d2a45483ee26acbacf62c9c13f8824697af96a3f2bf5bccb232d5b07097ed49cfb84a9b46e09f31405813d50b1564d6668f0a483023f449427428b
checksums.yaml.gz.sig CHANGED
Binary file
@@ -21,8 +21,7 @@ jobs:
21
21
  uses: ruby/setup-ruby@v1
22
22
  with:
23
23
  ruby-version: 3.1
24
- - name: Install latest bundler
25
- run: gem install bundler --no-document
24
+ bundler-cache: true
26
25
  - name: Install Diffend plugin
27
26
  run: bundle plugin install diffend
28
27
  - name: Bundle Secure
@@ -57,25 +56,19 @@ jobs:
57
56
  - name: Install package dependencies
58
57
  run: "[ -e $APT_DEPS ] || sudo apt-get install -y --no-install-recommends $APT_DEPS"
59
58
 
60
- - name: Run Kafka with docker-compose
59
+ - name: Start Kafka with docker-compose
61
60
  run: |
62
61
  docker-compose up -d
63
- sleep 10
64
62
 
65
63
  - name: Set up Ruby
66
64
  uses: ruby/setup-ruby@v1
67
65
  with:
68
66
  ruby-version: ${{matrix.ruby}}
67
+ bundler-cache: true
69
68
 
70
- - name: Install latest Bundler
69
+ - name: Ensure all needed Kafka topics are created and wait if not
71
70
  run: |
72
- gem install bundler --no-document
73
- bundle config set without 'tools benchmarks docs'
74
-
75
- - name: Bundle install
76
- run: |
77
- bundle config set without development
78
- bundle install --jobs 4 --retry 3
71
+ bin/wait_for_kafka
79
72
 
80
73
  - name: Run all specs
81
74
  env:
@@ -100,26 +93,19 @@ jobs:
100
93
  - name: Install package dependencies
101
94
  run: "[ -e $APT_DEPS ] || sudo apt-get install -y --no-install-recommends $APT_DEPS"
102
95
 
103
- - name: Run Kafka with docker-compose
96
+ - name: Start Kafka with docker-compose
104
97
  run: |
105
98
  docker-compose up -d
106
- sleep 5
107
99
 
108
100
  - name: Set up Ruby
109
101
  uses: ruby/setup-ruby@v1
110
102
  with:
111
103
  ruby-version: ${{matrix.ruby}}
104
+ bundler-cache: true
112
105
 
113
- - name: Install latest Bundler
114
- run: |
115
- gem install bundler --no-document
116
- gem update --system --no-document
117
- bundle config set without 'tools benchmarks docs'
118
-
119
- - name: Bundle install
106
+ - name: Ensure all needed Kafka topics are created and wait if not
120
107
  run: |
121
- bundle config set without development
122
- bundle install --jobs 4 --retry 3
108
+ bin/wait_for_kafka
123
109
 
124
110
  - name: Run integration tests
125
111
  env:
data/CHANGELOG.md CHANGED
@@ -1,5 +1,52 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.0.0-beta4 (2022-06-20)
4
+ - Rename job internal api methods from `#prepare` to `#before_call` and from `#teardown` to `#after_call` to abstract away jobs execution from any type of executors and consumers logic
5
+ - Remove ability of running `before_consume` and `after_consume` completely. Those should be for internal usage only.
6
+ - Reorganize how Pro consumer and Pro AJ consumers inherit.
7
+ - Require WaterDrop `2.3.1`.
8
+ - Add more integration specs for rebalancing and max poll exceeded.
9
+ - Move `revoked?` state from PRO to regular Karafka.
10
+ - Use return value of `mark_as_consumed!` and `mark_as_consumed` as indicator of partition ownership + use it to switch the ownership state.
11
+ - Do not remove rebalance manager upon client reset and recovery. This will allow us to keep the notion of lost partitions, so we can run revocation jobs for blocking jobs that exceeded the max poll interval.
12
+ - Run revocation jobs upon reaching max poll interval for blocking jobs.
13
+ - Early exit `poll` operation upon partition lost or max poll exceeded event.
14
+ - Always reset consumer instances on timeout exceeded.
15
+ - Wait for Kafka to create all the needed topics before running specs in CI.
16
+
17
+ ## 2.0.0-beta3 (2022-06-14)
18
+ - Jobs building responsibility extracted out of the listener code base.
19
+ - Fix a case where specs supervisor would try to kill no longer running process (#868)
20
+ - Fix an instable integration spec that could misbehave under load
21
+ - Commit offsets prior to pausing partitions to ensure that the latest offset is always committed
22
+ - Fix a case where consecutive CTRL+C (non-stop) would case an exception during forced shutdown
23
+ - Add missing `consumer.prepared.error` into `LoggerListener`
24
+ - Delegate partition resuming from the consumers to listeners threads.
25
+ - Add support for Long Running Jobs (LRJ) for ActiveJob [PRO]
26
+ - Add support for Long Running Jobs for consumers [PRO]
27
+ - Allow `active_job_topic` to accept a block for extra topic related settings
28
+ - Remove no longer needed logger threads
29
+ - Auto-adapt number of processes for integration specs based on the number of CPUs
30
+ - Introduce an integration spec runner that prints everything to stdout (better for development)
31
+ - Introduce extra integration specs for various ActiveJob usage scenarios
32
+ - Rename consumer method `#prepared` to `#prepare` to reflect better its use-case
33
+ - For test and dev raise an error when expired license key is used (never for non dev)
34
+ - Add worker related monitor events (`worker.process` and `worker.processed`)
35
+ - Update `LoggerListener` to include more useful information about processing and polling messages
36
+
37
+ ## 2.0.0-beta2 (2022-06-07)
38
+ - Abstract away notion of topics groups (until now it was just an array)
39
+ - Optimize how jobs queue is closed. Since we enqueue jobs only from the listeners, we can safely close jobs queue once listeners are done. By extracting this responsibility from listeners, we remove corner cases and race conditions. Note here: for non-blocking jobs we do wait for them to finish while running the `poll`. This ensures, that for async jobs that are long-living, we do not reach `max.poll.interval`.
40
+ - `Shutdown` jobs are executed in workers to align all the jobs behaviours.
41
+ - `Shutdown` jobs are always blocking.
42
+ - Notion of `ListenersBatch` was introduced similar to `WorkersBatch` to abstract this concept.
43
+ - Change default `shutdown_timeout` to be more than `max_wait_time` not to cause forced shutdown when no messages are being received from Kafka.
44
+ - Abstract away scheduling of revocation and shutdown jobs for both default and pro schedulers
45
+ - Introduce a second (internal) messages buffer to distinguish between raw messages buffer and karafka messages buffer
46
+ - Move messages and their metadata remap process to the listener thread to allow for their inline usage
47
+ - Change how we wait in the shutdown phase, so shutdown jobs can still use Kafka connection even if they run for a longer period of time. This will prevent us from being kicked out from the group early.
48
+ - Introduce validation that ensures, that `shutdown_timeout` is more than `max_wait_time`. This will prevent users from ending up with a config that could lead to frequent forceful shutdowns.
49
+
3
50
  ## 2.0.0-beta1 (2022-05-22)
4
51
  - Update the jobs queue blocking engine and allow for non-blocking jobs execution
5
52
  - Provide `#prepared` hook that always runs before the fetching loop is unblocked
data/Gemfile.lock CHANGED
@@ -1,13 +1,13 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.0.0.beta1)
4
+ karafka (2.0.0.beta4)
5
5
  dry-configurable (~> 0.13)
6
6
  dry-monitor (~> 0.5)
7
7
  dry-validation (~> 1.7)
8
8
  rdkafka (>= 0.10)
9
9
  thor (>= 0.20)
10
- waterdrop (>= 2.3.0, < 3.0.0)
10
+ waterdrop (>= 2.3.1, < 3.0.0)
11
11
  zeitwerk (~> 2.3)
12
12
 
13
13
  GEM
@@ -45,7 +45,7 @@ GEM
45
45
  dry-configurable (~> 0.13, >= 0.13.0)
46
46
  dry-core (~> 0.5, >= 0.5)
47
47
  dry-events (~> 0.2)
48
- dry-schema (1.9.1)
48
+ dry-schema (1.9.2)
49
49
  concurrent-ruby (~> 1.0)
50
50
  dry-configurable (~> 0.13, >= 0.13.0)
51
51
  dry-core (~> 0.5, >= 0.5)
@@ -58,12 +58,12 @@ GEM
58
58
  dry-core (~> 0.5, >= 0.5)
59
59
  dry-inflector (~> 0.1, >= 0.1.2)
60
60
  dry-logic (~> 1.0, >= 1.0.2)
61
- dry-validation (1.8.0)
61
+ dry-validation (1.8.1)
62
62
  concurrent-ruby (~> 1.0)
63
63
  dry-container (~> 0.7, >= 0.7.1)
64
64
  dry-core (~> 0.5, >= 0.5)
65
65
  dry-initializer (~> 3.0)
66
- dry-schema (~> 1.9, >= 1.9.1)
66
+ dry-schema (~> 1.8, >= 1.8.0)
67
67
  factory_bot (6.2.1)
68
68
  activesupport (>= 5.0.0)
69
69
  ffi (1.15.5)
@@ -74,7 +74,7 @@ GEM
74
74
  mini_portile2 (2.8.0)
75
75
  minitest (5.15.0)
76
76
  rake (13.0.6)
77
- rdkafka (0.11.1)
77
+ rdkafka (0.12.0)
78
78
  ffi (~> 1.15)
79
79
  mini_portile2 (~> 2.6)
80
80
  rake (> 12)
@@ -100,14 +100,14 @@ GEM
100
100
  thor (1.2.1)
101
101
  tzinfo (2.0.4)
102
102
  concurrent-ruby (~> 1.0)
103
- waterdrop (2.3.0)
103
+ waterdrop (2.3.1)
104
104
  concurrent-ruby (>= 1.1)
105
105
  dry-configurable (~> 0.13)
106
106
  dry-monitor (~> 0.5)
107
107
  dry-validation (~> 1.7)
108
108
  rdkafka (>= 0.10)
109
109
  zeitwerk (~> 2.3)
110
- zeitwerk (2.5.4)
110
+ zeitwerk (2.6.0)
111
111
 
112
112
  PLATFORMS
113
113
  x86_64-linux
data/bin/integrations CHANGED
@@ -11,21 +11,21 @@ require 'open3'
11
11
  require 'fileutils'
12
12
  require 'pathname'
13
13
  require 'tmpdir'
14
+ require 'etc'
14
15
 
15
16
  ROOT_PATH = Pathname.new(File.expand_path(File.join(File.dirname(__FILE__), '../')))
16
17
 
17
- # Raised from the parent process if any of the integration tests fails
18
- IntegrationTestError = Class.new(StandardError)
19
-
20
18
  # How many child processes with integration specs do we want to run in parallel
21
- # When the value is high, there's a problem with thread allocation on Github
22
- CONCURRENCY = 4
19
+ # When the value is high, there's a problem with thread allocation on Github CI, tht is why
20
+ # we limit it. Locally we can run a lot of those, as many of them have sleeps and do not use a lot
21
+ # of CPU
22
+ CONCURRENCY = ENV.key?('CI') ? 5 : Etc.nprocessors * 2
23
23
 
24
24
  # Abstraction around a single test scenario execution process
25
25
  class Scenario
26
26
  # How long a scenario can run before we kill it
27
27
  # This is a fail-safe just in case something would hang
28
- MAX_RUN_TIME = 60 * 2
28
+ MAX_RUN_TIME = 3 * 60 # 3 minutes tops
29
29
 
30
30
  # There are rare cases where Karafka may force shutdown for some of the integration cases
31
31
  # This includes exactly those
@@ -73,7 +73,12 @@ class Scenario
73
73
  # If the thread is running too long, kill it
74
74
  if current_time - @started_at > MAX_RUN_TIME
75
75
  @wait_thr.kill
76
- Process.kill('TERM', pid)
76
+
77
+ begin
78
+ Process.kill('TERM', pid)
79
+ # It may finish right after we want to kill it, that's why we ignore this
80
+ rescue Errno::ESRCH
81
+ end
77
82
  end
78
83
 
79
84
  # We read it so it won't grow as we use our default logger that prints to both test.log and
@@ -106,14 +111,15 @@ class Scenario
106
111
 
107
112
  # Prints a status report when scenario is finished and stdout if it failed
108
113
  def report
109
- result = success? ? "\e[#{32}m#{'OK'}\e[0m" : "\e[#{31}m#{'FAILED'}\e[0m"
110
-
111
- puts "#{result} #{name}"
112
-
113
- unless success?
114
+ if success?
115
+ print "\e[#{32}m#{'.'}\e[0m"
116
+ else
117
+ puts
118
+ puts "\e[#{31}m#{'[FAILED]'}\e[0m #{name}"
114
119
  puts "Exit code: #{exit_code}"
115
120
  puts @stdout_tail
116
121
  puts @stderr.read
122
+ puts
117
123
  end
118
124
  end
119
125
 
@@ -204,5 +210,21 @@ while finished_scenarios.size < scenarios.size
204
210
  sleep(0.1)
205
211
  end
206
212
 
207
- # Fail all if any of the tests does not have expected exit code
208
- raise IntegrationTestError unless finished_scenarios.all?(&:success?)
213
+ failed_scenarios = finished_scenarios.reject(&:success?)
214
+
215
+ # Report once more on the failed jobs
216
+ # This will only list scenarios that failed without printing their stdout here.
217
+ if failed_scenarios.empty?
218
+ puts
219
+ else
220
+ puts "\nFailed scenarios:\n\n"
221
+
222
+ failed_scenarios.each do |scenario|
223
+ puts "\e[#{31}m#{'[FAILED]'}\e[0m #{scenario.name}"
224
+ end
225
+
226
+ puts
227
+
228
+ # Exit with 1 if not all scenarios were successful
229
+ exit 1
230
+ end
data/bin/scenario ADDED
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Runner for non-parallel execution of a single scenario.
4
+ # It prints all the info stdout, etc and basically replaces itself with the scenario execution.
5
+ # It is useful when we work with a single spec and we need all the debug info
6
+
7
+ raise 'This code needs to be executed WITHOUT bundle exec' if Kernel.const_defined?(:Bundler)
8
+
9
+ require 'open3'
10
+ require 'fileutils'
11
+ require 'pathname'
12
+ require 'tmpdir'
13
+ require 'etc'
14
+
15
+ ROOT_PATH = Pathname.new(File.expand_path(File.join(File.dirname(__FILE__), '../')))
16
+
17
+ # Load all the specs
18
+ specs = Dir[ROOT_PATH.join('spec/integrations/**/*.rb')]
19
+
20
+ # If filters is provided, apply
21
+ # Allows to provide several filters one after another and applies all of them
22
+ ARGV.each do |filter|
23
+ specs.delete_if { |name| !name.include?(filter) }
24
+ end
25
+
26
+ raise ArgumentError, "No integration specs with filters: #{ARGV.join(', ')}" if specs.empty?
27
+ raise ArgumentError, "Many specs found with filters: #{ARGV.join(', ')}" if specs.size != 1
28
+
29
+ exec("bundle exec ruby -r #{ROOT_PATH}/spec/integrations_helper.rb #{specs[0]}")
@@ -0,0 +1,20 @@
1
+ #!/bin/bash
2
+
3
+ # This script allows us to wait for Kafka docker to fully be ready
4
+ # We consider it fully ready when all our topics that need to be created are created as expected
5
+
6
+ KAFKA_NAME='karafka_20_kafka'
7
+ ZOOKEEPER='zookeeper:2181'
8
+ LIST_CMD="kafka-topics.sh --list --zookeeper $ZOOKEEPER"
9
+
10
+ # Take the number of topics that we need to create prior to running anything
11
+ TOPICS_COUNT=`cat docker-compose.yml | grep -E -i 'integrations_|benchmarks_' | wc -l`
12
+
13
+ # And wait until all of them are created
14
+ until (((`docker exec $KAFKA_NAME $LIST_CMD | wc -l`) >= $TOPICS_COUNT));
15
+ do
16
+ echo "Waiting for Kafka to create all the needed topics..."
17
+ sleep 1
18
+ done
19
+
20
+ echo "All the needed topics created."
data/config/errors.yml CHANGED
@@ -2,6 +2,7 @@ en:
2
2
  dry_validation:
3
3
  errors:
4
4
  max_timeout_vs_pause_max_timeout: pause_timeout must be less or equal to pause_max_timeout
5
+ shutdown_timeout_vs_max_wait_time: shutdown_timeout must be more than max_wait_time
5
6
  topics_names_not_unique: all topic names within a single consumer group must be unique
6
7
  required_usage_count: Given topic must be used at least once
7
8
  consumer_groups_inclusion: Unknown consumer group
data/docker-compose.yml CHANGED
@@ -1,10 +1,12 @@
1
1
  version: '2'
2
2
  services:
3
3
  zookeeper:
4
+ container_name: karafka_20_zookeeper
4
5
  image: wurstmeister/zookeeper
5
6
  ports:
6
7
  - '2181:2181'
7
8
  kafka:
9
+ container_name: karafka_20_kafka
8
10
  image: wurstmeister/kafka
9
11
  ports:
10
12
  - '9092:9092'
@@ -17,6 +19,16 @@ services:
17
19
  "integrations_0_02:2:1,\
18
20
  integrations_1_02:2:1,\
19
21
  integrations_2_02:2:1,\
22
+ integrations_3_02:2:1,\
23
+ integrations_4_02:2:1,\
24
+ integrations_5_02:2:1,\
25
+ integrations_6_02:2:1,\
26
+ integrations_7_02:2:1,\
27
+ integrations_8_02:2:1,\
28
+ integrations_9_02:2:1,\
29
+ integrations_10_02:2:1,\
30
+ integrations_11_02:2:1,\
31
+ integrations_12_02:2:1,\
20
32
  integrations_0_03:3:1,\
21
33
  integrations_1_03:3:1,\
22
34
  integrations_2_03:3:1,\
data/karafka.gemspec CHANGED
@@ -14,14 +14,14 @@ Gem::Specification.new do |spec|
14
14
  spec.homepage = 'https://karafka.io'
15
15
  spec.summary = 'Ruby based framework for working with Apache Kafka'
16
16
  spec.description = 'Framework used to simplify Apache Kafka based Ruby applications development'
17
- spec.license = 'LGPL-3.0'
17
+ spec.licenses = ['LGPL-3.0', 'Commercial']
18
18
 
19
19
  spec.add_dependency 'dry-configurable', '~> 0.13'
20
20
  spec.add_dependency 'dry-monitor', '~> 0.5'
21
21
  spec.add_dependency 'dry-validation', '~> 1.7'
22
22
  spec.add_dependency 'rdkafka', '>= 0.10'
23
23
  spec.add_dependency 'thor', '>= 0.20'
24
- spec.add_dependency 'waterdrop', '>= 2.3.0', '< 3.0.0'
24
+ spec.add_dependency 'waterdrop', '>= 2.3.1', '< 3.0.0'
25
25
  spec.add_dependency 'zeitwerk', '~> 2.3'
26
26
 
27
27
  spec.required_ruby_version = '>= 2.6.0'
@@ -14,8 +14,8 @@ begin
14
14
  # We extend routing builder by adding a simple wrapper for easier jobs topics defining
15
15
  # This needs to be extended here as it is going to be used in karafka routes, hence doing that in
16
16
  # the railtie initializer would be too late
17
- ::Karafka::Routing::Builder.include ::Karafka::ActiveJob::RoutingExtensions
18
- ::Karafka::Routing::Proxy.include ::Karafka::ActiveJob::RoutingExtensions
17
+ ::Karafka::Routing::Builder.include ::Karafka::ActiveJob::Routing::Extensions
18
+ ::Karafka::Routing::Proxy.include ::Karafka::ActiveJob::Routing::Extensions
19
19
  rescue LoadError
20
20
  # We extend ActiveJob stuff in the railtie
21
21
  end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # ActiveJob related Karafka stuff
5
+ module ActiveJob
6
+ # Karafka routing ActiveJob related components
7
+ module Routing
8
+ # Routing extensions for ActiveJob
9
+ module Extensions
10
+ # This method simplifies routes definition for ActiveJob topics / queues by auto-injecting
11
+ # the consumer class
12
+ # @param name [String, Symbol] name of the topic where ActiveJobs jobs should go
13
+ # @param block [Proc] block that we can use for some extra configuration
14
+ def active_job_topic(name, &block)
15
+ topic(name) do
16
+ consumer App.config.internal.active_job.consumer
17
+
18
+ next unless block
19
+
20
+ instance_eval(&block)
21
+
22
+ # This is handled by our custom ActiveJob consumer
23
+ # Without this, default behaviour would cause messages to skip upon shutdown as the
24
+ # offset would be committed for the last message
25
+ manual_offset_management true
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -15,33 +15,60 @@ module Karafka
15
15
  # @return [Waterdrop::Producer] producer instance
16
16
  attr_accessor :producer
17
17
 
18
+ def initialize
19
+ # We re-use one to save on object allocation
20
+ # It also allows us to transfer the consumption notion to another batch
21
+ @consumption = Processing::Result.new
22
+ end
23
+
24
+ # Can be used to run preparation code
25
+ #
26
+ # @private
27
+ # @note This should not be used by the end users as it is part of the lifecycle of things but
28
+ # not as part of the public api. This can act as a hook when creating non-blocking
29
+ # consumers and doing other advanced stuff
30
+ def on_before_consume; end
31
+
18
32
  # Executes the default consumer flow.
19
33
  #
34
+ # @return [Boolean] true if there was no exception, otherwise false.
35
+ #
20
36
  # @note We keep the seek offset tracking, and use it to compensate for async offset flushing
21
37
  # that may not yet kick in when error occurs. That way we pause always on the last processed
22
38
  # message.
23
39
  def on_consume
24
40
  Karafka.monitor.instrument('consumer.consumed', caller: self) do
25
41
  consume
26
-
27
- pause_tracker.reset
28
-
29
- # Mark as consumed only if manual offset management is not on
30
- return if topic.manual_offset_management
31
-
32
- # We use the non-blocking one here. If someone needs the blocking one, can implement it
33
- # with manual offset management
34
- mark_as_consumed(messages.last)
35
42
  end
43
+
44
+ @consumption.success!
36
45
  rescue StandardError => e
46
+ @consumption.failure!
47
+
37
48
  Karafka.monitor.instrument(
38
49
  'error.occurred',
39
50
  error: e,
40
51
  caller: self,
41
52
  type: 'consumer.consume.error'
42
53
  )
54
+ end
55
+
56
+ # @private
57
+ # @note This should not be used by the end users as it is part of the lifecycle of things but
58
+ # not as part of the public api.
59
+ def on_after_consume
60
+ if @consumption.success?
61
+ pause_tracker.reset
62
+
63
+ # Mark as consumed only if manual offset management is not on
64
+ return if topic.manual_offset_management?
43
65
 
44
- pause(@seek_offset || messages.first.offset)
66
+ # We use the non-blocking one here. If someone needs the blocking one, can implement it
67
+ # with manual offset management
68
+ mark_as_consumed(messages.last)
69
+ else
70
+ pause(@seek_offset || messages.first.offset)
71
+ end
45
72
  end
46
73
 
47
74
  # Trigger method for running on shutdown.
@@ -76,31 +103,8 @@ module Karafka
76
103
  )
77
104
  end
78
105
 
79
- # Can be used to run preparation code
80
- #
81
- # @private
82
- # @note This should not be used by the end users as it is part of the lifecycle of things but
83
- # not as part of the public api. This can act as a hook when creating non-blocking
84
- # consumers and doing other advanced stuff
85
- def on_prepared
86
- Karafka.monitor.instrument('consumer.prepared', caller: self) do
87
- prepared
88
- end
89
- rescue StandardError => e
90
- Karafka.monitor.instrument(
91
- 'error.occurred',
92
- error: e,
93
- caller: self,
94
- type: 'consumer.prepared.error'
95
- )
96
- end
97
-
98
106
  private
99
107
 
100
- # Method that gets called in the blocking flow allowing to setup any type of resources or to
101
- # send additional commands to Kafka before the proper execution starts.
102
- def prepared; end
103
-
104
108
  # Method that will perform business logic and on data received from Kafka (it will consume
105
109
  # the data)
106
110
  # @note This method needs bo be implemented in a subclass. We stub it here as a failover if
@@ -120,21 +124,36 @@ module Karafka
120
124
  # Marks message as consumed in an async way.
121
125
  #
122
126
  # @param message [Messages::Message] last successfully processed message.
127
+ # @return [Boolean] true if we were able to mark the offset, false otherwise. False indicates
128
+ # that we were not able and that we have lost the partition.
129
+ #
123
130
  # @note We keep track of this offset in case we would mark as consumed and got error when
124
131
  # processing another message. In case like this we do not pause on the message we've already
125
132
  # processed but rather at the next one. This applies to both sync and async versions of this
126
133
  # method.
127
134
  def mark_as_consumed(message)
128
- client.mark_as_consumed(message)
135
+ @revoked = !client.mark_as_consumed(message)
136
+
137
+ return false if revoked?
138
+
129
139
  @seek_offset = message.offset + 1
140
+
141
+ true
130
142
  end
131
143
 
132
144
  # Marks message as consumed in a sync way.
133
145
  #
134
146
  # @param message [Messages::Message] last successfully processed message.
147
+ # @return [Boolean] true if we were able to mark the offset, false otherwise. False indicates
148
+ # that we were not able and that we have lost the partition.
135
149
  def mark_as_consumed!(message)
136
- client.mark_as_consumed!(message)
150
+ @revoked = !client.mark_as_consumed!(message)
151
+
152
+ return false if revoked?
153
+
137
154
  @seek_offset = message.offset + 1
155
+
156
+ true
138
157
  end
139
158
 
140
159
  # Pauses processing on a given offset for the current topic partition
@@ -144,22 +163,19 @@ module Karafka
144
163
  # @param timeout [Integer, nil] how long in milliseconds do we want to pause or nil to use the
145
164
  # default exponential pausing strategy defined for retries
146
165
  def pause(offset, timeout = nil)
166
+ timeout ? pause_tracker.pause(timeout) : pause_tracker.pause
167
+
147
168
  client.pause(
148
169
  messages.metadata.topic,
149
170
  messages.metadata.partition,
150
171
  offset
151
172
  )
152
-
153
- timeout ? pause_tracker.pause(timeout) : pause_tracker.pause
154
173
  end
155
174
 
156
175
  # Resumes processing of the current topic partition
157
176
  def resume
158
- client.resume(
159
- messages.metadata.topic,
160
- messages.metadata.partition
161
- )
162
-
177
+ # This is sufficient to expire a partition pause, as with it will be resumed by the listener
178
+ # thread before the next poll.
163
179
  pause_tracker.expire
164
180
  end
165
181
 
@@ -175,5 +191,12 @@ module Karafka
175
191
  )
176
192
  )
177
193
  end
194
+
195
+ # @return [Boolean] true if partition was revoked from the current consumer
196
+ # @note We know that partition got revoked because when we try to mark message as consumed,
197
+ # unless if is successful, it will return false
198
+ def revoked?
199
+ @revoked || false
200
+ end
178
201
  end
179
202
  end