karafka 2.0.0.beta3 → 2.0.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +18 -15
  4. data/CHANGELOG.md +37 -0
  5. data/CONTRIBUTING.md +0 -5
  6. data/Gemfile.lock +6 -6
  7. data/README.md +2 -10
  8. data/bin/benchmarks +2 -2
  9. data/bin/integrations +10 -3
  10. data/bin/{stress → stress_many} +1 -1
  11. data/bin/stress_one +13 -0
  12. data/bin/wait_for_kafka +20 -0
  13. data/docker-compose.yml +32 -13
  14. data/karafka.gemspec +1 -1
  15. data/lib/karafka/active_job/routing/extensions.rb +1 -1
  16. data/lib/karafka/app.rb +2 -1
  17. data/lib/karafka/base_consumer.rb +59 -46
  18. data/lib/karafka/connection/client.rb +60 -14
  19. data/lib/karafka/connection/listener.rb +37 -11
  20. data/lib/karafka/connection/rebalance_manager.rb +20 -19
  21. data/lib/karafka/contracts/config.rb +18 -4
  22. data/lib/karafka/contracts/server_cli_options.rb +1 -1
  23. data/lib/karafka/errors.rb +3 -0
  24. data/lib/karafka/instrumentation/logger_listener.rb +0 -3
  25. data/lib/karafka/instrumentation/monitor.rb +0 -1
  26. data/lib/karafka/pro/active_job/consumer.rb +2 -8
  27. data/lib/karafka/pro/base_consumer.rb +82 -0
  28. data/lib/karafka/pro/loader.rb +14 -8
  29. data/lib/karafka/pro/processing/coordinator.rb +63 -0
  30. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +1 -1
  31. data/lib/karafka/pro/processing/jobs_builder.rb +3 -2
  32. data/lib/karafka/pro/processing/partitioner.rb +41 -0
  33. data/lib/karafka/pro/processing/scheduler.rb +56 -0
  34. data/lib/karafka/pro/routing/extensions.rb +6 -0
  35. data/lib/karafka/processing/coordinator.rb +88 -0
  36. data/lib/karafka/processing/coordinators_buffer.rb +54 -0
  37. data/lib/karafka/processing/executor.rb +16 -9
  38. data/lib/karafka/processing/executors_buffer.rb +46 -15
  39. data/lib/karafka/processing/jobs/base.rb +8 -3
  40. data/lib/karafka/processing/jobs/consume.rb +11 -4
  41. data/lib/karafka/processing/jobs_builder.rb +3 -2
  42. data/lib/karafka/processing/partitioner.rb +22 -0
  43. data/lib/karafka/processing/result.rb +29 -0
  44. data/lib/karafka/processing/scheduler.rb +22 -0
  45. data/lib/karafka/processing/worker.rb +2 -2
  46. data/lib/karafka/routing/consumer_group.rb +1 -1
  47. data/lib/karafka/routing/topic.rb +14 -0
  48. data/lib/karafka/setup/config.rb +20 -10
  49. data/lib/karafka/version.rb +1 -1
  50. data.tar.gz.sig +0 -0
  51. metadata +16 -8
  52. metadata.gz.sig +0 -0
  53. data/lib/karafka/pro/base_consumer_extensions.rb +0 -66
  54. data/lib/karafka/pro/scheduler.rb +0 -54
  55. data/lib/karafka/scheduler.rb +0 -20
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 86b352cc7737bde8484567662d546165793496adb6a923e33aa7e4823e4a456f
4
- data.tar.gz: 2a180ba7b177db9f4c67de387262d47ac3eb2580860a548992912cd89a049da7
3
+ metadata.gz: 506ffb9aef3309eae2ee26e3283b7bc83859b26f4fe41995dca9d8f5e7bf0533
4
+ data.tar.gz: 14b39f0597676207bf9f2bf10b06c51ba539c3aa01959dfb2378c2e23941d240
5
5
  SHA512:
6
- metadata.gz: 207700c3e1fab4d3370de7eddae72453fc180c65d817f4d7c021929327e5464fcd8a9bb1610c641080f82a60bc038e4517dc7bc86c0aa4c83852dd9467441e75
7
- data.tar.gz: ab94239255ff841e0728c6e6585d4145a9c4f42dfd02d8df062975bd28ad3caf2cef50f5488d30a1356ad49e2529f772379bc947ba5804a15f12e56989cf9a7a
6
+ metadata.gz: 4b2ad5ef4eff629abfc0088be0c400bd0f9420d24c05c1414541a4c167e7e6bf2b9735bf0e596358d8e3f3c2392bf2c2b4ba345cdb8f0226b54877ce111fd749
7
+ data.tar.gz: 93adbc64906ff4a03e67dee646a5fe696d825357753bb804c690036800c947edaa186b67e5da8db115a0cc4a9efa46e6b1e061a608e496255bbfdc6ddeb60c14
checksums.yaml.gz.sig CHANGED
Binary file
@@ -8,6 +8,10 @@ on:
8
8
  schedule:
9
9
  - cron: '0 1 * * *'
10
10
 
11
+ env:
12
+ BUNDLE_RETRY: 6
13
+ BUNDLE_JOBS: 4
14
+
11
15
  jobs:
12
16
  diffend:
13
17
  runs-on: ubuntu-latest
@@ -17,14 +21,16 @@ jobs:
17
21
  - uses: actions/checkout@v2
18
22
  with:
19
23
  fetch-depth: 0
24
+
20
25
  - name: Set up Ruby
21
26
  uses: ruby/setup-ruby@v1
22
27
  with:
23
28
  ruby-version: 3.1
24
- - name: Install latest bundler
25
- run: gem install bundler --no-document
29
+ bundler-cache: true
30
+
26
31
  - name: Install Diffend plugin
27
32
  run: bundle plugin install diffend
33
+
28
34
  - name: Bundle Secure
29
35
  run: bundle secure
30
36
 
@@ -57,25 +63,19 @@ jobs:
57
63
  - name: Install package dependencies
58
64
  run: "[ -e $APT_DEPS ] || sudo apt-get install -y --no-install-recommends $APT_DEPS"
59
65
 
60
- - name: Run Kafka with docker-compose
66
+ - name: Start Kafka with docker-compose
61
67
  run: |
62
68
  docker-compose up -d
63
- sleep 10
64
69
 
65
70
  - name: Set up Ruby
66
71
  uses: ruby/setup-ruby@v1
67
72
  with:
68
73
  ruby-version: ${{matrix.ruby}}
74
+ bundler-cache: true
69
75
 
70
- - name: Install latest Bundler
76
+ - name: Ensure all needed Kafka topics are created and wait if not
71
77
  run: |
72
- gem install bundler --no-document
73
- bundle config set without 'tools benchmarks docs'
74
-
75
- - name: Bundle install
76
- run: |
77
- bundle config set without development
78
- bundle install --jobs 4 --retry 3
78
+ bin/wait_for_kafka
79
79
 
80
80
  - name: Run all specs
81
81
  env:
@@ -100,10 +100,9 @@ jobs:
100
100
  - name: Install package dependencies
101
101
  run: "[ -e $APT_DEPS ] || sudo apt-get install -y --no-install-recommends $APT_DEPS"
102
102
 
103
- - name: Run Kafka with docker-compose
103
+ - name: Start Kafka with docker-compose
104
104
  run: |
105
105
  docker-compose up -d
106
- sleep 5
107
106
 
108
107
  - name: Set up Ruby
109
108
  uses: ruby/setup-ruby@v1
@@ -119,7 +118,11 @@ jobs:
119
118
  - name: Bundle install
120
119
  run: |
121
120
  bundle config set without development
122
- bundle install --jobs 4 --retry 3
121
+ bundle install
122
+
123
+ - name: Ensure all needed Kafka topics are created and wait if not
124
+ run: |
125
+ bin/wait_for_kafka
123
126
 
124
127
  - name: Run integration tests
125
128
  env:
data/CHANGELOG.md CHANGED
@@ -1,5 +1,42 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.0.0-rc1 (2022-07-08)
4
+ - Extract consumption partitioner out of listener inline code.
5
+ - Introduce virtual partitioner concept for parallel processing of data from a single topic partition.
6
+ - Improve stability when there kafka internal errors occur while polling.
7
+ - Fix a case where we would resume a LRJ partition upon rebalance where we would reclaim the partition while job was still running.
8
+ - Do not revoke pauses for lost partitions. This will allow to un-pause reclaimed partitions when LRJ jobs are done.
9
+ - Fail integrations by default (unless configured otherwise) if any errors occur during Karafka server execution.
10
+
11
+ ## 2.0.0-beta5 (2022-07-05)
12
+ - Always resume processing of a revoked partition upon assignment.
13
+ - Improve specs stability.
14
+ - Fix a case where revocation job would be executed on partition for which we never did any work.
15
+ - Introduce a jobs group coordinator for easier jobs management.
16
+ - Improve stability of resuming paused partitions that were revoked and re-assigned.
17
+ - Optimize reaction time on partition ownership changes.
18
+ - Fix a bug where despite setting long max wait time, we would return messages prior to it while not reaching the desired max messages count.
19
+ - Add more integration specs related to polling limits.
20
+ - Remove auto-detection of re-assigned partitions upon rebalance as for too fast rebalances it could not be accurate enough. It would also mess up in case of rebalances that would happen right after a `#seek` was issued for a partition.
21
+ - Optimize the removal of pre-buffered lost partitions data.
22
+ - Always rune `#revoked` when rebalance with revocation happens.
23
+ - Evict executors upon rebalance, to prevent race-conditions.
24
+ - Align topics names for integration specs.
25
+
26
+ ## 2.0.0-beta4 (2022-06-20)
27
+ - Rename job internal api methods from `#prepare` to `#before_call` and from `#teardown` to `#after_call` to abstract away jobs execution from any type of executors and consumers logic
28
+ - Remove ability of running `before_consume` and `after_consume` completely. Those should be for internal usage only.
29
+ - Reorganize how Pro consumer and Pro AJ consumers inherit.
30
+ - Require WaterDrop `2.3.1`.
31
+ - Add more integration specs for rebalancing and max poll exceeded.
32
+ - Move `revoked?` state from PRO to regular Karafka.
33
+ - Use return value of `mark_as_consumed!` and `mark_as_consumed` as indicator of partition ownership + use it to switch the ownership state.
34
+ - Do not remove rebalance manager upon client reset and recovery. This will allow us to keep the notion of lost partitions, so we can run revocation jobs for blocking jobs that exceeded the max poll interval.
35
+ - Run revocation jobs upon reaching max poll interval for blocking jobs.
36
+ - Early exit `poll` operation upon partition lost or max poll exceeded event.
37
+ - Always reset consumer instances on timeout exceeded.
38
+ - Wait for Kafka to create all the needed topics before running specs in CI.
39
+
3
40
  ## 2.0.0-beta3 (2022-06-14)
4
41
  - Jobs building responsibility extracted out of the listener code base.
5
42
  - Fix a case where specs supervisor would try to kill no longer running process (#868)
data/CONTRIBUTING.md CHANGED
@@ -34,8 +34,3 @@ By sending a pull request to the pro components, you are agreeing to transfer th
34
34
 
35
35
  If you have any questions, create an [issue](issue) (protip: do a quick search first to see if someone else didn't ask the same question before!).
36
36
  You can also reach us at hello@karafka.opencollective.com.
37
-
38
- ## Credits
39
-
40
- Thank you to all the people who have already contributed to karafka!
41
- <a href="graphs/contributors"><img src="https://opencollective.com/karafka/contributors.svg?width=890" /></a>
data/Gemfile.lock CHANGED
@@ -1,13 +1,13 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.0.0.beta3)
4
+ karafka (2.0.0.rc1)
5
5
  dry-configurable (~> 0.13)
6
6
  dry-monitor (~> 0.5)
7
7
  dry-validation (~> 1.7)
8
8
  rdkafka (>= 0.10)
9
9
  thor (>= 0.20)
10
- waterdrop (>= 2.3.0, < 3.0.0)
10
+ waterdrop (>= 2.3.1, < 3.0.0)
11
11
  zeitwerk (~> 2.3)
12
12
 
13
13
  GEM
@@ -45,7 +45,7 @@ GEM
45
45
  dry-configurable (~> 0.13, >= 0.13.0)
46
46
  dry-core (~> 0.5, >= 0.5)
47
47
  dry-events (~> 0.2)
48
- dry-schema (1.9.2)
48
+ dry-schema (1.9.3)
49
49
  concurrent-ruby (~> 1.0)
50
50
  dry-configurable (~> 0.13, >= 0.13.0)
51
51
  dry-core (~> 0.5, >= 0.5)
@@ -74,7 +74,7 @@ GEM
74
74
  mini_portile2 (2.8.0)
75
75
  minitest (5.15.0)
76
76
  rake (13.0.6)
77
- rdkafka (0.11.1)
77
+ rdkafka (0.12.0)
78
78
  ffi (~> 1.15)
79
79
  mini_portile2 (~> 2.6)
80
80
  rake (> 12)
@@ -100,14 +100,14 @@ GEM
100
100
  thor (1.2.1)
101
101
  tzinfo (2.0.4)
102
102
  concurrent-ruby (~> 1.0)
103
- waterdrop (2.3.0)
103
+ waterdrop (2.3.1)
104
104
  concurrent-ruby (>= 1.1)
105
105
  dry-configurable (~> 0.13)
106
106
  dry-monitor (~> 0.5)
107
107
  dry-validation (~> 1.7)
108
108
  rdkafka (>= 0.10)
109
109
  zeitwerk (~> 2.3)
110
- zeitwerk (2.5.4)
110
+ zeitwerk (2.6.0)
111
111
 
112
112
  PLATFORMS
113
113
  x86_64-linux
data/README.md CHANGED
@@ -8,7 +8,7 @@
8
8
 
9
9
  ## About Karafka
10
10
 
11
- Karafka is a framework used to simplify Apache Kafka based Ruby and Ruby on Rails applications development.
11
+ Karafka is a multi-threaded framework used to simplify Apache Kafka based Ruby and Ruby on Rails applications development.
12
12
 
13
13
  ```ruby
14
14
  # Define what topics you want to consume with which consumers in karafka.rb
@@ -45,7 +45,7 @@ We also maintain many [integration specs](https://github.com/karafka/karafka/tre
45
45
 
46
46
  ## Want to Upgrade? LGPL is not for you? Want to help?
47
47
 
48
- I also sell Karafka Pro subscription. It includes commercial-friendly license, priority support, architecture consultations and high throughput data processing-related features (under development).
48
+ I also sell Karafka Pro subscription. It includes commercial-friendly license, priority support, architecture consultations and high throughput data processing-related features (virtual partitions, long running jobs and more).
49
49
 
50
50
  **20%** of the income will be distributed back to other OSS projects that Karafka uses under the hood.
51
51
 
@@ -56,11 +56,3 @@ Help me provide high-quality open-source software. Please see the Karafka [homep
56
56
  Karafka has [Wiki pages](https://github.com/karafka/karafka/wiki) for almost everything and a pretty decent [FAQ](https://github.com/karafka/karafka/wiki/FAQ). It covers the whole installation, setup and deployment along with other useful details on how to run Karafka.
57
57
 
58
58
  If you have any questions about using Karafka, feel free to join our [Slack](https://slack.karafka.io) channel.
59
-
60
- ## Note on contributions
61
-
62
- First, thank you for considering contributing to the Karafka ecosystem! It's people like you that make the open source community such a great community!
63
-
64
- Each pull request must pass all the RSpec specs, integration tests and meet our quality requirements.
65
-
66
- Fork it, update and wait for the Github Actions results.
data/bin/benchmarks CHANGED
@@ -39,8 +39,8 @@ if ENV['SEED']
39
39
 
40
40
  # We do not populate data of benchmarks_0_10 as we use it with life-stream data only
41
41
  %w[
42
- benchmarks_0_01
43
- benchmarks_0_05
42
+ benchmarks_00_01
43
+ benchmarks_00_05
44
44
  ].each do |topic_name|
45
45
  partitions_count = topic_name.split('_').last.to_i
46
46
 
data/bin/integrations CHANGED
@@ -21,6 +21,9 @@ ROOT_PATH = Pathname.new(File.expand_path(File.join(File.dirname(__FILE__), '../
21
21
  # of CPU
22
22
  CONCURRENCY = ENV.key?('CI') ? 5 : Etc.nprocessors * 2
23
23
 
24
+ # How may bytes do we want to keep from the stdout in the buffer for when we need to print it
25
+ MAX_BUFFER_OUTPUT = 10_240
26
+
24
27
  # Abstraction around a single test scenario execution process
25
28
  class Scenario
26
29
  # How long a scenario can run before we kill it
@@ -84,9 +87,9 @@ class Scenario
84
87
  # We read it so it won't grow as we use our default logger that prints to both test.log and
85
88
  # to stdout. Otherwise after reaching the buffer size, it would hang
86
89
  buffer = ''
87
- @stdout.read_nonblock(10_240, buffer, exception: false)
90
+ @stdout.read_nonblock(MAX_BUFFER_OUTPUT, buffer, exception: false)
88
91
  @stdout_tail << buffer
89
- @stdout_tail = @stdout_tail[-10_024..-1] || @stdout_tail
92
+ @stdout_tail = @stdout_tail[-MAX_BUFFER_OUTPUT..-1] || @stdout_tail
90
93
 
91
94
  !@wait_thr.alive?
92
95
  end
@@ -114,11 +117,15 @@ class Scenario
114
117
  if success?
115
118
  print "\e[#{32}m#{'.'}\e[0m"
116
119
  else
120
+ buffer = ''
121
+
122
+ @stderr.read_nonblock(MAX_BUFFER_OUTPUT, buffer, exception: false)
123
+
117
124
  puts
118
125
  puts "\e[#{31}m#{'[FAILED]'}\e[0m #{name}"
119
126
  puts "Exit code: #{exit_code}"
120
127
  puts @stdout_tail
121
- puts @stderr.read
128
+ puts buffer
122
129
  puts
123
130
  end
124
131
  end
@@ -8,6 +8,6 @@ set -e
8
8
 
9
9
  while :
10
10
  do
11
- reset
11
+ clear
12
12
  bin/integrations $1
13
13
  done
data/bin/stress_one ADDED
@@ -0,0 +1,13 @@
1
+ #!/bin/bash
2
+
3
+ # Runs a single integration spec in an endless loop
4
+ # This allows us to ensure (after long enough time) that the integration spec is stable and
5
+ # that there are no anomalies when running it for a long period of time
6
+
7
+ set -e
8
+
9
+ while :
10
+ do
11
+ clear
12
+ bin/scenario $1
13
+ done
@@ -0,0 +1,20 @@
1
+ #!/bin/bash
2
+
3
+ # This script allows us to wait for Kafka docker to fully be ready
4
+ # We consider it fully ready when all our topics that need to be created are created as expected
5
+
6
+ KAFKA_NAME='karafka_20_kafka'
7
+ ZOOKEEPER='zookeeper:2181'
8
+ LIST_CMD="kafka-topics.sh --list --zookeeper $ZOOKEEPER"
9
+
10
+ # Take the number of topics that we need to create prior to running anything
11
+ TOPICS_COUNT=`cat docker-compose.yml | grep -E -i 'integrations_|benchmarks_' | wc -l`
12
+
13
+ # And wait until all of them are created
14
+ until (((`docker exec $KAFKA_NAME $LIST_CMD | wc -l`) >= $TOPICS_COUNT));
15
+ do
16
+ echo "Waiting for Kafka to create all the needed topics..."
17
+ sleep 1
18
+ done
19
+
20
+ echo "All the needed topics created."
data/docker-compose.yml CHANGED
@@ -1,10 +1,12 @@
1
1
  version: '2'
2
2
  services:
3
3
  zookeeper:
4
+ container_name: karafka_20_zookeeper
4
5
  image: wurstmeister/zookeeper
5
6
  ports:
6
7
  - '2181:2181'
7
8
  kafka:
9
+ container_name: karafka_20_kafka
8
10
  image: wurstmeister/kafka
9
11
  ports:
10
12
  - '9092:9092'
@@ -14,18 +16,35 @@ services:
14
16
  KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
15
17
  KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'true'
16
18
  KAFKA_CREATE_TOPICS:
17
- "integrations_0_02:2:1,\
18
- integrations_1_02:2:1,\
19
- integrations_2_02:2:1,\
20
- integrations_3_02:2:1,\
21
- integrations_4_02:2:1,\
22
- integrations_0_03:3:1,\
23
- integrations_1_03:3:1,\
24
- integrations_2_03:3:1,\
25
- integrations_0_10:10:1,\
26
- integrations_1_10:10:1,\
27
- benchmarks_0_01:1:1,\
28
- benchmarks_0_05:5:1,\
29
- benchmarks_0_10:10:1"
19
+ "integrations_00_02:2:1,\
20
+ integrations_01_02:2:1,\
21
+ integrations_02_02:2:1,\
22
+ integrations_03_02:2:1,\
23
+ integrations_04_02:2:1,\
24
+ integrations_05_02:2:1,\
25
+ integrations_06_02:2:1,\
26
+ integrations_07_02:2:1,\
27
+ integrations_08_02:2:1,\
28
+ integrations_09_02:2:1,\
29
+ integrations_10_02:2:1,\
30
+ integrations_11_02:2:1,\
31
+ integrations_12_02:2:1,\
32
+ integrations_13_02:2:1,\
33
+ integrations_14_02:2:1,\
34
+ integrations_15_02:2:1,\
35
+ integrations_16_02:2:1,\
36
+ integrations_17_02:2:1,\
37
+ integrations_18_02:2:1,\
38
+ integrations_19_02:2:1,\
39
+ integrations_00_03:3:1,\
40
+ integrations_01_03:3:1,\
41
+ integrations_02_03:3:1,\
42
+ integrations_03_03:3:1,\
43
+ integrations_00_10:10:1,\
44
+ integrations_01_10:10:1,\
45
+ benchmarks_00_01:1:1,\
46
+ benchmarks_00_05:5:1,\
47
+ benchmarks_01_05:5:1,\
48
+ benchmarks_00_10:10:1"
30
49
  volumes:
31
50
  - /var/run/docker.sock:/var/run/docker.sock
data/karafka.gemspec CHANGED
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
21
21
  spec.add_dependency 'dry-validation', '~> 1.7'
22
22
  spec.add_dependency 'rdkafka', '>= 0.10'
23
23
  spec.add_dependency 'thor', '>= 0.20'
24
- spec.add_dependency 'waterdrop', '>= 2.3.0', '< 3.0.0'
24
+ spec.add_dependency 'waterdrop', '>= 2.3.1', '< 3.0.0'
25
25
  spec.add_dependency 'zeitwerk', '~> 2.3'
26
26
 
27
27
  spec.required_ruby_version = '>= 2.6.0'
@@ -13,7 +13,7 @@ module Karafka
13
13
  # @param block [Proc] block that we can use for some extra configuration
14
14
  def active_job_topic(name, &block)
15
15
  topic(name) do
16
- consumer App.config.internal.active_job.consumer
16
+ consumer App.config.internal.active_job.consumer_class
17
17
 
18
18
  next unless block
19
19
 
data/lib/karafka/app.rb CHANGED
@@ -10,7 +10,8 @@ module Karafka
10
10
  def consumer_groups
11
11
  config
12
12
  .internal
13
- .routing_builder
13
+ .routing
14
+ .builder
14
15
  end
15
16
 
16
17
  # @return [Array<Karafka::Routing::SubscriptionGroup>] active subscription groups
@@ -10,8 +10,8 @@ module Karafka
10
10
  attr_accessor :messages
11
11
  # @return [Karafka::Connection::Client] kafka connection client
12
12
  attr_accessor :client
13
- # @return [Karafka::TimeTrackers::Pause] current topic partition pause tracker
14
- attr_accessor :pause_tracker
13
+ # @return [Karafka::Processing::Coordinator] coordinator
14
+ attr_accessor :coordinator
15
15
  # @return [Waterdrop::Producer] producer instance
16
16
  attr_accessor :producer
17
17
 
@@ -21,22 +21,7 @@ module Karafka
21
21
  # @note This should not be used by the end users as it is part of the lifecycle of things but
22
22
  # not as part of the public api. This can act as a hook when creating non-blocking
23
23
  # consumers and doing other advanced stuff
24
- def on_prepare
25
- Karafka.monitor.instrument('consumer.prepared', caller: self) do
26
- prepare
27
- end
28
-
29
- true
30
- rescue StandardError => e
31
- Karafka.monitor.instrument(
32
- 'error.occurred',
33
- error: e,
34
- caller: self,
35
- type: 'consumer.prepare.error'
36
- )
37
-
38
- false
39
- end
24
+ def on_before_consume; end
40
25
 
41
26
  # Executes the default consumer flow.
42
27
  #
@@ -48,40 +33,52 @@ module Karafka
48
33
  def on_consume
49
34
  Karafka.monitor.instrument('consumer.consumed', caller: self) do
50
35
  consume
51
-
52
- pause_tracker.reset
53
-
54
- # Mark as consumed only if manual offset management is not on
55
- next if topic.manual_offset_management
56
-
57
- # We use the non-blocking one here. If someone needs the blocking one, can implement it
58
- # with manual offset management
59
- mark_as_consumed(messages.last)
60
36
  end
61
37
 
62
- true
38
+ coordinator.consumption(self).success!
63
39
  rescue StandardError => e
40
+ coordinator.consumption(self).failure!
41
+
64
42
  Karafka.monitor.instrument(
65
43
  'error.occurred',
66
44
  error: e,
67
45
  caller: self,
68
46
  type: 'consumer.consume.error'
69
47
  )
48
+ ensure
49
+ # We need to decrease number of jobs that this coordinator coordinates as it has finished
50
+ coordinator.decrement
51
+ end
52
+
53
+ # @private
54
+ # @note This should not be used by the end users as it is part of the lifecycle of things but
55
+ # not as part of the public api.
56
+ def on_after_consume
57
+ return if revoked?
58
+
59
+ if coordinator.success?
60
+ coordinator.pause_tracker.reset
70
61
 
71
- pause(@seek_offset || messages.first.offset)
62
+ # Mark as consumed only if manual offset management is not on
63
+ return if topic.manual_offset_management?
72
64
 
73
- false
65
+ # We use the non-blocking one here. If someone needs the blocking one, can implement it
66
+ # with manual offset management
67
+ mark_as_consumed(messages.last)
68
+ else
69
+ pause(@seek_offset || messages.first.offset)
70
+ end
74
71
  end
75
72
 
76
73
  # Trigger method for running on shutdown.
77
74
  #
78
75
  # @private
79
76
  def on_revoked
77
+ coordinator.revoke
78
+
80
79
  Karafka.monitor.instrument('consumer.revoked', caller: self) do
81
80
  revoked
82
81
  end
83
-
84
- true
85
82
  rescue StandardError => e
86
83
  Karafka.monitor.instrument(
87
84
  'error.occurred',
@@ -89,8 +86,6 @@ module Karafka
89
86
  caller: self,
90
87
  type: 'consumer.revoked.error'
91
88
  )
92
-
93
- false
94
89
  end
95
90
 
96
91
  # Trigger method for running on shutdown.
@@ -100,8 +95,6 @@ module Karafka
100
95
  Karafka.monitor.instrument('consumer.shutdown', caller: self) do
101
96
  shutdown
102
97
  end
103
-
104
- true
105
98
  rescue StandardError => e
106
99
  Karafka.monitor.instrument(
107
100
  'error.occurred',
@@ -109,16 +102,10 @@ module Karafka
109
102
  caller: self,
110
103
  type: 'consumer.shutdown.error'
111
104
  )
112
-
113
- false
114
105
  end
115
106
 
116
107
  private
117
108
 
118
- # Method that gets called in the blocking flow allowing to setup any type of resources or to
119
- # send additional commands to Kafka before the proper execution starts.
120
- def prepare; end
121
-
122
109
  # Method that will perform business logic and on data received from Kafka (it will consume
123
110
  # the data)
124
111
  # @note This method needs bo be implemented in a subclass. We stub it here as a failover if
@@ -138,21 +125,40 @@ module Karafka
138
125
  # Marks message as consumed in an async way.
139
126
  #
140
127
  # @param message [Messages::Message] last successfully processed message.
128
+ # @return [Boolean] true if we were able to mark the offset, false otherwise. False indicates
129
+ # that we were not able and that we have lost the partition.
130
+ #
141
131
  # @note We keep track of this offset in case we would mark as consumed and got error when
142
132
  # processing another message. In case like this we do not pause on the message we've already
143
133
  # processed but rather at the next one. This applies to both sync and async versions of this
144
134
  # method.
145
135
  def mark_as_consumed(message)
146
- client.mark_as_consumed(message)
136
+ unless client.mark_as_consumed(message)
137
+ coordinator.revoke
138
+
139
+ return false
140
+ end
141
+
147
142
  @seek_offset = message.offset + 1
143
+
144
+ true
148
145
  end
149
146
 
150
147
  # Marks message as consumed in a sync way.
151
148
  #
152
149
  # @param message [Messages::Message] last successfully processed message.
150
+ # @return [Boolean] true if we were able to mark the offset, false otherwise. False indicates
151
+ # that we were not able and that we have lost the partition.
153
152
  def mark_as_consumed!(message)
154
- client.mark_as_consumed!(message)
153
+ unless client.mark_as_consumed!(message)
154
+ coordinator.revoke
155
+
156
+ return false
157
+ end
158
+
155
159
  @seek_offset = message.offset + 1
160
+
161
+ true
156
162
  end
157
163
 
158
164
  # Pauses processing on a given offset for the current topic partition
@@ -162,7 +168,7 @@ module Karafka
162
168
  # @param timeout [Integer, nil] how long in milliseconds do we want to pause or nil to use the
163
169
  # default exponential pausing strategy defined for retries
164
170
  def pause(offset, timeout = nil)
165
- timeout ? pause_tracker.pause(timeout) : pause_tracker.pause
171
+ timeout ? coordinator.pause_tracker.pause(timeout) : coordinator.pause_tracker.pause
166
172
 
167
173
  client.pause(
168
174
  messages.metadata.topic,
@@ -175,7 +181,7 @@ module Karafka
175
181
  def resume
176
182
  # This is sufficient to expire a partition pause, as with it will be resumed by the listener
177
183
  # thread before the next poll.
178
- pause_tracker.expire
184
+ coordinator.pause_tracker.expire
179
185
  end
180
186
 
181
187
  # Seeks in the context of current topic and partition
@@ -190,5 +196,12 @@ module Karafka
190
196
  )
191
197
  )
192
198
  end
199
+
200
+ # @return [Boolean] true if partition was revoked from the current consumer
201
+ # @note We know that partition got revoked because when we try to mark message as consumed,
202
+ # unless if is successful, it will return false
203
+ def revoked?
204
+ coordinator.revoked?
205
+ end
193
206
  end
194
207
  end