karafka 2.0.37 → 2.0.39

Sign up to get free protection for your applications and to get access to all the features.
Files changed (116) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +1 -1
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +34 -0
  6. data/Gemfile.lock +7 -7
  7. data/README.md +1 -1
  8. data/bin/integrations +1 -1
  9. data/config/locales/errors.yml +0 -7
  10. data/config/locales/pro_errors.yml +18 -0
  11. data/lib/karafka/active_job/consumer.rb +22 -7
  12. data/lib/karafka/admin.rb +46 -14
  13. data/lib/karafka/base_consumer.rb +35 -55
  14. data/lib/karafka/connection/listener.rb +15 -10
  15. data/lib/karafka/errors.rb +0 -3
  16. data/lib/karafka/instrumentation/logger_listener.rb +44 -3
  17. data/lib/karafka/instrumentation/notifications.rb +7 -0
  18. data/lib/karafka/pro/active_job/consumer.rb +10 -5
  19. data/lib/karafka/pro/processing/coordinator.rb +13 -4
  20. data/lib/karafka/pro/processing/filters/base.rb +61 -0
  21. data/lib/karafka/pro/processing/filters/delayer.rb +70 -0
  22. data/lib/karafka/pro/processing/filters/expirer.rb +51 -0
  23. data/lib/karafka/pro/processing/filters/throttler.rb +84 -0
  24. data/lib/karafka/pro/processing/filters_applier.rb +100 -0
  25. data/lib/karafka/pro/processing/jobs_builder.rb +7 -3
  26. data/lib/karafka/pro/processing/scheduler.rb +24 -7
  27. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +68 -0
  28. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +74 -0
  29. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom.rb +72 -0
  30. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +76 -0
  31. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +62 -0
  32. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +68 -0
  33. data/lib/karafka/pro/processing/strategies/aj/dlq_mom.rb +64 -0
  34. data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +69 -0
  35. data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom.rb +38 -0
  36. data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +64 -0
  37. data/lib/karafka/pro/processing/strategies/aj/ftr_mom.rb +38 -0
  38. data/lib/karafka/pro/processing/strategies/aj/ftr_mom_vp.rb +58 -0
  39. data/lib/karafka/pro/processing/strategies/{dlq_lrj_vp.rb → aj/lrj_mom.rb} +14 -13
  40. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +77 -0
  41. data/lib/karafka/pro/processing/strategies/aj/mom.rb +36 -0
  42. data/lib/karafka/pro/processing/strategies/aj/mom_vp.rb +52 -0
  43. data/lib/karafka/pro/processing/strategies/dlq/default.rb +131 -0
  44. data/lib/karafka/pro/processing/strategies/dlq/ftr.rb +61 -0
  45. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +75 -0
  46. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +74 -0
  47. data/lib/karafka/pro/processing/strategies/{mom.rb → dlq/ftr_lrj_vp.rb} +16 -19
  48. data/lib/karafka/pro/processing/strategies/dlq/ftr_mom.rb +73 -0
  49. data/lib/karafka/pro/processing/strategies/dlq/ftr_vp.rb +39 -0
  50. data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +63 -0
  51. data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +66 -0
  52. data/lib/karafka/pro/processing/strategies/dlq/lrj_vp.rb +38 -0
  53. data/lib/karafka/pro/processing/strategies/dlq/mom.rb +67 -0
  54. data/lib/karafka/pro/processing/strategies/dlq/vp.rb +39 -0
  55. data/lib/karafka/pro/processing/strategies/ftr/default.rb +104 -0
  56. data/lib/karafka/pro/processing/strategies/ftr/vp.rb +40 -0
  57. data/lib/karafka/pro/processing/strategies/lrj/default.rb +85 -0
  58. data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +69 -0
  59. data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +67 -0
  60. data/lib/karafka/pro/processing/strategies/{vp.rb → lrj/ftr_vp.rb} +15 -13
  61. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +78 -0
  62. data/lib/karafka/pro/processing/strategies/{aj_lrj_mom.rb → lrj/vp.rb} +13 -12
  63. data/lib/karafka/pro/processing/strategies/mom/default.rb +46 -0
  64. data/lib/karafka/pro/processing/strategies/mom/ftr.rb +53 -0
  65. data/lib/karafka/pro/processing/strategies/vp/default.rb +53 -0
  66. data/lib/karafka/pro/processing/{strategies/lrj_vp.rb → strategies.rb} +1 -13
  67. data/lib/karafka/pro/processing/strategy_selector.rb +44 -18
  68. data/lib/karafka/pro/{processing/strategies/aj_mom.rb → routing/features/delaying/config.rb} +7 -13
  69. data/lib/karafka/pro/routing/features/delaying/contract.rb +38 -0
  70. data/lib/karafka/pro/routing/features/delaying/topic.rb +59 -0
  71. data/lib/karafka/pro/routing/features/delaying.rb +29 -0
  72. data/lib/karafka/pro/routing/features/expiring/config.rb +27 -0
  73. data/lib/karafka/pro/routing/features/expiring/contract.rb +38 -0
  74. data/lib/karafka/pro/routing/features/expiring/topic.rb +59 -0
  75. data/lib/karafka/pro/routing/features/expiring.rb +27 -0
  76. data/lib/karafka/pro/routing/features/filtering/config.rb +40 -0
  77. data/lib/karafka/pro/routing/features/filtering/contract.rb +41 -0
  78. data/lib/karafka/pro/routing/features/filtering/topic.rb +51 -0
  79. data/lib/karafka/pro/routing/features/filtering.rb +27 -0
  80. data/lib/karafka/pro/routing/features/long_running_job/contract.rb +1 -1
  81. data/lib/karafka/pro/routing/features/throttling/config.rb +32 -0
  82. data/lib/karafka/pro/routing/features/throttling/contract.rb +41 -0
  83. data/lib/karafka/pro/routing/features/throttling/topic.rb +69 -0
  84. data/lib/karafka/pro/routing/features/throttling.rb +30 -0
  85. data/lib/karafka/processing/coordinator.rb +60 -30
  86. data/lib/karafka/processing/coordinators_buffer.rb +5 -1
  87. data/lib/karafka/processing/executor.rb +23 -16
  88. data/lib/karafka/processing/executors_buffer.rb +10 -26
  89. data/lib/karafka/processing/jobs/consume.rb +2 -4
  90. data/lib/karafka/processing/jobs/idle.rb +24 -0
  91. data/lib/karafka/processing/jobs_builder.rb +2 -3
  92. data/lib/karafka/processing/result.rb +5 -0
  93. data/lib/karafka/processing/strategies/aj_dlq_mom.rb +1 -1
  94. data/lib/karafka/processing/strategies/base.rb +5 -0
  95. data/lib/karafka/processing/strategies/default.rb +50 -0
  96. data/lib/karafka/processing/strategies/dlq.rb +13 -4
  97. data/lib/karafka/processing/strategies/dlq_mom.rb +8 -3
  98. data/lib/karafka/processing/strategy_selector.rb +27 -10
  99. data/lib/karafka/version.rb +1 -1
  100. data/renovate.json +6 -0
  101. data.tar.gz.sig +0 -0
  102. metadata +66 -22
  103. metadata.gz.sig +0 -0
  104. data/lib/karafka/pro/processing/strategies/aj_dlq_lrj_mom.rb +0 -42
  105. data/lib/karafka/pro/processing/strategies/aj_dlq_lrj_mom_vp.rb +0 -70
  106. data/lib/karafka/pro/processing/strategies/aj_dlq_mom.rb +0 -62
  107. data/lib/karafka/pro/processing/strategies/aj_dlq_mom_vp.rb +0 -68
  108. data/lib/karafka/pro/processing/strategies/aj_lrj_mom_vp.rb +0 -75
  109. data/lib/karafka/pro/processing/strategies/aj_mom_vp.rb +0 -62
  110. data/lib/karafka/pro/processing/strategies/dlq.rb +0 -120
  111. data/lib/karafka/pro/processing/strategies/dlq_lrj.rb +0 -65
  112. data/lib/karafka/pro/processing/strategies/dlq_lrj_mom.rb +0 -62
  113. data/lib/karafka/pro/processing/strategies/dlq_mom.rb +0 -62
  114. data/lib/karafka/pro/processing/strategies/dlq_vp.rb +0 -37
  115. data/lib/karafka/pro/processing/strategies/lrj.rb +0 -83
  116. data/lib/karafka/pro/processing/strategies/lrj_mom.rb +0 -73
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bab9c1d7bc952b4ecbfc4fad794d7e7c861cd3a332cc5d9058cef6c0bd9b57cb
4
- data.tar.gz: 7662bd8dc5748d9112f3c72b2912619534e45750188f78df2f69a7e6ae1f9c31
3
+ metadata.gz: 12fe8a47dc0ab16b0f7783424cd1aa043c2d2b228b4f4164f1cecefe604269d9
4
+ data.tar.gz: 9fa3bae282770dd67503c41ef4b73a27a38bfcff3bf472ddd63753d14d03614f
5
5
  SHA512:
6
- metadata.gz: 9a99a84d538a74bd27d5a0f585a12dbbe67eb76ab63cc1a0984cbe1562f230070ad482418f85393a3a479e81534a0957a0863c91f3a7f5b6433f74efd317c79e
7
- data.tar.gz: 7da6129cd795f65d821bae897864648e4a5e37c0d07e8745f110f0d03a23d688d7717ab6c42652c4660fe3be3d26ee7051a8c2c66c1fcb62834a1c4159bd4ac4
6
+ metadata.gz: 9e6536c90a411a0b42337f73c00d9f454028366f42eabb1b7f40902181bcbcfd43258741d6fc51c6e29046b9ee1f8598755440d28a00ca96104a61a8095c20c2
7
+ data.tar.gz: be75dd1bfa744187f770f2e1f0deeedfba4f3fb1b824d6bab91f056f96e60a33498429e35ea22841404def0935e584db0df4289d2818631ace2e597d28785960
checksums.yaml.gz.sig CHANGED
Binary file
@@ -62,7 +62,7 @@ jobs:
62
62
  run: \curl -sSL https://api.coditsu.io/run/ci | bash
63
63
 
64
64
  specs:
65
- timeout-minutes: 30
65
+ timeout-minutes: 45
66
66
  runs-on: ubuntu-latest
67
67
  needs: diffend
68
68
  strategy:
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 3.2.1
1
+ 3.2.2
data/CHANGELOG.md CHANGED
@@ -1,5 +1,39 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.0.39 (2023-04-11)
4
+ - **[Feature]** Provide ability to throttle/limit number of messages processed in a time unit (#1203)
5
+ - **[Feature]** Provide Delayed Topics (#1000)
6
+ - **[Feature]** Provide ability to expire messages (expiring topics)
7
+ - **[Feature]** Provide ability to apply filters after messages are polled and before enqueued. This is a generic filter API for any usage.
8
+ - [Improvement] When using ActiveJob with Virtual Partitions, Karafka will stop if collectively VPs are failing. This minimizes number of jobs that will be collectively re-processed.
9
+ - [Improvement] `#retrying?` method has been added to consumers to provide ability to check, that we're reprocessing data after a failure. This is useful for branching out processing based on errors.
10
+ - [Improvement] Track active_job_id in instrumentation (#1372)
11
+ - [Improvement] Introduce new housekeeping job type called `Idle` for non-consumption execution flows.
12
+ - [Improvement] Change how a manual offset management works with Long-Running Jobs. Use the last message offset to move forward instead of relying on the last message marked as consumed for a scenario where no message is marked.
13
+ - [Improvement] Prioritize in Pro non-consumption jobs execution over consumption despite LJF. This will ensure, that housekeeping as well as other non-consumption events are not saturated when running a lot of work.
14
+ - [Improvement] Normalize the DLQ behaviour with MoM. Always pause on dispatch for all the strategies.
15
+ - [Improvement] Improve the manual offset management and DLQ behaviour when no markings occur for OSS.
16
+ - [Improvement] Do not early stop ActiveJob work running under virtual partitions to prevent extensive reprocessing.
17
+ - [Improvement] Drastically increase number of scenarios covered by integration specs (OSS and Pro).
18
+ - [Improvement] Introduce a `Coordinator#synchronize` lock for cross virtual partitions operations.
19
+ - [Fix] Do not resume partition that is not paused.
20
+ - [Fix] Fix `LoggerListener` cases where logs would not include caller id (when available)
21
+ - [Fix] Fix not working benchmark tests.
22
+ - [Fix] Fix a case where when using manual offset management with a user pause would ignore the pause and seek to the next message.
23
+ - [Fix] Fix a case where dead letter queue would go into an infinite loop on message with first ever offset if the first ever offset would not recover.
24
+ - [Fix] Make sure to resume always for all LRJ strategies on revocation.
25
+ - [Refactor] Make sure that coordinator is topic aware. Needed for throttling, delayed processing and expired jobs.
26
+ - [Refactor] Put Pro strategies into namespaces to better organize multiple combinations.
27
+ - [Refactor] Do not rely on messages metadata for internal topic and partition operations like `#seek` so they can run independently from the consumption flow.
28
+ - [Refactor] Hold a single topic/partition reference on a coordinator instead of in executor, coordinator and consumer.
29
+ - [Refactor] Move `#mark_as_consumed` and `#mark_as_consumed!`into `Strategies::Default` to be able to introduce marking for virtual partitions.
30
+
31
+ ## 2.0.38 (2023-03-27)
32
+ - [Improvement] Introduce `Karafka::Admin#read_watermark_offsets` to get low and high watermark offsets values.
33
+ - [Improvement] Track active_job_id in instrumentation (#1372)
34
+ - [Improvement] Improve `#read_topic` reading in case of a compacted partition where the offset is below the low watermark offset. This should optimize reading and should not go beyond the low watermark offset.
35
+ - [Improvement] Allow `#read_topic` to accept instance settings to overwrite any settings needed to customize reading behaviours.
36
+
3
37
  ## 2.0.37 (2023-03-20)
4
38
  - [Fix] Declarative topics execution on a secondary cluster run topics creation on the primary one (#1365)
5
39
  - [Fix] Admin read operations commit offset when not needed (#1369)
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.0.37)
4
+ karafka (2.0.39)
5
5
  karafka-core (>= 2.0.12, < 3.0.0)
6
6
  thor (>= 0.20)
7
7
  waterdrop (>= 2.4.10, < 3.0.0)
@@ -10,10 +10,10 @@ PATH
10
10
  GEM
11
11
  remote: https://rubygems.org/
12
12
  specs:
13
- activejob (7.0.4.2)
14
- activesupport (= 7.0.4.2)
13
+ activejob (7.0.4.3)
14
+ activesupport (= 7.0.4.3)
15
15
  globalid (>= 0.3.6)
16
- activesupport (7.0.4.2)
16
+ activesupport (7.0.4.3)
17
17
  concurrent-ruby (~> 1.0, >= 1.0.2)
18
18
  i18n (>= 1.6, < 2)
19
19
  minitest (>= 5.1)
@@ -48,7 +48,7 @@ GEM
48
48
  rspec-expectations (3.12.2)
49
49
  diff-lcs (>= 1.2.0, < 2.0)
50
50
  rspec-support (~> 3.12.0)
51
- rspec-mocks (3.12.3)
51
+ rspec-mocks (3.12.5)
52
52
  diff-lcs (>= 1.2.0, < 2.0)
53
53
  rspec-support (~> 3.12.0)
54
54
  rspec-support (3.12.0)
@@ -61,7 +61,7 @@ GEM
61
61
  thor (1.2.1)
62
62
  tzinfo (2.0.6)
63
63
  concurrent-ruby (~> 1.0)
64
- waterdrop (2.5.0)
64
+ waterdrop (2.5.1)
65
65
  karafka-core (>= 2.0.12, < 3.0.0)
66
66
  zeitwerk (~> 2.3)
67
67
  zeitwerk (2.6.7)
@@ -79,4 +79,4 @@ DEPENDENCIES
79
79
  simplecov
80
80
 
81
81
  BUNDLED WITH
82
- 2.4.7
82
+ 2.4.10
data/README.md CHANGED
@@ -86,7 +86,7 @@ bundle exec karafka server
86
86
 
87
87
  I also sell Karafka Pro subscriptions. It includes a commercial-friendly license, priority support, architecture consultations, enhanced Web UI and high throughput data processing-related features (virtual partitions, long-running jobs, and more).
88
88
 
89
- **20%** of the income will be distributed back to other OSS projects that Karafka uses under the hood.
89
+ **10%** of the income will be distributed back to other OSS projects that Karafka uses under the hood.
90
90
 
91
91
  Help me provide high-quality open-source software. Please see the Karafka [homepage](https://karafka.io/#become-pro) for more details.
92
92
 
data/bin/integrations CHANGED
@@ -25,7 +25,7 @@ ROOT_PATH = Pathname.new(File.expand_path(File.join(File.dirname(__FILE__), '../
25
25
  # we limit it. Locally we can run a lot of those, as many of them have sleeps and do not use a lot
26
26
  # of CPU. Locally we also cannot go beyond certain limit due to how often and how many topics we
27
27
  # create in Kafka. With an overloaded system, we start getting timeouts.
28
- CONCURRENCY = ENV.key?('CI') ? 4 : Etc.nprocessors * 2
28
+ CONCURRENCY = ENV.key?('CI') ? 5 : Etc.nprocessors * 3
29
29
 
30
30
  # How may bytes do we want to keep from the stdout in the buffer for when we need to print it
31
31
  MAX_BUFFER_OUTPUT = 51_200
@@ -72,10 +72,3 @@ en:
72
72
  test:
73
73
  missing: needs to be present
74
74
  id_format: needs to be a String
75
-
76
- pro_topic:
77
- virtual_partitions.partitioner_respond_to_call: needs to be defined and needs to respond to `#call`
78
- virtual_partitions.max_partitions_format: needs to be equal or more than 1
79
- manual_offset_management_not_with_virtual_partitions: cannot be used together with Virtual Partitions
80
- long_running_job.active_format: needs to be either true or false
81
- dead_letter_queue_not_with_virtual_partitions: cannot be used together with Virtual Partitions
@@ -3,10 +3,28 @@ en:
3
3
  topic:
4
4
  virtual_partitions.partitioner_respond_to_call: needs to be defined and needs to respond to `#call`
5
5
  virtual_partitions.max_partitions_format: needs to be equal or more than 1
6
+
6
7
  manual_offset_management_not_with_virtual_partitions: cannot be used together with Virtual Partitions
8
+
7
9
  long_running_job.active_format: needs to be either true or false
10
+
8
11
  dead_letter_queue_with_virtual_partitions: when using Dead Letter Queue with Virtual Partitions, at least one retry is required.
9
12
 
13
+ throttling.active_format: needs to be either true or false
14
+ throttling.limit_format: needs to be equal or more than 1
15
+ throttling.interval_format: needs to be equal or more than 1
16
+
17
+ filtering.active_missing: needs to be present
18
+ filtering.factory_format: 'needs to respond to #call'
19
+ filtering.factories_format: 'needs to contain only factories responding to #call'
20
+ filtering.active_format: 'needs to be boolean'
21
+
22
+ expiring.ttl_format: 'needs to be equal or more than 0 and an integer'
23
+ expiring.active_format: 'needs to be boolean'
24
+
25
+ delaying.delay_format: 'needs to be equal or more than 0 and an integer'
26
+ delaying.active_format: 'needs to be boolean'
27
+
10
28
  config:
11
29
  encryption.active_format: 'needs to be either true or false'
12
30
  encryption.public_key_invalid: 'is not a valid public RSA key'
@@ -12,16 +12,31 @@ module Karafka
12
12
  messages.each do |message|
13
13
  break if Karafka::App.stopping?
14
14
 
15
- # We technically speaking could set this as deserializer and reference it from the
16
- # message instead of using the `#raw_payload`. This is not done on purpose to simplify
17
- # the ActiveJob setup here
18
- job = ::ActiveSupport::JSON.decode(message.raw_payload)
15
+ consume_job(message)
19
16
 
20
- tags.add(:job_class, job['job_class'])
17
+ mark_as_consumed(message)
18
+ end
19
+ end
21
20
 
22
- ::ActiveJob::Base.execute(job)
21
+ private
23
22
 
24
- mark_as_consumed(message)
23
+ # Consumes a message with the job and runs needed instrumentation
24
+ #
25
+ # @param job_message [Karafka::Messages::Message] message with active job
26
+ def consume_job(job_message)
27
+ # We technically speaking could set this as deserializer and reference it from the
28
+ # message instead of using the `#raw_payload`. This is not done on purpose to simplify
29
+ # the ActiveJob setup here
30
+ job = ::ActiveSupport::JSON.decode(job_message.raw_payload)
31
+
32
+ tags.add(:job_class, job['job_class'])
33
+
34
+ payload = { caller: self, job: job, message: job_message }
35
+
36
+ # We publish both to make it consistent with `consumer.x` events
37
+ Karafka.monitor.instrument('active_job.consume', payload)
38
+ Karafka.monitor.instrument('active_job.consumed', payload) do
39
+ ::ActiveJob::Base.execute(job)
25
40
  end
26
41
  end
27
42
  end
data/lib/karafka/admin.rb CHANGED
@@ -44,17 +44,32 @@ module Karafka
44
44
  # @param count [Integer] how many messages we want to get at most
45
45
  # @param start_offset [Integer] offset from which we should start. If -1 is provided
46
46
  # (default) we will start from the latest offset
47
+ # @param settings [Hash] kafka extra settings (optional)
47
48
  #
48
49
  # @return [Array<Karafka::Messages::Message>] array with messages
49
- def read_topic(name, partition, count, start_offset = -1)
50
+ def read_topic(name, partition, count, start_offset = -1, settings = {})
50
51
  messages = []
51
52
  tpl = Rdkafka::Consumer::TopicPartitionList.new
53
+ low_offset, high_offset = nil
52
54
 
53
- with_consumer do |consumer|
54
- offsets = consumer.query_watermark_offsets(name, partition)
55
- end_offset = offsets.last
55
+ with_consumer(settings) do |consumer|
56
+ low_offset, high_offset = consumer.query_watermark_offsets(name, partition)
57
+
58
+ # Select offset dynamically if -1 or less
59
+ start_offset = high_offset - count if start_offset.negative?
56
60
 
57
- start_offset = [0, offsets.last - count].max if start_offset.negative?
61
+ # Build the requested range - since first element is on the start offset we need to
62
+ # subtract one from requested count to end up with expected number of elements
63
+ requested_range = (start_offset..start_offset + (count - 1))
64
+ # Establish theoretical available range. Note, that this does not handle cases related to
65
+ # log retention or compaction
66
+ available_range = (low_offset..high_offset)
67
+ # Select only offset that we can select. This will remove all the potential offsets that
68
+ # are below the low watermark offset
69
+ possible_range = requested_range.select { |offset| available_range.include?(offset) }
70
+
71
+ start_offset = possible_range.first
72
+ count = possible_range.count
58
73
 
59
74
  tpl.add_topic_and_partitions_with_offsets(name, partition => start_offset)
60
75
  consumer.assign(tpl)
@@ -64,11 +79,15 @@ module Karafka
64
79
  loop do
65
80
  # If we've got as many messages as we've wanted stop
66
81
  break if messages.size >= count
67
- # If we've reached end of the topic messages, don't process more
68
- break if !messages.empty? && end_offset <= messages.last.offset
69
82
 
70
83
  message = consumer.poll(200)
71
- messages << message if message
84
+
85
+ next unless message
86
+
87
+ # If the message we've got is beyond the requested range, stop
88
+ break unless possible_range.include?(message.offset)
89
+
90
+ messages << message
72
91
  rescue Rdkafka::RdkafkaError => e
73
92
  # End of partition
74
93
  break if e.code == :partition_eof
@@ -77,7 +96,7 @@ module Karafka
77
96
  end
78
97
  end
79
98
 
80
- messages.map do |message|
99
+ messages.map! do |message|
81
100
  Messages::Builders::Message.call(
82
101
  message,
83
102
  # Use topic from routes if we can match it or create a dummy one
@@ -136,6 +155,17 @@ module Karafka
136
155
  end
137
156
  end
138
157
 
158
+ # Fetches the watermark offsets for a given topic partition
159
+ #
160
+ # @param name [String, Symbol] topic name
161
+ # @param partition [Integer] partition
162
+ # @return [Array<Integer, Integer>] low watermark offset and high watermark offset
163
+ def read_watermark_offsets(name, partition)
164
+ with_consumer do |consumer|
165
+ consumer.query_watermark_offsets(name, partition)
166
+ end
167
+ end
168
+
139
169
  # @return [Rdkafka::Metadata] cluster metadata info
140
170
  def cluster_info
141
171
  with_admin do |admin|
@@ -159,15 +189,16 @@ module Karafka
159
189
 
160
190
  # Creates admin instance and yields it. After usage it closes the admin instance
161
191
  def with_admin
162
- admin = config(:producer).admin
192
+ admin = config(:producer, {}).admin
163
193
  yield(admin)
164
194
  ensure
165
195
  admin&.close
166
196
  end
167
197
 
168
198
  # Creates consumer instance and yields it. After usage it closes the consumer instance
169
- def with_consumer
170
- consumer = config(:consumer).consumer
199
+ # @param settings [Hash] extra settings to customize consumer
200
+ def with_consumer(settings = {})
201
+ consumer = config(:consumer, settings).consumer
171
202
  yield(consumer)
172
203
  ensure
173
204
  consumer&.close
@@ -196,11 +227,12 @@ module Karafka
196
227
  end
197
228
 
198
229
  # @param type [Symbol] type of config we want
230
+ # @param settings [Hash] extra settings for config (if needed)
199
231
  # @return [::Rdkafka::Config] rdkafka config
200
- def config(type)
232
+ def config(type, settings)
201
233
  config_hash = Karafka::Setup::AttributesMap.public_send(
202
234
  type,
203
- Karafka::App.config.kafka.dup.merge(CONFIG_DEFAULTS)
235
+ Karafka::App.config.kafka.dup.merge(CONFIG_DEFAULTS).merge!(settings)
204
236
  )
205
237
 
206
238
  ::Rdkafka::Config.new(config_hash)
@@ -7,11 +7,13 @@ module Karafka
7
7
  # Allow for consumer instance tagging for instrumentation
8
8
  include ::Karafka::Core::Taggable
9
9
 
10
+ extend Forwardable
11
+
12
+ def_delegators :@coordinator, :topic, :partition
13
+
10
14
  # @return [String] id of the current consumer
11
15
  attr_reader :id
12
16
  # @return [Karafka::Routing::Topic] topic to which a given consumer is subscribed
13
- attr_accessor :topic
14
- # @return [Karafka::Messages::Messages] current messages batch
15
17
  attr_accessor :messages
16
18
  # @return [Karafka::Connection::Client] kafka connection client
17
19
  attr_accessor :client
@@ -97,6 +99,20 @@ module Karafka
97
99
  )
98
100
  end
99
101
 
102
+ # Trigger method for running on idle runs without messages
103
+ #
104
+ # @private
105
+ def on_idle
106
+ handle_idle
107
+ rescue StandardError => e
108
+ Karafka.monitor.instrument(
109
+ 'error.occurred',
110
+ error: e,
111
+ caller: self,
112
+ type: 'consumer.idle.error'
113
+ )
114
+ end
115
+
100
116
  # Trigger method for running on partition revocation.
101
117
  #
102
118
  # @private
@@ -143,51 +159,6 @@ module Karafka
143
159
  # some teardown procedures (closing file handler, etc).
144
160
  def shutdown; end
145
161
 
146
- # Marks message as consumed in an async way.
147
- #
148
- # @param message [Messages::Message] last successfully processed message.
149
- # @return [Boolean] true if we were able to mark the offset, false otherwise. False indicates
150
- # that we were not able and that we have lost the partition.
151
- #
152
- # @note We keep track of this offset in case we would mark as consumed and got error when
153
- # processing another message. In case like this we do not pause on the message we've already
154
- # processed but rather at the next one. This applies to both sync and async versions of this
155
- # method.
156
- def mark_as_consumed(message)
157
- # Ignore earlier offsets than the one we alread committed
158
- return true if coordinator.seek_offset > message.offset
159
-
160
- unless client.mark_as_consumed(message)
161
- coordinator.revoke
162
-
163
- return false
164
- end
165
-
166
- coordinator.seek_offset = message.offset + 1
167
-
168
- true
169
- end
170
-
171
- # Marks message as consumed in a sync way.
172
- #
173
- # @param message [Messages::Message] last successfully processed message.
174
- # @return [Boolean] true if we were able to mark the offset, false otherwise. False indicates
175
- # that we were not able and that we have lost the partition.
176
- def mark_as_consumed!(message)
177
- # Ignore earlier offsets than the one we alread committed
178
- return true if coordinator.seek_offset > message.offset
179
-
180
- unless client.mark_as_consumed!(message)
181
- coordinator.revoke
182
-
183
- return false
184
- end
185
-
186
- coordinator.seek_offset = message.offset + 1
187
-
188
- true
189
- end
190
-
191
162
  # Pauses processing on a given offset for the current topic partition
192
163
  #
193
164
  # After given partition is resumed, it will continue processing from the given offset
@@ -201,8 +172,8 @@ module Karafka
201
172
  timeout ? coordinator.pause_tracker.pause(timeout) : coordinator.pause_tracker.pause
202
173
 
203
174
  client.pause(
204
- messages.metadata.topic,
205
- messages.metadata.partition,
175
+ topic.name,
176
+ partition,
206
177
  offset
207
178
  )
208
179
 
@@ -213,8 +184,8 @@ module Karafka
213
184
  'consumer.consuming.pause',
214
185
  caller: self,
215
186
  manual: manual_pause,
216
- topic: messages.metadata.topic,
217
- partition: messages.metadata.partition,
187
+ topic: topic.name,
188
+ partition: partition,
218
189
  offset: offset,
219
190
  timeout: coordinator.pause_tracker.current_timeout,
220
191
  attempt: coordinator.pause_tracker.attempt
@@ -223,6 +194,8 @@ module Karafka
223
194
 
224
195
  # Resumes processing of the current topic partition
225
196
  def resume
197
+ return unless coordinator.pause_tracker.paused?
198
+
226
199
  # This is sufficient to expire a partition pause, as with it will be resumed by the listener
227
200
  # thread before the next poll.
228
201
  coordinator.pause_tracker.expire
@@ -234,8 +207,8 @@ module Karafka
234
207
  def seek(offset)
235
208
  client.seek(
236
209
  Karafka::Messages::Seek.new(
237
- messages.metadata.topic,
238
- messages.metadata.partition,
210
+ topic.name,
211
+ partition,
239
212
  offset
240
213
  )
241
214
  )
@@ -248,6 +221,13 @@ module Karafka
248
221
  coordinator.revoked?
249
222
  end
250
223
 
224
+ # @return [Boolean] are we retrying processing after an error. This can be used to provide a
225
+ # different flow after there is an error, for example for resources cleanup, small manual
226
+ # backoff or different instrumentation tracking.
227
+ def retrying?
228
+ coordinator.pause_tracker.attempt.positive?
229
+ end
230
+
251
231
  # Pauses the processing from the last offset to retry on given message
252
232
  # @private
253
233
  def retry_after_pause
@@ -258,8 +238,8 @@ module Karafka
258
238
  Karafka.monitor.instrument(
259
239
  'consumer.consuming.retry',
260
240
  caller: self,
261
- topic: messages.metadata.topic,
262
- partition: messages.metadata.partition,
241
+ topic: topic.name,
242
+ partition: partition,
263
243
  offset: coordinator.seek_offset,
264
244
  timeout: coordinator.pause_tracker.current_timeout,
265
245
  attempt: coordinator.pause_tracker.attempt
@@ -25,7 +25,7 @@ module Karafka
25
25
  @consumer_group_coordinator = consumer_group_coordinator
26
26
  @subscription_group = subscription_group
27
27
  @jobs_queue = jobs_queue
28
- @coordinators = Processing::CoordinatorsBuffer.new
28
+ @coordinators = Processing::CoordinatorsBuffer.new(subscription_group.topics)
29
29
  @client = Client.new(@subscription_group)
30
30
  @executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
31
31
  @jobs_builder = proc_config.jobs_builder
@@ -234,7 +234,7 @@ module Karafka
234
234
  def build_and_schedule_shutdown_jobs
235
235
  jobs = []
236
236
 
237
- @executors.each do |_, _, executor|
237
+ @executors.each do |executor|
238
238
  job = @jobs_builder.shutdown(executor)
239
239
  job.before_enqueue
240
240
  jobs << job
@@ -263,20 +263,25 @@ module Karafka
263
263
 
264
264
  @messages_buffer.each do |topic, partition, messages|
265
265
  coordinator = @coordinators.find_or_create(topic, partition)
266
-
267
266
  # Start work coordination for this topic partition
268
267
  coordinator.start(messages)
269
268
 
270
- @partitioner.call(topic, messages, coordinator) do |group_id, partition_messages|
271
- # Count the job we're going to create here
272
- coordinator.increment
273
- executor = @executors.find_or_create(topic, partition, group_id)
274
- job = @jobs_builder.consume(executor, partition_messages, coordinator)
275
- job.before_enqueue
276
- jobs << job
269
+ # We do not increment coordinator for idle job because it's not a user related one
270
+ # and it will not go through a standard lifecycle. Same applies to revoked and shutdown
271
+ if messages.empty?
272
+ executor = @executors.find_or_create(topic, partition, 0, coordinator)
273
+ jobs << @jobs_builder.idle(executor)
274
+ else
275
+ @partitioner.call(topic, messages, coordinator) do |group_id, partition_messages|
276
+ executor = @executors.find_or_create(topic, partition, group_id, coordinator)
277
+ coordinator.increment
278
+ jobs << @jobs_builder.consume(executor, partition_messages)
279
+ end
277
280
  end
278
281
  end
279
282
 
283
+ jobs.each(&:before_enqueue)
284
+
280
285
  @scheduler.schedule_consumption(@jobs_queue, jobs)
281
286
  end
282
287
 
@@ -46,8 +46,5 @@ module Karafka
46
46
 
47
47
  # This should never happen. Please open an issue if it does.
48
48
  StrategyNotFoundError = Class.new(BaseError)
49
-
50
- # This should never happen. Please open an issue if it does.
51
- SkipMessageNotFoundError = Class.new(BaseError)
52
49
  end
53
50
  end
@@ -170,13 +170,51 @@ module Karafka
170
170
  #
171
171
  # @param event [Karafka::Core::Monitoring::Event] event details including payload
172
172
  def on_dead_letter_queue_dispatched(event)
173
+ consumer = event[:caller]
174
+ topic = consumer.topic.name
173
175
  message = event[:message]
174
176
  offset = message.offset
175
- topic = event[:caller].topic.name
176
- dlq_topic = event[:caller].topic.dead_letter_queue.topic
177
+ dlq_topic = consumer.topic.dead_letter_queue.topic
177
178
  partition = message.partition
178
179
 
179
- info "Dispatched message #{offset} from #{topic}/#{partition} to DLQ topic: #{dlq_topic}"
180
+ info <<~MSG.tr("\n", ' ').strip!
181
+ [#{consumer.id}] Dispatched message #{offset}
182
+ from #{topic}/#{partition}
183
+ to DLQ topic: #{dlq_topic}
184
+ MSG
185
+ end
186
+
187
+ # Logs info about throttling event
188
+ #
189
+ # @param event [Karafka::Core::Monitoring::Event] event details including payload
190
+ def on_filtering_throttled(event)
191
+ consumer = event[:caller]
192
+ topic = consumer.topic.name
193
+ # Here we get last message before throttle
194
+ message = event[:message]
195
+ partition = message.partition
196
+ offset = message.offset
197
+
198
+ info <<~MSG.tr("\n", ' ').strip!
199
+ [#{consumer.id}] Throttled and will resume
200
+ from message #{offset}
201
+ on #{topic}/#{partition}
202
+ MSG
203
+ end
204
+
205
+ # @param event [Karafka::Core::Monitoring::Event] event details including payload
206
+ def on_filtering_seek(event)
207
+ consumer = event[:caller]
208
+ topic = consumer.topic.name
209
+ # Message to which we seek
210
+ message = event[:message]
211
+ partition = message.partition
212
+ offset = message.offset
213
+
214
+ info <<~MSG.tr("\n", ' ').strip!
215
+ [#{consumer.id}] Post-filtering seeking to message #{offset}
216
+ on #{topic}/#{partition}
217
+ MSG
180
218
  end
181
219
 
182
220
  # There are many types of errors that can occur in many places, but we provide a single
@@ -203,6 +241,9 @@ module Karafka
203
241
  when 'consumer.after_consume.error'
204
242
  error "Consumer after consume failed due to an error: #{error}"
205
243
  error details
244
+ when 'consumer.idle.error'
245
+ error "Consumer idle failed due to an error: #{error}"
246
+ error details
206
247
  when 'consumer.shutdown.error'
207
248
  error "Consumer on shutdown failed due to an error: #{error}"
208
249
  error details
@@ -17,6 +17,9 @@ module Karafka
17
17
  # complete list of all the events. Please use the #available_events on fully loaded
18
18
  # Karafka system to determine all of the events you can use.
19
19
  EVENTS = %w[
20
+ active_job.consume
21
+ active_job.consumed
22
+
20
23
  app.initialized
21
24
  app.running
22
25
  app.quieting
@@ -36,6 +39,7 @@ module Karafka
36
39
  consumer.consumed
37
40
  consumer.consuming.pause
38
41
  consumer.consuming.retry
42
+ consumer.idle
39
43
  consumer.revoke
40
44
  consumer.revoked
41
45
  consumer.shutting_down
@@ -43,6 +47,9 @@ module Karafka
43
47
 
44
48
  dead_letter_queue.dispatched
45
49
 
50
+ filtering.throttled
51
+ filtering.seek
52
+
46
53
  process.notice_signal
47
54
 
48
55
  statistics.emitted