karafka 2.0.37 → 2.0.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +1 -1
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +34 -0
  6. data/Gemfile.lock +7 -7
  7. data/README.md +1 -1
  8. data/bin/integrations +1 -1
  9. data/config/locales/errors.yml +0 -7
  10. data/config/locales/pro_errors.yml +18 -0
  11. data/lib/karafka/active_job/consumer.rb +22 -7
  12. data/lib/karafka/admin.rb +46 -14
  13. data/lib/karafka/base_consumer.rb +35 -55
  14. data/lib/karafka/connection/listener.rb +15 -10
  15. data/lib/karafka/errors.rb +0 -3
  16. data/lib/karafka/instrumentation/logger_listener.rb +44 -3
  17. data/lib/karafka/instrumentation/notifications.rb +7 -0
  18. data/lib/karafka/pro/active_job/consumer.rb +10 -5
  19. data/lib/karafka/pro/processing/coordinator.rb +13 -4
  20. data/lib/karafka/pro/processing/filters/base.rb +61 -0
  21. data/lib/karafka/pro/processing/filters/delayer.rb +70 -0
  22. data/lib/karafka/pro/processing/filters/expirer.rb +51 -0
  23. data/lib/karafka/pro/processing/filters/throttler.rb +84 -0
  24. data/lib/karafka/pro/processing/filters_applier.rb +100 -0
  25. data/lib/karafka/pro/processing/jobs_builder.rb +7 -3
  26. data/lib/karafka/pro/processing/scheduler.rb +24 -7
  27. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom.rb +68 -0
  28. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_lrj_mom_vp.rb +74 -0
  29. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom.rb +72 -0
  30. data/lib/karafka/pro/processing/strategies/aj/dlq_ftr_mom_vp.rb +76 -0
  31. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom.rb +62 -0
  32. data/lib/karafka/pro/processing/strategies/aj/dlq_lrj_mom_vp.rb +68 -0
  33. data/lib/karafka/pro/processing/strategies/aj/dlq_mom.rb +64 -0
  34. data/lib/karafka/pro/processing/strategies/aj/dlq_mom_vp.rb +69 -0
  35. data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom.rb +38 -0
  36. data/lib/karafka/pro/processing/strategies/aj/ftr_lrj_mom_vp.rb +64 -0
  37. data/lib/karafka/pro/processing/strategies/aj/ftr_mom.rb +38 -0
  38. data/lib/karafka/pro/processing/strategies/aj/ftr_mom_vp.rb +58 -0
  39. data/lib/karafka/pro/processing/strategies/{dlq_lrj_vp.rb → aj/lrj_mom.rb} +14 -13
  40. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +77 -0
  41. data/lib/karafka/pro/processing/strategies/aj/mom.rb +36 -0
  42. data/lib/karafka/pro/processing/strategies/aj/mom_vp.rb +52 -0
  43. data/lib/karafka/pro/processing/strategies/dlq/default.rb +131 -0
  44. data/lib/karafka/pro/processing/strategies/dlq/ftr.rb +61 -0
  45. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj.rb +75 -0
  46. data/lib/karafka/pro/processing/strategies/dlq/ftr_lrj_mom.rb +74 -0
  47. data/lib/karafka/pro/processing/strategies/{mom.rb → dlq/ftr_lrj_vp.rb} +16 -19
  48. data/lib/karafka/pro/processing/strategies/dlq/ftr_mom.rb +73 -0
  49. data/lib/karafka/pro/processing/strategies/dlq/ftr_vp.rb +39 -0
  50. data/lib/karafka/pro/processing/strategies/dlq/lrj.rb +63 -0
  51. data/lib/karafka/pro/processing/strategies/dlq/lrj_mom.rb +66 -0
  52. data/lib/karafka/pro/processing/strategies/dlq/lrj_vp.rb +38 -0
  53. data/lib/karafka/pro/processing/strategies/dlq/mom.rb +67 -0
  54. data/lib/karafka/pro/processing/strategies/dlq/vp.rb +39 -0
  55. data/lib/karafka/pro/processing/strategies/ftr/default.rb +104 -0
  56. data/lib/karafka/pro/processing/strategies/ftr/vp.rb +40 -0
  57. data/lib/karafka/pro/processing/strategies/lrj/default.rb +85 -0
  58. data/lib/karafka/pro/processing/strategies/lrj/ftr.rb +69 -0
  59. data/lib/karafka/pro/processing/strategies/lrj/ftr_mom.rb +67 -0
  60. data/lib/karafka/pro/processing/strategies/{vp.rb → lrj/ftr_vp.rb} +15 -13
  61. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +78 -0
  62. data/lib/karafka/pro/processing/strategies/{aj_lrj_mom.rb → lrj/vp.rb} +13 -12
  63. data/lib/karafka/pro/processing/strategies/mom/default.rb +46 -0
  64. data/lib/karafka/pro/processing/strategies/mom/ftr.rb +53 -0
  65. data/lib/karafka/pro/processing/strategies/vp/default.rb +53 -0
  66. data/lib/karafka/pro/processing/{strategies/lrj_vp.rb → strategies.rb} +1 -13
  67. data/lib/karafka/pro/processing/strategy_selector.rb +44 -18
  68. data/lib/karafka/pro/{processing/strategies/aj_mom.rb → routing/features/delaying/config.rb} +7 -13
  69. data/lib/karafka/pro/routing/features/delaying/contract.rb +38 -0
  70. data/lib/karafka/pro/routing/features/delaying/topic.rb +59 -0
  71. data/lib/karafka/pro/routing/features/delaying.rb +29 -0
  72. data/lib/karafka/pro/routing/features/expiring/config.rb +27 -0
  73. data/lib/karafka/pro/routing/features/expiring/contract.rb +38 -0
  74. data/lib/karafka/pro/routing/features/expiring/topic.rb +59 -0
  75. data/lib/karafka/pro/routing/features/expiring.rb +27 -0
  76. data/lib/karafka/pro/routing/features/filtering/config.rb +40 -0
  77. data/lib/karafka/pro/routing/features/filtering/contract.rb +41 -0
  78. data/lib/karafka/pro/routing/features/filtering/topic.rb +51 -0
  79. data/lib/karafka/pro/routing/features/filtering.rb +27 -0
  80. data/lib/karafka/pro/routing/features/long_running_job/contract.rb +1 -1
  81. data/lib/karafka/pro/routing/features/throttling/config.rb +32 -0
  82. data/lib/karafka/pro/routing/features/throttling/contract.rb +41 -0
  83. data/lib/karafka/pro/routing/features/throttling/topic.rb +69 -0
  84. data/lib/karafka/pro/routing/features/throttling.rb +30 -0
  85. data/lib/karafka/processing/coordinator.rb +60 -30
  86. data/lib/karafka/processing/coordinators_buffer.rb +5 -1
  87. data/lib/karafka/processing/executor.rb +23 -16
  88. data/lib/karafka/processing/executors_buffer.rb +10 -26
  89. data/lib/karafka/processing/jobs/consume.rb +2 -4
  90. data/lib/karafka/processing/jobs/idle.rb +24 -0
  91. data/lib/karafka/processing/jobs_builder.rb +2 -3
  92. data/lib/karafka/processing/result.rb +5 -0
  93. data/lib/karafka/processing/strategies/aj_dlq_mom.rb +1 -1
  94. data/lib/karafka/processing/strategies/base.rb +5 -0
  95. data/lib/karafka/processing/strategies/default.rb +50 -0
  96. data/lib/karafka/processing/strategies/dlq.rb +13 -4
  97. data/lib/karafka/processing/strategies/dlq_mom.rb +8 -3
  98. data/lib/karafka/processing/strategy_selector.rb +27 -10
  99. data/lib/karafka/version.rb +1 -1
  100. data/renovate.json +6 -0
  101. data.tar.gz.sig +0 -0
  102. metadata +66 -22
  103. metadata.gz.sig +0 -0
  104. data/lib/karafka/pro/processing/strategies/aj_dlq_lrj_mom.rb +0 -42
  105. data/lib/karafka/pro/processing/strategies/aj_dlq_lrj_mom_vp.rb +0 -70
  106. data/lib/karafka/pro/processing/strategies/aj_dlq_mom.rb +0 -62
  107. data/lib/karafka/pro/processing/strategies/aj_dlq_mom_vp.rb +0 -68
  108. data/lib/karafka/pro/processing/strategies/aj_lrj_mom_vp.rb +0 -75
  109. data/lib/karafka/pro/processing/strategies/aj_mom_vp.rb +0 -62
  110. data/lib/karafka/pro/processing/strategies/dlq.rb +0 -120
  111. data/lib/karafka/pro/processing/strategies/dlq_lrj.rb +0 -65
  112. data/lib/karafka/pro/processing/strategies/dlq_lrj_mom.rb +0 -62
  113. data/lib/karafka/pro/processing/strategies/dlq_mom.rb +0 -62
  114. data/lib/karafka/pro/processing/strategies/dlq_vp.rb +0 -37
  115. data/lib/karafka/pro/processing/strategies/lrj.rb +0 -83
  116. data/lib/karafka/pro/processing/strategies/lrj_mom.rb +0 -73
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bab9c1d7bc952b4ecbfc4fad794d7e7c861cd3a332cc5d9058cef6c0bd9b57cb
4
- data.tar.gz: 7662bd8dc5748d9112f3c72b2912619534e45750188f78df2f69a7e6ae1f9c31
3
+ metadata.gz: 12fe8a47dc0ab16b0f7783424cd1aa043c2d2b228b4f4164f1cecefe604269d9
4
+ data.tar.gz: 9fa3bae282770dd67503c41ef4b73a27a38bfcff3bf472ddd63753d14d03614f
5
5
  SHA512:
6
- metadata.gz: 9a99a84d538a74bd27d5a0f585a12dbbe67eb76ab63cc1a0984cbe1562f230070ad482418f85393a3a479e81534a0957a0863c91f3a7f5b6433f74efd317c79e
7
- data.tar.gz: 7da6129cd795f65d821bae897864648e4a5e37c0d07e8745f110f0d03a23d688d7717ab6c42652c4660fe3be3d26ee7051a8c2c66c1fcb62834a1c4159bd4ac4
6
+ metadata.gz: 9e6536c90a411a0b42337f73c00d9f454028366f42eabb1b7f40902181bcbcfd43258741d6fc51c6e29046b9ee1f8598755440d28a00ca96104a61a8095c20c2
7
+ data.tar.gz: be75dd1bfa744187f770f2e1f0deeedfba4f3fb1b824d6bab91f056f96e60a33498429e35ea22841404def0935e584db0df4289d2818631ace2e597d28785960
checksums.yaml.gz.sig CHANGED
Binary file
@@ -62,7 +62,7 @@ jobs:
62
62
  run: \curl -sSL https://api.coditsu.io/run/ci | bash
63
63
 
64
64
  specs:
65
- timeout-minutes: 30
65
+ timeout-minutes: 45
66
66
  runs-on: ubuntu-latest
67
67
  needs: diffend
68
68
  strategy:
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 3.2.1
1
+ 3.2.2
data/CHANGELOG.md CHANGED
@@ -1,5 +1,39 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.0.39 (2023-04-11)
4
+ - **[Feature]** Provide ability to throttle/limit number of messages processed in a time unit (#1203)
5
+ - **[Feature]** Provide Delayed Topics (#1000)
6
+ - **[Feature]** Provide ability to expire messages (expiring topics)
7
+ - **[Feature]** Provide ability to apply filters after messages are polled and before enqueued. This is a generic filter API for any usage.
8
+ - [Improvement] When using ActiveJob with Virtual Partitions, Karafka will stop if collectively VPs are failing. This minimizes number of jobs that will be collectively re-processed.
9
+ - [Improvement] `#retrying?` method has been added to consumers to provide ability to check, that we're reprocessing data after a failure. This is useful for branching out processing based on errors.
10
+ - [Improvement] Track active_job_id in instrumentation (#1372)
11
+ - [Improvement] Introduce new housekeeping job type called `Idle` for non-consumption execution flows.
12
+ - [Improvement] Change how a manual offset management works with Long-Running Jobs. Use the last message offset to move forward instead of relying on the last message marked as consumed for a scenario where no message is marked.
13
+ - [Improvement] Prioritize in Pro non-consumption jobs execution over consumption despite LJF. This will ensure, that housekeeping as well as other non-consumption events are not saturated when running a lot of work.
14
+ - [Improvement] Normalize the DLQ behaviour with MoM. Always pause on dispatch for all the strategies.
15
+ - [Improvement] Improve the manual offset management and DLQ behaviour when no markings occur for OSS.
16
+ - [Improvement] Do not early stop ActiveJob work running under virtual partitions to prevent extensive reprocessing.
17
+ - [Improvement] Drastically increase number of scenarios covered by integration specs (OSS and Pro).
18
+ - [Improvement] Introduce a `Coordinator#synchronize` lock for cross virtual partitions operations.
19
+ - [Fix] Do not resume partition that is not paused.
20
+ - [Fix] Fix `LoggerListener` cases where logs would not include caller id (when available)
21
+ - [Fix] Fix not working benchmark tests.
22
+ - [Fix] Fix a case where when using manual offset management with a user pause would ignore the pause and seek to the next message.
23
+ - [Fix] Fix a case where dead letter queue would go into an infinite loop on message with first ever offset if the first ever offset would not recover.
24
+ - [Fix] Make sure to resume always for all LRJ strategies on revocation.
25
+ - [Refactor] Make sure that coordinator is topic aware. Needed for throttling, delayed processing and expired jobs.
26
+ - [Refactor] Put Pro strategies into namespaces to better organize multiple combinations.
27
+ - [Refactor] Do not rely on messages metadata for internal topic and partition operations like `#seek` so they can run independently from the consumption flow.
28
+ - [Refactor] Hold a single topic/partition reference on a coordinator instead of in executor, coordinator and consumer.
29
+ - [Refactor] Move `#mark_as_consumed` and `#mark_as_consumed!`into `Strategies::Default` to be able to introduce marking for virtual partitions.
30
+
31
+ ## 2.0.38 (2023-03-27)
32
+ - [Improvement] Introduce `Karafka::Admin#read_watermark_offsets` to get low and high watermark offsets values.
33
+ - [Improvement] Track active_job_id in instrumentation (#1372)
34
+ - [Improvement] Improve `#read_topic` reading in case of a compacted partition where the offset is below the low watermark offset. This should optimize reading and should not go beyond the low watermark offset.
35
+ - [Improvement] Allow `#read_topic` to accept instance settings to overwrite any settings needed to customize reading behaviours.
36
+
3
37
  ## 2.0.37 (2023-03-20)
4
38
  - [Fix] Declarative topics execution on a secondary cluster run topics creation on the primary one (#1365)
5
39
  - [Fix] Admin read operations commit offset when not needed (#1369)
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.0.37)
4
+ karafka (2.0.39)
5
5
  karafka-core (>= 2.0.12, < 3.0.0)
6
6
  thor (>= 0.20)
7
7
  waterdrop (>= 2.4.10, < 3.0.0)
@@ -10,10 +10,10 @@ PATH
10
10
  GEM
11
11
  remote: https://rubygems.org/
12
12
  specs:
13
- activejob (7.0.4.2)
14
- activesupport (= 7.0.4.2)
13
+ activejob (7.0.4.3)
14
+ activesupport (= 7.0.4.3)
15
15
  globalid (>= 0.3.6)
16
- activesupport (7.0.4.2)
16
+ activesupport (7.0.4.3)
17
17
  concurrent-ruby (~> 1.0, >= 1.0.2)
18
18
  i18n (>= 1.6, < 2)
19
19
  minitest (>= 5.1)
@@ -48,7 +48,7 @@ GEM
48
48
  rspec-expectations (3.12.2)
49
49
  diff-lcs (>= 1.2.0, < 2.0)
50
50
  rspec-support (~> 3.12.0)
51
- rspec-mocks (3.12.3)
51
+ rspec-mocks (3.12.5)
52
52
  diff-lcs (>= 1.2.0, < 2.0)
53
53
  rspec-support (~> 3.12.0)
54
54
  rspec-support (3.12.0)
@@ -61,7 +61,7 @@ GEM
61
61
  thor (1.2.1)
62
62
  tzinfo (2.0.6)
63
63
  concurrent-ruby (~> 1.0)
64
- waterdrop (2.5.0)
64
+ waterdrop (2.5.1)
65
65
  karafka-core (>= 2.0.12, < 3.0.0)
66
66
  zeitwerk (~> 2.3)
67
67
  zeitwerk (2.6.7)
@@ -79,4 +79,4 @@ DEPENDENCIES
79
79
  simplecov
80
80
 
81
81
  BUNDLED WITH
82
- 2.4.7
82
+ 2.4.10
data/README.md CHANGED
@@ -86,7 +86,7 @@ bundle exec karafka server
86
86
 
87
87
  I also sell Karafka Pro subscriptions. It includes a commercial-friendly license, priority support, architecture consultations, enhanced Web UI and high throughput data processing-related features (virtual partitions, long-running jobs, and more).
88
88
 
89
- **20%** of the income will be distributed back to other OSS projects that Karafka uses under the hood.
89
+ **10%** of the income will be distributed back to other OSS projects that Karafka uses under the hood.
90
90
 
91
91
  Help me provide high-quality open-source software. Please see the Karafka [homepage](https://karafka.io/#become-pro) for more details.
92
92
 
data/bin/integrations CHANGED
@@ -25,7 +25,7 @@ ROOT_PATH = Pathname.new(File.expand_path(File.join(File.dirname(__FILE__), '../
25
25
  # we limit it. Locally we can run a lot of those, as many of them have sleeps and do not use a lot
26
26
  # of CPU. Locally we also cannot go beyond certain limit due to how often and how many topics we
27
27
  # create in Kafka. With an overloaded system, we start getting timeouts.
28
- CONCURRENCY = ENV.key?('CI') ? 4 : Etc.nprocessors * 2
28
+ CONCURRENCY = ENV.key?('CI') ? 5 : Etc.nprocessors * 3
29
29
 
30
30
  # How may bytes do we want to keep from the stdout in the buffer for when we need to print it
31
31
  MAX_BUFFER_OUTPUT = 51_200
@@ -72,10 +72,3 @@ en:
72
72
  test:
73
73
  missing: needs to be present
74
74
  id_format: needs to be a String
75
-
76
- pro_topic:
77
- virtual_partitions.partitioner_respond_to_call: needs to be defined and needs to respond to `#call`
78
- virtual_partitions.max_partitions_format: needs to be equal or more than 1
79
- manual_offset_management_not_with_virtual_partitions: cannot be used together with Virtual Partitions
80
- long_running_job.active_format: needs to be either true or false
81
- dead_letter_queue_not_with_virtual_partitions: cannot be used together with Virtual Partitions
@@ -3,10 +3,28 @@ en:
3
3
  topic:
4
4
  virtual_partitions.partitioner_respond_to_call: needs to be defined and needs to respond to `#call`
5
5
  virtual_partitions.max_partitions_format: needs to be equal or more than 1
6
+
6
7
  manual_offset_management_not_with_virtual_partitions: cannot be used together with Virtual Partitions
8
+
7
9
  long_running_job.active_format: needs to be either true or false
10
+
8
11
  dead_letter_queue_with_virtual_partitions: when using Dead Letter Queue with Virtual Partitions, at least one retry is required.
9
12
 
13
+ throttling.active_format: needs to be either true or false
14
+ throttling.limit_format: needs to be equal or more than 1
15
+ throttling.interval_format: needs to be equal or more than 1
16
+
17
+ filtering.active_missing: needs to be present
18
+ filtering.factory_format: 'needs to respond to #call'
19
+ filtering.factories_format: 'needs to contain only factories responding to #call'
20
+ filtering.active_format: 'needs to be boolean'
21
+
22
+ expiring.ttl_format: 'needs to be equal or more than 0 and an integer'
23
+ expiring.active_format: 'needs to be boolean'
24
+
25
+ delaying.delay_format: 'needs to be equal or more than 0 and an integer'
26
+ delaying.active_format: 'needs to be boolean'
27
+
10
28
  config:
11
29
  encryption.active_format: 'needs to be either true or false'
12
30
  encryption.public_key_invalid: 'is not a valid public RSA key'
@@ -12,16 +12,31 @@ module Karafka
12
12
  messages.each do |message|
13
13
  break if Karafka::App.stopping?
14
14
 
15
- # We technically speaking could set this as deserializer and reference it from the
16
- # message instead of using the `#raw_payload`. This is not done on purpose to simplify
17
- # the ActiveJob setup here
18
- job = ::ActiveSupport::JSON.decode(message.raw_payload)
15
+ consume_job(message)
19
16
 
20
- tags.add(:job_class, job['job_class'])
17
+ mark_as_consumed(message)
18
+ end
19
+ end
21
20
 
22
- ::ActiveJob::Base.execute(job)
21
+ private
23
22
 
24
- mark_as_consumed(message)
23
+ # Consumes a message with the job and runs needed instrumentation
24
+ #
25
+ # @param job_message [Karafka::Messages::Message] message with active job
26
+ def consume_job(job_message)
27
+ # We technically speaking could set this as deserializer and reference it from the
28
+ # message instead of using the `#raw_payload`. This is not done on purpose to simplify
29
+ # the ActiveJob setup here
30
+ job = ::ActiveSupport::JSON.decode(job_message.raw_payload)
31
+
32
+ tags.add(:job_class, job['job_class'])
33
+
34
+ payload = { caller: self, job: job, message: job_message }
35
+
36
+ # We publish both to make it consistent with `consumer.x` events
37
+ Karafka.monitor.instrument('active_job.consume', payload)
38
+ Karafka.monitor.instrument('active_job.consumed', payload) do
39
+ ::ActiveJob::Base.execute(job)
25
40
  end
26
41
  end
27
42
  end
data/lib/karafka/admin.rb CHANGED
@@ -44,17 +44,32 @@ module Karafka
44
44
  # @param count [Integer] how many messages we want to get at most
45
45
  # @param start_offset [Integer] offset from which we should start. If -1 is provided
46
46
  # (default) we will start from the latest offset
47
+ # @param settings [Hash] kafka extra settings (optional)
47
48
  #
48
49
  # @return [Array<Karafka::Messages::Message>] array with messages
49
- def read_topic(name, partition, count, start_offset = -1)
50
+ def read_topic(name, partition, count, start_offset = -1, settings = {})
50
51
  messages = []
51
52
  tpl = Rdkafka::Consumer::TopicPartitionList.new
53
+ low_offset, high_offset = nil
52
54
 
53
- with_consumer do |consumer|
54
- offsets = consumer.query_watermark_offsets(name, partition)
55
- end_offset = offsets.last
55
+ with_consumer(settings) do |consumer|
56
+ low_offset, high_offset = consumer.query_watermark_offsets(name, partition)
57
+
58
+ # Select offset dynamically if -1 or less
59
+ start_offset = high_offset - count if start_offset.negative?
56
60
 
57
- start_offset = [0, offsets.last - count].max if start_offset.negative?
61
+ # Build the requested range - since first element is on the start offset we need to
62
+ # subtract one from requested count to end up with expected number of elements
63
+ requested_range = (start_offset..start_offset + (count - 1))
64
+ # Establish theoretical available range. Note, that this does not handle cases related to
65
+ # log retention or compaction
66
+ available_range = (low_offset..high_offset)
67
+ # Select only offset that we can select. This will remove all the potential offsets that
68
+ # are below the low watermark offset
69
+ possible_range = requested_range.select { |offset| available_range.include?(offset) }
70
+
71
+ start_offset = possible_range.first
72
+ count = possible_range.count
58
73
 
59
74
  tpl.add_topic_and_partitions_with_offsets(name, partition => start_offset)
60
75
  consumer.assign(tpl)
@@ -64,11 +79,15 @@ module Karafka
64
79
  loop do
65
80
  # If we've got as many messages as we've wanted stop
66
81
  break if messages.size >= count
67
- # If we've reached end of the topic messages, don't process more
68
- break if !messages.empty? && end_offset <= messages.last.offset
69
82
 
70
83
  message = consumer.poll(200)
71
- messages << message if message
84
+
85
+ next unless message
86
+
87
+ # If the message we've got is beyond the requested range, stop
88
+ break unless possible_range.include?(message.offset)
89
+
90
+ messages << message
72
91
  rescue Rdkafka::RdkafkaError => e
73
92
  # End of partition
74
93
  break if e.code == :partition_eof
@@ -77,7 +96,7 @@ module Karafka
77
96
  end
78
97
  end
79
98
 
80
- messages.map do |message|
99
+ messages.map! do |message|
81
100
  Messages::Builders::Message.call(
82
101
  message,
83
102
  # Use topic from routes if we can match it or create a dummy one
@@ -136,6 +155,17 @@ module Karafka
136
155
  end
137
156
  end
138
157
 
158
+ # Fetches the watermark offsets for a given topic partition
159
+ #
160
+ # @param name [String, Symbol] topic name
161
+ # @param partition [Integer] partition
162
+ # @return [Array<Integer, Integer>] low watermark offset and high watermark offset
163
+ def read_watermark_offsets(name, partition)
164
+ with_consumer do |consumer|
165
+ consumer.query_watermark_offsets(name, partition)
166
+ end
167
+ end
168
+
139
169
  # @return [Rdkafka::Metadata] cluster metadata info
140
170
  def cluster_info
141
171
  with_admin do |admin|
@@ -159,15 +189,16 @@ module Karafka
159
189
 
160
190
  # Creates admin instance and yields it. After usage it closes the admin instance
161
191
  def with_admin
162
- admin = config(:producer).admin
192
+ admin = config(:producer, {}).admin
163
193
  yield(admin)
164
194
  ensure
165
195
  admin&.close
166
196
  end
167
197
 
168
198
  # Creates consumer instance and yields it. After usage it closes the consumer instance
169
- def with_consumer
170
- consumer = config(:consumer).consumer
199
+ # @param settings [Hash] extra settings to customize consumer
200
+ def with_consumer(settings = {})
201
+ consumer = config(:consumer, settings).consumer
171
202
  yield(consumer)
172
203
  ensure
173
204
  consumer&.close
@@ -196,11 +227,12 @@ module Karafka
196
227
  end
197
228
 
198
229
  # @param type [Symbol] type of config we want
230
+ # @param settings [Hash] extra settings for config (if needed)
199
231
  # @return [::Rdkafka::Config] rdkafka config
200
- def config(type)
232
+ def config(type, settings)
201
233
  config_hash = Karafka::Setup::AttributesMap.public_send(
202
234
  type,
203
- Karafka::App.config.kafka.dup.merge(CONFIG_DEFAULTS)
235
+ Karafka::App.config.kafka.dup.merge(CONFIG_DEFAULTS).merge!(settings)
204
236
  )
205
237
 
206
238
  ::Rdkafka::Config.new(config_hash)
@@ -7,11 +7,13 @@ module Karafka
7
7
  # Allow for consumer instance tagging for instrumentation
8
8
  include ::Karafka::Core::Taggable
9
9
 
10
+ extend Forwardable
11
+
12
+ def_delegators :@coordinator, :topic, :partition
13
+
10
14
  # @return [String] id of the current consumer
11
15
  attr_reader :id
12
16
  # @return [Karafka::Routing::Topic] topic to which a given consumer is subscribed
13
- attr_accessor :topic
14
- # @return [Karafka::Messages::Messages] current messages batch
15
17
  attr_accessor :messages
16
18
  # @return [Karafka::Connection::Client] kafka connection client
17
19
  attr_accessor :client
@@ -97,6 +99,20 @@ module Karafka
97
99
  )
98
100
  end
99
101
 
102
+ # Trigger method for running on idle runs without messages
103
+ #
104
+ # @private
105
+ def on_idle
106
+ handle_idle
107
+ rescue StandardError => e
108
+ Karafka.monitor.instrument(
109
+ 'error.occurred',
110
+ error: e,
111
+ caller: self,
112
+ type: 'consumer.idle.error'
113
+ )
114
+ end
115
+
100
116
  # Trigger method for running on partition revocation.
101
117
  #
102
118
  # @private
@@ -143,51 +159,6 @@ module Karafka
143
159
  # some teardown procedures (closing file handler, etc).
144
160
  def shutdown; end
145
161
 
146
- # Marks message as consumed in an async way.
147
- #
148
- # @param message [Messages::Message] last successfully processed message.
149
- # @return [Boolean] true if we were able to mark the offset, false otherwise. False indicates
150
- # that we were not able and that we have lost the partition.
151
- #
152
- # @note We keep track of this offset in case we would mark as consumed and got error when
153
- # processing another message. In case like this we do not pause on the message we've already
154
- # processed but rather at the next one. This applies to both sync and async versions of this
155
- # method.
156
- def mark_as_consumed(message)
157
- # Ignore earlier offsets than the one we alread committed
158
- return true if coordinator.seek_offset > message.offset
159
-
160
- unless client.mark_as_consumed(message)
161
- coordinator.revoke
162
-
163
- return false
164
- end
165
-
166
- coordinator.seek_offset = message.offset + 1
167
-
168
- true
169
- end
170
-
171
- # Marks message as consumed in a sync way.
172
- #
173
- # @param message [Messages::Message] last successfully processed message.
174
- # @return [Boolean] true if we were able to mark the offset, false otherwise. False indicates
175
- # that we were not able and that we have lost the partition.
176
- def mark_as_consumed!(message)
177
- # Ignore earlier offsets than the one we alread committed
178
- return true if coordinator.seek_offset > message.offset
179
-
180
- unless client.mark_as_consumed!(message)
181
- coordinator.revoke
182
-
183
- return false
184
- end
185
-
186
- coordinator.seek_offset = message.offset + 1
187
-
188
- true
189
- end
190
-
191
162
  # Pauses processing on a given offset for the current topic partition
192
163
  #
193
164
  # After given partition is resumed, it will continue processing from the given offset
@@ -201,8 +172,8 @@ module Karafka
201
172
  timeout ? coordinator.pause_tracker.pause(timeout) : coordinator.pause_tracker.pause
202
173
 
203
174
  client.pause(
204
- messages.metadata.topic,
205
- messages.metadata.partition,
175
+ topic.name,
176
+ partition,
206
177
  offset
207
178
  )
208
179
 
@@ -213,8 +184,8 @@ module Karafka
213
184
  'consumer.consuming.pause',
214
185
  caller: self,
215
186
  manual: manual_pause,
216
- topic: messages.metadata.topic,
217
- partition: messages.metadata.partition,
187
+ topic: topic.name,
188
+ partition: partition,
218
189
  offset: offset,
219
190
  timeout: coordinator.pause_tracker.current_timeout,
220
191
  attempt: coordinator.pause_tracker.attempt
@@ -223,6 +194,8 @@ module Karafka
223
194
 
224
195
  # Resumes processing of the current topic partition
225
196
  def resume
197
+ return unless coordinator.pause_tracker.paused?
198
+
226
199
  # This is sufficient to expire a partition pause, as with it will be resumed by the listener
227
200
  # thread before the next poll.
228
201
  coordinator.pause_tracker.expire
@@ -234,8 +207,8 @@ module Karafka
234
207
  def seek(offset)
235
208
  client.seek(
236
209
  Karafka::Messages::Seek.new(
237
- messages.metadata.topic,
238
- messages.metadata.partition,
210
+ topic.name,
211
+ partition,
239
212
  offset
240
213
  )
241
214
  )
@@ -248,6 +221,13 @@ module Karafka
248
221
  coordinator.revoked?
249
222
  end
250
223
 
224
+ # @return [Boolean] are we retrying processing after an error. This can be used to provide a
225
+ # different flow after there is an error, for example for resources cleanup, small manual
226
+ # backoff or different instrumentation tracking.
227
+ def retrying?
228
+ coordinator.pause_tracker.attempt.positive?
229
+ end
230
+
251
231
  # Pauses the processing from the last offset to retry on given message
252
232
  # @private
253
233
  def retry_after_pause
@@ -258,8 +238,8 @@ module Karafka
258
238
  Karafka.monitor.instrument(
259
239
  'consumer.consuming.retry',
260
240
  caller: self,
261
- topic: messages.metadata.topic,
262
- partition: messages.metadata.partition,
241
+ topic: topic.name,
242
+ partition: partition,
263
243
  offset: coordinator.seek_offset,
264
244
  timeout: coordinator.pause_tracker.current_timeout,
265
245
  attempt: coordinator.pause_tracker.attempt
@@ -25,7 +25,7 @@ module Karafka
25
25
  @consumer_group_coordinator = consumer_group_coordinator
26
26
  @subscription_group = subscription_group
27
27
  @jobs_queue = jobs_queue
28
- @coordinators = Processing::CoordinatorsBuffer.new
28
+ @coordinators = Processing::CoordinatorsBuffer.new(subscription_group.topics)
29
29
  @client = Client.new(@subscription_group)
30
30
  @executors = Processing::ExecutorsBuffer.new(@client, subscription_group)
31
31
  @jobs_builder = proc_config.jobs_builder
@@ -234,7 +234,7 @@ module Karafka
234
234
  def build_and_schedule_shutdown_jobs
235
235
  jobs = []
236
236
 
237
- @executors.each do |_, _, executor|
237
+ @executors.each do |executor|
238
238
  job = @jobs_builder.shutdown(executor)
239
239
  job.before_enqueue
240
240
  jobs << job
@@ -263,20 +263,25 @@ module Karafka
263
263
 
264
264
  @messages_buffer.each do |topic, partition, messages|
265
265
  coordinator = @coordinators.find_or_create(topic, partition)
266
-
267
266
  # Start work coordination for this topic partition
268
267
  coordinator.start(messages)
269
268
 
270
- @partitioner.call(topic, messages, coordinator) do |group_id, partition_messages|
271
- # Count the job we're going to create here
272
- coordinator.increment
273
- executor = @executors.find_or_create(topic, partition, group_id)
274
- job = @jobs_builder.consume(executor, partition_messages, coordinator)
275
- job.before_enqueue
276
- jobs << job
269
+ # We do not increment coordinator for idle job because it's not a user related one
270
+ # and it will not go through a standard lifecycle. Same applies to revoked and shutdown
271
+ if messages.empty?
272
+ executor = @executors.find_or_create(topic, partition, 0, coordinator)
273
+ jobs << @jobs_builder.idle(executor)
274
+ else
275
+ @partitioner.call(topic, messages, coordinator) do |group_id, partition_messages|
276
+ executor = @executors.find_or_create(topic, partition, group_id, coordinator)
277
+ coordinator.increment
278
+ jobs << @jobs_builder.consume(executor, partition_messages)
279
+ end
277
280
  end
278
281
  end
279
282
 
283
+ jobs.each(&:before_enqueue)
284
+
280
285
  @scheduler.schedule_consumption(@jobs_queue, jobs)
281
286
  end
282
287
 
@@ -46,8 +46,5 @@ module Karafka
46
46
 
47
47
  # This should never happen. Please open an issue if it does.
48
48
  StrategyNotFoundError = Class.new(BaseError)
49
-
50
- # This should never happen. Please open an issue if it does.
51
- SkipMessageNotFoundError = Class.new(BaseError)
52
49
  end
53
50
  end
@@ -170,13 +170,51 @@ module Karafka
170
170
  #
171
171
  # @param event [Karafka::Core::Monitoring::Event] event details including payload
172
172
  def on_dead_letter_queue_dispatched(event)
173
+ consumer = event[:caller]
174
+ topic = consumer.topic.name
173
175
  message = event[:message]
174
176
  offset = message.offset
175
- topic = event[:caller].topic.name
176
- dlq_topic = event[:caller].topic.dead_letter_queue.topic
177
+ dlq_topic = consumer.topic.dead_letter_queue.topic
177
178
  partition = message.partition
178
179
 
179
- info "Dispatched message #{offset} from #{topic}/#{partition} to DLQ topic: #{dlq_topic}"
180
+ info <<~MSG.tr("\n", ' ').strip!
181
+ [#{consumer.id}] Dispatched message #{offset}
182
+ from #{topic}/#{partition}
183
+ to DLQ topic: #{dlq_topic}
184
+ MSG
185
+ end
186
+
187
+ # Logs info about throttling event
188
+ #
189
+ # @param event [Karafka::Core::Monitoring::Event] event details including payload
190
+ def on_filtering_throttled(event)
191
+ consumer = event[:caller]
192
+ topic = consumer.topic.name
193
+ # Here we get last message before throttle
194
+ message = event[:message]
195
+ partition = message.partition
196
+ offset = message.offset
197
+
198
+ info <<~MSG.tr("\n", ' ').strip!
199
+ [#{consumer.id}] Throttled and will resume
200
+ from message #{offset}
201
+ on #{topic}/#{partition}
202
+ MSG
203
+ end
204
+
205
+ # @param event [Karafka::Core::Monitoring::Event] event details including payload
206
+ def on_filtering_seek(event)
207
+ consumer = event[:caller]
208
+ topic = consumer.topic.name
209
+ # Message to which we seek
210
+ message = event[:message]
211
+ partition = message.partition
212
+ offset = message.offset
213
+
214
+ info <<~MSG.tr("\n", ' ').strip!
215
+ [#{consumer.id}] Post-filtering seeking to message #{offset}
216
+ on #{topic}/#{partition}
217
+ MSG
180
218
  end
181
219
 
182
220
  # There are many types of errors that can occur in many places, but we provide a single
@@ -203,6 +241,9 @@ module Karafka
203
241
  when 'consumer.after_consume.error'
204
242
  error "Consumer after consume failed due to an error: #{error}"
205
243
  error details
244
+ when 'consumer.idle.error'
245
+ error "Consumer idle failed due to an error: #{error}"
246
+ error details
206
247
  when 'consumer.shutdown.error'
207
248
  error "Consumer on shutdown failed due to an error: #{error}"
208
249
  error details
@@ -17,6 +17,9 @@ module Karafka
17
17
  # complete list of all the events. Please use the #available_events on fully loaded
18
18
  # Karafka system to determine all of the events you can use.
19
19
  EVENTS = %w[
20
+ active_job.consume
21
+ active_job.consumed
22
+
20
23
  app.initialized
21
24
  app.running
22
25
  app.quieting
@@ -36,6 +39,7 @@ module Karafka
36
39
  consumer.consumed
37
40
  consumer.consuming.pause
38
41
  consumer.consuming.retry
42
+ consumer.idle
39
43
  consumer.revoke
40
44
  consumer.revoked
41
45
  consumer.shutting_down
@@ -43,6 +47,9 @@ module Karafka
43
47
 
44
48
  dead_letter_queue.dispatched
45
49
 
50
+ filtering.throttled
51
+ filtering.seek
52
+
46
53
  process.notice_signal
47
54
 
48
55
  statistics.emitted