RubyGems - waterdrop - Versions diffs - 2.6.14 → 2.7.0 - Mend

waterdrop 2.6.14 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +0 -0
data/.github/workflows/ci.yml +5 -16
data/.ruby-version +1 -1
data/CHANGELOG.md +128 -0
data/Gemfile.lock +17 -20
data/config/locales/errors.yml +8 -3
data/docker-compose.yml +1 -1
data/lib/waterdrop/clients/rdkafka.rb +34 -4
data/lib/waterdrop/config.rb +22 -11
data/lib/waterdrop/contracts/config.rb +11 -1
data/lib/waterdrop/instrumentation/callbacks/delivery.rb +11 -0
data/lib/waterdrop/instrumentation/callbacks/error.rb +11 -0
data/lib/waterdrop/instrumentation/callbacks/oauthbearer_token_refresh.rb +49 -0
data/lib/waterdrop/instrumentation/callbacks/statistics.rb +11 -0
data/lib/waterdrop/instrumentation/notifications.rb +2 -0
data/lib/waterdrop/instrumentation/vendors/datadog/metrics_listener.rb +1 -1
data/lib/waterdrop/producer/buffer.rb +0 -6
data/lib/waterdrop/producer/sync.rb +2 -2
data/lib/waterdrop/producer/transactions.rb +7 -3
data/lib/waterdrop/producer.rb +30 -31
data/lib/waterdrop/version.rb +1 -1
data/waterdrop.gemspec +3 -1
data.tar.gz.sig +0 -0
metadata +7 -6
metadata.gz.sig +0 -0

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 24dc1ffc8d6298980ec8f0c302141a643196acbf62da017cf03ec6675552532c
-  data.tar.gz: '08b66f7abb7f2e04fb9a9b5da566f5cefb5a00769ef14c14898f86c07003f81e'
+  metadata.gz: 5308262b20199b02906783387f294a58beb01fa8850db3db19bb7be39395121a
+  data.tar.gz: d35c18c4b7352c20c8eeb623f54581476f108cb656912a571ef067cc796e884c
 SHA512:
-  metadata.gz: 65d1a0b2ce58fa07edfa96dffaa97773424a53787ee05ba981ca1ed9e4edb91d7f9b25e4dbc5e054ac68abf288dd5e1a879cb09ee13fcacc9422d9a341f4bd81
-  data.tar.gz: 2c7ce204ab9c9af43c5e916143aa5375e45157f1fd9c34acf69dc3191a646c3d41c230af82f6d1b8047836f2b7d4437d9a2cf3c173cd8beb9a7c1948c652cd54
+  metadata.gz: ac6693e44080e4edf9b201a5e735b283bb7fa81d36ae10bf0d7501faa00e5f099917144966beb7febc4b95c50ed78feb7c659e59753a78e4495111e3d00af322
+  data.tar.gz: 100439b79cc59bd668f40e4fed8086c49f13bfedebb68409981d8c70a39c692eb2b4f453d9a45c367f2612578b2c6ac8303bd56acdbbd27d3a02d5aab803d57a

checksums.yaml.gz.sig CHANGED Viewed

Binary file

data/.github/workflows/ci.yml CHANGED Viewed

@@ -22,7 +22,6 @@ jobs:
           - '3.2'
           - '3.1'
           - '3.0'
-          - '2.7'
         include:
           - ruby: '3.3'
             coverage: 'true'
@@ -39,9 +38,9 @@ jobs:
           bundler-cache: true
           bundler: 'latest'
-      - name: Run Kafka with docker-compose
+      - name: Run Kafka with docker compose
         run: |
-          docker-compose up -d || (sleep 5 && docker-compose up -d)
+          docker compose up -d || (sleep 5 && docker compose up -d)
       - name: Wait for Kafka
         run: |
@@ -49,25 +48,15 @@ jobs:
       - name: Install latest bundler
         run: |
-          if [[ "$(ruby -v | awk '{print $2}')" == 2.7.8* ]]; then
-            gem install bundler -v 2.4.22 --no-document
-            gem update --system 3.4.22 --no-document
-          else
-            gem install bundler --no-document
-            gem update --system --no-document
-          fi
+          gem install bundler --no-document
+          gem update --system --no-document
           bundle config set without 'tools benchmarks docs'
       - name: Bundle install
         run: |
           bundle config set without development
-          if [[ "$(ruby -v | awk '{print $2}')" == 2.7.8* ]]; then
-            BUNDLER_VERSION=2.4.22 bundle install --jobs 4 --retry 3
-          else
-            bundle install --jobs 4 --retry 3
-          fi
+          bundle install --jobs 4 --retry 3
       - name: Run all tests
         env:

data/.ruby-version CHANGED Viewed

	@@ -1 +1 @@
1	- 3.3.0
1	+ 3.3.1

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,133 @@
 # WaterDrop changelog
+## 2.7.0 (2024-04-26)
+This release contains **BREAKING** changes. Make sure to read and apply upgrade notes.
+- **[Feature]** Support custom OAuth providers.
+- **[Breaking]** Drop Ruby `2.7` support.
+- **[Breaking]** Change default timeouts so final delivery `message.timeout.ms` is less that `max_wait_time` so we do not end up with not final verdict.
+- **[Breaking]** Update all the time related configuration settings to be in `ms` and not mixed.
+- **[Breaking]** Remove no longer needed `wait_timeout` configuration option.
+- **[Breaking]** Do **not** validate or morph (via middleware) messages added to the buffer prior to `flush_sync` or `flush_async`.
+- [Enhancement] Provide `WaterDrop::Producer#transaction?` that returns only when producer has an active transaction running.
+- [Enhancement] Introduce `instrument_on_wait_queue_full` flag (defaults to `true`) to be able to configure whether non critical (retryable) queue full errors should be instrumented in the error pipeline. Useful when building high-performance pipes with WaterDrop queue retry backoff as a throttler.
+- [Enhancement] Protect critical `rdkafka` thread executable code sections.
+- [Enhancement] Treat the queue size as a gauge rather than a cumulative stat (isturdy).
+- [Fix] Fix a case where purge on non-initialized client would crash.
+- [Fix] Middlewares run twice when using buffered produce.
+- [Fix] Validations run twice when using buffered produce.
+### Upgrade Notes
+**PLEASE MAKE SURE TO READ AND APPLY THEM!**
+#### `wait_timeout` Configuration No Longer Needed
+The `wait_timeout` WaterDrop configuration option is no longer needed. You can safely remove it.
+```ruby
+producer = WaterDrop::Producer.new
+producer.setup do |config|
+  # Other config...
+  # Remove this, no longer needed
+  config.wait_timeout = 30
+end
+```
+#### Time Settings Format Alignment
+**All** time-related values are now configured in milliseconds instead of some being in seconds and some in milliseconds.
+The values that were changed from seconds to milliseconds are:
+- `max_wait_timeout`
+- `wait_backoff_on_queue_full`
+- `wait_timeout_on_queue_full`
+- `wait_backoff_on_transaction_command, default`
+If you have configured any of those yourself, please replace the seconds representation with milliseconds:
+```ruby
+producer = WaterDrop::Producer.new
+producer.setup do |config|
+  config.deliver = true
+  # Replace this:
+  config.max_wait_timeout = 30
+  # With
+  config.max_wait_timeout = 30_000
+  # ...
+end
+```
+#### Defaults Alignment
+In this release, we've updated our default settings to address a crucial issue: previous defaults could lead to inconclusive outcomes in synchronous operations due to wait timeout errors. Users often mistakenly believed that a message dispatch was halted because of these errors when, in fact, the timeout was related to awaiting the final dispatch verdict, not the dispatch action itself.
+The new defaults in WaterDrop 2.7.0 eliminate this confusion by ensuring synchronous operation results are always transparent and conclusive. This change aims to provide a straightforward understanding of wait timeout errors, reinforcing that they reflect the wait state, not the dispatch success.
+Below, you can find a table with what has changed, the new defaults, and the current ones in case you want to retain the previous behavior:
+<table>
+  <thead>
+    <tr>
+      <th>Config</th>
+      <th>Previous Default</th>
+      <th>New Default</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>root <code>max_wait_timeout</code></td>
+      <td>5000 ms (5 seconds)</td>
+      <td>60000 ms (60 seconds)</td>
+    </tr>
+    <tr>
+      <td>kafka <code>message.timeout.ms</code></td>
+      <td>300000 ms (5 minutes)</td>
+      <td>50000 ms (50 seconds)</td>
+    </tr>
+    <tr>
+      <td>kafka <code>transaction.timeout.ms</code></td>
+      <td>60000 ms (1 minute)</td>
+      <td>55000 ms (55 seconds)</td>
+    </tr>
+  </tbody>
+</table>
+This alignment ensures that when using sync operations or invoking `#wait`, any exception you get should give you a conclusive and final delivery verdict.
+#### Buffering No Longer Early Validates Messages
+As of version `2.7.0`, WaterDrop has changed how message buffering works. Previously, messages underwent validation and middleware processing when they were buffered. Now, these steps are deferred until just before dispatching the messages. The buffer functions strictly as a thread-safe storage area without performing any validations or middleware operations until the messages are ready to be sent.
+This adjustment was made primarily to ensure that middleware runs and validations are applied when most relevant—shortly before message dispatch. This approach addresses potential issues with buffers that might hold messages for extended periods:
+- **Temporal Relevance**: Validating and processing messages near their dispatch time helps ensure that actions such as partition assignments reflect the current system state. This is crucial in dynamic environments where system states are subject to rapid changes.
+- **Stale State Management**: By delaying validations and middleware to the dispatch phase, the system minimizes the risk of acting on outdated information, which could lead to incorrect processing or partitioning decisions.
+```ruby
+# Prior to 2.7.0 this would raise an error
+producer.buffer(topic: nil, payload: '')
+# => WaterDrop::Errors::MessageInvalidError
+# After 2.7.0 buffer will not, but flush_async will
+producer.buffer(topic: nil, payload: '')
+# => all good here
+producer.flush_async(topic: nil, payload: '')
+# => WaterDrop::Errors::MessageInvalidError
+```
+#### Middleware Execution Prior to Flush When Buffering
+The timing of middleware execution has been adjusted. Middleware, which was previously run when messages were added to the buffer, will now only execute immediately before the messages are flushed from the buffer and dispatched. This change is similar to the validation-related changes.
 ## 2.6.14 (2024-02-06)
 - [Enhancement] Instrument `producer.connected` and `producer.closing` lifecycle events.

data/Gemfile.lock CHANGED Viewed

@@ -1,14 +1,14 @@
 PATH
   remote: .
   specs:
-    waterdrop (2.6.14)
-      karafka-core (>= 2.2.3, < 3.0.0)
+    waterdrop (2.7.0)
+      karafka-core (>= 2.4.0, < 3.0.0)
       zeitwerk (~> 2.3)
 GEM
   remote: https://rubygems.org/
   specs:
-    activesupport (7.1.3)
+    activesupport (7.1.3.2)
       base64
       bigdecimal
       concurrent-ruby (~> 1.0, >= 1.0.2)
@@ -19,30 +19,28 @@ GEM
       mutex_m
       tzinfo (~> 2.0)
     base64 (0.2.0)
-    bigdecimal (3.1.6)
+    bigdecimal (3.1.7)
     byebug (11.1.3)
     concurrent-ruby (1.2.3)
     connection_pool (2.4.1)
     diff-lcs (1.5.1)
     docile (1.4.0)
-    drb (2.2.0)
-      ruby2_keywords
-    factory_bot (6.4.5)
+    drb (2.2.1)
+    factory_bot (6.4.6)
       activesupport (>= 5.0.0)
     ffi (1.16.3)
-    i18n (1.14.1)
+    i18n (1.14.4)
       concurrent-ruby (~> 1.0)
-    karafka-core (2.2.7)
-      concurrent-ruby (>= 1.1)
-      karafka-rdkafka (>= 0.13.9, < 0.15.0)
-    karafka-rdkafka (0.14.7)
+    karafka-core (2.4.0)
+      karafka-rdkafka (>= 0.15.0, < 0.16.0)
+    karafka-rdkafka (0.15.0)
       ffi (~> 1.15)
       mini_portile2 (~> 2.6)
       rake (> 12)
-    mini_portile2 (2.8.5)
-    minitest (5.21.2)
+    mini_portile2 (2.8.6)
+    minitest (5.22.3)
     mutex_m (0.2.0)
-    rake (13.1.0)
+    rake (13.2.1)
     rspec (3.13.0)
       rspec-core (~> 3.13.0)
       rspec-expectations (~> 3.13.0)
@@ -55,8 +53,7 @@ GEM
     rspec-mocks (3.13.0)
       diff-lcs (>= 1.2.0, < 2.0)
       rspec-support (~> 3.13.0)
-    rspec-support (3.13.0)
-    ruby2_keywords (0.0.5)
+    rspec-support (3.13.1)
     simplecov (0.22.0)
       docile (~> 1.1)
       simplecov-html (~> 0.11)
@@ -65,10 +62,10 @@ GEM
     simplecov_json_formatter (0.1.4)
     tzinfo (2.0.6)
       concurrent-ruby (~> 1.0)
-    zeitwerk (2.6.12)
+    zeitwerk (2.6.13)
 PLATFORMS
-  ruby
+  arm64-darwin-22
   x86_64-linux
 DEPENDENCIES
@@ -79,4 +76,4 @@ DEPENDENCIES
   waterdrop!
 BUNDLED WITH
-   2.5.4
+   2.5.9

data/config/locales/errors.yml CHANGED Viewed

@@ -4,15 +4,20 @@ en:
       missing: must be present
       logger_format: must be present
       deliver_format: must be boolean
+      instrument_on_wait_queue_full_format: must be boolean
       id_format: must be a non-empty string
+      monitor_format: must be present
+      client_class_format: must be present
       max_payload_size_format: must be an integer that is equal or bigger than 1
-      wait_timeout_format: must be a numeric that is bigger than 0
       max_wait_timeout_format: must be an integer that is equal or bigger than 0
       kafka_format: must be a hash with symbol based keys
       kafka_key_must_be_a_symbol: All keys under the kafka settings scope need to be symbols
       wait_on_queue_full_format: must be boolean
-      wait_backoff_on_queue_full_format: must be a numeric that is bigger or equal to 0
-      wait_timeout_on_queue_full_format: must be a numeric that is bigger or equal to 0
+      wait_backoff_on_queue_full_format: must be a numeric that is equal or bigger to 0
+      wait_timeout_on_queue_full_format: must be a numeric that is equal or bigger to 0
+      wait_backoff_on_transaction_command_format: must be a numeric that is equal or bigger to 0
+      max_attempts_on_transaction_command_format: must be an integer that is equal or bigger than 1
+      oauth.token_provider_listener_format: 'must be false or respond to #on_oauthbearer_token_refresh'
     message:
       missing: must be present

data/docker-compose.yml CHANGED Viewed

@@ -3,7 +3,7 @@ version: '2'
 services:
   kafka:
     container_name: kafka
-    image: confluentinc/cp-kafka:7.5.3
+    image: confluentinc/cp-kafka:7.6.1
     ports:
       - 9092:9092

data/lib/waterdrop/clients/rdkafka.rb CHANGED Viewed

@@ -11,20 +11,50 @@ module WaterDrop
         # @param producer [WaterDrop::Producer] producer instance with its config, etc
         # @note We overwrite this that way, because we do not care
         def new(producer)
-          config = producer.config.kafka.to_h
+          kafka_config = producer.config.kafka.to_h
+          monitor = producer.config.monitor
-          client = ::Rdkafka::Config.new(config).producer
+          client = ::Rdkafka::Config.new(kafka_config).producer(native_kafka_auto_start: false)
+          # Register statistics runner for this particular type of callbacks
+          ::Karafka::Core::Instrumentation.statistics_callbacks.add(
+            producer.id,
+            Instrumentation::Callbacks::Statistics.new(producer.id, client.name, monitor)
+          )
+          # Register error tracking callback
+          ::Karafka::Core::Instrumentation.error_callbacks.add(
+            producer.id,
+            Instrumentation::Callbacks::Error.new(producer.id, client.name, monitor)
+          )
+          # Register oauth bearer refresh for this particular type of callbacks
+          ::Karafka::Core::Instrumentation.oauthbearer_token_refresh_callbacks.add(
+            producer.id,
+            Instrumentation::Callbacks::OauthbearerTokenRefresh.new(client, monitor)
+          )
           # This callback is not global and is per client, thus we do not have to wrap it with a
           # callbacks manager to make it work
           client.delivery_callback = Instrumentation::Callbacks::Delivery.new(
             producer.id,
             producer.transactional?,
-            producer.config.monitor
+            monitor
           )
+          oauth_listener = producer.config.oauth.token_provider_listener
+          # We need to subscribe the oauth listener here because we want it to be ready before
+          # any producer callbacks run. In theory because WaterDrop rdkafka producer is lazy loaded
+          # we would have enough time to make user subscribe it himself, but then it would not
+          # coop with auto-configuration coming from Karafka. The way it is done below, if it is
+          # configured it will be subscribed and if not, user always can subscribe it himself as
+          # long as it is done prior to first usage
+          monitor.subscribe(oauth_listener) if oauth_listener
+          client.start
           # Switch to the transactional mode if user provided the transactional id
-          client.init_transactions if config.key?(:'transactional.id')
+          client.init_transactions if kafka_config.key?(:'transactional.id')
           client
         end

data/lib/waterdrop/config.rb CHANGED Viewed

@@ -12,7 +12,12 @@ module WaterDrop
       'client.id': 'waterdrop',
       # emit librdkafka statistics every five seconds. This is used in instrumentation.
       # When disabled, part of metrics will not be published and available.
-      'statistics.interval.ms': 5_000
+      'statistics.interval.ms': 5_000,
+      # We set it to a value that is lower than `max_wait_timeout` to have a final verdict upon
+      # sync delivery
+      'message.timeout.ms': 50_000,
+      # Must be more or equal to `message.timeout.ms` defaults
+      'transaction.timeout.ms': 55_000
     }.freeze
     private_constant :KAFKA_DEFAULTS
@@ -44,12 +49,8 @@ module WaterDrop
     # option [Integer] max payload size allowed for delivery to Kafka
     setting :max_payload_size, default: 1_000_012
     # option [Integer] Wait that long for the delivery report or raise an error if this takes
-    #   longer than the timeout.
-    setting :max_wait_timeout, default: 5
-    # option [Numeric] how long should we wait between re-checks on the availability of the
-    #   delivery report. In a really robust systems, this describes the min-delivery time
-    #   for a single sync message when produced in isolation
-    setting :wait_timeout, default: 0.005 # 5 milliseconds
+    #   longer than the timeout ms.
+    setting :max_wait_timeout, default: 60_000
     # option [Boolean] should we upon detecting full librdkafka queue backoff and retry or should
     #   we raise an exception.
     #   When this is set to `true`, upon full queue, we won't raise an error. There will be error
@@ -60,12 +61,14 @@ module WaterDrop
     # option [Integer] how long (in seconds) should we backoff before a retry when queue is full
     #   The retry will happen with the same message and backoff should give us some time to
     #   dispatch previously buffered messages.
-    setting :wait_backoff_on_queue_full, default: 0.1
-    # option [Numeric] how many seconds should we wait with the backoff on queue having space for
+    setting :wait_backoff_on_queue_full, default: 100
+    # option [Numeric] how many ms should we wait with the backoff on queue having space for
     # more messages before re-raising the error.
-    setting :wait_timeout_on_queue_full, default: 10
+    setting :wait_timeout_on_queue_full, default: 10_000
+    # option [Boolean] should we instrument non-critical, retryable queue full errors
+    setting :instrument_on_wait_queue_full, default: true
     # option [Numeric] How long to wait before retrying a retryable transaction related error
-    setting :wait_backoff_on_transaction_command, default: 0.5
+    setting :wait_backoff_on_transaction_command, default: 500
     # option [Numeric] How many times to retry a retryable transaction related error before
     #   giving up
     setting :max_attempts_on_transaction_command, default: 5
@@ -86,6 +89,14 @@ module WaterDrop
       constructor: ->(middleware) { middleware || WaterDrop::Middleware.new }
     )
+    # Namespace for oauth related configuration
+    setting :oauth do
+      # option [false, #call] Listener for using oauth bearer. This listener will be able to
+      #   get the client name to decide whether to use a single multi-client token refreshing
+      #   or have separate tokens per instance.
+      setting :token_provider_listener, default: false
+    end
     # Configuration method
     # @yield Runs a block of code providing a config singleton instance to it
     # @yieldparam [WaterDrop::Config] WaterDrop config instance

data/lib/waterdrop/contracts/config.rb CHANGED Viewed

@@ -14,14 +14,24 @@ module WaterDrop
       required(:id) { |val| val.is_a?(String) && !val.empty? }
       required(:logger) { |val| !val.nil? }
+      required(:monitor) { |val| !val.nil? }
       required(:deliver) { |val| [true, false].include?(val) }
       required(:max_payload_size) { |val| val.is_a?(Integer) && val >= 1 }
       required(:max_wait_timeout) { |val| val.is_a?(Numeric) && val >= 0 }
-      required(:wait_timeout) { |val| val.is_a?(Numeric) && val.positive? }
+      required(:client_class) { |val| !val.nil? }
       required(:kafka) { |val| val.is_a?(Hash) && !val.empty? }
       required(:wait_on_queue_full) { |val| [true, false].include?(val) }
+      required(:instrument_on_wait_queue_full) { |val| [true, false].include?(val) }
       required(:wait_backoff_on_queue_full) { |val| val.is_a?(Numeric) && val >= 0 }
       required(:wait_timeout_on_queue_full) { |val| val.is_a?(Numeric) && val >= 0 }
+      required(:wait_backoff_on_transaction_command) { |val| val.is_a?(Numeric) && val >= 0 }
+      required(:max_attempts_on_transaction_command) { |val| val.is_a?(Integer) && val >= 1 }
+      nested(:oauth) do
+        required(:token_provider_listener) do |val|
+          val == false || val.respond_to?(:on_oauthbearer_token_refresh)
+        end
+      end
       # rdkafka allows both symbols and strings as keys for config but then casts them to strings
       # This can be confusing, so we expect all keys to be symbolized

data/lib/waterdrop/instrumentation/callbacks/delivery.rb CHANGED Viewed

@@ -44,6 +44,17 @@ module WaterDrop
           else
             instrument_error(delivery_report)
           end
+        # This runs from the rdkafka thread, thus we want to safe-guard it and prevent absolute
+        # crashes even if the instrumentation code fails. If it would bubble-up, it could crash
+        # the rdkafka background thread
+        rescue StandardError => e
+          @monitor.instrument(
+            'error.occurred',
+            caller: self,
+            error: e,
+            producer_id: @producer_id,
+            type: 'callbacks.delivery.error'
+          )
         end
         private

data/lib/waterdrop/instrumentation/callbacks/error.rb CHANGED Viewed

@@ -32,6 +32,17 @@ module WaterDrop
             producer_id: @producer_id,
             type: 'librdkafka.error'
           )
+        # This runs from the rdkafka thread, thus we want to safe-guard it and prevent absolute
+        # crashes even if the instrumentation code fails. If it would bubble-up, it could crash
+        # the rdkafka background thread
+        rescue StandardError => e
+          @monitor.instrument(
+            'error.occurred',
+            caller: self,
+            error: e,
+            producer_id: @producer_id,
+            type: 'callbacks.error.error'
+          )
         end
       end
     end

data/lib/waterdrop/instrumentation/callbacks/oauthbearer_token_refresh.rb ADDED Viewed

@@ -0,0 +1,49 @@
+# frozen_string_literal: true
+module WaterDrop
+  module Instrumentation
+    module Callbacks
+      # Callback that is triggered when oauth token needs to be refreshed.
+      class OauthbearerTokenRefresh
+        # @param bearer [Rdkafka::Producer] given rdkafka instance. It is needed as
+        #   we need to have a reference to call `#oauthbearer_set_token` or
+        #   `#oauthbearer_set_token_failure` upon the event.
+        # @param monitor [WaterDrop::Instrumentation::Monitor] monitor we are using
+        def initialize(bearer, monitor)
+          @bearer = bearer
+          @monitor = monitor
+        end
+        # Upon receiving of this event, user is required to invoke either `#oauthbearer_set_token`
+        # or `#oauthbearer_set_token_failure` on the `event[:bearer]` depending whether token
+        # obtaining was successful or not.
+        #
+        # Please refer to WaterDrop and Karafka documentation or `Rdkafka::Helpers::OAuth`
+        # documentation directly for exact parameters of those methods.
+        #
+        # @param _rd_config [Rdkafka::Config]
+        # @param bearer_name [String] name of the bearer for which we refresh
+        def call(_rd_config, bearer_name)
+          return unless @bearer.name == bearer_name
+          @monitor.instrument(
+            'oauthbearer.token_refresh',
+            bearer: @bearer,
+            caller: self
+          )
+        # This runs from the rdkafka thread, thus we want to safe-guard it and prevent absolute
+        # crashes even if the instrumentation code fails. If it would bubble-up, it could crash
+        # the rdkafka background thread
+        rescue StandardError => e
+          @monitor.instrument(
+            'error.occurred',
+            caller: self,
+            error: e,
+            producer_id: @producer_id,
+            type: 'callbacks.oauthbearer_token_refresh.error'
+          )
+        end
+      end
+    end
+  end
+end

data/lib/waterdrop/instrumentation/callbacks/statistics.rb CHANGED Viewed

@@ -34,6 +34,17 @@ module WaterDrop
             producer_id: @producer_id,
             statistics: @statistics_decorator.call(statistics)
           )
+        # This runs from the rdkafka thread, thus we want to safe-guard it and prevent absolute
+        # crashes even if the instrumentation code fails. If it would bubble-up, it could crash
+        # the rdkafka background thread
+        rescue StandardError => e
+          @monitor.instrument(
+            'error.occurred',
+            caller: self,
+            error: e,
+            producer_id: @producer_id,
+            type: 'callbacks.statistics.error'
+          )
         end
       end
     end

data/lib/waterdrop/instrumentation/notifications.rb CHANGED Viewed

@@ -21,6 +21,8 @@ module WaterDrop
         messages.produced_sync
         messages.buffered
+        oauthbearer.token_refresh
         transaction.started
         transaction.committed
         transaction.aborted

data/lib/waterdrop/instrumentation/vendors/datadog/metrics_listener.rb CHANGED Viewed

@@ -36,7 +36,7 @@ module WaterDrop
           setting :rd_kafka_metrics, default: [
             # Client metrics
             RdKafkaMetric.new(:count, :root, 'calls', 'tx_d'),
-            RdKafkaMetric.new(:histogram, :root, 'queue.size', 'msg_cnt_d'),
+            RdKafkaMetric.new(:histogram, :root, 'queue.size', 'msg_cnt'),
             # Broker metrics
             RdKafkaMetric.new(:count, :brokers, 'deliver.attempts', 'txretries_d'),

data/lib/waterdrop/producer/buffer.rb CHANGED Viewed

@@ -12,9 +12,6 @@ module WaterDrop
       def buffer(message)
         ensure_active!
-        message = middleware.run(message)
-        validate_message!(message)
         @monitor.instrument(
           'message.buffered',
           producer_id: id,
@@ -32,9 +29,6 @@ module WaterDrop
       def buffer_many(messages)
         ensure_active!
-        messages = middleware.run_many(messages)
-        messages.each { |message| validate_message!(message) }
         @monitor.instrument(
           'messages.buffered',
           producer_id: id,

data/lib/waterdrop/producer/sync.rb CHANGED Viewed

@@ -52,8 +52,8 @@ module WaterDrop
       # @return [Array<Rdkafka::Producer::DeliveryReport>] delivery reports
       #
       # @raise [Rdkafka::RdkafkaError] When adding the messages to rdkafka's queue failed
-      # @raise [Rdkafka::Producer::WaitTimeoutError] When the timeout has been reached and the
-      #   some handles are still pending
+      # @raise [Rdkafka::Producer::WaitTimeoutError] When the timeout has been reached and some
+      #   handles are still pending
       # @raise [Errors::MessageInvalidError] When any of the provided messages details are invalid
       #   and the message could not be sent to Kafka
       def produce_many_sync(messages)

data/lib/waterdrop/producer/transactions.rb CHANGED Viewed

@@ -89,6 +89,11 @@ module WaterDrop
         end
       end
+      # @return [Boolean] true if we are in an active transaction
+      def transaction?
+        @transaction_mutex.owned?
+      end
       # @return [Boolean] Is this producer a transactional one
       def transactional?
         return @transactional if instance_variable_defined?(:'@transactional')
@@ -132,8 +137,7 @@ module WaterDrop
             client.send_offsets_to_transaction(
               consumer,
               tpl,
-              # This setting is at the moment in seconds and we require ms
-              @config.max_wait_timeout * 1_000
+              @config.max_wait_timeout
             )
           end
         end
@@ -197,7 +201,7 @@ module WaterDrop
         if do_retry
           # Backoff more and more before retries
-          sleep(config.wait_backoff_on_transaction_command * attempt)
+          sleep((config.wait_backoff_on_transaction_command / 1_000.0) * attempt)
           retry
         end

data/lib/waterdrop/producer.rb CHANGED Viewed

@@ -104,18 +104,6 @@ module WaterDrop
         @pid = Process.pid
         @client = Builder.new.call(self, @config)
-        # Register statistics runner for this particular type of callbacks
-        ::Karafka::Core::Instrumentation.statistics_callbacks.add(
-          @id,
-          Instrumentation::Callbacks::Statistics.new(@id, @client.name, @config.monitor)
-        )
-        # Register error tracking callback
-        ::Karafka::Core::Instrumentation.error_callbacks.add(
-          @id,
-          Instrumentation::Callbacks::Error.new(@id, @client.name, @config.monitor)
-        )
         @status.connected!
         @monitor.instrument('producer.connected', producer_id: id)
       end
@@ -145,7 +133,12 @@ module WaterDrop
           @messages = []
         end
-        @client.purge
+        # We should not purge if there is no client initialized
+        # It may not be initialized if we created a new producer that never connected to kafka,
+        # we used buffer and purged. In cases like this client won't exist
+        @connecting_mutex.synchronize do
+          @client&.purge
+        end
       end
     end
@@ -188,8 +181,7 @@ module WaterDrop
             # The linger.ms time will be ignored for the duration of the call,
             # queued messages will be sent to the broker as soon as possible.
             begin
-              # `max_wait_timeout` is in seconds at the moment
-              @client.flush(@config.max_wait_timeout * 1_000) unless @client.closed?
+              @client.flush(@config.max_wait_timeout) unless @client.closed?
             # We can safely ignore timeouts here because any left outstanding requests
             # will anyhow force wait on close if not forced.
             # If forced, we will purge the queue and just close
@@ -210,6 +202,7 @@ module WaterDrop
           # Remove callbacks runners that were registered
           ::Karafka::Core::Instrumentation.statistics_callbacks.delete(@id)
           ::Karafka::Core::Instrumentation.error_callbacks.delete(@id)
+          ::Karafka::Core::Instrumentation.oauthbearer_token_refresh_callbacks.delete(@id)
           @status.closed!
         end
@@ -250,8 +243,8 @@ module WaterDrop
     # @param handler [Rdkafka::Producer::DeliveryHandle]
     def wait(handler)
       handler.wait(
-        max_wait_timeout: @config.max_wait_timeout,
-        wait_timeout: @config.wait_timeout
+        # rdkafka max_wait_timeout is in seconds and we use ms
+        max_wait_timeout: @config.max_wait_timeout / 1_000.0
       )
     end
@@ -286,7 +279,7 @@ module WaterDrop
       # If we're running for longer than the timeout, we need to re-raise the queue full.
       # This will prevent from situation where cluster is down forever and we just retry and retry
       # in an infinite loop, effectively hanging the processing
-      raise unless monotonic_now - produce_time < @config.wait_timeout_on_queue_full * 1_000
+      raise unless monotonic_now - produce_time < @config.wait_timeout_on_queue_full
       label = caller_locations(2, 1)[0].label.split(' ').last
@@ -297,22 +290,28 @@ module WaterDrop
       begin
         raise Errors::ProduceError, e.inspect
       rescue Errors::ProduceError => e
-        # We want to instrument on this event even when we restart it.
-        # The reason is simple: instrumentation and visibility.
-        # We can recover from this, but despite that we should be able to instrument this.
-        # If this type of event happens too often, it may indicate that the buffer settings are not
-        # well configured.
-        @monitor.instrument(
-          'error.occurred',
-          producer_id: id,
-          message: message,
-          error: e,
-          type: "message.#{label}"
-        )
+        # Users can configure this because in pipe-like flows with high throughput, queue full with
+        # retry may be used as a throttling system that will backoff and wait.
+        # In such scenarios this error notification can be removed and until queue full is
+        # retryable, it will not be raised as an error.
+        if @config.instrument_on_wait_queue_full
+          # We want to instrument on this event even when we restart it.
+          # The reason is simple: instrumentation and visibility.
+          # We can recover from this, but despite that we should be able to instrument this.
+          # If this type of event happens too often, it may indicate that the buffer settings are
+          # not well configured.
+          @monitor.instrument(
+            'error.occurred',
+            producer_id: id,
+            message: message,
+            error: e,
+            type: "message.#{label}"
+          )
+        end
         # We do not poll the producer because polling happens in a background thread
         # It also should not be a frequent case (queue full), hence it's ok to just throttle.
-        sleep @config.wait_backoff_on_queue_full
+        sleep @config.wait_backoff_on_queue_full / 1_000.0
       end
       @operations_in_progress.decrement

data/lib/waterdrop/version.rb CHANGED Viewed

@@ -3,5 +3,5 @@
 # WaterDrop library
 module WaterDrop
   # Current WaterDrop version
-  VERSION = '2.6.14'
+  VERSION = '2.7.0'
 end

data/waterdrop.gemspec CHANGED Viewed

@@ -16,9 +16,11 @@ Gem::Specification.new do |spec|
   spec.description   = spec.summary
   spec.license       = 'MIT'
-  spec.add_dependency 'karafka-core', '>= 2.2.3', '< 3.0.0'
+  spec.add_dependency 'karafka-core', '>= 2.4.0', '< 3.0.0'
   spec.add_dependency 'zeitwerk', '~> 2.3'
+  spec.required_ruby_version = '>= 3.0.0'
   if $PROGRAM_NAME.end_with?('gem')
     spec.signing_key = File.expand_path('~/.ssh/gem-private_key.pem')
   end

data.tar.gz.sig CHANGED Viewed

Binary file

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: waterdrop
 version: !ruby/object:Gem::Version
-  version: 2.6.14
+  version: 2.7.0
 platform: ruby
 authors:
 - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
   AnG1dJU+yL2BK7vaVytLTstJME5mepSZ46qqIJXMuWob/YPDmVaBF39TDSG9e34s
   msG3BiCqgOgHAnL23+CN3Rt8MsuRfEtoTKpJVcCfoEoNHOkc
   -----END CERTIFICATE-----
-date: 2024-02-06 00:00:00.000000000 Z
+date: 2024-04-26 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: karafka-core
@@ -43,7 +43,7 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 2.2.3
+        version: 2.4.0
     - - "<"
       - !ruby/object:Gem::Version
         version: 3.0.0
@@ -53,7 +53,7 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 2.2.3
+        version: 2.4.0
     - - "<"
       - !ruby/object:Gem::Version
         version: 3.0.0
@@ -107,6 +107,7 @@ files:
 - lib/waterdrop/helpers/counter.rb
 - lib/waterdrop/instrumentation/callbacks/delivery.rb
 - lib/waterdrop/instrumentation/callbacks/error.rb
+- lib/waterdrop/instrumentation/callbacks/oauthbearer_token_refresh.rb
 - lib/waterdrop/instrumentation/callbacks/statistics.rb
 - lib/waterdrop/instrumentation/logger_listener.rb
 - lib/waterdrop/instrumentation/monitor.rb
@@ -144,14 +145,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: '0'
+      version: 3.0.0
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.5.3
+rubygems_version: 3.5.9
 signing_key:
 specification_version: 4
 summary: Kafka messaging made easy!

metadata.gz.sig CHANGED Viewed

Binary file