RubyGems - karafka - Versions diffs - 2.0.37 → 2.0.38 - Mend

karafka 2.0.37 → 2.0.38

Files changed (13) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +0 -0
data/CHANGELOG.md +6 -0
data/Gemfile.lock +4 -4
data/README.md +1 -1
data/lib/karafka/active_job/consumer.rb +22 -7
data/lib/karafka/admin.rb +46 -14
data/lib/karafka/instrumentation/notifications.rb +3 -0
data/lib/karafka/pro/active_job/consumer.rb +2 -6
data/lib/karafka/version.rb +1 -1
data.tar.gz.sig +0 -0
metadata +2 -2
metadata.gz.sig +0 -0

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: bab9c1d7bc952b4ecbfc4fad794d7e7c861cd3a332cc5d9058cef6c0bd9b57cb
-  data.tar.gz: 7662bd8dc5748d9112f3c72b2912619534e45750188f78df2f69a7e6ae1f9c31
+  metadata.gz: 1b9653385cf5a3b1e27eae06d53b9761c9a1f265252f721773258459eb3df1e7
+  data.tar.gz: c0af983ab0539e8463bf2612068a6b261de1325078c3e8600b0d6df0f596d100
 SHA512:
-  metadata.gz: 9a99a84d538a74bd27d5a0f585a12dbbe67eb76ab63cc1a0984cbe1562f230070ad482418f85393a3a479e81534a0957a0863c91f3a7f5b6433f74efd317c79e
-  data.tar.gz: 7da6129cd795f65d821bae897864648e4a5e37c0d07e8745f110f0d03a23d688d7717ab6c42652c4660fe3be3d26ee7051a8c2c66c1fcb62834a1c4159bd4ac4
+  metadata.gz: d9000a8f71d7fff762db5f567956f6ea68e436b428014c509ae233730c9f75fd6ac311e51b0022999dfdce64362c86dab6912ce549378d9def231e5749961140
+  data.tar.gz: f980261b5ada2f46efbf919aac86ab63da5bccce26639b9e7d98c07c6012cc3c727189a548627687092ee2802aca8df3d5459bcdcc8d9d29b35f2d6da92a64fc

checksums.yaml.gz.sig CHANGED Viewed

Binary file

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,11 @@
 # Karafka framework changelog
+## 2.0.38 (2023-03-27)
+- [Improvement] Introduce `Karafka::Admin#read_watermark_offsets` to get low and high watermark offsets values.
+- [Improvement] Track active_job_id in instrumentation (#1372)
+- [Improvement] Improve `#read_topic` reading in case of a compacted partition where the offset is below the low watermark offset. This should optimize reading and should not go beyond the low watermark offset.
+- [Improvement] Allow `#read_topic` to accept instance settings to overwrite any settings needed to customize reading behaviours.
 ## 2.0.37 (2023-03-20)
 - [Fix] Declarative topics execution on a secondary cluster run topics creation on the primary one (#1365)
 - [Fix]  Admin read operations commit offset when not needed (#1369)

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    karafka (2.0.37)
+    karafka (2.0.38)
       karafka-core (>= 2.0.12, < 3.0.0)
       thor (>= 0.20)
       waterdrop (>= 2.4.10, < 3.0.0)
@@ -10,10 +10,10 @@ PATH
 GEM
   remote: https://rubygems.org/
   specs:
-    activejob (7.0.4.2)
-      activesupport (= 7.0.4.2)
+    activejob (7.0.4.3)
+      activesupport (= 7.0.4.3)
       globalid (>= 0.3.6)
-    activesupport (7.0.4.2)
+    activesupport (7.0.4.3)
       concurrent-ruby (~> 1.0, >= 1.0.2)
       i18n (>= 1.6, < 2)
       minitest (>= 5.1)

data/README.md CHANGED Viewed

@@ -86,7 +86,7 @@ bundle exec karafka server
 I also sell Karafka Pro subscriptions. It includes a commercial-friendly license, priority support, architecture consultations, enhanced Web UI and high throughput data processing-related features (virtual partitions, long-running jobs, and more).
-**20%** of the income will be distributed back to other OSS projects that Karafka uses under the hood.
+**10%** of the income will be distributed back to other OSS projects that Karafka uses under the hood.
 Help me provide high-quality open-source software. Please see the Karafka [homepage](https://karafka.io/#become-pro) for more details.

data/lib/karafka/active_job/consumer.rb CHANGED Viewed

@@ -12,16 +12,31 @@ module Karafka
         messages.each do |message|
           break if Karafka::App.stopping?
-          # We technically speaking could set this as deserializer and reference it from the
-          # message instead of using the `#raw_payload`. This is not done on purpose to simplify
-          # the ActiveJob setup here
-          job = ::ActiveSupport::JSON.decode(message.raw_payload)
+          consume_job(message)
-          tags.add(:job_class, job['job_class'])
+          mark_as_consumed(message)
+        end
+      end
-          ::ActiveJob::Base.execute(job)
+      private
-          mark_as_consumed(message)
+      # Consumes a message with the job and runs needed instrumentation
+      #
+      # @param job_message [Karafka::Messages::Message] message with active job
+      def consume_job(job_message)
+        # We technically speaking could set this as deserializer and reference it from the
+        # message instead of using the `#raw_payload`. This is not done on purpose to simplify
+        # the ActiveJob setup here
+        job = ::ActiveSupport::JSON.decode(job_message.raw_payload)
+        tags.add(:job_class, job['job_class'])
+        payload = { caller: self, job: job, message: job_message }
+        # We publish both to make it consistent with `consumer.x` events
+        Karafka.monitor.instrument('active_job.consume', payload)
+        Karafka.monitor.instrument('active_job.consumed', payload) do
+          ::ActiveJob::Base.execute(job)
         end
       end
     end

data/lib/karafka/admin.rb CHANGED Viewed

@@ -44,17 +44,32 @@ module Karafka
       # @param count [Integer] how many messages we want to get at most
       # @param start_offset [Integer] offset from which we should start. If -1 is provided
       #   (default) we will start from the latest offset
+      # @param settings [Hash] kafka extra settings (optional)
       #
       # @return [Array<Karafka::Messages::Message>] array with messages
-      def read_topic(name, partition, count, start_offset = -1)
+      def read_topic(name, partition, count, start_offset = -1, settings = {})
         messages = []
         tpl = Rdkafka::Consumer::TopicPartitionList.new
+        low_offset, high_offset = nil
-        with_consumer do |consumer|
-          offsets = consumer.query_watermark_offsets(name, partition)
-          end_offset = offsets.last
+        with_consumer(settings) do |consumer|
+          low_offset, high_offset = consumer.query_watermark_offsets(name, partition)
+          # Select offset dynamically if -1 or less
+          start_offset = high_offset - count if start_offset.negative?
-          start_offset = [0, offsets.last - count].max if start_offset.negative?
+          # Build the requested range - since first element is on the start offset we need to
+          # subtract one from requested count to end up with expected number of elements
+          requested_range = (start_offset..start_offset + (count - 1))
+          # Establish theoretical available range. Note, that this does not handle cases related to
+          # log retention or compaction
+          available_range = (low_offset..high_offset)
+          # Select only offset that we can select. This will remove all the potential offsets that
+          # are below the low watermark offset
+          possible_range = requested_range.select { |offset| available_range.include?(offset) }
+          start_offset = possible_range.first
+          count = possible_range.count
           tpl.add_topic_and_partitions_with_offsets(name, partition => start_offset)
           consumer.assign(tpl)
@@ -64,11 +79,15 @@ module Karafka
           loop do
             # If we've got as many messages as we've wanted stop
             break if messages.size >= count
-            # If we've reached end of the topic messages, don't process more
-            break if !messages.empty? && end_offset <= messages.last.offset
             message = consumer.poll(200)
-            messages << message if message
+            next unless message
+            # If the message we've got is beyond the requested range, stop
+            break unless possible_range.include?(message.offset)
+            messages << message
           rescue Rdkafka::RdkafkaError => e
             # End of partition
             break if e.code == :partition_eof
@@ -77,7 +96,7 @@ module Karafka
           end
         end
-        messages.map do |message|
+        messages.map! do |message|
           Messages::Builders::Message.call(
             message,
             # Use topic from routes if we can match it or create a dummy one
@@ -136,6 +155,17 @@ module Karafka
         end
       end
+      # Fetches the watermark offsets for a given topic partition
+      #
+      # @param name [String, Symbol] topic name
+      # @param partition [Integer] partition
+      # @return [Array<Integer, Integer>] low watermark offset and high watermark offset
+      def read_watermark_offsets(name, partition)
+        with_consumer do |consumer|
+          consumer.query_watermark_offsets(name, partition)
+        end
+      end
       # @return [Rdkafka::Metadata] cluster metadata info
       def cluster_info
         with_admin do |admin|
@@ -159,15 +189,16 @@ module Karafka
       # Creates admin instance and yields it. After usage it closes the admin instance
       def with_admin
-        admin = config(:producer).admin
+        admin = config(:producer, {}).admin
         yield(admin)
       ensure
         admin&.close
       end
       # Creates consumer instance and yields it. After usage it closes the consumer instance
-      def with_consumer
-        consumer = config(:consumer).consumer
+      # @param settings [Hash] extra settings to customize consumer
+      def with_consumer(settings = {})
+        consumer = config(:consumer, settings).consumer
         yield(consumer)
       ensure
         consumer&.close
@@ -196,11 +227,12 @@ module Karafka
       end
       # @param type [Symbol] type of config we want
+      # @param settings [Hash] extra settings for config (if needed)
       # @return [::Rdkafka::Config] rdkafka config
-      def config(type)
+      def config(type, settings)
         config_hash = Karafka::Setup::AttributesMap.public_send(
           type,
-          Karafka::App.config.kafka.dup.merge(CONFIG_DEFAULTS)
+          Karafka::App.config.kafka.dup.merge(CONFIG_DEFAULTS).merge!(settings)
         )
         ::Rdkafka::Config.new(config_hash)

data/lib/karafka/instrumentation/notifications.rb CHANGED Viewed

@@ -17,6 +17,9 @@ module Karafka
       #   complete list of all the events. Please use the #available_events on fully loaded
       #   Karafka system to determine all of the events you can use.
       EVENTS = %w[
+        active_job.consume
+        active_job.consumed
         app.initialized
         app.running
         app.quieting

data/lib/karafka/pro/active_job/consumer.rb CHANGED Viewed

@@ -22,7 +22,7 @@ module Karafka
       #
       # It contains slightly better revocation warranties than the regular blocking consumer as
       # it can stop processing batch of jobs in the middle after the revocation.
-      class Consumer < Karafka::Pro::BaseConsumer
+      class Consumer < ::Karafka::ActiveJob::Consumer
         # Runs ActiveJob jobs processing and handles lrj if needed
         def consume
           messages.each do |message|
@@ -31,11 +31,7 @@ module Karafka
             break if revoked?
             break if Karafka::App.stopping?
-            job = ::ActiveSupport::JSON.decode(message.raw_payload)
-            tags.add(:job_class, job['job_class'])
-            ::ActiveJob::Base.execute(job)
+            consume_job(message)
             # We cannot mark jobs as done after each if there are virtual partitions. Otherwise
             # this could create random markings.

data/lib/karafka/version.rb CHANGED Viewed

@@ -3,5 +3,5 @@
 # Main module namespace
 module Karafka
   # Current Karafka version
-  VERSION = '2.0.37'
+  VERSION = '2.0.38'
 end

data.tar.gz.sig CHANGED Viewed

Binary file

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: karafka
 version: !ruby/object:Gem::Version
-  version: 2.0.37
+  version: 2.0.38
 platform: ruby
 authors:
 - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
   Qf04B9ceLUaC4fPVEz10FyobjaFoY4i32xRto3XnrzeAgfEe4swLq8bQsR3w/EF3
   MGU0FeSV2Yj7Xc2x/7BzLK8xQn5l7Yy75iPF+KP3vVmDHnNl
   -----END CERTIFICATE-----
-date: 2023-03-20 00:00:00.000000000 Z
+date: 2023-03-27 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: karafka-core

metadata.gz.sig CHANGED Viewed

Binary file