RubyGems - karafka - Versions diffs - 0.6.0.rc2 → 1.0.0.rc1 - Mend

karafka 0.6.0.rc2 → 1.0.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +10 -2
data/Gemfile.lock +4 -18
data/karafka.gemspec +0 -1
data/lib/karafka.rb +2 -12
data/lib/karafka/attributes_map.rb +2 -3
data/lib/karafka/backends/inline.rb +17 -0
data/lib/karafka/base_controller.rb +40 -96
data/lib/karafka/base_responder.rb +19 -19
data/lib/karafka/cli/info.rb +2 -3
data/lib/karafka/cli/install.rb +0 -3
data/lib/karafka/connection/messages_processor.rb +10 -6
data/lib/karafka/controllers/includer.rb +51 -0
data/lib/karafka/controllers/responders.rb +19 -0
data/lib/karafka/controllers/single_params.rb +15 -0
data/lib/karafka/errors.rb +1 -17
data/lib/karafka/fetcher.rb +2 -2
data/lib/karafka/helpers/class_matcher.rb +9 -10
data/lib/karafka/params/params.rb +2 -2
data/lib/karafka/params/params_batch.rb +2 -7
data/lib/karafka/persistence.rb +18 -0
data/lib/karafka/routing/builder.rb +1 -1
data/lib/karafka/routing/router.rb +3 -11
data/lib/karafka/routing/topic.rb +1 -13
data/lib/karafka/schemas/config.rb +1 -12
data/lib/karafka/schemas/consumer_group.rb +2 -2
data/lib/karafka/setup/config.rb +14 -19
data/lib/karafka/templates/karafka.rb.example +1 -5
data/lib/karafka/version.rb +1 -1
metadata +8 -24
data/lib/karafka/base_worker.rb +0 -26
data/lib/karafka/cli/worker.rb +0 -28
data/lib/karafka/params/interchanger.rb +0 -35
data/lib/karafka/setup/configurators/sidekiq.rb +0 -36
data/lib/karafka/templates/application_worker.rb.example +0 -8
data/lib/karafka/templates/sidekiq.yml.example +0 -26
data/lib/karafka/workers/builder.rb +0 -51

data/lib/karafka/cli/info.rb CHANGED Viewed

@@ -14,14 +14,13 @@ module Karafka
         info = [
           "Karafka framework version: #{Karafka::VERSION}",
           "Application client id: #{config.client_id}",
-          "Processing backend: #{config.processing_backend}",
+          "Backend: #{config.backend}",
           "Batch consuming: #{config.batch_consuming}",
           "Batch processing: #{config.batch_processing}",
           "Number of threads: #{config.concurrency}",
           "Boot file: #{Karafka.boot_file}",
           "Environment: #{Karafka.env}",
-          "Kafka seed brokers: #{config.kafka.seed_brokers}",
-          "Redis: #{config.redis.to_h}"
+          "Kafka seed brokers: #{config.kafka.seed_brokers}"
         ]
         puts(info.join("\n"))

data/lib/karafka/cli/install.rb CHANGED Viewed

@@ -12,7 +12,6 @@ module Karafka
         app/models
         app/controllers
         app/responders
-        app/workers
         config
         log
         tmp/pids
@@ -21,8 +20,6 @@ module Karafka
       # Where should we map proper files from templates
       INSTALL_FILES_MAP = {
         'karafka.rb.example' => Karafka.boot_file.basename,
-        'sidekiq.yml.example' => 'config/sidekiq.yml.example',
-        'application_worker.rb.example' => 'app/workers/application_worker.rb',
         'application_controller.rb.example' => 'app/controllers/application_controller.rb',
         'application_responder.rb.example' => 'app/responders/application_responder.rb'
       }.freeze

data/lib/karafka/connection/messages_processor.rb CHANGED Viewed

@@ -18,12 +18,16 @@ module Karafka
           # first one and it will be valid for all the messages
           # We map from incoming topic name, as it might be namespaced, etc.
           # @see topic_mapper internal docs
-          mapped_topic = Karafka::App.config.topic_mapper.incoming(kafka_messages[0].topic)
-          # @note We search based on the topic id - that is a combination of group id and
-          # topic name
-          controller = Karafka::Routing::Router.build("#{group_id}_#{mapped_topic}")
-          handler = controller.topic.batch_processing ? :process_batch : :process_each
+          mapped_topic_name = Karafka::App.config.topic_mapper.incoming(kafka_messages[0].topic)
+          topic = Routing::Router.find("#{group_id}_#{mapped_topic_name}")
+          # Depending on a case (persisted or not) we might use new controller instance per each
+          # batch, or use the same instance for all of them (for implementing buffering, etc)
+          controller = Persistence.fetch(topic, kafka_messages[0].partition, :controller) do
+            topic.controller.new
+          end
+          handler = topic.batch_processing ? :process_batch : :process_each
           send(handler, controller, kafka_messages)
         end
@@ -35,7 +39,7 @@ module Karafka
         def process_batch(controller, kafka_messages)
           controller.params_batch = kafka_messages
           Karafka.monitor.notice(self, kafka_messages)
-          controller.schedule
+          controller.call
         end
         # Processes messages one by one (like with std http requests)

data/lib/karafka/controllers/includer.rb ADDED Viewed

@@ -0,0 +1,51 @@
+# frozen_string_literal: true
+module Karafka
+  # Additional functionalities for controllers
+  module Controllers
+    # Module used to inject functionalities into a given controller class, based on the controller
+    # topic and its settings
+    # We don't need all the behaviors in all the cases, so it is totally not worth having
+    # everything in all the cases all the time
+    module Includer
+      class << self
+        # @param controller_class [Class] controller class, that will get some functionalities
+        #   based on the topic under which it operates
+        def call(controller_class)
+          topic = controller_class.topic
+          bind_backend(controller_class, topic)
+          bind_params(controller_class, topic)
+          bind_responders(controller_class, topic)
+        end
+        private
+        # Figures out backend for a given controller class, based on the topic backend and
+        #   includes it into the controller class
+        # @param controller_class [Class] controller class
+        # @param topic [Karafka::Routing::Topic] topic of a controller class
+        def bind_backend(controller_class, topic)
+          backend = Kernel.const_get("::Karafka::Backends::#{topic.backend.to_s.capitalize}")
+          controller_class.include backend
+        end
+        # Adds a single #params support for non batch processed topics
+        # @param controller_class [Class] controller class
+        # @param topic [Karafka::Routing::Topic] topic of a controller class
+        def bind_params(controller_class, topic)
+          return if topic.batch_processing
+          controller_class.include SingleParams
+        end
+        # Adds responders support for topics and controllers with responders defined for them
+        # @param controller_class [Class] controller class
+        # @param topic [Karafka::Routing::Topic] topic of a controller class
+        def bind_responders(controller_class, topic)
+          return unless topic.responder
+          controller_class.include Responders
+        end
+      end
+    end
+  end
+end

data/lib/karafka/controllers/responders.rb ADDED Viewed

@@ -0,0 +1,19 @@
+# frozen_string_literal: true
+module Karafka
+  module Controllers
+    # Feature that allows us to use responders flow in controller
+    module Responders
+      # Responds with given data using given responder. This allows us to have a similar way of
+      # defining flows like synchronous protocols
+      # @param data Anything we want to pass to responder based on which we want to trigger further
+      #   Kafka responding
+      def respond_with(*data)
+        Karafka.monitor.notice(self.class, data: data)
+        # @note we build a new instance of responder each time, as a long running (persisted)
+        #   controllers can respond multiple times during the lifecycle
+        topic.responder.new(topic.parser).call(*data)
+      end
+    end
+  end
+end

data/lib/karafka/controllers/single_params.rb ADDED Viewed

@@ -0,0 +1,15 @@
+# frozen_string_literal: true
+module Karafka
+  module Controllers
+    # Params alias for single message processing controllers
+    module SingleParams
+      private
+      # @return [Karafka::Params::Params] params instance for non batch processed controllers
+      def params
+        params_batch.first
+      end
+    end
+  end
+end

data/lib/karafka/errors.rb CHANGED Viewed

@@ -13,7 +13,7 @@ module Karafka
     # Raised when router receives topic name which does not correspond with any routes
     # This can only happen in a case when:
-    #   - you've received a message and it was scheduled to Sidekiq background worker
+    #   - you've received a message and we cannot match it with a controller
     #   - you've changed the routing, so router can no longer associate your topic to
     #     any controller
     #   - or in a case when you do a lot of metaprogramming and you change routing/etc on runtime
@@ -23,27 +23,11 @@ module Karafka
     # @see https://github.com/karafka/karafka/issues/135
     NonMatchingRouteError = Class.new(BaseError)
-    # Raised when application does not have ApplicationWorker or other class that directly
-    # inherits from Karafka::BaseWorker
-    BaseWorkerDescentantMissing = Class.new(BaseError)
-    # Raised when we want to use #respond_with in controllers but we didn't define
-    # (and we couldn't find) any appropriate responder for a given controller
-    ResponderMissing = Class.new(BaseError)
     # Raised when we don't use or use responder not in the way it expected to based on the
     # topics usage definitions
     InvalidResponderUsage = Class.new(BaseError)
     # Raised when configuration doesn't match with validation schema
     InvalidConfiguration = Class.new(BaseError)
-    # Raised when processing messages in batches but still want to use #params instead of
-    # #params_batch
-    ParamsMethodUnavailable = Class.new(BaseError)
-    # Raised when for some reason we try to use invalid processing backend and
-    # we bypass validations
-    InvalidProcessingBackend = Class.new(BaseError)
   end
 end

data/lib/karafka/fetcher.rb CHANGED Viewed

@@ -34,8 +34,8 @@ module Karafka
     # @return [Proc] proc that should be processed when a messages arrive
     # @yieldparam messages [Array<Kafka::FetchedMessage>] messages from kafka (raw)
     def processor
-      lambda do |consumer_group_id, messages|
-        Karafka::Connection::MessagesProcessor.process(consumer_group_id, messages)
+      lambda do |group_id, messages|
+        Karafka::Connection::MessagesProcessor.process(group_id, messages)
       end
     end
   end

data/lib/karafka/helpers/class_matcher.rb CHANGED Viewed

@@ -4,7 +4,6 @@ module Karafka
   module Helpers
     # Class used to autodetect corresponding classes that are internally inside Karafka framework
     # It is used among others to match:
-    #   controller => worker
     #   controller => responder
     class ClassMatcher
       # Regexp used to remove any non classy like characters that might be in the controller
@@ -14,11 +13,11 @@ module Karafka
       # @param klass [Class] class to which we want to find a corresponding class
       # @param from [String] what type of object is it (based on postfix name part)
       # @param to [String] what are we looking for (based on a postfix name part)
-      # @example Controller that has a corresponding worker
-      #   matcher = Karafka::Helpers::ClassMatcher.new(SuperController, 'Controller', 'Worker')
-      #   matcher.match #=> SuperWorker
-      # @example Controller without a corresponding worker
-      #   matcher = Karafka::Helpers::ClassMatcher.new(Super2Controller, 'Controller', 'Worker')
+      # @example Controller that has a corresponding responder
+      #   matcher = Karafka::Helpers::ClassMatcher.new(SuperController, 'Controller', 'Responder')
+      #   matcher.match #=> SuperResponder
+      # @example Controller without a corresponding responder
+      #   matcher = Karafka::Helpers::ClassMatcher.new(Super2Controller, 'Controller', 'Responder')
       #   matcher.match #=> nil
       def initialize(klass, from:, to:)
         @klass = klass
@@ -37,10 +36,10 @@ module Karafka
       # @return [String] name of a new class that we're looking for
       # @note This method returns name of a class without a namespace
-      # @example From SuperController matching worker
-      #   matcher.name #=> 'SuperWorker'
-      # @example From Namespaced::Super2Controller matching worker
-      #   matcher.name #=> Super2Worker
+      # @example From SuperController matching responder
+      #   matcher.name #=> 'SuperResponder'
+      # @example From Namespaced::Super2Controller matching responder
+      #   matcher.name #=> Super2Responder
       def name
         inflected = @klass.to_s.split('::').last.to_s
         inflected.gsub!(@from, @to)

data/lib/karafka/params/params.rb CHANGED Viewed

@@ -5,7 +5,7 @@ module Karafka
   module Params
     # Class-wrapper for hash with indifferent access with additional lazy loading feature
     # It provides lazy loading not only until the first usage, but also allows us to skip
-    # using parser until we execute our logic inside worker. That way we can operate with
+    # using parser until we execute our logic. That way we can operate with
     # heavy-parsing data without slowing down the whole application.
     class Params < HashWithIndifferentAccess
       # Kafka::FetchedMessage attributes that we want to use inside of params
@@ -30,7 +30,7 @@ module Karafka
         # @example Build params instance from a Kafka::FetchedMessage object
         #   Karafka::Params::Params.build(message) #=> params object
         def build(message, parser)
-          # Hash case happens inside workers
+          # Hash case happens inside backends that interchange data
           if message.is_a?(Hash)
             new(parser: parser).merge!(message)
           else

data/lib/karafka/params/params_batch.rb CHANGED Viewed

@@ -4,13 +4,8 @@ module Karafka
   module Params
     # Params batch represents a set of messages received from Kafka.
     # @note Params internally are lazy loaded before first use. That way we can skip parsing
-    #   process if we have before_enqueue that rejects some incoming messages without using params
-    #   It can be also used when handling really heavy data (in terms of parsing). Without direct
-    #   usage outside of worker scope, it will pass raw data into sidekiq, so we won't use Karafka
-    #   working time to parse this data. It will happen only in the worker (where it can take time)
-    #   that way Karafka will be able to process data really quickly. On the other hand, if we
-    #   decide to use params somewhere before it hits worker logic, it won't parse it again in
-    #   the worker - it will use already loaded data and pass it to Redis
+    #   process if we have after_received that rejects some incoming messages without using params
+    #   It can be also used when handling really heavy data (in terms of parsing).
     class ParamsBatch
       include Enumerable

data/lib/karafka/persistence.rb ADDED Viewed

@@ -0,0 +1,18 @@
+# frozen_string_literal: true
+module Karafka
+  # Module used to provide a persistent cache across batch requests for a given
+  # topic and partition to store some additional details when the persistent mode
+  # for a given topic is turned on
+  module Persistence
+    # @param topic [Karafka::Routing::Topic] topic instance for which we might cache
+    # @param partition [Integer] number of partition for which we want to cache
+    # @param resource [Symbol] name of the resource that we want to store
+    def self.fetch(topic, partition, resource)
+      return yield unless topic.persistent
+      Thread.current[topic.id] ||= {}
+      Thread.current[topic.id][partition] ||= {}
+      Thread.current[topic.id][partition][resource] ||= yield
+    end
+  end
+end

data/lib/karafka/routing/builder.rb CHANGED Viewed

@@ -27,7 +27,7 @@ module Karafka
         each do |consumer_group|
           hashed_group = consumer_group.to_h
           validation_result = Karafka::Schemas::ConsumerGroup.call(hashed_group)
-          next if validation_result.success?
+          return if validation_result.success?
           raise Errors::InvalidConfiguration, [validation_result.errors, hashed_group]
         end
       end

data/lib/karafka/routing/router.rb CHANGED Viewed

@@ -7,16 +7,9 @@ module Karafka
     # @note Since Kafka does not provide namespaces or modules for topics, they all have "flat"
     #  structure so all the routes are being stored in a single level array
     module Router
-      # Builds a controller instance that should handle message from a given topic
-      # @param topic_id [String] topic based on which we find a proper route
-      # @return [Karafka::BaseController] base controller descendant instance object
-      def build(topic_id)
-        topic = find(topic_id)
-        topic.controller.new.tap { |ctrl| ctrl.topic = topic }
-      end
-      private
+      # Find a proper topic based on full topic id
+      # @param topic_id [String] proper topic id (already mapped, etc) for which we want to find
+      #   routing topic
       # @return [Karafka::Routing::Route] proper route details
       # @raise [Karafka::Topic::NonMatchingTopicError] raised if topic name does not match
       #   any route defined by user using routes.draw
@@ -30,7 +23,6 @@ module Karafka
         raise(Errors::NonMatchingRouteError, topic_id)
       end
-      module_function :build
       module_function :find
     end
   end

data/lib/karafka/routing/topic.rb CHANGED Viewed

@@ -29,16 +29,10 @@ module Karafka
       # example for Sidekiq
       def build
         Karafka::AttributesMap.topic.each { |attr| send(attr) }
+        controller&.topic = self
         self
       end
-      # @return [Class] Class (not an instance) of a worker that should be used to schedule the
-      #   background job
-      # @note If not provided - will be built based on the provided controller
-      def worker
-        @worker ||= processing_backend == :sidekiq ? Workers::Builder.new(controller).build : nil
-      end
       # @return [Class, nil] Class (not an instance) of a responder that should respond from
       #   controller back to Kafka (usefull for piping dataflows)
       def responder
@@ -51,12 +45,6 @@ module Karafka
         @parser ||= Karafka::Parsers::Json
       end
-      # @return [Class] Interchanger class (not an instance) that we want to use to interchange
-      #   params between Karafka server and Karafka background job
-      def interchanger
-        @interchanger ||= Karafka::Params::Interchanger
-      end
       Karafka::AttributesMap.topic.each do |attribute|
         config_retriever_for(attribute)
       end

data/lib/karafka/schemas/config.rb CHANGED Viewed

@@ -14,18 +14,7 @@ module Karafka
     Config = Dry::Validation.Schema do
       required(:client_id).filled(:str?, format?: Karafka::Schemas::TOPIC_REGEXP)
-      required(:redis).maybe do
-        schema do
-          required(:url).filled(:str?)
-        end
-      end
-      optional(:processing_backend).filled(included_in?: %i[inline sidekiq])
-      # If we want to use sidekiq, then redis needs to be configured
-      rule(redis_presence: %i[redis processing_backend]) do |redis, processing_backend|
-        processing_backend.eql?(:sidekiq).then(redis.filled?)
-      end
+      optional(:backend).filled
       optional(:connection_pool).schema do
         required(:size).filled

data/lib/karafka/schemas/consumer_group.rb CHANGED Viewed

@@ -6,13 +6,13 @@ module Karafka
     ConsumerGroupTopic = Dry::Validation.Schema do
       required(:id).filled(:str?, format?: Karafka::Schemas::TOPIC_REGEXP)
       required(:name).filled(:str?, format?: Karafka::Schemas::TOPIC_REGEXP)
-      required(:processing_backend).filled(included_in?: %i[inline sidekiq])
+      required(:backend).filled(included_in?: %i[inline sidekiq])
       required(:controller).filled
       required(:parser).filled
-      required(:interchanger).filled
       required(:max_bytes_per_partition).filled(:int?, gteq?: 0)
       required(:start_from_beginning).filled(:bool?)
       required(:batch_processing).filled(:bool?)
+      required(:persistent).filled(:bool?)
     end
     # Schema for single full route (consumer group + topics) validation.

data/lib/karafka/setup/config.rb CHANGED Viewed

@@ -18,17 +18,12 @@ module Karafka
       # option client_id [String] kafka client_id - used to provide
       #   default Kafka groups namespaces and identify that app in kafka
       setting :client_id
-      # How should we process messages. For now we support inline mode (asap in the process) or
-      # sidekiq mode (schedule to sidekiq)
-      setting :processing_backend, :inline
+      # What backend do we want to use to process messages
+      setting :backend, :inline
       # option logger [Instance] logger that we want to use
       setting :logger, -> { ::Karafka::Logger.instance }
       # option monitor [Instance] monitor that we will to use (defaults to Karafka::Monitor)
       setting :monitor, -> { ::Karafka::Monitor.instance }
-      # option redis [Hash] redis options hash (url and optional parameters)
-      # Note that redis could be rewriten using nested options, but it is a sidekiq specific
-      # stuff and we don't want to touch it
-      setting :redis
       # Mapper used to remap names of topics, so we can have a clean internal topic namings
       # despite using any Kafka provider that uses namespacing, etc
       # It needs to implement two methods:
@@ -42,19 +37,19 @@ module Karafka
       # #params_batch will contain params received from Kafka (may be more than 1) so we can
       # process them in batches
       setting :batch_processing, false
-      # Connection pool options are used for producer (Waterdrop)
-      # They are configured automatically based on Sidekiq concurrency and number of consumers
-      # The bigger one is selected as we need to be able to send messages from both places
+      # Should we operate in a single controller instance across multiple batches of messages,
+      # from the same partition or should we build a new instance for each incoming batch.
+      # Disabling that can be useful when you want to build a new controller instance for each
+      # incoming batch. It's disabled by default, not to create more objects that needed on
+      # each batch
+      setting :persistent, true
+      # Connection pool options are used for producer (Waterdrop) - by default it will adapt to
+      # number of active actors
       setting :connection_pool do
-        # Connection pool size for producers. Note that we take a bigger number because there
-        # are cases when we might have more sidekiq threads than Karafka consumers (small app)
-        # or the opposite for bigger systems
-        setting :size, lambda {
-          [
-            ::Karafka::App.consumer_groups.active.count,
-            Sidekiq.options[:concurrency]
-          ].max
-        }
+        # Connection pool size for producers. If you use sidekiq or any other multi threaded
+        # backend, you might want to tune it to match number of threads of your background
+        # processing engine
+        setting :size, -> { ::Karafka::App.consumer_groups.active.count }
         # How long should we wait for a working resource from the pool before rising timeout
         # With a proper connection pool size, this should never happen
         setting :timeout, 5