RubyGems - karafka - Versions diffs - 1.2.11 - Mend

karafka 1.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

checksums.yaml +7 -0
data/.coditsu.yml +3 -0
data/.console_irbrc +13 -0
data/.gitignore +68 -0
data/.rspec +1 -0
data/.ruby-gemset +1 -0
data/.ruby-version +1 -0
data/.travis.yml +49 -0
data/CHANGELOG.md +458 -0
data/CODE_OF_CONDUCT.md +46 -0
data/CONTRIBUTING.md +41 -0
data/Gemfile +15 -0
data/Gemfile.lock +126 -0
data/MIT-LICENCE +18 -0
data/README.md +102 -0
data/bin/karafka +19 -0
data/config/errors.yml +6 -0
data/karafka.gemspec +42 -0
data/lib/karafka.rb +79 -0
data/lib/karafka/app.rb +45 -0
data/lib/karafka/attributes_map.rb +69 -0
data/lib/karafka/backends/inline.rb +16 -0
data/lib/karafka/base_consumer.rb +68 -0
data/lib/karafka/base_responder.rb +208 -0
data/lib/karafka/callbacks.rb +30 -0
data/lib/karafka/callbacks/config.rb +22 -0
data/lib/karafka/callbacks/dsl.rb +16 -0
data/lib/karafka/cli.rb +54 -0
data/lib/karafka/cli/base.rb +78 -0
data/lib/karafka/cli/console.rb +29 -0
data/lib/karafka/cli/flow.rb +46 -0
data/lib/karafka/cli/info.rb +29 -0
data/lib/karafka/cli/install.rb +42 -0
data/lib/karafka/cli/server.rb +66 -0
data/lib/karafka/connection/api_adapter.rb +148 -0
data/lib/karafka/connection/builder.rb +16 -0
data/lib/karafka/connection/client.rb +107 -0
data/lib/karafka/connection/delegator.rb +46 -0
data/lib/karafka/connection/listener.rb +60 -0
data/lib/karafka/consumers/callbacks.rb +54 -0
data/lib/karafka/consumers/includer.rb +51 -0
data/lib/karafka/consumers/responders.rb +24 -0
data/lib/karafka/consumers/single_params.rb +15 -0
data/lib/karafka/errors.rb +50 -0
data/lib/karafka/fetcher.rb +44 -0
data/lib/karafka/helpers/class_matcher.rb +78 -0
data/lib/karafka/helpers/config_retriever.rb +46 -0
data/lib/karafka/helpers/multi_delegator.rb +33 -0
data/lib/karafka/instrumentation/listener.rb +112 -0
data/lib/karafka/instrumentation/logger.rb +55 -0
data/lib/karafka/instrumentation/monitor.rb +64 -0
data/lib/karafka/loader.rb +28 -0
data/lib/karafka/params/dsl.rb +158 -0
data/lib/karafka/params/params_batch.rb +46 -0
data/lib/karafka/parsers/json.rb +38 -0
data/lib/karafka/patches/dry_configurable.rb +33 -0
data/lib/karafka/patches/ruby_kafka.rb +34 -0
data/lib/karafka/persistence/client.rb +25 -0
data/lib/karafka/persistence/consumer.rb +38 -0
data/lib/karafka/persistence/topic.rb +29 -0
data/lib/karafka/process.rb +62 -0
data/lib/karafka/responders/builder.rb +36 -0
data/lib/karafka/responders/topic.rb +57 -0
data/lib/karafka/routing/builder.rb +61 -0
data/lib/karafka/routing/consumer_group.rb +61 -0
data/lib/karafka/routing/consumer_mapper.rb +34 -0
data/lib/karafka/routing/proxy.rb +37 -0
data/lib/karafka/routing/router.rb +29 -0
data/lib/karafka/routing/topic.rb +60 -0
data/lib/karafka/routing/topic_mapper.rb +55 -0
data/lib/karafka/schemas/config.rb +24 -0
data/lib/karafka/schemas/consumer_group.rb +78 -0
data/lib/karafka/schemas/consumer_group_topic.rb +18 -0
data/lib/karafka/schemas/responder_usage.rb +39 -0
data/lib/karafka/schemas/server_cli_options.rb +43 -0
data/lib/karafka/server.rb +85 -0
data/lib/karafka/setup/config.rb +193 -0
data/lib/karafka/setup/configurators/base.rb +29 -0
data/lib/karafka/setup/configurators/params.rb +25 -0
data/lib/karafka/setup/configurators/water_drop.rb +32 -0
data/lib/karafka/setup/dsl.rb +22 -0
data/lib/karafka/status.rb +25 -0
data/lib/karafka/templates/application_consumer.rb.example +6 -0
data/lib/karafka/templates/application_responder.rb.example +11 -0
data/lib/karafka/templates/karafka.rb.example +54 -0
data/lib/karafka/version.rb +7 -0
data/log/.gitkeep +0 -0
metadata +303 -0

data/lib/karafka/app.rb ADDED

@@ -0,0 +1,45 @@
+# frozen_string_literal: true
+module Karafka
+  # App class
+  class App
+    extend Setup::Dsl
+    extend Callbacks::Dsl
+    class << self
+      # Sets up all the internal components and bootstrap whole app
+      # We need to know details about consumers in order to setup components,
+      # that's why we don't setup them after std setup is done
+      # @raise [Karafka::Errors::InvalidConfiguration] raised when configuration
+      #   doesn't match with ConfigurationSchema
+      def boot!
+        Setup::Config.validate!
+        Setup::Config.setup_components
+        Callbacks.after_init(Karafka::App.config)
+      end
+      # @return [Karafka::Routing::Builder] consumers builder instance
+      def consumer_groups
+        Routing::Builder.instance
+      end
+      Status.instance_methods(false).each do |delegated|
+        define_method(delegated) do
+          Status.instance.send(delegated)
+        end
+      end
+      # Methods that should be delegated to Karafka module
+      %i[
+        root
+        env
+        logger
+        monitor
+      ].each do |delegated|
+        define_method(delegated) do
+          Karafka.send(delegated)
+        end
+      end
+    end
+  end
+end

data/lib/karafka/attributes_map.rb ADDED

@@ -0,0 +1,69 @@
+# frozen_string_literal: true
+module Karafka
+  # Both Karafka and Ruby-Kafka contain a lot of settings that can be applied on multiple
+  # levels. In Karafka that is on consumer group and on the topic level. In Ruby-Kafka it
+  # is on consumer, subscription and consumption levels. In order to maintain an order
+  # in managing those settings, this module was created. It contains details on what setting
+  # where should go and which layer (both on Karafka and Ruby-Kafka) is responsible for
+  # setting it and sending it forward
+  # @note Settings presented here cover all the settings that are being used across Karafka
+  module AttributesMap
+    class << self
+      # What settings should go where in ruby-kafka
+      # @note All other settings will be passed to Kafka.new method invocation.
+      #   All elements in this hash are just edge cases
+      # @return [Hash] hash with proper sections on what to proxy where in Ruby-Kafka
+      def api_adapter
+        {
+          consumer: %i[
+            session_timeout offset_commit_interval offset_commit_threshold
+            offset_retention_time heartbeat_interval fetcher_max_queue_size
+          ],
+          subscribe: %i[start_from_beginning max_bytes_per_partition],
+          consumption: %i[min_bytes max_bytes max_wait_time],
+          pause: %i[pause_timeout],
+          # All the options that are under kafka config namespace, but are not used
+          # directly with kafka api, but from the Karafka user perspective, they are
+          # still related to kafka. They should not be proxied anywhere
+          ignored: %i[reconnect_timeout automatically_mark_as_consumed]
+        }
+      end
+      # @return [Array<Symbol>] properties that can be set on a per topic level
+      def topic
+        (api_adapter[:subscribe] + %i[
+          backend
+          name
+          parser
+          responder
+          batch_consuming
+          persistent
+        ]).uniq
+      end
+      # @return [Array<Symbol>] properties that can be set on a per consumer group level
+      # @note Note that there are settings directly extracted from the config kafka namespace
+      #   I did this that way, so I won't have to repeat same setting keys over and over again
+      #   Thanks to this solution, if any new setting is available for ruby-kafka, we just need
+      #   to add it to our configuration class and it will be handled automatically.
+      def consumer_group
+        # @note We don't ignore the api_adapter[:ignored] values as they should be ignored
+        #   only when proxying details go ruby-kafka. We use ignored fields internally in karafka
+        ignored_settings = api_adapter[:subscribe]
+        defined_settings = api_adapter.values.flatten
+        karafka_settings = %i[batch_fetching]
+        # This is a drity and bad hack of dry-configurable to get keys before setting values
+        dynamically_proxied = Karafka::Setup::Config
+                              ._settings
+                              .settings
+                              .find { |s| s.name == :kafka }
+                              .value
+                              .names
+                              .to_a
+        (defined_settings + dynamically_proxied).uniq + karafka_settings - ignored_settings
+      end
+    end
+  end
+end

data/lib/karafka/backends/inline.rb ADDED

@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+module Karafka
+  # Namespace for all different backends Karafka supports
+  module Backends
+    # Backend that just runs stuff asap without any scheduling
+    module Inline
+      private
+      # Executes consume code immediately (without enqueuing)
+      def process
+        Karafka.monitor.instrument('backends.inline.process', caller: self) { consume }
+      end
+    end
+  end
+end

data/lib/karafka/base_consumer.rb ADDED

@@ -0,0 +1,68 @@
+# frozen_string_literal: true
+# Karafka module namespace
+module Karafka
+  # Base consumer from which all Karafka consumers should inherit
+  class BaseConsumer
+    extend ActiveSupport::DescendantsTracker
+    extend Forwardable
+    # Allows us to mark messages as consumed for non-automatic mode without having
+    # to use consumer client directly. We do this that way, because most of the people should not
+    # mess with the client instance directly (just in case)
+    def_delegator :client, :mark_as_consumed
+    private :mark_as_consumed
+    class << self
+      attr_reader :topic
+      # Assigns a topic to a consumer and builds up proper consumer functionalities
+      #   so that it can cooperate with the topic settings
+      # @param topic [Karafka::Routing::Topic]
+      # @return [Karafka::Routing::Topic] assigned topic
+      def topic=(topic)
+        @topic = topic
+        Consumers::Includer.call(self)
+      end
+    end
+    # @return [Karafka::Routing::Topic] topic to which a given consumer is subscribed
+    def topic
+      self.class.topic
+    end
+    # Creates lazy loaded params batch object
+    # @note Until first params usage, it won't parse data at all
+    # @param messages [Array<Kafka::FetchedMessage>, Array<Hash>] messages with raw
+    #   content (from Kafka) or messages inside a hash (from backend, etc)
+    # @return [Karafka::Params::ParamsBatch] lazy loaded params batch
+    def params_batch=(messages)
+      @params_batch = Karafka::Params::ParamsBatch.new(messages, topic.parser)
+    end
+    # Executes the default consumer flow.
+    def call
+      process
+    end
+    private
+    # We make it private as it should be accessible only from the inside of a consumer
+    attr_reader :params_batch
+    # @return [Karafka::Connection::Client] messages consuming client that can be used to
+    #    commit manually offset or pause / stop consumer based on the business logic
+    def client
+      Persistence::Client.read
+    end
+    # Method that will perform business logic and on data received from Kafka (it will consume
+    #   the data)
+    # @note This method needs bo be implemented in a subclass. We stub it here as a failover if
+    #   someone forgets about it or makes on with typo
+    def consume
+      raise NotImplementedError, 'Implement this in a subclass'
+    end
+  end
+end

data/lib/karafka/base_responder.rb ADDED

@@ -0,0 +1,208 @@
+# frozen_string_literal: true
+module Karafka
+  # Base responder from which all Karafka responders should inherit
+  # Similar to Rails responders concept. It allows us to design flow from one app to another
+  # by isolating what responses should be sent (and where) based on a given action
+  # It differs from Rails responders in the way it works: in std http request we can have one
+  # response, here we can have unlimited number of them
+  #
+  # It has a simple API for defining where should we respond (and if it is required)
+  #
+  # @example Basic usage (each registered topic is required to be used by default)
+  #   class Responder < BaseResponder
+  #     topic :new_action
+  #
+  #     def respond(data)
+  #       respond_to :new_action, data
+  #     end
+  #   end
+  #
+  # @example Responding to a topic with extra options
+  #   class Responder < BaseResponder
+  #     topic :new_action
+  #
+  #     def respond(data)
+  #       respond_to :new_action, data, partition_key: 'thing'
+  #     end
+  #   end
+  #
+  # @example Marking topic as not required (we won't have to use it)
+  #   class Responder < BaseResponder
+  #     topic :required_topic
+  #     topic :new_action, required: false
+  #
+  #     def respond(data)
+  #       respond_to :required_topic, data
+  #     end
+  #   end
+  #
+  # @example Multiple times used topic
+  #   class Responder < BaseResponder
+  #     topic :required_topic, multiple_usage: true
+  #
+  #     def respond(data)
+  #       data.each do |subset|
+  #         respond_to :required_topic, subset
+  #       end
+  #     end
+  #   end
+  #
+  # @example Accept multiple arguments to a respond method
+  #   class Responder < BaseResponder
+  #     topic :users_actions
+  #     topic :articles_viewed
+  #
+  #     def respond(user, article)
+  #       respond_to :users_actions, user
+  #       respond_to :articles_viewed, article
+  #     end
+  #   end
+  class BaseResponder
+    # Definitions of all topics that we want to be able to use in this responder should go here
+    class_attribute :topics
+    # Schema that we can use to control and/or require some additional details upon options
+    # that are being passed to the producer. This can be in particular useful if we want to make
+    # sure that for example partition_key is always present.
+    class_attribute :options_schema
+    attr_reader :messages_buffer
+    class << self
+      # Registers a topic as on to which we will be able to respond
+      # @param topic_name [Symbol, String] name of topic to which we want to respond
+      # @param options [Hash] hash with optional configuration details
+      def topic(topic_name, options = {})
+        self.topics ||= {}
+        topic_obj = Responders::Topic.new(topic_name, options.merge(registered: true))
+        self.topics[topic_obj.name] = topic_obj
+      end
+      # A simple alias for easier standalone responder usage.
+      # Instead of building it with new.call it allows (in case of usin JSON parser)
+      # to just run it directly from the class level
+      # @param data Anything that we want to respond with
+      # @example Send user data with a responder (uses default Karafka::Parsers::Json parser)
+      #   UsersCreatedResponder.call(@created_user)
+      def call(*data)
+        # Just in case there were no topics defined for a responder, we initialize with
+        # empty hash not to handle a nil case
+        self.topics ||= {}
+        new.call(*data)
+      end
+    end
+    # Creates a responder object
+    # @param parser_class [Class] parser class that we can use to generate appropriate string
+    #   or nothing if we want to default to Karafka::Parsers::Json
+    # @return [Karafka::BaseResponder] base responder descendant responder
+    def initialize(parser_class = Karafka::App.config.parser)
+      @parser_class = parser_class
+      @messages_buffer = {}
+    end
+    # Performs respond and validates that all the response requirement were met
+    # @param data Anything that we want to respond with
+    # @note We know that validators should be executed also before sending data to topics, however
+    #   the implementation gets way more complicated then, that's why we check after everything
+    #   was sent using responder
+    # @example Send user data with a responder (uses default Karafka::Parsers::Json parser)
+    #   UsersCreatedResponder.new.call(@created_user)
+    # @example Send user data with a responder using non default Parser
+    #   UsersCreatedResponder.new(MyParser).call(@created_user)
+    def call(*data)
+      respond(*data)
+      validate_usage!
+      validate_options!
+      deliver!
+    end
+    private
+    # Checks if we met all the topics requirements. It will fail if we didn't send a message to
+    # a registered required topic, etc.
+    def validate_usage!
+      registered_topics = self.class.topics.map do |name, topic|
+        topic.to_h.merge!(
+          usage_count: messages_buffer[name]&.count || 0
+        )
+      end
+      used_topics = messages_buffer.map do |name, usage|
+        topic = self.class.topics[name] || Responders::Topic.new(name, registered: false)
+        topic.to_h.merge!(usage_count: usage.count)
+      end
+      result = Karafka::Schemas::ResponderUsage.call(
+        registered_topics: registered_topics,
+        used_topics: used_topics
+      )
+      return if result.success?
+      raise Karafka::Errors::InvalidResponderUsage, result.errors
+    end
+    # Checks if we met all the options requirements before sending them to the producer.
+    def validate_options!
+      return true unless self.class.options_schema
+      messages_buffer.each_value do |messages_set|
+        messages_set.each do |message_data|
+          result = self.class.options_schema.call(message_data.last)
+          next if result.success?
+          raise Karafka::Errors::InvalidResponderMessageOptions, result.errors
+        end
+      end
+    end
+    # Takes all the messages from the buffer and delivers them one by one
+    # @note This method is executed after the validation, so we're sure that
+    #   what we send is legit and it will go to a proper topics
+    def deliver!
+      messages_buffer.each_value do |data_elements|
+        data_elements.each do |data, options|
+          # We map this topic name, so it will match namespaced/etc topic in Kafka
+          # @note By default will not change topic (if default mapper used)
+          mapped_topic = Karafka::App.config.topic_mapper.outgoing(options[:topic])
+          external_options = options.merge(topic: mapped_topic)
+          producer(options).call(data, external_options)
+        end
+      end
+    end
+    # Method that needs to be implemented in a subclass. It should handle responding
+    #   on registered topics
+    # @raise [NotImplementedError] This method needs to be implemented in a subclass
+    def respond(*_data)
+      raise NotImplementedError, 'Implement this in a subclass'
+    end
+    # This method allow us to respond to a single topic with a given data. It can be used
+    # as many times as we need. Especially when we have 1:n flow
+    # @param topic [Symbol, String] topic to which we want to respond
+    # @param data [String, Object] string or object that we want to send
+    # @param options [Hash] options for waterdrop (e.g. partition_key)
+    # @note Respond to does not accept multiple data arguments.
+    def respond_to(topic, data, options = {})
+      # We normalize the format to string, as WaterDrop and Ruby-Kafka support only
+      # string topics
+      topic = topic.to_s
+      messages_buffer[topic] ||= []
+      messages_buffer[topic] << [
+        @parser_class.generate(data),
+        options.merge(topic: topic)
+      ]
+    end
+    # @param options [Hash] options for waterdrop
+    # @return [Class] WaterDrop producer (sync or async based on the settings)
+    def producer(options)
+      self.class.topics[
+        options[:topic]
+      ].async? ? WaterDrop::AsyncProducer : WaterDrop::SyncProducer
+    end
+  end
+end

data/lib/karafka/callbacks.rb ADDED

@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+module Karafka
+  # Additional callbacks that are used to trigger some things in given places during the
+  # system lifecycle
+  # @note Those callbacks aren't the same as consumer callbacks as they are not related to the
+  #   lifecycle of particular messages fetches but rather to the internal flow process.
+  #   They cannot be defined on a consumer callback level because for some of those,
+  #   there aren't consumers in the memory yet and/or they aren't per consumer thread
+  module Callbacks
+    # Types of system callbacks that we have that are not related to consumers
+    TYPES = %i[
+      after_init
+      before_fetch_loop
+    ].freeze
+    class << self
+      TYPES.each do |callback_type|
+        # Executes given callbacks set at a given moment with provided arguments
+        define_method callback_type do |*args|
+          Karafka::App
+            .config
+            .callbacks
+            .send(callback_type)
+            .each { |callback| callback.call(*args) }
+        end
+      end
+    end
+  end
+end

data/lib/karafka/callbacks/config.rb ADDED

@@ -0,0 +1,22 @@
+# frozen_string_literal: true
+module Karafka
+  module Callbacks
+    # Additional configuration required to store procs that we will execute upon callback trigger
+    module Config
+      # Builds up internal callback accumulators
+      # @param klass [Class] Class that we extend with callback config
+      def self.extended(klass)
+        # option internal [Hash] - optional - internal karafka configuration settings that should
+        #   never be changed by users directly
+        klass.setting :callbacks do
+          Callbacks::TYPES.each do |callback_type|
+            # option [Array<Proc>] an array of blocks that will be executed at a given moment
+            #   depending on the callback type
+            setting callback_type, []
+          end
+        end
+      end
+    end
+  end
+end