RubyGems - karafka - Versions diffs - 0.5.0 - Mend

karafka 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

checksums.yaml +7 -0
data/.gitignore +68 -0
data/.ruby-gemset +1 -0
data/.ruby-version +1 -0
data/.travis.yml +6 -0
data/CHANGELOG.md +202 -0
data/Gemfile +8 -0
data/Gemfile.lock +216 -0
data/MIT-LICENCE +18 -0
data/README.md +831 -0
data/Rakefile +17 -0
data/bin/karafka +7 -0
data/karafka.gemspec +34 -0
data/lib/karafka.rb +73 -0
data/lib/karafka/app.rb +45 -0
data/lib/karafka/base_controller.rb +162 -0
data/lib/karafka/base_responder.rb +118 -0
data/lib/karafka/base_worker.rb +41 -0
data/lib/karafka/capistrano.rb +2 -0
data/lib/karafka/capistrano/karafka.cap +84 -0
data/lib/karafka/cli.rb +52 -0
data/lib/karafka/cli/base.rb +74 -0
data/lib/karafka/cli/console.rb +23 -0
data/lib/karafka/cli/flow.rb +46 -0
data/lib/karafka/cli/info.rb +26 -0
data/lib/karafka/cli/install.rb +45 -0
data/lib/karafka/cli/routes.rb +39 -0
data/lib/karafka/cli/server.rb +59 -0
data/lib/karafka/cli/worker.rb +26 -0
data/lib/karafka/connection/consumer.rb +29 -0
data/lib/karafka/connection/listener.rb +54 -0
data/lib/karafka/connection/message.rb +17 -0
data/lib/karafka/connection/topic_consumer.rb +48 -0
data/lib/karafka/errors.rb +50 -0
data/lib/karafka/fetcher.rb +40 -0
data/lib/karafka/helpers/class_matcher.rb +77 -0
data/lib/karafka/helpers/multi_delegator.rb +31 -0
data/lib/karafka/loader.rb +77 -0
data/lib/karafka/logger.rb +52 -0
data/lib/karafka/monitor.rb +82 -0
data/lib/karafka/params/interchanger.rb +33 -0
data/lib/karafka/params/params.rb +102 -0
data/lib/karafka/patches/dry/configurable/config.rb +37 -0
data/lib/karafka/process.rb +61 -0
data/lib/karafka/responders/builder.rb +33 -0
data/lib/karafka/responders/topic.rb +43 -0
data/lib/karafka/responders/usage_validator.rb +59 -0
data/lib/karafka/routing/builder.rb +89 -0
data/lib/karafka/routing/route.rb +80 -0
data/lib/karafka/routing/router.rb +38 -0
data/lib/karafka/server.rb +53 -0
data/lib/karafka/setup/config.rb +57 -0
data/lib/karafka/setup/configurators/base.rb +33 -0
data/lib/karafka/setup/configurators/celluloid.rb +20 -0
data/lib/karafka/setup/configurators/sidekiq.rb +34 -0
data/lib/karafka/setup/configurators/water_drop.rb +19 -0
data/lib/karafka/setup/configurators/worker_glass.rb +13 -0
data/lib/karafka/status.rb +23 -0
data/lib/karafka/templates/app.rb.example +26 -0
data/lib/karafka/templates/application_controller.rb.example +5 -0
data/lib/karafka/templates/application_responder.rb.example +9 -0
data/lib/karafka/templates/application_worker.rb.example +12 -0
data/lib/karafka/templates/config.ru.example +13 -0
data/lib/karafka/templates/sidekiq.yml.example +26 -0
data/lib/karafka/version.rb +6 -0
data/lib/karafka/workers/builder.rb +49 -0
data/log/.gitkeep +0 -0
metadata +267 -0

data/Rakefile ADDED

@@ -0,0 +1,17 @@
+require 'bundler'
+require 'rake'
+require 'polishgeeks-dev-tools'
+PolishGeeks::DevTools.setup do |config|
+  config.brakeman = false
+  config.haml_lint = false
+end
+desc 'Self check using polishgeeks-dev-tools'
+task :check do
+  PolishGeeks::DevTools::Runner.new.execute(
+    PolishGeeks::DevTools::Logger.new
+  )
+end
+task default: :check

data/bin/karafka ADDED

@@ -0,0 +1,7 @@
+#!/usr/bin/env ruby
+require 'karafka'
+require Karafka.boot_file.to_s if File.exist?(Karafka.boot_file.to_s)
+Karafka::Cli.prepare
+Karafka::Cli.start

data/karafka.gemspec ADDED

@@ -0,0 +1,34 @@
+lib = File.expand_path('../lib', __FILE__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require 'karafka/version'
+Gem::Specification.new do |spec|
+  spec.name          = 'karafka'
+  spec.version       = ::Karafka::VERSION
+  spec.platform      = Gem::Platform::RUBY
+  spec.authors       = ['Maciej Mensfeld', 'Pavlo Vavruk']
+  spec.email         = %w( maciej@mensfeld.pl pavlo.vavruk@gmail.com )
+  spec.homepage      = 'https://github.com/karafka/karafka'
+  spec.summary       = %q{ Ruby based Microframework for handling Apache Kafka incoming messages }
+  spec.description   = %q{ Microframework used to simplify Kafka based Ruby applications }
+  spec.license       = 'MIT'
+  spec.add_development_dependency 'bundler', '~> 1.2'
+  spec.add_dependency 'ruby-kafka', '= 0.3.15'
+  spec.add_dependency 'sidekiq', '~> 4.2'
+  spec.add_dependency 'worker-glass', '~> 0.2'
+  spec.add_dependency 'celluloid', '~> 0.17'
+  spec.add_dependency 'envlogic', '~> 1.0'
+  spec.add_dependency 'waterdrop', '~> 0.3'
+  spec.add_dependency 'rake', '~> 11.3'
+  spec.add_dependency 'thor', '~> 0.19'
+  spec.add_dependency 'activesupport', '~> 5.0'
+  spec.add_dependency 'dry-configurable', '~> 0.1.7'
+  spec.required_ruby_version = '>= 2.3.0'
+  spec.files         = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(spec)/}) }
+  spec.executables   = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
+  spec.require_paths = %w( lib )
+end

data/lib/karafka.rb ADDED

@@ -0,0 +1,73 @@
+%w(
+  rake
+  ostruct
+  rubygems
+  bundler
+  English
+  celluloid/current
+  waterdrop
+  pathname
+  timeout
+  logger
+  kafka
+  sidekiq
+  worker_glass
+  envlogic
+  thor
+  fileutils
+  dry-configurable
+  active_support/callbacks
+  active_support/core_ext/class/subclasses
+  active_support/core_ext/hash/indifferent_access
+  active_support/descendants_tracker
+  active_support/inflector
+  karafka/loader
+  karafka/status
+).each { |lib| require lib }
+# Karafka library
+module Karafka
+  extend Envlogic
+  class << self
+    # @return [Logger] logger that we want to use. Will use ::Karafka::Logger by default
+    def logger
+      @logger ||= App.config.logger
+    end
+    # @return [::Karafka::Monitor] monitor that we want to use. Will use dummy monitor by default
+    def monitor
+      @monitor ||= App.config.monitor
+    end
+    # @return [String] root path of this gem
+    def gem_root
+      Pathname.new(File.expand_path('../..', __FILE__))
+    end
+    # @return [String] Karafka app root path (user application path)
+    def root
+      Pathname.new(File.dirname(ENV['BUNDLE_GEMFILE']))
+    end
+    # @return [String] path to Karafka gem root core
+    def core_root
+      Pathname.new(File.expand_path('../karafka', __FILE__))
+    end
+    # @return [String] path to a default file that contains booting procedure etc
+    # @note By default it is a file called 'app.rb' but it can be specified as you wish if you
+    #   have Karafka that is merged into a Sinatra/Rails app and app.rb is taken.
+    #   It will be used for console/workers/etc
+    # @example Standard only-Karafka case
+    #   Karafka.boot_file #=> '/home/app_path/app.rb'
+    # @example Non standard case
+    #   KARAFKA_BOOT_FILE='/home/app_path/karafka.rb'
+    #   Karafka.boot_file #=> '/home/app_path/karafka.rb'
+    def boot_file
+      Pathname.new(ENV['KARAFKA_BOOT_FILE'] || File.join(Karafka.root, 'app.rb'))
+    end
+  end
+end
+Karafka::Loader.new.load!(Karafka.core_root)

data/lib/karafka/app.rb ADDED

@@ -0,0 +1,45 @@
+module Karafka
+  # App class
+  class App
+    class << self
+      # Sets up the whole configuration
+      # @param [Block] block configuration block
+      def setup(&block)
+        Setup::Config.setup(&block)
+        initialize!
+      end
+      # Sets up all the internal components and bootstrap whole app
+      # We need to know details about routes in order to setup components,
+      # that's why we don't setup them after std setup is done
+      def boot!
+        Setup::Config.setup_components
+      end
+      # @return [Karafka::Config] config instance
+      def config
+        Setup::Config.config
+      end
+      # @return [Karafka::Routing::Builder] routes builder instance
+      def routes
+        Routing::Builder.instance
+      end
+      Status.instance_methods(false).each do |delegated|
+        define_method(delegated) do
+          Status.instance.public_send(delegated)
+        end
+      end
+      # Methods that should be delegated to Karafka module
+      %i(
+        root env logger monitor
+      ).each do |delegated|
+        define_method(delegated) do
+          Karafka.public_send(delegated)
+        end
+      end
+    end
+  end
+end

data/lib/karafka/base_controller.rb ADDED

@@ -0,0 +1,162 @@
+# Karafka module namespace
+module Karafka
+  # Base controller from which all Karafka controllers should inherit
+  # Similar to Rails controllers we can define before_enqueue callbacks
+  # that will be executed
+  #
+  # Note that if before_enqueue return false, the chain will be stopped and
+  #   the perform method won't be executed in sidekiq (won't peform_async it)
+  #
+  # @example Create simple controller
+  #   class ExamplesController < Karafka::BaseController
+  #     def perform
+  #       # some logic here
+  #     end
+  #   end
+  #
+  # @example Create a controller with a block before_enqueue
+  #   class ExampleController < Karafka::BaseController
+  #     before_enqueue do
+  #       # Here we should have some checking logic
+  #       # If false is returned, won't schedule a perform action
+  #     end
+  #
+  #     def perform
+  #       # some logic here
+  #     end
+  #   end
+  #
+  # @example Create a controller with a method before_enqueue
+  #   class ExampleController < Karafka::BaseController
+  #     before_enqueue :before_method
+  #
+  #     def perform
+  #       # some logic here
+  #     end
+  #
+  #     private
+  #
+  #     def before_method
+  #       # Here we should have some checking logic
+  #       # If false is returned, won't schedule a perform action
+  #     end
+  #   end
+  #
+  # @example Create a controller with an after_failure action
+  #   class ExampleController < Karafka::BaseController
+  #     def perform
+  #       # some logic here
+  #     end
+  #
+  #     def after_failure
+  #       # action taken in case perform fails
+  #     end
+  #   end
+  class BaseController
+    extend ActiveSupport::DescendantsTracker
+    include ActiveSupport::Callbacks
+    # The schedule method is wrapped with a set of callbacks
+    # We won't run perform at the backend if any of the callbacks
+    # returns false
+    # @see http://api.rubyonrails.org/classes/ActiveSupport/Callbacks/ClassMethods.html#method-i-get_callbacks
+    define_callbacks :schedule
+    # This will be set based on routing settings
+    # From 0.4 a single controller can handle multiple topics jobs
+    attr_accessor :group, :topic, :worker, :parser, :interchanger, :responder
+    class << self
+      # Creates a callback that will be executed before scheduling to Sidekiq
+      # @param method_name [Symbol, String] method name or nil if we plan to provide a block
+      # @yield A block with a code that should be executed before scheduling
+      # @note If value returned is false, will chalt the chain and not schedlue to Sidekiq
+      # @example Define a block before_enqueue callback
+      #   before_enqueue do
+      #     # logic here
+      #   end
+      #
+      # @example Define a class name before_enqueue callback
+      #   before_enqueue :method_name
+      def before_enqueue(method_name = nil, &block)
+        set_callback :schedule, :before, method_name ? method_name : block
+      end
+    end
+    # Creates lazy loaded params object
+    # @note Until first params usage, it won't parse data at all
+    # @param message [Karafka::Connection::Message, Hash] message with raw content or a hash
+    #   from Sidekiq that allows us to build params.
+    def params=(message)
+      @params = Karafka::Params::Params.build(message, self)
+    end
+    # Executes the default controller flow, runs callbacks and if not halted
+    # will schedule a perform task in sidekiq
+    def schedule
+      run_callbacks :schedule do
+        perform_async
+      end
+    end
+    # @return [Hash] hash with all controller details - it works similar to #params method however
+    #   it won't parse data so it will return unparsed details about controller and its parameters
+    # @example Get data about ctrl
+    #   ctrl.to_h #=> { "worker"=>WorkerClass, "parsed"=>false, "content"=>"{}" }
+    def to_h
+      @params
+    end
+    # Method that will perform business logic on data received from Kafka
+    # @note This method needs bo be implemented in a subclass. We stub it here as a failover if
+    #   someone forgets about it or makes on with typo
+    def perform
+      raise NotImplementedError, 'Implement this in a subclass'
+    end
+    private
+    # @return [Karafka::Params::Params] Karafka params that is a hash with indifferent access
+    # @note Params internally are lazy loaded before first use. That way we can skip parsing
+    #   process if we have before_enqueue that rejects some incoming messages without using params
+    #   It can be also used when handling really heavy data (in terms of parsing). Without direct
+    #   usage outside of worker scope, it will pass raw data into sidekiq, so we won't use Karafka
+    #   working time to parse this data. It will happen only in the worker (where it can take time)
+    #   that way Karafka will be able to process data really quickly. On the other hand, if we
+    #   decide to use params somewhere before it hits worker logic, it won't parse it again in
+    #   the worker - it will use already loaded data and pass it to Redis
+    # @note Invokation of this method will cause load all the data into params object. If you want
+    #   to get access without parsing, please access @params directly
+    def params
+      @params.retrieve
+    end
+    # Responds with given data using given responder. This allows us to have a similar way of
+    # defining flows like synchronous protocols
+    # @param data Anything we want to pass to responder based on which we want to trigger further
+    #   Kafka responding
+    # @raise [Karafka::Errors::ResponderMissing] raised when we don't have a responder defined,
+    #   but we still try to use this method
+    def respond_with(*data)
+      raise(Errors::ResponderMissing, self.class) unless responder
+      Karafka.monitor.notice(self.class, data: data)
+      responder.new.call(*data)
+    end
+    # Enqueues the execution of perform method into a worker.
+    # @note Each worker needs to have a class #perform_async method that will allow us to pass
+    #   parameters into it. We always pass topic as a first argument and this request params
+    #   as a second one (we pass topic to be able to build back the controller in the worker)
+    def perform_async
+      Karafka.monitor.notice(self.class, to_h)
+      # We use @params directly (instead of #params) because of lazy loading logic that is behind
+      # it. See Karafka::Params::Params class for more details about that
+      worker.perform_async(
+        topic,
+        interchanger.load(@params)
+      )
+    end
+  end
+end

data/lib/karafka/base_responder.rb ADDED

@@ -0,0 +1,118 @@
+module Karafka
+  # Base responder from which all Karafka responders should inherit
+  # Similar to Rails responders concept. It allows us to design flow from one app to another
+  # by isolating what responses should be sent (and where) based on a given action
+  # It differs from Rails responders in the way it works: in std http request we can have one
+  # response, here we can have unlimited number of them
+  #
+  # It has a simple API for defining where should we respond (and if it is required)
+  #
+  # @example Basic usage (each registered topic is required to be used by default)
+  #   class Responder < BaseResponder
+  #     topic :new_action
+  #
+  #     def respond(data)
+  #       respond_to :new_action, data
+  #     end
+  #   end
+  #
+  # @example Marking topic as optional (we won't have to use it)
+  #   class Responder < BaseResponder
+  #     topic :required_topic
+  #     topic :new_action, optional: true
+  #
+  #     def respond(data)
+  #       respond_to :required_topic, data
+  #     end
+  #   end
+  #
+  # @example Multiple times used topic
+  #   class Responder < BaseResponder
+  #     topic :required_topic, multiple_usage: true
+  #
+  #     def respond(data)
+  #       data.each do |subset|
+  #         respond_to :required_topic, subset
+  #       end
+  #     end
+  #   end
+  #
+  # @example Accept multiple arguments to a respond method
+  #   class Responder < BaseResponder
+  #     topic :users_actions
+  #     topic :articles_viewed
+  #
+  #     def respond(user, article)
+  #       respond_to :users_actions, user
+  #       respond_to :articles_viewed, article
+  #     end
+  #   end
+  class BaseResponder
+    # Definitions of all topics that we want to be able to use in this responder should go here
+    class_attribute :topics
+    class << self
+      # Registers a topic as on to which we will be able to respond
+      # @param topic_name [Symbol, String] name of topic to which we want to respond
+      # @param options [Hash] hash with optional configuration details
+      def topic(topic_name, options = {})
+        self.topics ||= {}
+        topic_obj = Responders::Topic.new(topic_name, options)
+        self.topics[topic_obj.name] = topic_obj
+      end
+    end
+    # Creates a responder object
+    # @return [Karafka::BaseResponder] base responder descendant responder
+    def initialize
+      @used_topics = []
+    end
+    # Performs respond and validates that all the response requirement were met
+    # @param data Anything that we want to respond with
+    # @note We know that validators should be executed also before sending data to topics, however
+    #   the implementation gets way more complicated then, that's why we check after everything
+    #   was sent using responder
+    def call(*data)
+      respond(*data)
+      validate!
+    end
+    private
+    # Method that needs to be implemented in a subclass. It should handle responding
+    #   on registered topics
+    # @raise [NotImplementedError] This method needs to be implemented in a subclass
+    def respond(*_data)
+      raise NotImplementedError, 'Implement this in a subclass'
+    end
+    # This method allow us to respond to a single topic with a given data. It can be used
+    # as many times as we need. Especially when we have 1:n flow
+    # @param topic [Symbol, String] topic to which we want to respond
+    # @param data [String, Object] string or object that we want to send
+    # @note Note that if we pass object here (not a string), this method will invoke a #to_json
+    #   on it.
+    # @note Respond to does not accept multiple data arguments.
+    def respond_to(topic, data)
+      Karafka.monitor.notice(self.class, topic: topic, data: data)
+      topic = topic.to_s
+      @used_topics << topic
+      ::WaterDrop::Message.new(
+        topic,
+        data.is_a?(String) ? data : data.to_json
+      ).send!
+    end
+    # Checks if we met all the topics requirements. It will fail if we didn't send a message to
+    # a registered required topic, etc.
+    def validate!
+      Responders::UsageValidator.new(
+        self.class.topics || {},
+        @used_topics
+      ).validate!
+    end
+  end
+end