RubyGems - karafka - Versions diffs - 1.4.13 → 2.0.0 - Mend

karafka 1.4.13 → 2.0.0

Files changed (170) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +3 -3
data/.github/workflows/ci.yml +85 -30
data/.ruby-version +1 -1
data/CHANGELOG.md +268 -7
data/CONTRIBUTING.md +10 -19
data/Gemfile +6 -0
data/Gemfile.lock +44 -87
data/LICENSE +17 -0
data/LICENSE-COMM +89 -0
data/LICENSE-LGPL +165 -0
data/README.md +44 -48
data/bin/benchmarks +85 -0
data/bin/create_token +22 -0
data/bin/integrations +237 -0
data/bin/karafka +4 -0
data/bin/scenario +29 -0
data/bin/stress_many +13 -0
data/bin/stress_one +13 -0
data/bin/wait_for_kafka +20 -0
data/certs/karafka-pro.pem +11 -0
data/config/errors.yml +55 -40
data/docker-compose.yml +39 -3
data/karafka.gemspec +11 -17
data/lib/active_job/karafka.rb +21 -0
data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
data/lib/karafka/active_job/consumer.rb +26 -0
data/lib/karafka/active_job/dispatcher.rb +38 -0
data/lib/karafka/active_job/job_extensions.rb +34 -0
data/lib/karafka/active_job/job_options_contract.rb +21 -0
data/lib/karafka/active_job/routing/extensions.rb +31 -0
data/lib/karafka/app.rb +15 -20
data/lib/karafka/base_consumer.rb +181 -31
data/lib/karafka/cli/base.rb +4 -4
data/lib/karafka/cli/info.rb +43 -9
data/lib/karafka/cli/install.rb +19 -10
data/lib/karafka/cli/server.rb +17 -42
data/lib/karafka/cli.rb +4 -11
data/lib/karafka/connection/client.rb +385 -90
data/lib/karafka/connection/listener.rb +246 -38
data/lib/karafka/connection/listeners_batch.rb +24 -0
data/lib/karafka/connection/messages_buffer.rb +84 -0
data/lib/karafka/connection/pauses_manager.rb +46 -0
data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
data/lib/karafka/connection/rebalance_manager.rb +78 -0
data/lib/karafka/contracts/base.rb +17 -0
data/lib/karafka/contracts/config.rb +88 -11
data/lib/karafka/contracts/consumer_group.rb +21 -189
data/lib/karafka/contracts/consumer_group_topic.rb +34 -11
data/lib/karafka/contracts/server_cli_options.rb +19 -18
data/lib/karafka/contracts.rb +1 -1
data/lib/karafka/env.rb +46 -0
data/lib/karafka/errors.rb +21 -21
data/lib/karafka/helpers/async.rb +33 -0
data/lib/karafka/helpers/colorize.rb +20 -0
data/lib/karafka/helpers/multi_delegator.rb +2 -2
data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
data/lib/karafka/instrumentation/logger_listener.rb +164 -0
data/lib/karafka/instrumentation/monitor.rb +13 -61
data/lib/karafka/instrumentation/notifications.rb +52 -0
data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
data/lib/karafka/instrumentation.rb +21 -0
data/lib/karafka/licenser.rb +75 -0
data/lib/karafka/messages/batch_metadata.rb +45 -0
data/lib/karafka/messages/builders/batch_metadata.rb +40 -0
data/lib/karafka/messages/builders/message.rb +39 -0
data/lib/karafka/messages/builders/messages.rb +32 -0
data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
data/lib/karafka/messages/messages.rb +64 -0
data/lib/karafka/{params → messages}/metadata.rb +4 -6
data/lib/karafka/messages/seek.rb +9 -0
data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
data/lib/karafka/pro/active_job/consumer.rb +46 -0
data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
data/lib/karafka/pro/base_consumer.rb +82 -0
data/lib/karafka/pro/contracts/base.rb +21 -0
data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
data/lib/karafka/pro/loader.rb +76 -0
data/lib/karafka/pro/performance_tracker.rb +80 -0
data/lib/karafka/pro/processing/coordinator.rb +72 -0
data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +37 -0
data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
data/lib/karafka/pro/processing/partitioner.rb +60 -0
data/lib/karafka/pro/processing/scheduler.rb +56 -0
data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
data/lib/karafka/pro/routing/topic_extensions.rb +38 -0
data/lib/karafka/pro.rb +13 -0
data/lib/karafka/process.rb +1 -0
data/lib/karafka/processing/coordinator.rb +88 -0
data/lib/karafka/processing/coordinators_buffer.rb +54 -0
data/lib/karafka/processing/executor.rb +118 -0
data/lib/karafka/processing/executors_buffer.rb +88 -0
data/lib/karafka/processing/jobs/base.rb +51 -0
data/lib/karafka/processing/jobs/consume.rb +42 -0
data/lib/karafka/processing/jobs/revoked.rb +22 -0
data/lib/karafka/processing/jobs/shutdown.rb +23 -0
data/lib/karafka/processing/jobs_builder.rb +29 -0
data/lib/karafka/processing/jobs_queue.rb +144 -0
data/lib/karafka/processing/partitioner.rb +22 -0
data/lib/karafka/processing/result.rb +29 -0
data/lib/karafka/processing/scheduler.rb +22 -0
data/lib/karafka/processing/worker.rb +88 -0
data/lib/karafka/processing/workers_batch.rb +27 -0
data/lib/karafka/railtie.rb +113 -0
data/lib/karafka/routing/builder.rb +15 -24
data/lib/karafka/routing/consumer_group.rb +11 -19
data/lib/karafka/routing/consumer_mapper.rb +1 -2
data/lib/karafka/routing/router.rb +1 -1
data/lib/karafka/routing/subscription_group.rb +53 -0
data/lib/karafka/routing/subscription_groups_builder.rb +53 -0
data/lib/karafka/routing/topic.rb +61 -24
data/lib/karafka/routing/topics.rb +38 -0
data/lib/karafka/runner.rb +51 -0
data/lib/karafka/serialization/json/deserializer.rb +6 -15
data/lib/karafka/server.rb +67 -26
data/lib/karafka/setup/config.rb +147 -175
data/lib/karafka/status.rb +14 -5
data/lib/karafka/templates/example_consumer.rb.erb +16 -0
data/lib/karafka/templates/karafka.rb.erb +15 -51
data/lib/karafka/time_trackers/base.rb +19 -0
data/lib/karafka/time_trackers/pause.rb +92 -0
data/lib/karafka/time_trackers/poll.rb +65 -0
data/lib/karafka/version.rb +1 -1
data/lib/karafka.rb +38 -17
data.tar.gz.sig +0 -0
metadata +118 -120
metadata.gz.sig +0 -0
data/MIT-LICENCE +0 -18
data/lib/karafka/assignment_strategies/round_robin.rb +0 -13
data/lib/karafka/attributes_map.rb +0 -63
data/lib/karafka/backends/inline.rb +0 -16
data/lib/karafka/base_responder.rb +0 -226
data/lib/karafka/cli/flow.rb +0 -48
data/lib/karafka/cli/missingno.rb +0 -19
data/lib/karafka/code_reloader.rb +0 -67
data/lib/karafka/connection/api_adapter.rb +0 -158
data/lib/karafka/connection/batch_delegator.rb +0 -55
data/lib/karafka/connection/builder.rb +0 -23
data/lib/karafka/connection/message_delegator.rb +0 -36
data/lib/karafka/consumers/batch_metadata.rb +0 -10
data/lib/karafka/consumers/callbacks.rb +0 -71
data/lib/karafka/consumers/includer.rb +0 -64
data/lib/karafka/consumers/responders.rb +0 -24
data/lib/karafka/consumers/single_params.rb +0 -15
data/lib/karafka/contracts/responder_usage.rb +0 -54
data/lib/karafka/fetcher.rb +0 -42
data/lib/karafka/helpers/class_matcher.rb +0 -88
data/lib/karafka/helpers/config_retriever.rb +0 -46
data/lib/karafka/helpers/inflector.rb +0 -26
data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
data/lib/karafka/params/batch_metadata.rb +0 -26
data/lib/karafka/params/builders/batch_metadata.rb +0 -30
data/lib/karafka/params/builders/params.rb +0 -38
data/lib/karafka/params/builders/params_batch.rb +0 -25
data/lib/karafka/params/params_batch.rb +0 -60
data/lib/karafka/patches/ruby_kafka.rb +0 -47
data/lib/karafka/persistence/client.rb +0 -29
data/lib/karafka/persistence/consumers.rb +0 -45
data/lib/karafka/persistence/topics.rb +0 -48
data/lib/karafka/responders/builder.rb +0 -36
data/lib/karafka/responders/topic.rb +0 -55
data/lib/karafka/routing/topic_mapper.rb +0 -53
data/lib/karafka/serialization/json/serializer.rb +0 -31
data/lib/karafka/setup/configurators/water_drop.rb +0 -36
data/lib/karafka/templates/application_responder.rb.erb +0 -11

data/lib/karafka/runner.rb ADDED Viewed

@@ -0,0 +1,51 @@
+# frozen_string_literal: true
+module Karafka
+  # Class used to run the Karafka listeners in separate threads
+  class Runner
+    # Starts listening on all the listeners asynchronously and handles the jobs queue closing
+    # after listeners are done with their work.
+    def call
+      # Despite possibility of having several independent listeners, we aim to have one queue for
+      # jobs across and one workers poll for that
+      jobs_queue = Processing::JobsQueue.new
+      workers = Processing::WorkersBatch.new(jobs_queue)
+      listeners = Connection::ListenersBatch.new(jobs_queue)
+      workers.each(&:async_call)
+      listeners.each(&:async_call)
+      # We aggregate threads here for a supervised shutdown process
+      Karafka::Server.workers = workers
+      Karafka::Server.listeners = listeners
+      # All the listener threads need to finish
+      listeners.each(&:join)
+      # We close the jobs queue only when no listener threads are working.
+      # This ensures, that everything was closed prior to us not accepting anymore jobs and that
+      # no more jobs will be enqueued. Since each listener waits for jobs to finish, once those
+      # are done, we can close.
+      jobs_queue.close
+      # All the workers need to stop processing anything before we can stop the runner completely
+      # This ensures that even async long-running jobs have time to finish before we are done
+      # with everything. One thing worth keeping in mind though: It is the end user responsibility
+      # to handle the shutdown detection in their long-running processes. Otherwise if timeout
+      # is exceeded, there will be a forced shutdown.
+      workers.each(&:join)
+    # If anything crashes here, we need to raise the error and crush the runner because it means
+    # that something terrible happened
+    rescue StandardError => e
+      Karafka.monitor.instrument(
+        'error.occurred',
+        caller: self,
+        error: e,
+        type: 'runner.call.error'
+      )
+      Karafka::App.stop!
+      raise e
+    end
+  end
+end

data/lib/karafka/serialization/json/deserializer.rb CHANGED Viewed

@@ -1,25 +1,16 @@
 # frozen_string_literal: true
 module Karafka
-  # Module for all supported by default serialization and deserialization ways
+  # Module for all supported by default serialization and deserialization ways.
   module Serialization
-    # Namespace for json ser/der
+    # Namespace for json serializers and deserializers.
     module Json
-      # Default Karafka Json deserializer for loading JSON data
+      # Default Karafka Json deserializer for loading JSON data.
       class Deserializer
-        # @param params [Karafka::Params::Params] Full params object that we want to deserialize
+        # @param message [Karafka::Messages::Message] Message object that we want to deserialize
         # @return [Hash] hash with deserialized JSON data
-        # @example
-        #   params = {
-        #     'payload' => "{\"a\":1}",
-        #     'topic' => 'my-topic',
-        #     'headers' => { 'message_type' => :test }
-        #   }
-        #   Deserializer.call(params) #=> { 'a' => 1 }
-        def call(params)
-          params.raw_payload.nil? ? nil : ::JSON.parse(params.raw_payload)
-        rescue ::JSON::ParserError => e
-          raise ::Karafka::Errors::DeserializationError, e
+        def call(message)
+          message.raw_payload.nil? ? nil : ::JSON.parse(message.raw_payload)
         end
       end
     end

data/lib/karafka/server.rb CHANGED Viewed

@@ -3,8 +3,6 @@
 module Karafka
   # Karafka consuming server class
   class Server
-    @consumer_threads = Concurrent::Array.new
     # How long should we sleep between checks on shutting down consumers
     SUPERVISION_SLEEP = 0.1
     # What system exit code should we use when we terminated forcefully
@@ -17,55 +15,77 @@ module Karafka
     class << self
       # Set of consuming threads. Each consumer thread contains a single consumer
-      attr_accessor :consumer_threads
+      attr_accessor :listeners
+      # Set of workers
+      attr_accessor :workers
       # Writer for list of consumer groups that we want to consume in our current process context
       attr_writer :consumer_groups
       # Method which runs app
       def run
-        process.on_sigint { stop_supervised }
-        process.on_sigquit { stop_supervised }
-        process.on_sigterm { stop_supervised }
-        run_supervised
+        # Since we do a lot of threading and queuing, we don't want to stop from the trap context
+        # as some things may not work there as expected, that is why we spawn a separate thread to
+        # handle the stopping process
+        process.on_sigint { Thread.new { stop } }
+        process.on_sigquit { Thread.new { stop } }
+        process.on_sigterm { Thread.new { stop } }
+        # Start is blocking until stop is called and when we stop, it will wait until
+        # all of the things are ready to stop
+        start
+        # We always need to wait for Karafka to stop here since we should wait for the stop running
+        # in a separate thread (or trap context) to indicate everything is closed
+        # Since `#start` is blocking, we were get here only after the runner is done. This will
+        # not add any performance degradation because of that.
+        Thread.pass until Karafka::App.stopped?
+      # Try its best to shutdown underlying components before re-raising
+      # rubocop:disable Lint/RescueException
+      rescue Exception => e
+        # rubocop:enable Lint/RescueException
+        stop
+        raise e
       end
       # @return [Array<String>] array with names of consumer groups that should be consumed in a
       #   current server context
       def consumer_groups
-        # If not specified, a server will listed on all the topics
+        # If not specified, a server will listen on all the topics
         @consumer_groups ||= Karafka::App.consumer_groups.map(&:name).freeze
       end
-      private
-      # @return [Karafka::Process] process wrapper instance used to catch system signal calls
-      def process
-        Karafka::App.config.internal.process
-      end
       # Starts Karafka with a supervision
       # @note We don't need to sleep because Karafka::Fetcher is locking and waiting to
       # finish loop (and it won't happen until we explicitly want to stop)
-      def run_supervised
+      def start
         process.supervise
         Karafka::App.run!
-        Karafka::App.config.internal.fetcher.call
+        Karafka::Runner.new.call
       end
       # Stops Karafka with a supervision (as long as there is a shutdown timeout)
-      # If consumers won't stop in a given time frame, it will force them to exit
-      def stop_supervised
+      # If consumers or workers won't stop in a given time frame, it will force them to exit
+      #
+      # @note This method is not async. It should not be executed from the workers as it will
+      #   lock them forever. If you need to run Karafka shutdown from within workers threads,
+      #   please start a separate thread to do so.
+      def stop
         Karafka::App.stop!
-        # See https://github.com/dry-rb/dry-configurable/issues/93
-        timeout = Thread.new { Karafka::App.config.shutdown_timeout }.join.value
+        timeout = Karafka::App.config.shutdown_timeout
         # We check from time to time (for the timeout period) if all the threads finished
         # their work and if so, we can just return and normal shutdown process will take place
-        (timeout * SUPERVISION_CHECK_FACTOR).to_i.times do
-          if consumer_threads.count(&:alive?).zero?
-            Thread.new { Karafka.monitor.instrument('app.stopped') }.join
+        # We divide it by 1000 because we use time in ms.
+        ((timeout / 1_000) * SUPERVISION_CHECK_FACTOR).to_i.times do
+          if listeners.count(&:alive?).zero? &&
+             workers.count(&:alive?).zero?
+            Karafka::App.producer.close
             return
           end
@@ -74,12 +94,33 @@ module Karafka
         raise Errors::ForcefulShutdownError
       rescue Errors::ForcefulShutdownError => e
-        Thread.new { Karafka.monitor.instrument('app.stopping.error', error: e) }.join
+        Karafka.monitor.instrument(
+          'error.occurred',
+          caller: self,
+          error: e,
+          type: 'app.stopping.error'
+        )
         # We're done waiting, lets kill them!
-        consumer_threads.each(&:terminate)
+        workers.each(&:terminate)
+        listeners.each(&:terminate)
+        # We always need to shutdown clients to make sure we do not force the GC to close consumer.
+        # This can cause memory leaks and crashes.
+        listeners.each(&:shutdown)
+        Karafka::App.producer.close
         # exit! is not within the instrumentation as it would not trigger due to exit
         Kernel.exit! FORCEFUL_EXIT_CODE
+      ensure
+        Karafka::App.stopped!
+      end
+      private
+      # @return [Karafka::Process] process wrapper instance used to catch system signal calls
+      def process
+        Karafka::App.config.internal.process
       end
     end
   end

data/lib/karafka/setup/config.rb CHANGED Viewed

@@ -12,19 +12,46 @@ module Karafka
     #   enough and will still keep the code simple
     # @see Karafka::Setup::Configurators::Base for more details about configurators api
     class Config
-      extend Dry::Configurable
+      extend ::Karafka::Core::Configurable
-      # Contract for checking the config provided by the user
-      CONTRACT = Karafka::Contracts::Config.new.freeze
+      # Defaults for kafka settings, that will be overwritten only if not present already
+      KAFKA_DEFAULTS = {
+        'client.id': 'karafka'
+      }.freeze
-      private_constant :CONTRACT
+      # Contains settings that should not be used in production but make life easier in dev
+      DEV_DEFAULTS = {
+        # Will create non-existing topics automatically.
+        # Note that the broker needs to be configured with `auto.create.topics.enable=true`
+        # While it is not recommended in prod, it simplifies work in dev
+        'allow.auto.create.topics': 'true',
+        # We refresh the cluster state often as newly created topics in dev may not be detected
+        # fast enough. Fast enough means within reasonable time to provide decent user experience
+        # While it's only a one time thing for new topics, it can still be irritating to have to
+        # restart the process.
+        'topic.metadata.refresh.interval.ms': 5_000
+      }.freeze
+      private_constant :KAFKA_DEFAULTS, :DEV_DEFAULTS
       # Available settings
+      # Namespace for Pro version related license management. If you use LGPL, no need to worry
+      #   about any of this
+      setting :license do
+        # option token [String, false] - license token issued when you acquire a Pro license
+        # Leave false if using the LGPL version and all is going to work just fine :)
+        #
+        # @note By using the commercial components, you accept the LICENSE-COMM commercial license
+        #   terms and conditions
+        setting :token, default: false
+        # option entity [String] for whom we did issue the license
+        setting :entity, default: ''
+      end
       # option client_id [String] kafka client_id - used to provide
       #   default Kafka groups namespaces and identify that app in kafka
-      setting :client_id
-      # What backend do we want to use to process messages
-      setting :backend, default: :inline
+      setting :client_id, default: 'karafka'
       # option logger [Instance] logger that we want to use
       setting :logger, default: ::Karafka::Instrumentation::Logger.new
       # option monitor [Instance] monitor that we will to use (defaults to Karafka::Monitor)
@@ -33,195 +60,140 @@ module Karafka
       # or they need to maintain their own internal consumer group naming conventions, they
       # can easily do it, replacing the default client_id + consumer name pattern concept
       setting :consumer_mapper, default: Routing::ConsumerMapper.new
-      # Mapper used to remap names of topics, so we can have a clean internal topic naming
-      # despite using any Kafka provider that uses namespacing, etc
-      # It needs to implement two methods:
-      #   - #incoming - for remapping from the incoming message to our internal format
-      #   - #outgoing - for remapping from internal topic name into outgoing message
-      setting :topic_mapper, default: Routing::TopicMapper.new
-      # Default serializer for converting whatever we want to send to kafka to json
-      setting :serializer, default: Karafka::Serialization::Json::Serializer.new
+      # option [Boolean] should we reload consumers with each incoming batch thus effectively
+      # supporting code reload (if someone reloads code) or should we keep the persistence
+      setting :consumer_persistence, default: true
       # Default deserializer for converting incoming data into ruby objects
       setting :deserializer, default: Karafka::Serialization::Json::Deserializer.new
-      # If batch_fetching is true, we will fetch kafka messages in batches instead of 1 by 1
-      # @note Fetching does not equal consuming, see batch_consuming description for details
-      setting :batch_fetching, default: true
-      # If batch_consuming is true, we will have access to #params_batch instead of #params.
-      # #params_batch will contain params received from Kafka (may be more than 1) so we can
-      # process them in batches
-      setting :batch_consuming, default: false
-      # option shutdown_timeout [Integer, nil] the number of seconds after which Karafka no
-      #   longer wait for the consumers to stop gracefully but instead we force terminate
+      # option [String] should we start with the earliest possible offset or latest
+      # This will set the `auto.offset.reset` value unless present in the kafka scope
+      setting :initial_offset, default: 'earliest'
+      # option [Boolean] should we leave offset management to the user
+      setting :manual_offset_management, default: false
+      # options max_messages [Integer] how many messages do we want to fetch from Kafka in one go
+      setting :max_messages, default: 100
+      # option [Integer] number of milliseconds we can wait while fetching data
+      setting :max_wait_time, default: 1_000
+      # option shutdown_timeout [Integer] the number of milliseconds after which Karafka no
+      #   longer waits for the consumers to stop gracefully but instead we force terminate
       #   everything.
-      setting :shutdown_timeout, default: 60
-      # option kafka [Hash] - optional - kafka configuration options
-      setting :kafka do
-        # Array with at least one host
-        setting :seed_brokers, default: %w[kafka://127.0.0.1:9092]
-        # option session_timeout [Integer] the number of seconds after which, if a client
-        #   hasn't contacted the Kafka cluster, it will be kicked out of the group.
-        setting :session_timeout, default: 30
-        # Time that a given partition will be paused from fetching messages, when message
-        # consumption fails. It allows us to process other partitions, while the error is being
-        # resolved and also "slows" things down, so it prevents from "eating" up all messages and
-        # consuming them with failed code. Use `nil` if you want to pause forever and never retry.
-        setting :pause_timeout, default: 10
-        # option pause_max_timeout [Integer, nil] the maximum number of seconds to pause for,
-        #   or `nil` if no maximum should be enforced.
-        setting :pause_max_timeout, default: nil
-        # option pause_exponential_backoff [Boolean] whether to enable exponential backoff
-        setting :pause_exponential_backoff, default: false
-        # option offset_commit_interval [Integer] the interval between offset commits,
-        #   in seconds.
-        setting :offset_commit_interval, default: 10
-        # option offset_commit_threshold [Integer] the number of messages that can be
-        #   processed before their offsets are committed. If zero, offset commits are
-        #   not triggered by message consumption.
-        setting :offset_commit_threshold, default: 0
-        # option heartbeat_interval [Integer] the interval between heartbeats; must be less
-        #   than the session window.
-        setting :heartbeat_interval, default: 10
-        # option offset_retention_time [Integer] The length of the retention window, known as
-        #   offset retention time
-        setting :offset_retention_time, default: nil
-        # option fetcher_max_queue_size [Integer] max number of items in the fetch queue that
-        #   are stored for further processing. Note, that each item in the queue represents a
-        #   response from a single broker
-        setting :fetcher_max_queue_size, default: 10
-        # option assignment_strategy [Object] a strategy determining the assignment of
-        #   partitions to the consumers.
-        setting :assignment_strategy, default: Karafka::AssignmentStrategies::RoundRobin.new
-        # option max_bytes_per_partition [Integer] the maximum amount of data fetched
-        #   from a single partition at a time.
-        setting :max_bytes_per_partition, default: 1_048_576
-        #  whether to consume messages starting at the beginning or to just consume new messages
-        setting :start_from_beginning, default: true
-        # option resolve_seed_brokers [Boolean] whether to resolve each hostname of the seed
-        # brokers
-        setting :resolve_seed_brokers, default: false
-        # option min_bytes [Integer] the minimum number of bytes to read before
-        #   returning messages from the server; if `max_wait_time` is reached, this
-        #   is ignored.
-        setting :min_bytes, default: 1
-        # option max_bytes [Integer] the maximum number of bytes to read before returning messages
-        #   from each broker.
-        setting :max_bytes, default: 10_485_760
-        # option max_wait_time [Integer, Float] max_wait_time is the maximum number of seconds to
-        #   wait before returning data from a single message fetch. By setting this high you also
-        #   increase the fetching throughput - and by setting it low you set a bound on latency.
-        #   This configuration overrides `min_bytes`, so you'll _always_ get data back within the
-        #   time specified. The default value is one second. If you want to have at most five
-        #   seconds of latency, set `max_wait_time` to 5. You should make sure
-        #   max_wait_time * num brokers + heartbeat_interval is less than session_timeout.
-        setting :max_wait_time, default: 1
-        # option automatically_mark_as_consumed [Boolean] should we automatically mark received
-        # messages as consumed (processed) after non-error consumption
-        setting :automatically_mark_as_consumed, default: true
-        # option reconnect_timeout [Integer] How long should we wait before trying to reconnect to
-        # Kafka cluster that went down (in seconds)
-        setting :reconnect_timeout, default: 5
-        # option connect_timeout [Integer] Sets the number of seconds to wait while connecting to
-        # a broker for the first time. When ruby-kafka initializes, it needs to connect to at
-        # least one host.
-        setting :connect_timeout, default: 10
-        # option socket_timeout [Integer] Sets the number of seconds to wait when reading from or
-        # writing to a socket connection to a broker. After this timeout expires the connection
-        # will be killed. Note that some Kafka operations are by definition long-running, such as
-        # waiting for new messages to arrive in a partition, so don't set this value too low
-        setting :socket_timeout, default: 30
-        # option partitioner [Object, nil] the partitioner that should be used by the client
-        setting :partitioner, default: nil
-        # SSL authentication related settings
-        # option ca_cert [String, nil] SSL CA certificate
-        setting :ssl_ca_cert, default: nil
-        # option ssl_ca_cert_file_path [String, nil] SSL CA certificate file path
-        setting :ssl_ca_cert_file_path, default: nil
-        # option ssl_ca_certs_from_system [Boolean] Use the CA certs from your system's default
-        #   certificate store
-        setting :ssl_ca_certs_from_system, default: false
-        # option ssl_verify_hostname [Boolean] Verify the hostname for client certs
-        setting :ssl_verify_hostname, default: true
-        # option ssl_client_cert [String, nil] SSL client certificate
-        setting :ssl_client_cert, default: nil
-        # option ssl_client_cert_key [String, nil] SSL client certificate password
-        setting :ssl_client_cert_key, default: nil
-        # option sasl_gssapi_principal [String, nil] sasl principal
-        setting :sasl_gssapi_principal, default: nil
-        # option sasl_gssapi_keytab [String, nil] sasl keytab
-        setting :sasl_gssapi_keytab, default: nil
-        # option sasl_plain_authzid [String] The authorization identity to use
-        setting :sasl_plain_authzid, default: ''
-        # option sasl_plain_username [String, nil] The username used to authenticate
-        setting :sasl_plain_username, default: nil
-        # option sasl_plain_password [String, nil] The password used to authenticate
-        setting :sasl_plain_password, default: nil
-        # option sasl_scram_username [String, nil] The username used to authenticate
-        setting :sasl_scram_username, default: nil
-        # option sasl_scram_password [String, nil] The password used to authenticate
-        setting :sasl_scram_password, default: nil
-        # option sasl_scram_mechanism [String, nil] Scram mechanism, either 'sha256' or 'sha512'
-        setting :sasl_scram_mechanism, default: nil
-        # option sasl_over_ssl [Boolean] whether to enforce SSL with SASL
-        setting :sasl_over_ssl, default: true
-        # option ssl_client_cert_chain [String, nil] client cert chain or nil if not used
-        setting :ssl_client_cert_chain, default: nil
-        # option ssl_client_cert_key_password [String, nil] the password required to read
-        #   the ssl_client_cert_key
-        setting :ssl_client_cert_key_password, default: nil
-        # @param sasl_oauth_token_provider [Object, nil] OAuthBearer Token Provider instance that
-        #   implements method token.
-        setting :sasl_oauth_token_provider, default: nil
-      end
+      setting :shutdown_timeout, default: 60_000
+      # option [Integer] number of threads in which we want to do parallel processing
+      setting :concurrency, default: 5
+      # option [Integer] how long should we wait upon processing error
+      setting :pause_timeout, default: 1_000
+      # option [Integer] what is the max timeout in case of an exponential backoff
+      setting :pause_max_timeout, default: 30_000
+      # option [Boolean] should we use exponential backoff
+      setting :pause_with_exponential_backoff, default: true
+      # option [::WaterDrop::Producer, nil]
+      # Unless configured, will be created once Karafka is configured based on user Karafka setup
+      setting :producer, default: nil
+      # rdkafka default options
+      # @see https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
+      setting :kafka, default: {}
-      # Namespace for internal settings that should not be modified
-      # It's a temporary step to "declassify" several things internally before we move to a
-      # non global state
+      # Namespace for internal settings that should not be modified directly
       setting :internal do
-        # option routing_builder [Karafka::Routing::Builder] builder instance
-        setting :routing_builder, default: Routing::Builder.new
         # option status [Karafka::Status] app status
         setting :status, default: Status.new
         # option process [Karafka::Process] process status
         # @note In the future, we need to have a single process representation for all the karafka
         #   instances
         setting :process, default: Process.new
-        # option fetcher [Karafka::Fetcher] fetcher instance
-        setting :fetcher, default: Fetcher.new
-        # option configurators [Array<Object>] all configurators that we want to run after
-        #   the setup
-        setting :configurators, default: [Configurators::WaterDrop.new]
+        setting :routing do
+          # option builder [Karafka::Routing::Builder] builder instance
+          setting :builder, default: Routing::Builder.new
+          # option subscription_groups_builder [Routing::SubscriptionGroupsBuilder] subscription
+          #   group builder
+          setting :subscription_groups_builder, default: Routing::SubscriptionGroupsBuilder.new
+        end
+        setting :processing do
+          # option scheduler [Object] scheduler we will be using
+          setting :scheduler, default: Processing::Scheduler.new
+          # option jobs_builder [Object] jobs builder we want to use
+          setting :jobs_builder, default: Processing::JobsBuilder.new
+          # option coordinator [Class] work coordinator we want to user for processing coordination
+          setting :coordinator_class, default: Processing::Coordinator
+          # option partitioner_class [Class] partitioner we use against a batch of data
+          setting :partitioner_class, default: Processing::Partitioner
+        end
+        # Karafka components for ActiveJob
+        setting :active_job do
+          # option dispatcher [Karafka::ActiveJob::Dispatcher] default dispatcher for ActiveJob
+          setting :dispatcher, default: ActiveJob::Dispatcher.new
+          # option job_options_contract [Karafka::Contracts::JobOptionsContract] contract for
+          #   ensuring, that extra job options defined are valid
+          setting :job_options_contract, default: ActiveJob::JobOptionsContract.new
+          # option consumer [Class] consumer class that should be used to consume ActiveJob data
+          setting :consumer_class, default: ActiveJob::Consumer
+        end
       end
+      # This will load all the defaults that can be later overwritten.
+      # Thanks to that we have an initial state out of the box.
+      configure
       class << self
         # Configuring method
-        # @yield Runs a block of code providing a config singleton instance to it
-        # @yieldparam [Karafka::Setup::Config] Karafka config instance
-        def setup
-          configure { |config| yield(config) }
+        # @param block [Proc] block we want to execute with the config instance
+        def setup(&block)
+          configure(&block)
+          merge_kafka_defaults!(config)
+          Contracts::Config.new.validate!(config.to_h)
+          licenser = Licenser.new
+          # Tries to load our license gem and if present will try to load the correct license
+          licenser.prepare_and_verify(config.license)
+          configure_components
+          Karafka::App.initialized!
         end
-        # Everything that should be initialized after the setup
-        # Components are in karafka/config directory and are all loaded one by one
-        # If you want to configure a next component, please add a proper file to config dir
-        def setup_components
-          config
-            .internal
-            .configurators
-            .each { |configurator| configurator.call(config) }
+        private
+        # Propagates the kafka setting defaults unless they are already present
+        # This makes it easier to set some values that users usually don't change but still allows
+        # them to overwrite the whole hash if they want to
+        # @param config [Karafka::Core::Configurable::Node] config of this producer
+        def merge_kafka_defaults!(config)
+          KAFKA_DEFAULTS.each do |key, value|
+            next if config.kafka.key?(key)
+            config.kafka[key] = value
+          end
+          return if Karafka::App.env.production?
+          DEV_DEFAULTS.each do |key, value|
+            next if config.kafka.key?(key)
+            config.kafka[key] = value
+          end
         end
-        # Validate config based on the config contract
-        # @return [Boolean] true if configuration is valid
-        # @raise [Karafka::Errors::InvalidConfigurationError] raised when configuration
-        #   doesn't match with the config contract
-        def validate!
-          validation_result = CONTRACT.call(config.to_h)
+        # Sets up all the components that are based on the user configuration
+        # @note At the moment it is only WaterDrop
+        def configure_components
+          config.producer ||= ::WaterDrop::Producer.new do |producer_config|
+            # In some cases WaterDrop updates the config and we don't want our consumer config to
+            # be polluted by those updates, that's why we copy
+            producer_config.kafka = config.kafka.dup
+            producer_config.logger = config.logger
+          end
-          return true if validation_result.success?
+          return unless Karafka.pro?
-          raise Errors::InvalidConfigurationError, validation_result.errors.to_h
+          # Runs the pro loader that includes all the pro components
+          require 'karafka/pro/loader'
+          Pro::Loader.setup(config)
         end
       end
     end

data/lib/karafka/status.rb CHANGED Viewed

@@ -3,16 +3,22 @@
 module Karafka
   # App status monitor
   class Status
-    # Available states and their transitions
+    # Available states and their transitions.
     STATES = {
       initializing: :initialize!,
       initialized: :initialized!,
       running: :run!,
-      stopping: :stop!
+      stopping: :stop!,
+      stopped: :stopped!
     }.freeze
     private_constant :STATES
+    # By default we are in the initializing state
+    def initialize
+      initialize!
+    end
     STATES.each do |state, transition|
       define_method :"#{state}?" do
         @status == state
@@ -20,9 +26,12 @@ module Karafka
       define_method transition do
         @status = state
-        # Trap context disallows to run certain things that we instrument
-        # so the state changes are executed from a separate thread
-        Thread.new { Karafka.monitor.instrument("app.#{state}") }.join
+        # Skip on creation (initializing)
+        # We skip as during this state we do not have yet a monitor
+        return if initializing?
+        Karafka.monitor.instrument("app.#{state}")
       end
     end
   end

data/lib/karafka/templates/example_consumer.rb.erb ADDED Viewed

@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+# Example consumer that prints messages payloads
+class ExampleConsumer < ApplicationConsumer
+  def consume
+    messages.each { |message| puts message.payload }
+  end
+  # Run anything upon partition being revoked
+  # def revoked
+  # end
+  # Define here any teardown things you want when Karafka server stops
+  # def shutdown
+  # end
+end