karafka 0.6.0.rc2 → 1.0.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -2
  3. data/Gemfile.lock +4 -18
  4. data/karafka.gemspec +0 -1
  5. data/lib/karafka.rb +2 -12
  6. data/lib/karafka/attributes_map.rb +2 -3
  7. data/lib/karafka/backends/inline.rb +17 -0
  8. data/lib/karafka/base_controller.rb +40 -96
  9. data/lib/karafka/base_responder.rb +19 -19
  10. data/lib/karafka/cli/info.rb +2 -3
  11. data/lib/karafka/cli/install.rb +0 -3
  12. data/lib/karafka/connection/messages_processor.rb +10 -6
  13. data/lib/karafka/controllers/includer.rb +51 -0
  14. data/lib/karafka/controllers/responders.rb +19 -0
  15. data/lib/karafka/controllers/single_params.rb +15 -0
  16. data/lib/karafka/errors.rb +1 -17
  17. data/lib/karafka/fetcher.rb +2 -2
  18. data/lib/karafka/helpers/class_matcher.rb +9 -10
  19. data/lib/karafka/params/params.rb +2 -2
  20. data/lib/karafka/params/params_batch.rb +2 -7
  21. data/lib/karafka/persistence.rb +18 -0
  22. data/lib/karafka/routing/builder.rb +1 -1
  23. data/lib/karafka/routing/router.rb +3 -11
  24. data/lib/karafka/routing/topic.rb +1 -13
  25. data/lib/karafka/schemas/config.rb +1 -12
  26. data/lib/karafka/schemas/consumer_group.rb +2 -2
  27. data/lib/karafka/setup/config.rb +14 -19
  28. data/lib/karafka/templates/karafka.rb.example +1 -5
  29. data/lib/karafka/version.rb +1 -1
  30. metadata +8 -24
  31. data/lib/karafka/base_worker.rb +0 -26
  32. data/lib/karafka/cli/worker.rb +0 -28
  33. data/lib/karafka/params/interchanger.rb +0 -35
  34. data/lib/karafka/setup/configurators/sidekiq.rb +0 -36
  35. data/lib/karafka/templates/application_worker.rb.example +0 -8
  36. data/lib/karafka/templates/sidekiq.yml.example +0 -26
  37. data/lib/karafka/workers/builder.rb +0 -51
@@ -14,14 +14,13 @@ module Karafka
14
14
  info = [
15
15
  "Karafka framework version: #{Karafka::VERSION}",
16
16
  "Application client id: #{config.client_id}",
17
- "Processing backend: #{config.processing_backend}",
17
+ "Backend: #{config.backend}",
18
18
  "Batch consuming: #{config.batch_consuming}",
19
19
  "Batch processing: #{config.batch_processing}",
20
20
  "Number of threads: #{config.concurrency}",
21
21
  "Boot file: #{Karafka.boot_file}",
22
22
  "Environment: #{Karafka.env}",
23
- "Kafka seed brokers: #{config.kafka.seed_brokers}",
24
- "Redis: #{config.redis.to_h}"
23
+ "Kafka seed brokers: #{config.kafka.seed_brokers}"
25
24
  ]
26
25
 
27
26
  puts(info.join("\n"))
@@ -12,7 +12,6 @@ module Karafka
12
12
  app/models
13
13
  app/controllers
14
14
  app/responders
15
- app/workers
16
15
  config
17
16
  log
18
17
  tmp/pids
@@ -21,8 +20,6 @@ module Karafka
21
20
  # Where should we map proper files from templates
22
21
  INSTALL_FILES_MAP = {
23
22
  'karafka.rb.example' => Karafka.boot_file.basename,
24
- 'sidekiq.yml.example' => 'config/sidekiq.yml.example',
25
- 'application_worker.rb.example' => 'app/workers/application_worker.rb',
26
23
  'application_controller.rb.example' => 'app/controllers/application_controller.rb',
27
24
  'application_responder.rb.example' => 'app/responders/application_responder.rb'
28
25
  }.freeze
@@ -18,12 +18,16 @@ module Karafka
18
18
  # first one and it will be valid for all the messages
19
19
  # We map from incoming topic name, as it might be namespaced, etc.
20
20
  # @see topic_mapper internal docs
21
- mapped_topic = Karafka::App.config.topic_mapper.incoming(kafka_messages[0].topic)
22
- # @note We search based on the topic id - that is a combination of group id and
23
- # topic name
24
- controller = Karafka::Routing::Router.build("#{group_id}_#{mapped_topic}")
25
- handler = controller.topic.batch_processing ? :process_batch : :process_each
21
+ mapped_topic_name = Karafka::App.config.topic_mapper.incoming(kafka_messages[0].topic)
22
+ topic = Routing::Router.find("#{group_id}_#{mapped_topic_name}")
26
23
 
24
+ # Depending on a case (persisted or not) we might use new controller instance per each
25
+ # batch, or use the same instance for all of them (for implementing buffering, etc)
26
+ controller = Persistence.fetch(topic, kafka_messages[0].partition, :controller) do
27
+ topic.controller.new
28
+ end
29
+
30
+ handler = topic.batch_processing ? :process_batch : :process_each
27
31
  send(handler, controller, kafka_messages)
28
32
  end
29
33
 
@@ -35,7 +39,7 @@ module Karafka
35
39
  def process_batch(controller, kafka_messages)
36
40
  controller.params_batch = kafka_messages
37
41
  Karafka.monitor.notice(self, kafka_messages)
38
- controller.schedule
42
+ controller.call
39
43
  end
40
44
 
41
45
  # Processes messages one by one (like with std http requests)
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Additional functionalities for controllers
5
+ module Controllers
6
+ # Module used to inject functionalities into a given controller class, based on the controller
7
+ # topic and its settings
8
+ # We don't need all the behaviors in all the cases, so it is totally not worth having
9
+ # everything in all the cases all the time
10
+ module Includer
11
+ class << self
12
+ # @param controller_class [Class] controller class, that will get some functionalities
13
+ # based on the topic under which it operates
14
+ def call(controller_class)
15
+ topic = controller_class.topic
16
+
17
+ bind_backend(controller_class, topic)
18
+ bind_params(controller_class, topic)
19
+ bind_responders(controller_class, topic)
20
+ end
21
+
22
+ private
23
+
24
+ # Figures out backend for a given controller class, based on the topic backend and
25
+ # includes it into the controller class
26
+ # @param controller_class [Class] controller class
27
+ # @param topic [Karafka::Routing::Topic] topic of a controller class
28
+ def bind_backend(controller_class, topic)
29
+ backend = Kernel.const_get("::Karafka::Backends::#{topic.backend.to_s.capitalize}")
30
+ controller_class.include backend
31
+ end
32
+
33
+ # Adds a single #params support for non batch processed topics
34
+ # @param controller_class [Class] controller class
35
+ # @param topic [Karafka::Routing::Topic] topic of a controller class
36
+ def bind_params(controller_class, topic)
37
+ return if topic.batch_processing
38
+ controller_class.include SingleParams
39
+ end
40
+
41
+ # Adds responders support for topics and controllers with responders defined for them
42
+ # @param controller_class [Class] controller class
43
+ # @param topic [Karafka::Routing::Topic] topic of a controller class
44
+ def bind_responders(controller_class, topic)
45
+ return unless topic.responder
46
+ controller_class.include Responders
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Controllers
5
+ # Feature that allows us to use responders flow in controller
6
+ module Responders
7
+ # Responds with given data using given responder. This allows us to have a similar way of
8
+ # defining flows like synchronous protocols
9
+ # @param data Anything we want to pass to responder based on which we want to trigger further
10
+ # Kafka responding
11
+ def respond_with(*data)
12
+ Karafka.monitor.notice(self.class, data: data)
13
+ # @note we build a new instance of responder each time, as a long running (persisted)
14
+ # controllers can respond multiple times during the lifecycle
15
+ topic.responder.new(topic.parser).call(*data)
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Controllers
5
+ # Params alias for single message processing controllers
6
+ module SingleParams
7
+ private
8
+
9
+ # @return [Karafka::Params::Params] params instance for non batch processed controllers
10
+ def params
11
+ params_batch.first
12
+ end
13
+ end
14
+ end
15
+ end
@@ -13,7 +13,7 @@ module Karafka
13
13
 
14
14
  # Raised when router receives topic name which does not correspond with any routes
15
15
  # This can only happen in a case when:
16
- # - you've received a message and it was scheduled to Sidekiq background worker
16
+ # - you've received a message and we cannot match it with a controller
17
17
  # - you've changed the routing, so router can no longer associate your topic to
18
18
  # any controller
19
19
  # - or in a case when you do a lot of metaprogramming and you change routing/etc on runtime
@@ -23,27 +23,11 @@ module Karafka
23
23
  # @see https://github.com/karafka/karafka/issues/135
24
24
  NonMatchingRouteError = Class.new(BaseError)
25
25
 
26
- # Raised when application does not have ApplicationWorker or other class that directly
27
- # inherits from Karafka::BaseWorker
28
- BaseWorkerDescentantMissing = Class.new(BaseError)
29
-
30
- # Raised when we want to use #respond_with in controllers but we didn't define
31
- # (and we couldn't find) any appropriate responder for a given controller
32
- ResponderMissing = Class.new(BaseError)
33
-
34
26
  # Raised when we don't use or use responder not in the way it expected to based on the
35
27
  # topics usage definitions
36
28
  InvalidResponderUsage = Class.new(BaseError)
37
29
 
38
30
  # Raised when configuration doesn't match with validation schema
39
31
  InvalidConfiguration = Class.new(BaseError)
40
-
41
- # Raised when processing messages in batches but still want to use #params instead of
42
- # #params_batch
43
- ParamsMethodUnavailable = Class.new(BaseError)
44
-
45
- # Raised when for some reason we try to use invalid processing backend and
46
- # we bypass validations
47
- InvalidProcessingBackend = Class.new(BaseError)
48
32
  end
49
33
  end
@@ -34,8 +34,8 @@ module Karafka
34
34
  # @return [Proc] proc that should be processed when a messages arrive
35
35
  # @yieldparam messages [Array<Kafka::FetchedMessage>] messages from kafka (raw)
36
36
  def processor
37
- lambda do |consumer_group_id, messages|
38
- Karafka::Connection::MessagesProcessor.process(consumer_group_id, messages)
37
+ lambda do |group_id, messages|
38
+ Karafka::Connection::MessagesProcessor.process(group_id, messages)
39
39
  end
40
40
  end
41
41
  end
@@ -4,7 +4,6 @@ module Karafka
4
4
  module Helpers
5
5
  # Class used to autodetect corresponding classes that are internally inside Karafka framework
6
6
  # It is used among others to match:
7
- # controller => worker
8
7
  # controller => responder
9
8
  class ClassMatcher
10
9
  # Regexp used to remove any non classy like characters that might be in the controller
@@ -14,11 +13,11 @@ module Karafka
14
13
  # @param klass [Class] class to which we want to find a corresponding class
15
14
  # @param from [String] what type of object is it (based on postfix name part)
16
15
  # @param to [String] what are we looking for (based on a postfix name part)
17
- # @example Controller that has a corresponding worker
18
- # matcher = Karafka::Helpers::ClassMatcher.new(SuperController, 'Controller', 'Worker')
19
- # matcher.match #=> SuperWorker
20
- # @example Controller without a corresponding worker
21
- # matcher = Karafka::Helpers::ClassMatcher.new(Super2Controller, 'Controller', 'Worker')
16
+ # @example Controller that has a corresponding responder
17
+ # matcher = Karafka::Helpers::ClassMatcher.new(SuperController, 'Controller', 'Responder')
18
+ # matcher.match #=> SuperResponder
19
+ # @example Controller without a corresponding responder
20
+ # matcher = Karafka::Helpers::ClassMatcher.new(Super2Controller, 'Controller', 'Responder')
22
21
  # matcher.match #=> nil
23
22
  def initialize(klass, from:, to:)
24
23
  @klass = klass
@@ -37,10 +36,10 @@ module Karafka
37
36
 
38
37
  # @return [String] name of a new class that we're looking for
39
38
  # @note This method returns name of a class without a namespace
40
- # @example From SuperController matching worker
41
- # matcher.name #=> 'SuperWorker'
42
- # @example From Namespaced::Super2Controller matching worker
43
- # matcher.name #=> Super2Worker
39
+ # @example From SuperController matching responder
40
+ # matcher.name #=> 'SuperResponder'
41
+ # @example From Namespaced::Super2Controller matching responder
42
+ # matcher.name #=> Super2Responder
44
43
  def name
45
44
  inflected = @klass.to_s.split('::').last.to_s
46
45
  inflected.gsub!(@from, @to)
@@ -5,7 +5,7 @@ module Karafka
5
5
  module Params
6
6
  # Class-wrapper for hash with indifferent access with additional lazy loading feature
7
7
  # It provides lazy loading not only until the first usage, but also allows us to skip
8
- # using parser until we execute our logic inside worker. That way we can operate with
8
+ # using parser until we execute our logic. That way we can operate with
9
9
  # heavy-parsing data without slowing down the whole application.
10
10
  class Params < HashWithIndifferentAccess
11
11
  # Kafka::FetchedMessage attributes that we want to use inside of params
@@ -30,7 +30,7 @@ module Karafka
30
30
  # @example Build params instance from a Kafka::FetchedMessage object
31
31
  # Karafka::Params::Params.build(message) #=> params object
32
32
  def build(message, parser)
33
- # Hash case happens inside workers
33
+ # Hash case happens inside backends that interchange data
34
34
  if message.is_a?(Hash)
35
35
  new(parser: parser).merge!(message)
36
36
  else
@@ -4,13 +4,8 @@ module Karafka
4
4
  module Params
5
5
  # Params batch represents a set of messages received from Kafka.
6
6
  # @note Params internally are lazy loaded before first use. That way we can skip parsing
7
- # process if we have before_enqueue that rejects some incoming messages without using params
8
- # It can be also used when handling really heavy data (in terms of parsing). Without direct
9
- # usage outside of worker scope, it will pass raw data into sidekiq, so we won't use Karafka
10
- # working time to parse this data. It will happen only in the worker (where it can take time)
11
- # that way Karafka will be able to process data really quickly. On the other hand, if we
12
- # decide to use params somewhere before it hits worker logic, it won't parse it again in
13
- # the worker - it will use already loaded data and pass it to Redis
7
+ # process if we have after_received that rejects some incoming messages without using params
8
+ # It can be also used when handling really heavy data (in terms of parsing).
14
9
  class ParamsBatch
15
10
  include Enumerable
16
11
 
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Module used to provide a persistent cache across batch requests for a given
5
+ # topic and partition to store some additional details when the persistent mode
6
+ # for a given topic is turned on
7
+ module Persistence
8
+ # @param topic [Karafka::Routing::Topic] topic instance for which we might cache
9
+ # @param partition [Integer] number of partition for which we want to cache
10
+ # @param resource [Symbol] name of the resource that we want to store
11
+ def self.fetch(topic, partition, resource)
12
+ return yield unless topic.persistent
13
+ Thread.current[topic.id] ||= {}
14
+ Thread.current[topic.id][partition] ||= {}
15
+ Thread.current[topic.id][partition][resource] ||= yield
16
+ end
17
+ end
18
+ end
@@ -27,7 +27,7 @@ module Karafka
27
27
  each do |consumer_group|
28
28
  hashed_group = consumer_group.to_h
29
29
  validation_result = Karafka::Schemas::ConsumerGroup.call(hashed_group)
30
- next if validation_result.success?
30
+ return if validation_result.success?
31
31
  raise Errors::InvalidConfiguration, [validation_result.errors, hashed_group]
32
32
  end
33
33
  end
@@ -7,16 +7,9 @@ module Karafka
7
7
  # @note Since Kafka does not provide namespaces or modules for topics, they all have "flat"
8
8
  # structure so all the routes are being stored in a single level array
9
9
  module Router
10
- # Builds a controller instance that should handle message from a given topic
11
- # @param topic_id [String] topic based on which we find a proper route
12
- # @return [Karafka::BaseController] base controller descendant instance object
13
- def build(topic_id)
14
- topic = find(topic_id)
15
- topic.controller.new.tap { |ctrl| ctrl.topic = topic }
16
- end
17
-
18
- private
19
-
10
+ # Find a proper topic based on full topic id
11
+ # @param topic_id [String] proper topic id (already mapped, etc) for which we want to find
12
+ # routing topic
20
13
  # @return [Karafka::Routing::Route] proper route details
21
14
  # @raise [Karafka::Topic::NonMatchingTopicError] raised if topic name does not match
22
15
  # any route defined by user using routes.draw
@@ -30,7 +23,6 @@ module Karafka
30
23
  raise(Errors::NonMatchingRouteError, topic_id)
31
24
  end
32
25
 
33
- module_function :build
34
26
  module_function :find
35
27
  end
36
28
  end
@@ -29,16 +29,10 @@ module Karafka
29
29
  # example for Sidekiq
30
30
  def build
31
31
  Karafka::AttributesMap.topic.each { |attr| send(attr) }
32
+ controller&.topic = self
32
33
  self
33
34
  end
34
35
 
35
- # @return [Class] Class (not an instance) of a worker that should be used to schedule the
36
- # background job
37
- # @note If not provided - will be built based on the provided controller
38
- def worker
39
- @worker ||= processing_backend == :sidekiq ? Workers::Builder.new(controller).build : nil
40
- end
41
-
42
36
  # @return [Class, nil] Class (not an instance) of a responder that should respond from
43
37
  # controller back to Kafka (usefull for piping dataflows)
44
38
  def responder
@@ -51,12 +45,6 @@ module Karafka
51
45
  @parser ||= Karafka::Parsers::Json
52
46
  end
53
47
 
54
- # @return [Class] Interchanger class (not an instance) that we want to use to interchange
55
- # params between Karafka server and Karafka background job
56
- def interchanger
57
- @interchanger ||= Karafka::Params::Interchanger
58
- end
59
-
60
48
  Karafka::AttributesMap.topic.each do |attribute|
61
49
  config_retriever_for(attribute)
62
50
  end
@@ -14,18 +14,7 @@ module Karafka
14
14
  Config = Dry::Validation.Schema do
15
15
  required(:client_id).filled(:str?, format?: Karafka::Schemas::TOPIC_REGEXP)
16
16
 
17
- required(:redis).maybe do
18
- schema do
19
- required(:url).filled(:str?)
20
- end
21
- end
22
-
23
- optional(:processing_backend).filled(included_in?: %i[inline sidekiq])
24
-
25
- # If we want to use sidekiq, then redis needs to be configured
26
- rule(redis_presence: %i[redis processing_backend]) do |redis, processing_backend|
27
- processing_backend.eql?(:sidekiq).then(redis.filled?)
28
- end
17
+ optional(:backend).filled
29
18
 
30
19
  optional(:connection_pool).schema do
31
20
  required(:size).filled
@@ -6,13 +6,13 @@ module Karafka
6
6
  ConsumerGroupTopic = Dry::Validation.Schema do
7
7
  required(:id).filled(:str?, format?: Karafka::Schemas::TOPIC_REGEXP)
8
8
  required(:name).filled(:str?, format?: Karafka::Schemas::TOPIC_REGEXP)
9
- required(:processing_backend).filled(included_in?: %i[inline sidekiq])
9
+ required(:backend).filled(included_in?: %i[inline sidekiq])
10
10
  required(:controller).filled
11
11
  required(:parser).filled
12
- required(:interchanger).filled
13
12
  required(:max_bytes_per_partition).filled(:int?, gteq?: 0)
14
13
  required(:start_from_beginning).filled(:bool?)
15
14
  required(:batch_processing).filled(:bool?)
15
+ required(:persistent).filled(:bool?)
16
16
  end
17
17
 
18
18
  # Schema for single full route (consumer group + topics) validation.
@@ -18,17 +18,12 @@ module Karafka
18
18
  # option client_id [String] kafka client_id - used to provide
19
19
  # default Kafka groups namespaces and identify that app in kafka
20
20
  setting :client_id
21
- # How should we process messages. For now we support inline mode (asap in the process) or
22
- # sidekiq mode (schedule to sidekiq)
23
- setting :processing_backend, :inline
21
+ # What backend do we want to use to process messages
22
+ setting :backend, :inline
24
23
  # option logger [Instance] logger that we want to use
25
24
  setting :logger, -> { ::Karafka::Logger.instance }
26
25
  # option monitor [Instance] monitor that we will to use (defaults to Karafka::Monitor)
27
26
  setting :monitor, -> { ::Karafka::Monitor.instance }
28
- # option redis [Hash] redis options hash (url and optional parameters)
29
- # Note that redis could be rewriten using nested options, but it is a sidekiq specific
30
- # stuff and we don't want to touch it
31
- setting :redis
32
27
  # Mapper used to remap names of topics, so we can have a clean internal topic namings
33
28
  # despite using any Kafka provider that uses namespacing, etc
34
29
  # It needs to implement two methods:
@@ -42,19 +37,19 @@ module Karafka
42
37
  # #params_batch will contain params received from Kafka (may be more than 1) so we can
43
38
  # process them in batches
44
39
  setting :batch_processing, false
45
- # Connection pool options are used for producer (Waterdrop)
46
- # They are configured automatically based on Sidekiq concurrency and number of consumers
47
- # The bigger one is selected as we need to be able to send messages from both places
40
+ # Should we operate in a single controller instance across multiple batches of messages,
41
+ # from the same partition or should we build a new instance for each incoming batch.
42
+ # Disabling that can be useful when you want to build a new controller instance for each
43
+ # incoming batch. It's disabled by default, not to create more objects that needed on
44
+ # each batch
45
+ setting :persistent, true
46
+ # Connection pool options are used for producer (Waterdrop) - by default it will adapt to
47
+ # number of active actors
48
48
  setting :connection_pool do
49
- # Connection pool size for producers. Note that we take a bigger number because there
50
- # are cases when we might have more sidekiq threads than Karafka consumers (small app)
51
- # or the opposite for bigger systems
52
- setting :size, lambda {
53
- [
54
- ::Karafka::App.consumer_groups.active.count,
55
- Sidekiq.options[:concurrency]
56
- ].max
57
- }
49
+ # Connection pool size for producers. If you use sidekiq or any other multi threaded
50
+ # backend, you might want to tune it to match number of threads of your background
51
+ # processing engine
52
+ setting :size, -> { ::Karafka::App.consumer_groups.active.count }
58
53
  # How long should we wait for a working resource from the pool before rising timeout
59
54
  # With a proper connection pool size, this should never happen
60
55
  setting :timeout, 5