karafka 0.6.0.rc2 → 1.0.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -2
  3. data/Gemfile.lock +4 -18
  4. data/karafka.gemspec +0 -1
  5. data/lib/karafka.rb +2 -12
  6. data/lib/karafka/attributes_map.rb +2 -3
  7. data/lib/karafka/backends/inline.rb +17 -0
  8. data/lib/karafka/base_controller.rb +40 -96
  9. data/lib/karafka/base_responder.rb +19 -19
  10. data/lib/karafka/cli/info.rb +2 -3
  11. data/lib/karafka/cli/install.rb +0 -3
  12. data/lib/karafka/connection/messages_processor.rb +10 -6
  13. data/lib/karafka/controllers/includer.rb +51 -0
  14. data/lib/karafka/controllers/responders.rb +19 -0
  15. data/lib/karafka/controllers/single_params.rb +15 -0
  16. data/lib/karafka/errors.rb +1 -17
  17. data/lib/karafka/fetcher.rb +2 -2
  18. data/lib/karafka/helpers/class_matcher.rb +9 -10
  19. data/lib/karafka/params/params.rb +2 -2
  20. data/lib/karafka/params/params_batch.rb +2 -7
  21. data/lib/karafka/persistence.rb +18 -0
  22. data/lib/karafka/routing/builder.rb +1 -1
  23. data/lib/karafka/routing/router.rb +3 -11
  24. data/lib/karafka/routing/topic.rb +1 -13
  25. data/lib/karafka/schemas/config.rb +1 -12
  26. data/lib/karafka/schemas/consumer_group.rb +2 -2
  27. data/lib/karafka/setup/config.rb +14 -19
  28. data/lib/karafka/templates/karafka.rb.example +1 -5
  29. data/lib/karafka/version.rb +1 -1
  30. metadata +8 -24
  31. data/lib/karafka/base_worker.rb +0 -26
  32. data/lib/karafka/cli/worker.rb +0 -28
  33. data/lib/karafka/params/interchanger.rb +0 -35
  34. data/lib/karafka/setup/configurators/sidekiq.rb +0 -36
  35. data/lib/karafka/templates/application_worker.rb.example +0 -8
  36. data/lib/karafka/templates/sidekiq.yml.example +0 -26
  37. data/lib/karafka/workers/builder.rb +0 -51
@@ -14,14 +14,13 @@ module Karafka
14
14
  info = [
15
15
  "Karafka framework version: #{Karafka::VERSION}",
16
16
  "Application client id: #{config.client_id}",
17
- "Processing backend: #{config.processing_backend}",
17
+ "Backend: #{config.backend}",
18
18
  "Batch consuming: #{config.batch_consuming}",
19
19
  "Batch processing: #{config.batch_processing}",
20
20
  "Number of threads: #{config.concurrency}",
21
21
  "Boot file: #{Karafka.boot_file}",
22
22
  "Environment: #{Karafka.env}",
23
- "Kafka seed brokers: #{config.kafka.seed_brokers}",
24
- "Redis: #{config.redis.to_h}"
23
+ "Kafka seed brokers: #{config.kafka.seed_brokers}"
25
24
  ]
26
25
 
27
26
  puts(info.join("\n"))
@@ -12,7 +12,6 @@ module Karafka
12
12
  app/models
13
13
  app/controllers
14
14
  app/responders
15
- app/workers
16
15
  config
17
16
  log
18
17
  tmp/pids
@@ -21,8 +20,6 @@ module Karafka
21
20
  # Where should we map proper files from templates
22
21
  INSTALL_FILES_MAP = {
23
22
  'karafka.rb.example' => Karafka.boot_file.basename,
24
- 'sidekiq.yml.example' => 'config/sidekiq.yml.example',
25
- 'application_worker.rb.example' => 'app/workers/application_worker.rb',
26
23
  'application_controller.rb.example' => 'app/controllers/application_controller.rb',
27
24
  'application_responder.rb.example' => 'app/responders/application_responder.rb'
28
25
  }.freeze
@@ -18,12 +18,16 @@ module Karafka
18
18
  # first one and it will be valid for all the messages
19
19
  # We map from incoming topic name, as it might be namespaced, etc.
20
20
  # @see topic_mapper internal docs
21
- mapped_topic = Karafka::App.config.topic_mapper.incoming(kafka_messages[0].topic)
22
- # @note We search based on the topic id - that is a combination of group id and
23
- # topic name
24
- controller = Karafka::Routing::Router.build("#{group_id}_#{mapped_topic}")
25
- handler = controller.topic.batch_processing ? :process_batch : :process_each
21
+ mapped_topic_name = Karafka::App.config.topic_mapper.incoming(kafka_messages[0].topic)
22
+ topic = Routing::Router.find("#{group_id}_#{mapped_topic_name}")
26
23
 
24
+ # Depending on a case (persisted or not) we might use new controller instance per each
25
+ # batch, or use the same instance for all of them (for implementing buffering, etc)
26
+ controller = Persistence.fetch(topic, kafka_messages[0].partition, :controller) do
27
+ topic.controller.new
28
+ end
29
+
30
+ handler = topic.batch_processing ? :process_batch : :process_each
27
31
  send(handler, controller, kafka_messages)
28
32
  end
29
33
 
@@ -35,7 +39,7 @@ module Karafka
35
39
  def process_batch(controller, kafka_messages)
36
40
  controller.params_batch = kafka_messages
37
41
  Karafka.monitor.notice(self, kafka_messages)
38
- controller.schedule
42
+ controller.call
39
43
  end
40
44
 
41
45
  # Processes messages one by one (like with std http requests)
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Additional functionalities for controllers
5
+ module Controllers
6
+ # Module used to inject functionalities into a given controller class, based on the controller
7
+ # topic and its settings
8
+ # We don't need all the behaviors in all the cases, so it is totally not worth having
9
+ # everything in all the cases all the time
10
+ module Includer
11
+ class << self
12
+ # @param controller_class [Class] controller class, that will get some functionalities
13
+ # based on the topic under which it operates
14
+ def call(controller_class)
15
+ topic = controller_class.topic
16
+
17
+ bind_backend(controller_class, topic)
18
+ bind_params(controller_class, topic)
19
+ bind_responders(controller_class, topic)
20
+ end
21
+
22
+ private
23
+
24
+ # Figures out backend for a given controller class, based on the topic backend and
25
+ # includes it into the controller class
26
+ # @param controller_class [Class] controller class
27
+ # @param topic [Karafka::Routing::Topic] topic of a controller class
28
+ def bind_backend(controller_class, topic)
29
+ backend = Kernel.const_get("::Karafka::Backends::#{topic.backend.to_s.capitalize}")
30
+ controller_class.include backend
31
+ end
32
+
33
+ # Adds a single #params support for non batch processed topics
34
+ # @param controller_class [Class] controller class
35
+ # @param topic [Karafka::Routing::Topic] topic of a controller class
36
+ def bind_params(controller_class, topic)
37
+ return if topic.batch_processing
38
+ controller_class.include SingleParams
39
+ end
40
+
41
+ # Adds responders support for topics and controllers with responders defined for them
42
+ # @param controller_class [Class] controller class
43
+ # @param topic [Karafka::Routing::Topic] topic of a controller class
44
+ def bind_responders(controller_class, topic)
45
+ return unless topic.responder
46
+ controller_class.include Responders
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Controllers
5
+ # Feature that allows us to use responders flow in controller
6
+ module Responders
7
+ # Responds with given data using given responder. This allows us to have a similar way of
8
+ # defining flows like synchronous protocols
9
+ # @param data Anything we want to pass to responder based on which we want to trigger further
10
+ # Kafka responding
11
+ def respond_with(*data)
12
+ Karafka.monitor.notice(self.class, data: data)
13
+ # @note we build a new instance of responder each time, as a long running (persisted)
14
+ # controllers can respond multiple times during the lifecycle
15
+ topic.responder.new(topic.parser).call(*data)
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Controllers
5
+ # Params alias for single message processing controllers
6
+ module SingleParams
7
+ private
8
+
9
+ # @return [Karafka::Params::Params] params instance for non batch processed controllers
10
+ def params
11
+ params_batch.first
12
+ end
13
+ end
14
+ end
15
+ end
@@ -13,7 +13,7 @@ module Karafka
13
13
 
14
14
  # Raised when router receives topic name which does not correspond with any routes
15
15
  # This can only happen in a case when:
16
- # - you've received a message and it was scheduled to Sidekiq background worker
16
+ # - you've received a message and we cannot match it with a controller
17
17
  # - you've changed the routing, so router can no longer associate your topic to
18
18
  # any controller
19
19
  # - or in a case when you do a lot of metaprogramming and you change routing/etc on runtime
@@ -23,27 +23,11 @@ module Karafka
23
23
  # @see https://github.com/karafka/karafka/issues/135
24
24
  NonMatchingRouteError = Class.new(BaseError)
25
25
 
26
- # Raised when application does not have ApplicationWorker or other class that directly
27
- # inherits from Karafka::BaseWorker
28
- BaseWorkerDescentantMissing = Class.new(BaseError)
29
-
30
- # Raised when we want to use #respond_with in controllers but we didn't define
31
- # (and we couldn't find) any appropriate responder for a given controller
32
- ResponderMissing = Class.new(BaseError)
33
-
34
26
  # Raised when we don't use or use responder not in the way it expected to based on the
35
27
  # topics usage definitions
36
28
  InvalidResponderUsage = Class.new(BaseError)
37
29
 
38
30
  # Raised when configuration doesn't match with validation schema
39
31
  InvalidConfiguration = Class.new(BaseError)
40
-
41
- # Raised when processing messages in batches but still want to use #params instead of
42
- # #params_batch
43
- ParamsMethodUnavailable = Class.new(BaseError)
44
-
45
- # Raised when for some reason we try to use invalid processing backend and
46
- # we bypass validations
47
- InvalidProcessingBackend = Class.new(BaseError)
48
32
  end
49
33
  end
@@ -34,8 +34,8 @@ module Karafka
34
34
  # @return [Proc] proc that should be processed when a messages arrive
35
35
  # @yieldparam messages [Array<Kafka::FetchedMessage>] messages from kafka (raw)
36
36
  def processor
37
- lambda do |consumer_group_id, messages|
38
- Karafka::Connection::MessagesProcessor.process(consumer_group_id, messages)
37
+ lambda do |group_id, messages|
38
+ Karafka::Connection::MessagesProcessor.process(group_id, messages)
39
39
  end
40
40
  end
41
41
  end
@@ -4,7 +4,6 @@ module Karafka
4
4
  module Helpers
5
5
  # Class used to autodetect corresponding classes that are internally inside Karafka framework
6
6
  # It is used among others to match:
7
- # controller => worker
8
7
  # controller => responder
9
8
  class ClassMatcher
10
9
  # Regexp used to remove any non classy like characters that might be in the controller
@@ -14,11 +13,11 @@ module Karafka
14
13
  # @param klass [Class] class to which we want to find a corresponding class
15
14
  # @param from [String] what type of object is it (based on postfix name part)
16
15
  # @param to [String] what are we looking for (based on a postfix name part)
17
- # @example Controller that has a corresponding worker
18
- # matcher = Karafka::Helpers::ClassMatcher.new(SuperController, 'Controller', 'Worker')
19
- # matcher.match #=> SuperWorker
20
- # @example Controller without a corresponding worker
21
- # matcher = Karafka::Helpers::ClassMatcher.new(Super2Controller, 'Controller', 'Worker')
16
+ # @example Controller that has a corresponding responder
17
+ # matcher = Karafka::Helpers::ClassMatcher.new(SuperController, 'Controller', 'Responder')
18
+ # matcher.match #=> SuperResponder
19
+ # @example Controller without a corresponding responder
20
+ # matcher = Karafka::Helpers::ClassMatcher.new(Super2Controller, 'Controller', 'Responder')
22
21
  # matcher.match #=> nil
23
22
  def initialize(klass, from:, to:)
24
23
  @klass = klass
@@ -37,10 +36,10 @@ module Karafka
37
36
 
38
37
  # @return [String] name of a new class that we're looking for
39
38
  # @note This method returns name of a class without a namespace
40
- # @example From SuperController matching worker
41
- # matcher.name #=> 'SuperWorker'
42
- # @example From Namespaced::Super2Controller matching worker
43
- # matcher.name #=> Super2Worker
39
+ # @example From SuperController matching responder
40
+ # matcher.name #=> 'SuperResponder'
41
+ # @example From Namespaced::Super2Controller matching responder
42
+ # matcher.name #=> Super2Responder
44
43
  def name
45
44
  inflected = @klass.to_s.split('::').last.to_s
46
45
  inflected.gsub!(@from, @to)
@@ -5,7 +5,7 @@ module Karafka
5
5
  module Params
6
6
  # Class-wrapper for hash with indifferent access with additional lazy loading feature
7
7
  # It provides lazy loading not only until the first usage, but also allows us to skip
8
- # using parser until we execute our logic inside worker. That way we can operate with
8
+ # using parser until we execute our logic. That way we can operate with
9
9
  # heavy-parsing data without slowing down the whole application.
10
10
  class Params < HashWithIndifferentAccess
11
11
  # Kafka::FetchedMessage attributes that we want to use inside of params
@@ -30,7 +30,7 @@ module Karafka
30
30
  # @example Build params instance from a Kafka::FetchedMessage object
31
31
  # Karafka::Params::Params.build(message) #=> params object
32
32
  def build(message, parser)
33
- # Hash case happens inside workers
33
+ # Hash case happens inside backends that interchange data
34
34
  if message.is_a?(Hash)
35
35
  new(parser: parser).merge!(message)
36
36
  else
@@ -4,13 +4,8 @@ module Karafka
4
4
  module Params
5
5
  # Params batch represents a set of messages received from Kafka.
6
6
  # @note Params internally are lazy loaded before first use. That way we can skip parsing
7
- # process if we have before_enqueue that rejects some incoming messages without using params
8
- # It can be also used when handling really heavy data (in terms of parsing). Without direct
9
- # usage outside of worker scope, it will pass raw data into sidekiq, so we won't use Karafka
10
- # working time to parse this data. It will happen only in the worker (where it can take time)
11
- # that way Karafka will be able to process data really quickly. On the other hand, if we
12
- # decide to use params somewhere before it hits worker logic, it won't parse it again in
13
- # the worker - it will use already loaded data and pass it to Redis
7
+ # process if we have after_received that rejects some incoming messages without using params
8
+ # It can be also used when handling really heavy data (in terms of parsing).
14
9
  class ParamsBatch
15
10
  include Enumerable
16
11
 
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Module used to provide a persistent cache across batch requests for a given
5
+ # topic and partition to store some additional details when the persistent mode
6
+ # for a given topic is turned on
7
+ module Persistence
8
+ # @param topic [Karafka::Routing::Topic] topic instance for which we might cache
9
+ # @param partition [Integer] number of partition for which we want to cache
10
+ # @param resource [Symbol] name of the resource that we want to store
11
+ def self.fetch(topic, partition, resource)
12
+ return yield unless topic.persistent
13
+ Thread.current[topic.id] ||= {}
14
+ Thread.current[topic.id][partition] ||= {}
15
+ Thread.current[topic.id][partition][resource] ||= yield
16
+ end
17
+ end
18
+ end
@@ -27,7 +27,7 @@ module Karafka
27
27
  each do |consumer_group|
28
28
  hashed_group = consumer_group.to_h
29
29
  validation_result = Karafka::Schemas::ConsumerGroup.call(hashed_group)
30
- next if validation_result.success?
30
+ return if validation_result.success?
31
31
  raise Errors::InvalidConfiguration, [validation_result.errors, hashed_group]
32
32
  end
33
33
  end
@@ -7,16 +7,9 @@ module Karafka
7
7
  # @note Since Kafka does not provide namespaces or modules for topics, they all have "flat"
8
8
  # structure so all the routes are being stored in a single level array
9
9
  module Router
10
- # Builds a controller instance that should handle message from a given topic
11
- # @param topic_id [String] topic based on which we find a proper route
12
- # @return [Karafka::BaseController] base controller descendant instance object
13
- def build(topic_id)
14
- topic = find(topic_id)
15
- topic.controller.new.tap { |ctrl| ctrl.topic = topic }
16
- end
17
-
18
- private
19
-
10
+ # Find a proper topic based on full topic id
11
+ # @param topic_id [String] proper topic id (already mapped, etc) for which we want to find
12
+ # routing topic
20
13
  # @return [Karafka::Routing::Route] proper route details
21
14
  # @raise [Karafka::Topic::NonMatchingTopicError] raised if topic name does not match
22
15
  # any route defined by user using routes.draw
@@ -30,7 +23,6 @@ module Karafka
30
23
  raise(Errors::NonMatchingRouteError, topic_id)
31
24
  end
32
25
 
33
- module_function :build
34
26
  module_function :find
35
27
  end
36
28
  end
@@ -29,16 +29,10 @@ module Karafka
29
29
  # example for Sidekiq
30
30
  def build
31
31
  Karafka::AttributesMap.topic.each { |attr| send(attr) }
32
+ controller&.topic = self
32
33
  self
33
34
  end
34
35
 
35
- # @return [Class] Class (not an instance) of a worker that should be used to schedule the
36
- # background job
37
- # @note If not provided - will be built based on the provided controller
38
- def worker
39
- @worker ||= processing_backend == :sidekiq ? Workers::Builder.new(controller).build : nil
40
- end
41
-
42
36
  # @return [Class, nil] Class (not an instance) of a responder that should respond from
43
37
  # controller back to Kafka (usefull for piping dataflows)
44
38
  def responder
@@ -51,12 +45,6 @@ module Karafka
51
45
  @parser ||= Karafka::Parsers::Json
52
46
  end
53
47
 
54
- # @return [Class] Interchanger class (not an instance) that we want to use to interchange
55
- # params between Karafka server and Karafka background job
56
- def interchanger
57
- @interchanger ||= Karafka::Params::Interchanger
58
- end
59
-
60
48
  Karafka::AttributesMap.topic.each do |attribute|
61
49
  config_retriever_for(attribute)
62
50
  end
@@ -14,18 +14,7 @@ module Karafka
14
14
  Config = Dry::Validation.Schema do
15
15
  required(:client_id).filled(:str?, format?: Karafka::Schemas::TOPIC_REGEXP)
16
16
 
17
- required(:redis).maybe do
18
- schema do
19
- required(:url).filled(:str?)
20
- end
21
- end
22
-
23
- optional(:processing_backend).filled(included_in?: %i[inline sidekiq])
24
-
25
- # If we want to use sidekiq, then redis needs to be configured
26
- rule(redis_presence: %i[redis processing_backend]) do |redis, processing_backend|
27
- processing_backend.eql?(:sidekiq).then(redis.filled?)
28
- end
17
+ optional(:backend).filled
29
18
 
30
19
  optional(:connection_pool).schema do
31
20
  required(:size).filled
@@ -6,13 +6,13 @@ module Karafka
6
6
  ConsumerGroupTopic = Dry::Validation.Schema do
7
7
  required(:id).filled(:str?, format?: Karafka::Schemas::TOPIC_REGEXP)
8
8
  required(:name).filled(:str?, format?: Karafka::Schemas::TOPIC_REGEXP)
9
- required(:processing_backend).filled(included_in?: %i[inline sidekiq])
9
+ required(:backend).filled(included_in?: %i[inline sidekiq])
10
10
  required(:controller).filled
11
11
  required(:parser).filled
12
- required(:interchanger).filled
13
12
  required(:max_bytes_per_partition).filled(:int?, gteq?: 0)
14
13
  required(:start_from_beginning).filled(:bool?)
15
14
  required(:batch_processing).filled(:bool?)
15
+ required(:persistent).filled(:bool?)
16
16
  end
17
17
 
18
18
  # Schema for single full route (consumer group + topics) validation.
@@ -18,17 +18,12 @@ module Karafka
18
18
  # option client_id [String] kafka client_id - used to provide
19
19
  # default Kafka groups namespaces and identify that app in kafka
20
20
  setting :client_id
21
- # How should we process messages. For now we support inline mode (asap in the process) or
22
- # sidekiq mode (schedule to sidekiq)
23
- setting :processing_backend, :inline
21
+ # What backend do we want to use to process messages
22
+ setting :backend, :inline
24
23
  # option logger [Instance] logger that we want to use
25
24
  setting :logger, -> { ::Karafka::Logger.instance }
26
25
  # option monitor [Instance] monitor that we will to use (defaults to Karafka::Monitor)
27
26
  setting :monitor, -> { ::Karafka::Monitor.instance }
28
- # option redis [Hash] redis options hash (url and optional parameters)
29
- # Note that redis could be rewriten using nested options, but it is a sidekiq specific
30
- # stuff and we don't want to touch it
31
- setting :redis
32
27
  # Mapper used to remap names of topics, so we can have a clean internal topic namings
33
28
  # despite using any Kafka provider that uses namespacing, etc
34
29
  # It needs to implement two methods:
@@ -42,19 +37,19 @@ module Karafka
42
37
  # #params_batch will contain params received from Kafka (may be more than 1) so we can
43
38
  # process them in batches
44
39
  setting :batch_processing, false
45
- # Connection pool options are used for producer (Waterdrop)
46
- # They are configured automatically based on Sidekiq concurrency and number of consumers
47
- # The bigger one is selected as we need to be able to send messages from both places
40
+ # Should we operate in a single controller instance across multiple batches of messages,
41
+ # from the same partition or should we build a new instance for each incoming batch.
42
+ # Disabling that can be useful when you want to build a new controller instance for each
43
+ # incoming batch. It's disabled by default, not to create more objects that needed on
44
+ # each batch
45
+ setting :persistent, true
46
+ # Connection pool options are used for producer (Waterdrop) - by default it will adapt to
47
+ # number of active actors
48
48
  setting :connection_pool do
49
- # Connection pool size for producers. Note that we take a bigger number because there
50
- # are cases when we might have more sidekiq threads than Karafka consumers (small app)
51
- # or the opposite for bigger systems
52
- setting :size, lambda {
53
- [
54
- ::Karafka::App.consumer_groups.active.count,
55
- Sidekiq.options[:concurrency]
56
- ].max
57
- }
49
+ # Connection pool size for producers. If you use sidekiq or any other multi threaded
50
+ # backend, you might want to tune it to match number of threads of your background
51
+ # processing engine
52
+ setting :size, -> { ::Karafka::App.consumer_groups.active.count }
58
53
  # How long should we wait for a working resource from the pool before rising timeout
59
54
  # With a proper connection pool size, this should never happen
60
55
  setting :timeout, 5