karafka 1.0.1 → 1.1.0.alpha1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -3
  3. data/Gemfile +1 -0
  4. data/Gemfile.lock +14 -32
  5. data/README.md +1 -1
  6. data/karafka.gemspec +2 -3
  7. data/lib/karafka.rb +2 -3
  8. data/lib/karafka/attributes_map.rb +3 -3
  9. data/lib/karafka/backends/inline.rb +2 -2
  10. data/lib/karafka/base_controller.rb +19 -69
  11. data/lib/karafka/base_responder.rb +10 -5
  12. data/lib/karafka/cli/info.rb +1 -2
  13. data/lib/karafka/cli/server.rb +6 -8
  14. data/lib/karafka/connection/{messages_consumer.rb → consumer.rb} +27 -12
  15. data/lib/karafka/connection/listener.rb +6 -13
  16. data/lib/karafka/connection/{messages_processor.rb → processor.rb} +3 -3
  17. data/lib/karafka/controllers/callbacks.rb +54 -0
  18. data/lib/karafka/controllers/includer.rb +1 -1
  19. data/lib/karafka/controllers/single_params.rb +2 -2
  20. data/lib/karafka/errors.rb +7 -0
  21. data/lib/karafka/fetcher.rb +11 -5
  22. data/lib/karafka/monitor.rb +2 -2
  23. data/lib/karafka/params/params.rb +3 -1
  24. data/lib/karafka/params/params_batch.rb +1 -1
  25. data/lib/karafka/patches/dry_configurable.rb +0 -2
  26. data/lib/karafka/patches/ruby_kafka.rb +34 -0
  27. data/lib/karafka/persistence/consumer.rb +25 -0
  28. data/lib/karafka/persistence/controller.rb +24 -9
  29. data/lib/karafka/process.rb +1 -1
  30. data/lib/karafka/responders/topic.rb +8 -1
  31. data/lib/karafka/schemas/config.rb +0 -10
  32. data/lib/karafka/schemas/consumer_group.rb +9 -8
  33. data/lib/karafka/schemas/consumer_group_topic.rb +1 -1
  34. data/lib/karafka/schemas/responder_usage.rb +1 -0
  35. data/lib/karafka/server.rb +6 -19
  36. data/lib/karafka/setup/config.rb +15 -34
  37. data/lib/karafka/setup/configurators/base.rb +1 -1
  38. data/lib/karafka/setup/configurators/water_drop.rb +11 -13
  39. data/lib/karafka/templates/karafka.rb.example +1 -1
  40. data/lib/karafka/version.rb +1 -1
  41. metadata +15 -28
  42. data/Rakefile +0 -7
  43. data/lib/karafka/setup/configurators/celluloid.rb +0 -19
@@ -3,15 +3,16 @@
3
3
  module Karafka
4
4
  module Connection
5
5
  # Class used as a wrapper around Ruby-Kafka to simplify additional
6
- # features that we provide/might provide in future
7
- class MessagesConsumer
6
+ # features that we provide/might provide in future and to hide the internal implementation
7
+ class Consumer
8
8
  # Creates a queue consumer that will pull the data from Kafka
9
9
  # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group for which
10
10
  # we create a client
11
- # @return [Karafka::Connection::MessagesConsumer] group consumer that can subscribe to
11
+ # @return [Karafka::Connection::Consumer] group consumer that can subscribe to
12
12
  # multiple topics
13
13
  def initialize(consumer_group)
14
14
  @consumer_group = consumer_group
15
+ Persistence::Consumer.write(self)
15
16
  end
16
17
 
17
18
  # Opens connection, gets messages and calls a block for each of the incoming messages
@@ -19,10 +20,10 @@ module Karafka
19
20
  # @note This will yield with raw messages - no preprocessing or reformatting.
20
21
  def fetch_loop
21
22
  send(
22
- consumer_group.batch_consuming ? :consume_each_batch : :consume_each_message
23
+ consumer_group.batch_fetching ? :consume_each_batch : :consume_each_message
23
24
  ) { |messages| yield(messages) }
24
25
  rescue Kafka::ProcessingError => e
25
- # If there was an error during processing, we have to log it, pause current partition
26
+ # If there was an error during consumption, we have to log it, pause current partition
26
27
  # and process other things
27
28
  Karafka.monitor.notice_error(self.class, e.cause)
28
29
  pause(e.topic, e.partition)
@@ -36,25 +37,39 @@ module Karafka
36
37
  end
37
38
 
38
39
  # Gracefuly stops topic consumption
40
+ # @note Stopping running consumers without a really important reason is not recommended
41
+ # as until all the consumers are stopped, the server will keep running serving only
42
+ # part of the messages
39
43
  def stop
40
44
  @kafka_consumer&.stop
41
45
  @kafka_consumer = nil
42
46
  end
43
47
 
44
- private
45
-
46
- attr_reader :consumer_group
47
-
48
- # Pauses processing of a given topic partition
48
+ # Pauses fetching and consumption of a given topic partition
49
49
  # @param topic [String] topic that we want to pause
50
50
  # @param partition [Integer] number partition that we want to pause
51
51
  def pause(topic, partition)
52
52
  settings = ConfigAdapter.pausing(consumer_group)
53
- return false unless settings[:timeout].positive?
53
+ timeout = settings[:timeout]
54
+ raise(Errors::InvalidPauseTimeout, timeout) unless timeout.positive?
54
55
  kafka_consumer.pause(topic, partition, settings)
55
- true
56
56
  end
57
57
 
58
+ # Marks a given message as consumed and commit the offsets
59
+ # @note In opposite to ruby-kafka, we commit the offset for each manual marking to be sure
60
+ # that offset commit happen asap in case of a crash
61
+ # @param [Karafka::Params::Params] params message that we want to mark as processed
62
+ def mark_as_consumed(params)
63
+ kafka_consumer.mark_message_as_processed(params)
64
+ # Trigger an immediate, blocking offset commit in order to minimize the risk of crashing
65
+ # before the automatic triggers have kicked in.
66
+ kafka_consumer.commit_offsets
67
+ end
68
+
69
+ private
70
+
71
+ attr_reader :consumer_group
72
+
58
73
  # Consumes messages from Kafka in batches
59
74
  # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
60
75
  def consume_each_batch
@@ -7,10 +7,6 @@ module Karafka
7
7
  # @note Listener itself does nothing with the message - it will return to the block
8
8
  # a raw Kafka::FetchedMessage
9
9
  class Listener
10
- include Celluloid
11
-
12
- execute_block_on_receiver :fetch_loop
13
-
14
10
  attr_reader :consumer_group
15
11
 
16
12
  # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group that holds details
@@ -31,7 +27,7 @@ module Karafka
31
27
  # Kafka connections / Internet connection issues / Etc. Business logic problems should not
32
28
  # propagate this far
33
29
  def fetch_loop(block)
34
- messages_consumer.fetch_loop do |raw_messages|
30
+ consumer.fetch_loop do |raw_messages|
35
31
  block.call(consumer_group.id, raw_messages)
36
32
  end
37
33
  # This is on purpose - see the notes for this method
@@ -39,19 +35,16 @@ module Karafka
39
35
  rescue Exception => e
40
36
  # rubocop:enable RescueException
41
37
  Karafka.monitor.notice_error(self.class, e)
42
- @messages_consumer&.stop
43
- retry if @messages_consumer
38
+ @consumer&.stop
39
+ retry if @consumer
44
40
  end
45
41
 
46
42
  private
47
43
 
48
- # @return [Karafka::Connection::MessagesConsumer] wrapped kafka consumer for a given topic
44
+ # @return [Karafka::Connection::Consumer] wrapped kafka consumer for a given topic
49
45
  # consumption
50
- # @note It adds consumer into Karafka::Server consumers pool for graceful shutdown on exit
51
- def messages_consumer
52
- @messages_consumer ||= MessagesConsumer.new(consumer_group).tap do |consumer|
53
- Karafka::Server.consumers << consumer if Karafka::Server.consumers
54
- end
46
+ def consumer
47
+ @consumer ||= Consumer.new(consumer_group)
55
48
  end
56
49
  end
57
50
  end
@@ -3,14 +3,14 @@
3
3
  module Karafka
4
4
  module Connection
5
5
  # Class that consumes messages for which we listen
6
- module MessagesProcessor
6
+ module Processor
7
7
  class << self
8
8
  # Processes messages (does something with them)
9
9
  # It will either schedule or run a proper controller action for messages
10
10
  # @note This should be looped to obtain a constant listening
11
11
  # @note We catch all the errors here, to make sure that none failures
12
12
  # for a given consumption will affect other consumed messages
13
- # If we wouldn't catch it, it would propagate up until killing the Celluloid actor
13
+ # If we wouldn't catch it, it would propagate up until killing the thread
14
14
  # @param group_id [String] group_id of a group from which a given message came
15
15
  # @param kafka_messages [Array<Kafka::FetchedMessage>] raw messages fetched from kafka
16
16
  def process(group_id, kafka_messages)
@@ -27,7 +27,7 @@ module Karafka
27
27
  # Depending on a case (persisted or not) we might use new controller instance per each
28
28
  # batch, or use the same instance for all of them (for implementing buffering, etc)
29
29
  send(
30
- topic.batch_processing ? :process_batch : :process_each,
30
+ topic.batch_consuming ? :process_batch : :process_each,
31
31
  controller,
32
32
  kafka_messages
33
33
  )
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Controllers
5
+ # Additional callbacks that can be used to trigger some actions on certain moments like
6
+ # manual offset management, committing or anything else outside of a standard messages flow
7
+ # They are not included by default, as we don't want to provide functionalities that are
8
+ # not required by users by default
9
+ # Please refer to the wiki callbacks page for more details on how to use them
10
+ module Callbacks
11
+ # Types of events on which we run callbacks
12
+ TYPES = %i[
13
+ after_fetched
14
+ after_poll
15
+ before_poll
16
+ before_stop
17
+ ].freeze
18
+
19
+ # Class methods needed to make callbacks run
20
+ module ClassMethods
21
+ TYPES.each do |type|
22
+ # A Creates a callback wrapper
23
+ # @param method_name [Symbol, String] method name or nil if we plan to provide a block
24
+ # @yield A block with a code that should be executed before scheduling
25
+ define_method type do |method_name = nil, &block|
26
+ set_callback type, :before, method_name ? method_name : block
27
+ end
28
+ end
29
+ end
30
+
31
+ # @param controller_class [Class] controller class that we extend with callbacks
32
+ def self.included(controller_class)
33
+ controller_class.class_eval do
34
+ extend ClassMethods
35
+ include ActiveSupport::Callbacks
36
+
37
+ # The call method is wrapped with a set of callbacks
38
+ # We won't run process if any of the callbacks throw abort
39
+ # @see http://api.rubyonrails.org/classes/ActiveSupport/Callbacks/ClassMethods.html#method-i-get_callbacks
40
+ TYPES.each { |type| define_callbacks type }
41
+ end
42
+ end
43
+
44
+ # Executes the default controller flow, runs callbacks and if not halted will call process
45
+ # method of a proper backend. This is here because it interacts with the default Karafka
46
+ # call flow and needs to be overwritten in order to support callbacks
47
+ def call
48
+ run_callbacks :after_fetched do
49
+ process
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -34,7 +34,7 @@ module Karafka
34
34
  # @param controller_class [Class] controller class
35
35
  # @param topic [Karafka::Routing::Topic] topic of a controller class
36
36
  def bind_params(controller_class, topic)
37
- return if topic.batch_processing
37
+ return if topic.batch_consuming
38
38
  controller_class.include SingleParams
39
39
  end
40
40
 
@@ -2,11 +2,11 @@
2
2
 
3
3
  module Karafka
4
4
  module Controllers
5
- # Params alias for single message processing controllers
5
+ # Params alias for single message consumption controllers
6
6
  module SingleParams
7
7
  private
8
8
 
9
- # @return [Karafka::Params::Params] params instance for non batch processed controllers
9
+ # @return [Karafka::Params::Params] params instance for non batch consumption controllers
10
10
  def params
11
11
  params_batch.first
12
12
  end
@@ -32,5 +32,12 @@ module Karafka
32
32
 
33
33
  # Raised when we try to use Karafka CLI commands (except install) without a bootfile
34
34
  MissingBootFile = Class.new(BaseError)
35
+
36
+ # Raised when we want to read a persisted thread messages consumer but it is unavailable
37
+ # This should never happen and if it does, please contact us
38
+ MissingConsumer = Class.new(BaseError)
39
+
40
+ # Raised when we attemp to pause a partition but the pause timeout is equal to 0
41
+ InvalidPauseTimeout = Class.new(BaseError)
35
42
  end
36
43
  end
@@ -9,14 +9,20 @@ module Karafka
9
9
  # Fetch loop should never end, which means that we won't create more actor clusters
10
10
  # so we don't have to terminate them
11
11
  def fetch_loop
12
- futures = listeners.map do |listener|
13
- listener.future.public_send(:fetch_loop, processor)
12
+ threads = listeners.map do |listener|
13
+ # We abort on exception because there should be an exception handling developed for
14
+ # each listener running in separate threads, so the exceptions should never leak
15
+ # and if that happens, it means that something really bad happened and we should stop
16
+ # the whole process
17
+ Thread
18
+ .new { listener.fetch_loop(processor) }
19
+ .tap { |thread| thread.abort_on_exception = true }
14
20
  end
15
21
 
16
- futures.map(&:value)
22
+ threads.each(&:join)
17
23
  # If anything crashes here, we need to raise the error and crush the runner because it means
18
24
  # that something really bad happened
19
- rescue => e
25
+ rescue StandardError => e
20
26
  Karafka.monitor.notice_error(self.class, e)
21
27
  Karafka::App.stop!
22
28
  raise e
@@ -35,7 +41,7 @@ module Karafka
35
41
  # @yieldparam messages [Array<Kafka::FetchedMessage>] messages from kafka (raw)
36
42
  def processor
37
43
  lambda do |group_id, messages|
38
- Karafka::Connection::MessagesProcessor.process(group_id, messages)
44
+ Karafka::Connection::Processor.process(group_id, messages)
39
45
  end
40
46
  end
41
47
  end
@@ -13,7 +13,7 @@ module Karafka
13
13
  include Singleton
14
14
 
15
15
  # This method is executed in many important places in the code (during data flow), like
16
- # the moment before #perform_async, etc. For full list just grep for 'monitor.notice'
16
+ # the moment before #consume_async, etc. For full list just grep for 'monitor.notice'
17
17
  # @param caller_class [Class] class of object that executed this call
18
18
  # @param options [Hash] hash with options that we passed to notice. It differs based
19
19
  # on of who and when is calling
@@ -55,7 +55,7 @@ module Karafka
55
55
  def caller_exceptions_map
56
56
  @caller_exceptions_map ||= {
57
57
  error: [
58
- Karafka::Connection::MessagesConsumer,
58
+ Karafka::Connection::Consumer,
59
59
  Karafka::Connection::Listener,
60
60
  Karafka::Params::Params
61
61
  ],
@@ -14,6 +14,7 @@ module Karafka
14
14
  partition
15
15
  offset
16
16
  key
17
+ create_time
17
18
  ].freeze
18
19
 
19
20
  # Params attributes that should be available via a method call invocation for Kafka
@@ -26,6 +27,7 @@ module Karafka
26
27
  partition
27
28
  offset
28
29
  key
30
+ create_time
29
31
  ].freeze
30
32
 
31
33
  class << self
@@ -46,7 +48,7 @@ module Karafka
46
48
  if message.is_a?(Hash)
47
49
  new(parser: parser).send(:merge!, message)
48
50
  else
49
- # This happens inside Kafka::FetchedMessagesProcessor
51
+ # This happens inside Kafka::FetchedProcessor
50
52
  new(
51
53
  parser: parser,
52
54
  parsed: false,
@@ -4,7 +4,7 @@ module Karafka
4
4
  module Params
5
5
  # Params batch represents a set of messages received from Kafka.
6
6
  # @note Params internally are lazy loaded before first use. That way we can skip parsing
7
- # process if we have after_received that rejects some incoming messages without using params
7
+ # process if we have after_fetched that rejects some incoming messages without using params
8
8
  # It can be also used when handling really heavy data (in terms of parsing).
9
9
  class ParamsBatch
10
10
  include Enumerable
@@ -29,5 +29,3 @@ module Karafka
29
29
  end
30
30
  end
31
31
  end
32
-
33
- ::Dry::Configurable::Config.prepend(Karafka::Patches::DryConfigurable)
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Patches
5
+ # Batches for Ruby Kafka gem
6
+ module RubyKafka
7
+ # This patch allows us to inject business logic in between fetches and before the consumer
8
+ # stop, so we can perform stop commit or anything else that we need since
9
+ # ruby-kafka fetch loop does not allow that directly
10
+ # We don't wan't to use poll ruby-kafka api as it brings many more problems that we would
11
+ # have to take care of. That way, nothing like that ever happens but we get the control
12
+ # over the stopping process that we need (since we're the once that initiate it for each
13
+ # thread)
14
+ def consumer_loop
15
+ super do
16
+ controllers = Karafka::Persistence::Controller
17
+ .all
18
+ .values
19
+ .flat_map(&:values)
20
+ .select { |ctrl| ctrl.respond_to?(:run_callbacks) }
21
+
22
+ if Karafka::App.stopped?
23
+ controllers.each { |ctrl| ctrl.run_callbacks :before_stop }
24
+ Karafka::Persistence::Consumer.read.stop
25
+ else
26
+ controllers.each { |ctrl| ctrl.run_callbacks :before_poll }
27
+ yield
28
+ controllers.each { |ctrl| ctrl.run_callbacks :after_poll }
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Persistence
5
+ # Persistence layer to store current thread messages consumer for further use
6
+ class Consumer
7
+ # Thread.current key under which we store current thread messages consumer
8
+ PERSISTENCE_SCOPE = :consumer
9
+
10
+ # @param consumer [Karafka::Connection::Consumer] messages consumer of
11
+ # a current thread
12
+ # @return [Karafka::Connection::Consumer] persisted messages consumer
13
+ def self.write(consumer)
14
+ Thread.current[PERSISTENCE_SCOPE] = consumer
15
+ end
16
+
17
+ # @return [Karafka::Connection::Consumer] persisted messages consumer
18
+ # @raise [Karafka::Errors::MissingConsumer] raised when no thread messages consumer
19
+ # but we try to use it anyway
20
+ def self.read
21
+ Thread.current[PERSISTENCE_SCOPE] || raise(Errors::MissingConsumer)
22
+ end
23
+ end
24
+ end
25
+ end
@@ -8,15 +8,30 @@ module Karafka
8
8
  # topic and partition to store some additional details when the persistent mode
9
9
  # for a given topic is turned on
10
10
  class Controller
11
- # Used to build (if block given) and/or fetch a current controller instance that will be used
12
- # to process messages from a given topic and partition
13
- # @return [Karafka::BaseController] base controller descendant
14
- # @param topic [Karafka::Routing::Topic] topic instance for which we might cache
15
- # @param partition [Integer] number of partition for which we want to cache
16
- def self.fetch(topic, partition)
17
- return yield unless topic.persistent
18
- Thread.current[topic.id] ||= {}
19
- Thread.current[topic.id][partition] ||= yield
11
+ # Thread.current scope under which we store controllers data
12
+ PERSISTENCE_SCOPE = :controllers
13
+
14
+ class << self
15
+ # @return [Hash] current thread persistence scope hash with all the controllers
16
+ def all
17
+ Thread.current[PERSISTENCE_SCOPE] ||= {}
18
+ end
19
+
20
+ # Used to build (if block given) and/or fetch a current controller instance that will be
21
+ # used to process messages from a given topic and partition
22
+ # @return [Karafka::BaseController] base controller descendant
23
+ # @param topic [Karafka::Routing::Topic] topic instance for which we might cache
24
+ # @param partition [Integer] number of partition for which we want to cache
25
+ def fetch(topic, partition)
26
+ all[topic.id] ||= {}
27
+
28
+ # We always store a current instance
29
+ if topic.persistent
30
+ all[topic.id][partition] ||= yield
31
+ else
32
+ all[topic.id][partition] = yield
33
+ end
34
+ end
20
35
  end
21
36
  end
22
37
  end
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Karafka
4
- # Class used to catch signals from ruby Signal class in order to manage Karafka shutdown
4
+ # Class used to catch signals from ruby Signal class in order to manage Karafka stop
5
5
  # @note There might be only one process - this class is a singleton
6
6
  class Process
7
7
  include Singleton