karafka 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.console_irbrc +13 -0
  3. data/.github/ISSUE_TEMPLATE.md +2 -0
  4. data/.gitignore +68 -0
  5. data/.rspec +1 -0
  6. data/.ruby-gemset +1 -0
  7. data/.ruby-version +1 -0
  8. data/.travis.yml +17 -0
  9. data/CHANGELOG.md +371 -0
  10. data/CODE_OF_CONDUCT.md +46 -0
  11. data/CONTRIBUTING.md +42 -0
  12. data/Gemfile +12 -0
  13. data/Gemfile.lock +111 -0
  14. data/MIT-LICENCE +18 -0
  15. data/README.md +95 -0
  16. data/bin/karafka +19 -0
  17. data/config/errors.yml +6 -0
  18. data/karafka.gemspec +35 -0
  19. data/lib/karafka.rb +68 -0
  20. data/lib/karafka/app.rb +52 -0
  21. data/lib/karafka/attributes_map.rb +67 -0
  22. data/lib/karafka/backends/inline.rb +17 -0
  23. data/lib/karafka/base_controller.rb +60 -0
  24. data/lib/karafka/base_responder.rb +185 -0
  25. data/lib/karafka/cli.rb +54 -0
  26. data/lib/karafka/cli/base.rb +78 -0
  27. data/lib/karafka/cli/console.rb +29 -0
  28. data/lib/karafka/cli/flow.rb +46 -0
  29. data/lib/karafka/cli/info.rb +29 -0
  30. data/lib/karafka/cli/install.rb +43 -0
  31. data/lib/karafka/cli/server.rb +67 -0
  32. data/lib/karafka/connection/config_adapter.rb +112 -0
  33. data/lib/karafka/connection/consumer.rb +121 -0
  34. data/lib/karafka/connection/listener.rb +51 -0
  35. data/lib/karafka/connection/processor.rb +61 -0
  36. data/lib/karafka/controllers/callbacks.rb +54 -0
  37. data/lib/karafka/controllers/includer.rb +51 -0
  38. data/lib/karafka/controllers/responders.rb +19 -0
  39. data/lib/karafka/controllers/single_params.rb +15 -0
  40. data/lib/karafka/errors.rb +43 -0
  41. data/lib/karafka/fetcher.rb +48 -0
  42. data/lib/karafka/helpers/class_matcher.rb +78 -0
  43. data/lib/karafka/helpers/config_retriever.rb +46 -0
  44. data/lib/karafka/helpers/multi_delegator.rb +33 -0
  45. data/lib/karafka/loader.rb +29 -0
  46. data/lib/karafka/logger.rb +53 -0
  47. data/lib/karafka/monitor.rb +98 -0
  48. data/lib/karafka/params/params.rb +128 -0
  49. data/lib/karafka/params/params_batch.rb +41 -0
  50. data/lib/karafka/parsers/json.rb +38 -0
  51. data/lib/karafka/patches/dry_configurable.rb +31 -0
  52. data/lib/karafka/patches/ruby_kafka.rb +34 -0
  53. data/lib/karafka/persistence/consumer.rb +25 -0
  54. data/lib/karafka/persistence/controller.rb +38 -0
  55. data/lib/karafka/process.rb +63 -0
  56. data/lib/karafka/responders/builder.rb +35 -0
  57. data/lib/karafka/responders/topic.rb +57 -0
  58. data/lib/karafka/routing/builder.rb +61 -0
  59. data/lib/karafka/routing/consumer_group.rb +61 -0
  60. data/lib/karafka/routing/consumer_mapper.rb +33 -0
  61. data/lib/karafka/routing/proxy.rb +37 -0
  62. data/lib/karafka/routing/router.rb +29 -0
  63. data/lib/karafka/routing/topic.rb +66 -0
  64. data/lib/karafka/routing/topic_mapper.rb +55 -0
  65. data/lib/karafka/schemas/config.rb +21 -0
  66. data/lib/karafka/schemas/consumer_group.rb +65 -0
  67. data/lib/karafka/schemas/consumer_group_topic.rb +18 -0
  68. data/lib/karafka/schemas/responder_usage.rb +39 -0
  69. data/lib/karafka/schemas/server_cli_options.rb +43 -0
  70. data/lib/karafka/server.rb +62 -0
  71. data/lib/karafka/setup/config.rb +163 -0
  72. data/lib/karafka/setup/configurators/base.rb +35 -0
  73. data/lib/karafka/setup/configurators/water_drop.rb +29 -0
  74. data/lib/karafka/status.rb +25 -0
  75. data/lib/karafka/templates/application_controller.rb.example +7 -0
  76. data/lib/karafka/templates/application_responder.rb.example +11 -0
  77. data/lib/karafka/templates/karafka.rb.example +41 -0
  78. data/lib/karafka/version.rb +7 -0
  79. data/log/.gitkeep +0 -0
  80. metadata +267 -0
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # A single listener that listens to incoming messages from a single route
6
+ # @note It does not loop on itself - it needs to be executed in a loop
7
+ # @note Listener itself does nothing with the message - it will return to the block
8
+ # a raw Kafka::FetchedMessage
9
+ class Listener
10
+ attr_reader :consumer_group
11
+
12
+ # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group that holds details
13
+ # on what topics and with what settings should we listen
14
+ # @return [Karafka::Connection::Listener] listener instance
15
+ def initialize(consumer_group)
16
+ @consumer_group = consumer_group
17
+ end
18
+
19
+ # Opens connection, gets messages and calls a block for each of the incoming messages
20
+ # @yieldparam [String] consumer group id
21
+ # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
22
+ # @note This will yield with a raw message - no preprocessing or reformatting
23
+ # @note We catch all the errors here, so they don't affect other listeners (or this one)
24
+ # so we will be able to listen and consume other incoming messages.
25
+ # Since it is run inside Karafka::Connection::ActorCluster - catching all the exceptions
26
+ # won't crash the whole cluster. Here we mostly focus on catchin the exceptions related to
27
+ # Kafka connections / Internet connection issues / Etc. Business logic problems should not
28
+ # propagate this far
29
+ def fetch_loop(block)
30
+ consumer.fetch_loop do |raw_messages|
31
+ block.call(consumer_group.id, raw_messages)
32
+ end
33
+ # This is on purpose - see the notes for this method
34
+ # rubocop:disable RescueException
35
+ rescue Exception => e
36
+ # rubocop:enable RescueException
37
+ Karafka.monitor.notice_error(self.class, e)
38
+ @consumer&.stop
39
+ retry if @consumer
40
+ end
41
+
42
+ private
43
+
44
+ # @return [Karafka::Connection::Consumer] wrapped kafka consumer for a given topic
45
+ # consumption
46
+ def consumer
47
+ @consumer ||= Consumer.new(consumer_group)
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Class that consumes messages for which we listen
6
+ module Processor
7
+ class << self
8
+ # Processes messages (does something with them)
9
+ # It will either schedule or run a proper controller action for messages
10
+ # @note This should be looped to obtain a constant listening
11
+ # @note We catch all the errors here, to make sure that none failures
12
+ # for a given consumption will affect other consumed messages
13
+ # If we wouldn't catch it, it would propagate up until killing the thread
14
+ # @param group_id [String] group_id of a group from which a given message came
15
+ # @param kafka_messages [Array<Kafka::FetchedMessage>] raw messages fetched from kafka
16
+ def process(group_id, kafka_messages)
17
+ # @note We always get messages by topic and partition so we can take topic from the
18
+ # first one and it will be valid for all the messages
19
+ # We map from incoming topic name, as it might be namespaced, etc.
20
+ # @see topic_mapper internal docs
21
+ mapped_topic_name = Karafka::App.config.topic_mapper.incoming(kafka_messages[0].topic)
22
+ topic = Routing::Router.find("#{group_id}_#{mapped_topic_name}")
23
+ controller = Persistence::Controller.fetch(topic, kafka_messages[0].partition) do
24
+ topic.controller.new
25
+ end
26
+
27
+ # Depending on a case (persisted or not) we might use new controller instance per each
28
+ # batch, or use the same instance for all of them (for implementing buffering, etc)
29
+ send(
30
+ topic.batch_consuming ? :process_batch : :process_each,
31
+ controller,
32
+ kafka_messages
33
+ )
34
+ end
35
+
36
+ private
37
+
38
+ # Processes whole batch in one request (all at once)
39
+ # @param controller [Karafka::BaseController] base controller descendant
40
+ # @param kafka_messages [Array<Kafka::FetchedMessage>] raw messages from kafka
41
+ def process_batch(controller, kafka_messages)
42
+ controller.params_batch = kafka_messages
43
+ Karafka.monitor.notice(self, kafka_messages)
44
+ controller.call
45
+ end
46
+
47
+ # Processes messages one by one (like with std http requests)
48
+ # @param controller [Karafka::BaseController] base controller descendant
49
+ # @param kafka_messages [Array<Kafka::FetchedMessage>] raw messages from kafka
50
+ def process_each(controller, kafka_messages)
51
+ kafka_messages.each do |kafka_message|
52
+ # @note This is a simple trick - we just process one after another, but in order
53
+ # not to handle everywhere both cases (single vs batch), we just "fake" batching with
54
+ # a single message for each
55
+ process_batch(controller, [kafka_message])
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Controllers
5
+ # Additional callbacks that can be used to trigger some actions on certain moments like
6
+ # manual offset management, committing or anything else outside of a standard messages flow
7
+ # They are not included by default, as we don't want to provide functionalities that are
8
+ # not required by users by default
9
+ # Please refer to the wiki callbacks page for more details on how to use them
10
+ module Callbacks
11
+ # Types of events on which we run callbacks
12
+ TYPES = %i[
13
+ after_fetched
14
+ after_poll
15
+ before_poll
16
+ before_stop
17
+ ].freeze
18
+
19
+ # Class methods needed to make callbacks run
20
+ module ClassMethods
21
+ TYPES.each do |type|
22
+ # A Creates a callback wrapper
23
+ # @param method_name [Symbol, String] method name or nil if we plan to provide a block
24
+ # @yield A block with a code that should be executed before scheduling
25
+ define_method type do |method_name = nil, &block|
26
+ set_callback type, :before, method_name ? method_name : block
27
+ end
28
+ end
29
+ end
30
+
31
+ # @param controller_class [Class] controller class that we extend with callbacks
32
+ def self.included(controller_class)
33
+ controller_class.class_eval do
34
+ extend ClassMethods
35
+ include ActiveSupport::Callbacks
36
+
37
+ # The call method is wrapped with a set of callbacks
38
+ # We won't run process if any of the callbacks throw abort
39
+ # @see http://api.rubyonrails.org/classes/ActiveSupport/Callbacks/ClassMethods.html#method-i-get_callbacks
40
+ TYPES.each { |type| define_callbacks type }
41
+ end
42
+ end
43
+
44
+ # Executes the default controller flow, runs callbacks and if not halted will call process
45
+ # method of a proper backend. This is here because it interacts with the default Karafka
46
+ # call flow and needs to be overwritten in order to support callbacks
47
+ def call
48
+ run_callbacks :after_fetched do
49
+ process
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Additional functionalities for controllers
5
+ module Controllers
6
+ # Module used to inject functionalities into a given controller class, based on the controller
7
+ # topic and its settings
8
+ # We don't need all the behaviors in all the cases, so it is totally not worth having
9
+ # everything in all the cases all the time
10
+ module Includer
11
+ class << self
12
+ # @param controller_class [Class] controller class, that will get some functionalities
13
+ # based on the topic under which it operates
14
+ def call(controller_class)
15
+ topic = controller_class.topic
16
+
17
+ bind_backend(controller_class, topic)
18
+ bind_params(controller_class, topic)
19
+ bind_responders(controller_class, topic)
20
+ end
21
+
22
+ private
23
+
24
+ # Figures out backend for a given controller class, based on the topic backend and
25
+ # includes it into the controller class
26
+ # @param controller_class [Class] controller class
27
+ # @param topic [Karafka::Routing::Topic] topic of a controller class
28
+ def bind_backend(controller_class, topic)
29
+ backend = Kernel.const_get("::Karafka::Backends::#{topic.backend.to_s.capitalize}")
30
+ controller_class.include backend
31
+ end
32
+
33
+ # Adds a single #params support for non batch processed topics
34
+ # @param controller_class [Class] controller class
35
+ # @param topic [Karafka::Routing::Topic] topic of a controller class
36
+ def bind_params(controller_class, topic)
37
+ return if topic.batch_consuming
38
+ controller_class.include SingleParams
39
+ end
40
+
41
+ # Adds responders support for topics and controllers with responders defined for them
42
+ # @param controller_class [Class] controller class
43
+ # @param topic [Karafka::Routing::Topic] topic of a controller class
44
+ def bind_responders(controller_class, topic)
45
+ return unless topic.responder
46
+ controller_class.include Responders
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Controllers
5
+ # Feature that allows us to use responders flow in controller
6
+ module Responders
7
+ # Responds with given data using given responder. This allows us to have a similar way of
8
+ # defining flows like synchronous protocols
9
+ # @param data Anything we want to pass to responder based on which we want to trigger further
10
+ # Kafka responding
11
+ def respond_with(*data)
12
+ Karafka.monitor.notice(self.class, data: data)
13
+ # @note we build a new instance of responder each time, as a long running (persisted)
14
+ # controllers can respond multiple times during the lifecycle
15
+ topic.responder.new(topic.parser).call(*data)
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Controllers
5
+ # Params alias for single message consumption controllers
6
+ module SingleParams
7
+ private
8
+
9
+ # @return [Karafka::Params::Params] params instance for non batch consumption controllers
10
+ def params
11
+ params_batch.first
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Namespace used to encapsulate all the internal errors of Karafka
5
+ module Errors
6
+ # Base class for all the Karafka internal errors
7
+ BaseError = Class.new(StandardError)
8
+
9
+ # Should be raised when we attemp to parse incoming params but parsing fails
10
+ # If this error (or its descendant) is detected, we will pass the raw message
11
+ # into params and proceed further
12
+ ParserError = Class.new(BaseError)
13
+
14
+ # Raised when router receives topic name which does not correspond with any routes
15
+ # This can only happen in a case when:
16
+ # - you've received a message and we cannot match it with a controller
17
+ # - you've changed the routing, so router can no longer associate your topic to
18
+ # any controller
19
+ # - or in a case when you do a lot of metaprogramming and you change routing/etc on runtime
20
+ #
21
+ # In case this happens, you will have to create a temporary route that will allow
22
+ # you to "eat" everything from the Sidekiq queue.
23
+ # @see https://github.com/karafka/karafka/issues/135
24
+ NonMatchingRouteError = Class.new(BaseError)
25
+
26
+ # Raised when we don't use or use responder not in the way it expected to based on the
27
+ # topics usage definitions
28
+ InvalidResponderUsage = Class.new(BaseError)
29
+
30
+ # Raised when configuration doesn't match with validation schema
31
+ InvalidConfiguration = Class.new(BaseError)
32
+
33
+ # Raised when we try to use Karafka CLI commands (except install) without a bootfile
34
+ MissingBootFile = Class.new(BaseError)
35
+
36
+ # Raised when we want to read a persisted thread messages consumer but it is unavailable
37
+ # This should never happen and if it does, please contact us
38
+ MissingConsumer = Class.new(BaseError)
39
+
40
+ # Raised when we attemp to pause a partition but the pause timeout is equal to 0
41
+ InvalidPauseTimeout = Class.new(BaseError)
42
+ end
43
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Class used to run the Karafka consumer and handle shutting down, restarting etc
5
+ # @note Creating multiple fetchers will result in having multiple connections to the same
6
+ # topics, which means that if there are no partitions, it won't use them.
7
+ class Fetcher
8
+ # Starts listening on all the listeners asynchronously
9
+ # Fetch loop should never end, which means that we won't create more actor clusters
10
+ # so we don't have to terminate them
11
+ def fetch_loop
12
+ threads = listeners.map do |listener|
13
+ # We abort on exception because there should be an exception handling developed for
14
+ # each listener running in separate threads, so the exceptions should never leak
15
+ # and if that happens, it means that something really bad happened and we should stop
16
+ # the whole process
17
+ Thread
18
+ .new { listener.fetch_loop(processor) }
19
+ .tap { |thread| thread.abort_on_exception = true }
20
+ end
21
+
22
+ threads.each(&:join)
23
+ # If anything crashes here, we need to raise the error and crush the runner because it means
24
+ # that something really bad happened
25
+ rescue StandardError => e
26
+ Karafka.monitor.notice_error(self.class, e)
27
+ Karafka::App.stop!
28
+ raise e
29
+ end
30
+
31
+ private
32
+
33
+ # @return [Array<Karafka::Connection::Listener>] listeners that will consume messages
34
+ def listeners
35
+ @listeners ||= App.consumer_groups.active.map do |consumer_group|
36
+ Karafka::Connection::Listener.new(consumer_group)
37
+ end
38
+ end
39
+
40
+ # @return [Proc] proc that should be processed when a messages arrive
41
+ # @yieldparam messages [Array<Kafka::FetchedMessage>] messages from kafka (raw)
42
+ def processor
43
+ lambda do |group_id, messages|
44
+ Karafka::Connection::Processor.process(group_id, messages)
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Helpers
5
+ # Class used to autodetect corresponding classes that are internally inside Karafka framework
6
+ # It is used among others to match:
7
+ # controller => responder
8
+ class ClassMatcher
9
+ # Regexp used to remove any non classy like characters that might be in the controller
10
+ # class name (if defined dynamically, etc)
11
+ CONSTANT_REGEXP = %r{[?!=+\-\*/\^\|&\[\]<>%~\#\:\s\(\)]}
12
+
13
+ # @param klass [Class] class to which we want to find a corresponding class
14
+ # @param from [String] what type of object is it (based on postfix name part)
15
+ # @param to [String] what are we looking for (based on a postfix name part)
16
+ # @example Controller that has a corresponding responder
17
+ # matcher = Karafka::Helpers::ClassMatcher.new(SuperController, 'Controller', 'Responder')
18
+ # matcher.match #=> SuperResponder
19
+ # @example Controller without a corresponding responder
20
+ # matcher = Karafka::Helpers::ClassMatcher.new(Super2Controller, 'Controller', 'Responder')
21
+ # matcher.match #=> nil
22
+ def initialize(klass, from:, to:)
23
+ @klass = klass
24
+ @from = from
25
+ @to = to
26
+ end
27
+
28
+ # @return [Class] matched class
29
+ # @return [nil] nil if we couldn't find matching class
30
+ def match
31
+ return nil if name.empty?
32
+ return nil unless scope.const_defined?(name)
33
+ matching = scope.const_get(name)
34
+ same_scope?(matching) ? matching : nil
35
+ end
36
+
37
+ # @return [String] name of a new class that we're looking for
38
+ # @note This method returns name of a class without a namespace
39
+ # @example From SuperController matching responder
40
+ # matcher.name #=> 'SuperResponder'
41
+ # @example From Namespaced::Super2Controller matching responder
42
+ # matcher.name #=> Super2Responder
43
+ def name
44
+ inflected = @klass.to_s.split('::').last.to_s
45
+ inflected.gsub!(@from, @to)
46
+ inflected.gsub!(CONSTANT_REGEXP, '')
47
+ inflected
48
+ end
49
+
50
+ # @return [Class, Module] class or module in which we're looking for a matching
51
+ def scope
52
+ scope_of(@klass)
53
+ end
54
+
55
+ private
56
+
57
+ # @param klass [Class] class for which we want to extract it's enclosing class/module
58
+ # @return [Class, Module] enclosing class/module
59
+ # @return [::Object] object if it was a root class
60
+ #
61
+ # @example Non-namespaced class
62
+ # scope_of(SuperClass) #=> Object
63
+ # @example Namespaced class
64
+ # scope_of(Abc::SuperClass) #=> Abc
65
+ def scope_of(klass)
66
+ enclosing = klass.to_s.split('::')[0...-1]
67
+ return ::Object if enclosing.empty?
68
+ ::Object.const_get(enclosing.join('::'))
69
+ end
70
+
71
+ # @param matching [Class] class of which scope we want to check
72
+ # @return [Boolean] true if the scope of class is the same as scope of matching
73
+ def same_scope?(matching)
74
+ scope == scope_of(matching)
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Helpers
5
+ # A helper method that allows us to build methods that try to get a given
6
+ # attribute from its instance value and if it fails, will fallback to
7
+ # the default config or config.kafka value for a given attribute.
8
+ # It is used to simplify the checkings.
9
+ # @note Worth noticing, that the value might be equal to false, so even
10
+ # then we need to return it. That's why we check for nil?
11
+ # @example Define config retried attribute for start_from_beginning
12
+ # class Test
13
+ # extend Karafka::Helpers::ConfigRetriever
14
+ # config_retriever_for :start_from_beginning
15
+ # end
16
+ #
17
+ # Test.new.start_from_beginning #=> false
18
+ # test_instance = Test.new
19
+ # test_instance.start_from_beginning = true
20
+ # test_instance.start_from_beginning #=> true
21
+ module ConfigRetriever
22
+ # Builds proper methods for setting and retrieving (with fallback) given attribute value
23
+ # @param attribute [Symbol] attribute name based on which we will build
24
+ # accessor with fallback
25
+ def config_retriever_for(attribute)
26
+ attr_writer attribute unless method_defined? :"#{attribute}="
27
+
28
+ # Don't redefine if we already have accessor for a given element
29
+ return if method_defined? attribute
30
+
31
+ define_method attribute do
32
+ current_value = instance_variable_get(:"@#{attribute}")
33
+ return current_value unless current_value.nil?
34
+
35
+ value = if Karafka::App.config.respond_to?(attribute)
36
+ Karafka::App.config.public_send(attribute)
37
+ else
38
+ Karafka::App.config.kafka.public_send(attribute)
39
+ end
40
+
41
+ instance_variable_set(:"@#{attribute}", value)
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end