karafka 1.0.0 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (83) hide show
  1. checksums.yaml +5 -5
  2. data/.ruby-version +1 -1
  3. data/.travis.yml +3 -1
  4. data/CHANGELOG.md +90 -3
  5. data/CONTRIBUTING.md +5 -6
  6. data/Gemfile +1 -1
  7. data/Gemfile.lock +59 -64
  8. data/README.md +28 -57
  9. data/bin/karafka +13 -1
  10. data/config/errors.yml +6 -0
  11. data/karafka.gemspec +10 -9
  12. data/lib/karafka.rb +19 -10
  13. data/lib/karafka/app.rb +8 -15
  14. data/lib/karafka/attributes_map.rb +4 -4
  15. data/lib/karafka/backends/inline.rb +2 -3
  16. data/lib/karafka/base_consumer.rb +68 -0
  17. data/lib/karafka/base_responder.rb +41 -17
  18. data/lib/karafka/callbacks.rb +30 -0
  19. data/lib/karafka/callbacks/config.rb +22 -0
  20. data/lib/karafka/callbacks/dsl.rb +16 -0
  21. data/lib/karafka/cli/base.rb +2 -0
  22. data/lib/karafka/cli/flow.rb +1 -1
  23. data/lib/karafka/cli/info.rb +1 -2
  24. data/lib/karafka/cli/install.rb +2 -3
  25. data/lib/karafka/cli/server.rb +9 -12
  26. data/lib/karafka/connection/client.rb +117 -0
  27. data/lib/karafka/connection/config_adapter.rb +30 -14
  28. data/lib/karafka/connection/delegator.rb +46 -0
  29. data/lib/karafka/connection/listener.rb +22 -20
  30. data/lib/karafka/consumers/callbacks.rb +54 -0
  31. data/lib/karafka/consumers/includer.rb +51 -0
  32. data/lib/karafka/consumers/responders.rb +24 -0
  33. data/lib/karafka/{controllers → consumers}/single_params.rb +3 -3
  34. data/lib/karafka/errors.rb +19 -2
  35. data/lib/karafka/fetcher.rb +30 -28
  36. data/lib/karafka/helpers/class_matcher.rb +8 -8
  37. data/lib/karafka/helpers/config_retriever.rb +2 -2
  38. data/lib/karafka/instrumentation/listener.rb +112 -0
  39. data/lib/karafka/instrumentation/logger.rb +55 -0
  40. data/lib/karafka/instrumentation/monitor.rb +64 -0
  41. data/lib/karafka/loader.rb +0 -1
  42. data/lib/karafka/params/dsl.rb +156 -0
  43. data/lib/karafka/params/params_batch.rb +7 -2
  44. data/lib/karafka/patches/dry_configurable.rb +7 -7
  45. data/lib/karafka/patches/ruby_kafka.rb +34 -0
  46. data/lib/karafka/persistence/client.rb +25 -0
  47. data/lib/karafka/persistence/consumer.rb +38 -0
  48. data/lib/karafka/persistence/topic.rb +29 -0
  49. data/lib/karafka/process.rb +6 -5
  50. data/lib/karafka/responders/builder.rb +15 -14
  51. data/lib/karafka/responders/topic.rb +8 -1
  52. data/lib/karafka/routing/builder.rb +2 -2
  53. data/lib/karafka/routing/consumer_group.rb +1 -1
  54. data/lib/karafka/routing/consumer_mapper.rb +34 -0
  55. data/lib/karafka/routing/router.rb +1 -1
  56. data/lib/karafka/routing/topic.rb +5 -11
  57. data/lib/karafka/routing/{mapper.rb → topic_mapper.rb} +2 -2
  58. data/lib/karafka/schemas/config.rb +4 -5
  59. data/lib/karafka/schemas/consumer_group.rb +45 -24
  60. data/lib/karafka/schemas/consumer_group_topic.rb +18 -0
  61. data/lib/karafka/schemas/responder_usage.rb +1 -0
  62. data/lib/karafka/server.rb +39 -20
  63. data/lib/karafka/setup/config.rb +74 -51
  64. data/lib/karafka/setup/configurators/base.rb +6 -12
  65. data/lib/karafka/setup/configurators/params.rb +25 -0
  66. data/lib/karafka/setup/configurators/water_drop.rb +15 -14
  67. data/lib/karafka/setup/dsl.rb +22 -0
  68. data/lib/karafka/templates/{application_controller.rb.example → application_consumer.rb.example} +2 -3
  69. data/lib/karafka/templates/karafka.rb.example +18 -5
  70. data/lib/karafka/version.rb +1 -1
  71. metadata +87 -63
  72. data/.github/ISSUE_TEMPLATE.md +0 -2
  73. data/Rakefile +0 -7
  74. data/lib/karafka/base_controller.rb +0 -118
  75. data/lib/karafka/connection/messages_consumer.rb +0 -106
  76. data/lib/karafka/connection/messages_processor.rb +0 -59
  77. data/lib/karafka/controllers/includer.rb +0 -51
  78. data/lib/karafka/controllers/responders.rb +0 -19
  79. data/lib/karafka/logger.rb +0 -53
  80. data/lib/karafka/monitor.rb +0 -98
  81. data/lib/karafka/params/params.rb +0 -101
  82. data/lib/karafka/persistence.rb +0 -18
  83. data/lib/karafka/setup/configurators/celluloid.rb +0 -22
@@ -1,2 +0,0 @@
1
- <!-- Love karafka? Please consider supporting our collective:
2
- 👉 https://opencollective.com/karafka/donate -->
data/Rakefile DELETED
@@ -1,7 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rspec'
4
- require 'rspec/core/rake_task'
5
-
6
- RSpec::Core::RakeTask.new(:spec)
7
- task default: :spec
@@ -1,118 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # Karafka module namespace
4
- module Karafka
5
- # Base controller from which all Karafka controllers should inherit
6
- # Similar to Rails controllers we can define after_received callbacks
7
- # that will be executed
8
- #
9
- # Note that if after_received return false, the chain will be stopped and
10
- # the perform method won't be executed
11
- #
12
- # @example Create simple controller
13
- # class ExamplesController < Karafka::BaseController
14
- # def perform
15
- # # some logic here
16
- # end
17
- # end
18
- #
19
- # @example Create a controller with a block after_received
20
- # class ExampleController < Karafka::BaseController
21
- # after_received do
22
- # # Here we should have some checking logic
23
- # # If false is returned, won't schedule a perform action
24
- # end
25
- #
26
- # def perform
27
- # # some logic here
28
- # end
29
- # end
30
- #
31
- # @example Create a controller with a method after_received
32
- # class ExampleController < Karafka::BaseController
33
- # after_received :after_received_method
34
- #
35
- # def perform
36
- # # some logic here
37
- # end
38
- #
39
- # private
40
- #
41
- # def after_received_method
42
- # # Here we should have some checking logic
43
- # # If false is returned, won't schedule a perform action
44
- # end
45
- # end
46
- class BaseController
47
- extend ActiveSupport::DescendantsTracker
48
- include ActiveSupport::Callbacks
49
-
50
- # The call method is wrapped with a set of callbacks
51
- # We won't run perform at the backend if any of the callbacks
52
- # returns false
53
- # @see http://api.rubyonrails.org/classes/ActiveSupport/Callbacks/ClassMethods.html#method-i-get_callbacks
54
- define_callbacks :after_received
55
-
56
- attr_accessor :params_batch
57
-
58
- class << self
59
- attr_reader :topic
60
-
61
- # Assigns a topic to a controller and build up proper controller functionalities, so it can
62
- # cooperate with the topic settings
63
- # @param topic [Karafka::Routing::Topic]
64
- # @return [Karafka::Routing::Topic] assigned topic
65
- def topic=(topic)
66
- @topic = topic
67
- Controllers::Includer.call(self)
68
- @topic
69
- end
70
-
71
- # Creates a callback that will be executed after receiving message but before executing the
72
- # backend for processing
73
- # @param method_name [Symbol, String] method name or nil if we plan to provide a block
74
- # @yield A block with a code that should be executed before scheduling
75
- # @example Define a block after_received callback
76
- # after_received do
77
- # # logic here
78
- # end
79
- #
80
- # @example Define a class name after_received callback
81
- # after_received :method_name
82
- def after_received(method_name = nil, &block)
83
- set_callback :after_received, :before, method_name ? method_name : block
84
- end
85
- end
86
-
87
- # @return [Karafka::Routing::Topic] topic to which a given controller is subscribed
88
- def topic
89
- self.class.topic
90
- end
91
-
92
- # Creates lazy loaded params batch object
93
- # @note Until first params usage, it won't parse data at all
94
- # @param messages [Array<Kafka::FetchedMessage>, Array<Hash>] messages with raw
95
- # content (from Kafka) or messages inside a hash (from backend, etc)
96
- # @return [Karafka::Params::ParamsBatch] lazy loaded params batch
97
- def params_batch=(messages)
98
- @params_batch = Karafka::Params::ParamsBatch.new(messages, topic.parser)
99
- end
100
-
101
- # Executes the default controller flow, runs callbacks and if not halted
102
- # will call process method of a proper backend
103
- def call
104
- run_callbacks :after_received do
105
- process
106
- end
107
- end
108
-
109
- private
110
-
111
- # Method that will perform business logic on data received from Kafka
112
- # @note This method needs bo be implemented in a subclass. We stub it here as a failover if
113
- # someone forgets about it or makes on with typo
114
- def perform
115
- raise NotImplementedError, 'Implement this in a subclass'
116
- end
117
- end
118
- end
@@ -1,106 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Karafka
4
- module Connection
5
- # Class used as a wrapper around Ruby-Kafka to simplify additional
6
- # features that we provide/might provide in future
7
- class MessagesConsumer
8
- # Creates a queue consumer that will pull the data from Kafka
9
- # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group for which
10
- # we create a client
11
- # @return [Karafka::Connection::MessagesConsumer] group consumer that can subscribe to
12
- # multiple topics
13
- def initialize(consumer_group)
14
- @consumer_group = consumer_group
15
- end
16
-
17
- # Opens connection, gets messages and calls a block for each of the incoming messages
18
- # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
19
- # @note This will yield with raw messages - no preprocessing or reformatting.
20
- def fetch_loop
21
- send(
22
- consumer_group.batch_consuming ? :consume_each_batch : :consume_each_message
23
- ) { |messages| yield(messages) }
24
- rescue Kafka::ProcessingError => e
25
- # If there was an error during processing, we have to log it, pause current partition
26
- # and process other things
27
- Karafka.monitor.notice_error(self.class, e.cause)
28
- pause(e.topic, e.partition)
29
- retry
30
- # This is on purpose - see the notes for this method
31
- # rubocop:disable RescueException
32
- rescue Exception => e
33
- # rubocop:enable RescueException
34
- Karafka.monitor.notice_error(self.class, e)
35
- retry
36
- end
37
-
38
- # Gracefuly stops topic consumption
39
- def stop
40
- @kafka_consumer&.stop
41
- @kafka_consumer = nil
42
- end
43
-
44
- private
45
-
46
- attr_reader :consumer_group
47
-
48
- # Pauses processing of a given topic partition
49
- # @param topic [String] topic that we want to pause
50
- # @param partition [Integer] number partition that we want to pause
51
- def pause(topic, partition)
52
- settings = ConfigAdapter.pausing(consumer_group)
53
- return false unless settings[:timeout].positive?
54
- kafka_consumer.pause(topic, partition, settings)
55
- true
56
- end
57
-
58
- # Consumes messages from Kafka in batches
59
- # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
60
- def consume_each_batch
61
- kafka_consumer.each_batch(
62
- ConfigAdapter.consuming(consumer_group)
63
- ) do |batch|
64
- yield(batch.messages)
65
- end
66
- end
67
-
68
- # Consumes messages from Kafka one by one
69
- # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
70
- def consume_each_message
71
- kafka_consumer.each_message(
72
- ConfigAdapter.consuming(consumer_group)
73
- ) do |message|
74
- # always yield an array of messages, so we have consistent API (always a batch)
75
- yield([message])
76
- end
77
- end
78
-
79
- # @return [Kafka::Consumer] returns a ready to consume Kafka consumer
80
- # that is set up to consume from topics of a given consumer group
81
- def kafka_consumer
82
- @kafka_consumer ||= kafka.consumer(
83
- ConfigAdapter.consumer(consumer_group)
84
- ).tap do |consumer|
85
- consumer_group.topics.each do |topic|
86
- consumer.subscribe(*ConfigAdapter.subscription(topic))
87
- end
88
- end
89
- rescue Kafka::ConnectionError
90
- # If we would not wait it would totally spam log file with failed
91
- # attempts if Kafka is down
92
- sleep(consumer_group.reconnect_timeout)
93
- # We don't log and just reraise - this will be logged
94
- # down the road
95
- raise
96
- end
97
-
98
- # @return [Kafka] returns a Kafka
99
- # @note We don't cache it internally because we cache kafka_consumer that uses kafka
100
- # object instance
101
- def kafka
102
- Kafka.new(ConfigAdapter.client(consumer_group))
103
- end
104
- end
105
- end
106
- end
@@ -1,59 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Karafka
4
- module Connection
5
- # Class that consumes messages for which we listen
6
- module MessagesProcessor
7
- class << self
8
- # Processes messages (does something with them)
9
- # It will either schedule or run a proper controller action for messages
10
- # @note This should be looped to obtain a constant listening
11
- # @note We catch all the errors here, to make sure that none failures
12
- # for a given consumption will affect other consumed messages
13
- # If we wouldn't catch it, it would propagate up until killing the Celluloid actor
14
- # @param group_id [String] group_id of a group from which a given message came
15
- # @param kafka_messages [Array<Kafka::FetchedMessage>] raw messages fetched from kafka
16
- def process(group_id, kafka_messages)
17
- # @note We always get messages by topic and partition so we can take topic from the
18
- # first one and it will be valid for all the messages
19
- # We map from incoming topic name, as it might be namespaced, etc.
20
- # @see topic_mapper internal docs
21
- mapped_topic_name = Karafka::App.config.topic_mapper.incoming(kafka_messages[0].topic)
22
- topic = Routing::Router.find("#{group_id}_#{mapped_topic_name}")
23
-
24
- # Depending on a case (persisted or not) we might use new controller instance per each
25
- # batch, or use the same instance for all of them (for implementing buffering, etc)
26
- controller = Persistence.fetch(topic, kafka_messages[0].partition, :controller) do
27
- topic.controller.new
28
- end
29
-
30
- handler = topic.batch_processing ? :process_batch : :process_each
31
- send(handler, controller, kafka_messages)
32
- end
33
-
34
- private
35
-
36
- # Processes whole batch in one request (all at once)
37
- # @param controller [Karafka::BaseController] base controller descendant
38
- # @param kafka_messages [Array<Kafka::FetchedMessage>] raw messages from kafka
39
- def process_batch(controller, kafka_messages)
40
- controller.params_batch = kafka_messages
41
- Karafka.monitor.notice(self, kafka_messages)
42
- controller.call
43
- end
44
-
45
- # Processes messages one by one (like with std http requests)
46
- # @param controller [Karafka::BaseController] base controller descendant
47
- # @param kafka_messages [Array<Kafka::FetchedMessage>] raw messages from kafka
48
- def process_each(controller, kafka_messages)
49
- kafka_messages.each do |kafka_message|
50
- # @note This is a simple trick - we just process one after another, but in order
51
- # not to handle everywhere both cases (single vs batch), we just "fake" batching with
52
- # a single message for each
53
- process_batch(controller, [kafka_message])
54
- end
55
- end
56
- end
57
- end
58
- end
59
- end
@@ -1,51 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Karafka
4
- # Additional functionalities for controllers
5
- module Controllers
6
- # Module used to inject functionalities into a given controller class, based on the controller
7
- # topic and its settings
8
- # We don't need all the behaviors in all the cases, so it is totally not worth having
9
- # everything in all the cases all the time
10
- module Includer
11
- class << self
12
- # @param controller_class [Class] controller class, that will get some functionalities
13
- # based on the topic under which it operates
14
- def call(controller_class)
15
- topic = controller_class.topic
16
-
17
- bind_backend(controller_class, topic)
18
- bind_params(controller_class, topic)
19
- bind_responders(controller_class, topic)
20
- end
21
-
22
- private
23
-
24
- # Figures out backend for a given controller class, based on the topic backend and
25
- # includes it into the controller class
26
- # @param controller_class [Class] controller class
27
- # @param topic [Karafka::Routing::Topic] topic of a controller class
28
- def bind_backend(controller_class, topic)
29
- backend = Kernel.const_get("::Karafka::Backends::#{topic.backend.to_s.capitalize}")
30
- controller_class.include backend
31
- end
32
-
33
- # Adds a single #params support for non batch processed topics
34
- # @param controller_class [Class] controller class
35
- # @param topic [Karafka::Routing::Topic] topic of a controller class
36
- def bind_params(controller_class, topic)
37
- return if topic.batch_processing
38
- controller_class.include SingleParams
39
- end
40
-
41
- # Adds responders support for topics and controllers with responders defined for them
42
- # @param controller_class [Class] controller class
43
- # @param topic [Karafka::Routing::Topic] topic of a controller class
44
- def bind_responders(controller_class, topic)
45
- return unless topic.responder
46
- controller_class.include Responders
47
- end
48
- end
49
- end
50
- end
51
- end
@@ -1,19 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Karafka
4
- module Controllers
5
- # Feature that allows us to use responders flow in controller
6
- module Responders
7
- # Responds with given data using given responder. This allows us to have a similar way of
8
- # defining flows like synchronous protocols
9
- # @param data Anything we want to pass to responder based on which we want to trigger further
10
- # Kafka responding
11
- def respond_with(*data)
12
- Karafka.monitor.notice(self.class, data: data)
13
- # @note we build a new instance of responder each time, as a long running (persisted)
14
- # controllers can respond multiple times during the lifecycle
15
- topic.responder.new(topic.parser).call(*data)
16
- end
17
- end
18
- end
19
- end
@@ -1,53 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Karafka
4
- # Default logger for Event Delegator
5
- # @note It uses ::Logger features - providing basic logging
6
- class Logger < ::Logger
7
- include Singleton
8
-
9
- # Map containing informations about log level for given environment
10
- ENV_MAP = {
11
- 'production' => ::Logger::ERROR,
12
- 'test' => ::Logger::ERROR,
13
- 'development' => ::Logger::INFO,
14
- 'debug' => ::Logger::DEBUG,
15
- default: ::Logger::INFO
16
- }.freeze
17
-
18
- # Creates a new instance of logger ensuring that it has a place to write to
19
- def initialize(*_args)
20
- ensure_dir_exists
21
- super(target)
22
- self.level = ENV_MAP[Karafka.env] || ENV_MAP[:default]
23
- end
24
-
25
- private
26
-
27
- # @return [Karafka::Helpers::MultiDelegator] multi delegator instance
28
- # to which we will be writtng logs
29
- # We use this approach to log stuff to file and to the STDOUT at the same time
30
- def target
31
- Karafka::Helpers::MultiDelegator
32
- .delegate(:write, :close)
33
- .to(STDOUT, file)
34
- end
35
-
36
- # Makes sure the log directory exists
37
- def ensure_dir_exists
38
- dir = File.dirname(log_path)
39
- FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
40
- end
41
-
42
- # @return [Pathname] Path to a file to which we should log
43
- def log_path
44
- @log_path ||= Karafka::App.root.join("log/#{Karafka.env}.log")
45
- end
46
-
47
- # @return [File] file to which we want to write our logs
48
- # @note File is being opened in append mode ('a')
49
- def file
50
- @file ||= File.open(log_path, 'a')
51
- end
52
- end
53
- end
@@ -1,98 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Karafka
4
- # Monitor is used to hookup external monitoring services to monitor how Karafka works
5
- # It provides a standarized API for checking incoming messages/enqueueing etc
6
- # By default it implements logging functionalities but can be replaced with any more
7
- # sophisticated logging/monitoring system like Errbit, Airbrake, NewRelic
8
- # @note This class acts as a singleton because we are only permitted to have single monitor
9
- # per running process (just as logger)
10
- # Keep in mind, that if you create your own monitor object, you will have to implement also
11
- # logging functionality (or just inherit, super and do whatever you want)
12
- class Monitor
13
- include Singleton
14
-
15
- # This method is executed in many important places in the code (during data flow), like
16
- # the moment before #perform_async, etc. For full list just grep for 'monitor.notice'
17
- # @param caller_class [Class] class of object that executed this call
18
- # @param options [Hash] hash with options that we passed to notice. It differs based
19
- # on of who and when is calling
20
- # @note We don't provide a name of method in which this was called, because we can take
21
- # it directly from Ruby (see #caller_label method of this class for more details)
22
- # @example Notice about consuming with controller_class
23
- # Karafka.monitor.notice(self.class, controller_class: controller_class)
24
- # @example Notice about terminating with a signal
25
- # Karafka.monitor.notice(self.class, signal: signal)
26
- def notice(caller_class, options = {})
27
- logger.info("#{caller_class}##{caller_label} with #{options}")
28
- end
29
-
30
- # This method is executed when we want to notify about an error that happened somewhere
31
- # in the system
32
- # @param caller_class [Class] class of object that executed this call
33
- # @param e [Exception] exception that was raised
34
- # @note We don't provide a name of method in which this was called, because we can take
35
- # it directly from Ruby (see #caller_label method of this class for more details)
36
- # @example Notify about error
37
- # Karafka.monitor.notice(self.class, e)
38
- def notice_error(caller_class, e)
39
- caller_exceptions_map.each do |level, types|
40
- next unless types.include?(caller_class)
41
-
42
- return logger.public_send(level, e)
43
- end
44
-
45
- logger.info(e)
46
- end
47
-
48
- private
49
-
50
- # @return [Hash] Hash containing informations on which level of notification should
51
- # we use for exceptions that happen in certain parts of Karafka
52
- # @note Keep in mind that any not handled here class should be logged with info
53
- # @note Those are not maps of exceptions classes but of classes that were callers of this
54
- # particular exception
55
- def caller_exceptions_map
56
- @caller_exceptions_map ||= {
57
- error: [
58
- Karafka::Connection::MessagesConsumer,
59
- Karafka::Connection::Listener,
60
- Karafka::Params::Params
61
- ],
62
- fatal: [
63
- Karafka::Fetcher
64
- ]
65
- }
66
- end
67
-
68
- # @return [String] label of method that invoked #notice or #notice_error
69
- # @example Check label of method that invoked #notice
70
- # caller_label #=> 'fetch'
71
- # @example Check label of method that invoked #notice in a block
72
- # caller_label #=> 'block in fetch'
73
- # @example Check label of method that invoked #notice_error
74
- # caller_label #=> 'rescue in target'
75
- def caller_label
76
- # We need to calculate ancestors because if someone inherits
77
- # from this class, caller chains is longer
78
- index = self.class.ancestors.index(Karafka::Monitor)
79
- # caller_locations has a differs in result whether it is a subclass of
80
- # Karafka::Monitor, the basic Karafka::Monitor itself or a super for a subclass.
81
- # So to cover all the cases we need to differentiate.
82
- # @see https://github.com/karafka/karafka/issues/128
83
- # @note It won't work if the monitor caller_label caller class is defined using
84
- # define method
85
- super_execution = caller_locations(1, 2)[0].label == caller_locations(1, 2)[1].label
86
-
87
- scope = super_execution ? 1 : nil
88
- scope ||= index.positive? ? 0 : 1
89
-
90
- caller_locations(index + 1, 2)[scope].label
91
- end
92
-
93
- # @return [Logger] logger instance
94
- def logger
95
- Karafka.logger
96
- end
97
- end
98
- end