karafka 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +5 -5
  2. data/.ruby-version +1 -1
  3. data/.travis.yml +3 -1
  4. data/CHANGELOG.md +90 -3
  5. data/CONTRIBUTING.md +5 -6
  6. data/Gemfile +1 -1
  7. data/Gemfile.lock +59 -64
  8. data/README.md +28 -57
  9. data/bin/karafka +13 -1
  10. data/config/errors.yml +6 -0
  11. data/karafka.gemspec +10 -9
  12. data/lib/karafka.rb +19 -10
  13. data/lib/karafka/app.rb +8 -15
  14. data/lib/karafka/attributes_map.rb +4 -4
  15. data/lib/karafka/backends/inline.rb +2 -3
  16. data/lib/karafka/base_consumer.rb +68 -0
  17. data/lib/karafka/base_responder.rb +41 -17
  18. data/lib/karafka/callbacks.rb +30 -0
  19. data/lib/karafka/callbacks/config.rb +22 -0
  20. data/lib/karafka/callbacks/dsl.rb +16 -0
  21. data/lib/karafka/cli/base.rb +2 -0
  22. data/lib/karafka/cli/flow.rb +1 -1
  23. data/lib/karafka/cli/info.rb +1 -2
  24. data/lib/karafka/cli/install.rb +2 -3
  25. data/lib/karafka/cli/server.rb +9 -12
  26. data/lib/karafka/connection/client.rb +117 -0
  27. data/lib/karafka/connection/config_adapter.rb +30 -14
  28. data/lib/karafka/connection/delegator.rb +46 -0
  29. data/lib/karafka/connection/listener.rb +22 -20
  30. data/lib/karafka/consumers/callbacks.rb +54 -0
  31. data/lib/karafka/consumers/includer.rb +51 -0
  32. data/lib/karafka/consumers/responders.rb +24 -0
  33. data/lib/karafka/{controllers → consumers}/single_params.rb +3 -3
  34. data/lib/karafka/errors.rb +19 -2
  35. data/lib/karafka/fetcher.rb +30 -28
  36. data/lib/karafka/helpers/class_matcher.rb +8 -8
  37. data/lib/karafka/helpers/config_retriever.rb +2 -2
  38. data/lib/karafka/instrumentation/listener.rb +112 -0
  39. data/lib/karafka/instrumentation/logger.rb +55 -0
  40. data/lib/karafka/instrumentation/monitor.rb +64 -0
  41. data/lib/karafka/loader.rb +0 -1
  42. data/lib/karafka/params/dsl.rb +156 -0
  43. data/lib/karafka/params/params_batch.rb +7 -2
  44. data/lib/karafka/patches/dry_configurable.rb +7 -7
  45. data/lib/karafka/patches/ruby_kafka.rb +34 -0
  46. data/lib/karafka/persistence/client.rb +25 -0
  47. data/lib/karafka/persistence/consumer.rb +38 -0
  48. data/lib/karafka/persistence/topic.rb +29 -0
  49. data/lib/karafka/process.rb +6 -5
  50. data/lib/karafka/responders/builder.rb +15 -14
  51. data/lib/karafka/responders/topic.rb +8 -1
  52. data/lib/karafka/routing/builder.rb +2 -2
  53. data/lib/karafka/routing/consumer_group.rb +1 -1
  54. data/lib/karafka/routing/consumer_mapper.rb +34 -0
  55. data/lib/karafka/routing/router.rb +1 -1
  56. data/lib/karafka/routing/topic.rb +5 -11
  57. data/lib/karafka/routing/{mapper.rb → topic_mapper.rb} +2 -2
  58. data/lib/karafka/schemas/config.rb +4 -5
  59. data/lib/karafka/schemas/consumer_group.rb +45 -24
  60. data/lib/karafka/schemas/consumer_group_topic.rb +18 -0
  61. data/lib/karafka/schemas/responder_usage.rb +1 -0
  62. data/lib/karafka/server.rb +39 -20
  63. data/lib/karafka/setup/config.rb +74 -51
  64. data/lib/karafka/setup/configurators/base.rb +6 -12
  65. data/lib/karafka/setup/configurators/params.rb +25 -0
  66. data/lib/karafka/setup/configurators/water_drop.rb +15 -14
  67. data/lib/karafka/setup/dsl.rb +22 -0
  68. data/lib/karafka/templates/{application_controller.rb.example → application_consumer.rb.example} +2 -3
  69. data/lib/karafka/templates/karafka.rb.example +18 -5
  70. data/lib/karafka/version.rb +1 -1
  71. metadata +87 -63
  72. data/.github/ISSUE_TEMPLATE.md +0 -2
  73. data/Rakefile +0 -7
  74. data/lib/karafka/base_controller.rb +0 -118
  75. data/lib/karafka/connection/messages_consumer.rb +0 -106
  76. data/lib/karafka/connection/messages_processor.rb +0 -59
  77. data/lib/karafka/controllers/includer.rb +0 -51
  78. data/lib/karafka/controllers/responders.rb +0 -19
  79. data/lib/karafka/logger.rb +0 -53
  80. data/lib/karafka/monitor.rb +0 -98
  81. data/lib/karafka/params/params.rb +0 -101
  82. data/lib/karafka/persistence.rb +0 -18
  83. data/lib/karafka/setup/configurators/celluloid.rb +0 -22
@@ -1,2 +0,0 @@
1
- <!-- Love karafka? Please consider supporting our collective:
2
- 👉 https://opencollective.com/karafka/donate -->
data/Rakefile DELETED
@@ -1,7 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rspec'
4
- require 'rspec/core/rake_task'
5
-
6
- RSpec::Core::RakeTask.new(:spec)
7
- task default: :spec
@@ -1,118 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # Karafka module namespace
4
- module Karafka
5
- # Base controller from which all Karafka controllers should inherit
6
- # Similar to Rails controllers we can define after_received callbacks
7
- # that will be executed
8
- #
9
- # Note that if after_received return false, the chain will be stopped and
10
- # the perform method won't be executed
11
- #
12
- # @example Create simple controller
13
- # class ExamplesController < Karafka::BaseController
14
- # def perform
15
- # # some logic here
16
- # end
17
- # end
18
- #
19
- # @example Create a controller with a block after_received
20
- # class ExampleController < Karafka::BaseController
21
- # after_received do
22
- # # Here we should have some checking logic
23
- # # If false is returned, won't schedule a perform action
24
- # end
25
- #
26
- # def perform
27
- # # some logic here
28
- # end
29
- # end
30
- #
31
- # @example Create a controller with a method after_received
32
- # class ExampleController < Karafka::BaseController
33
- # after_received :after_received_method
34
- #
35
- # def perform
36
- # # some logic here
37
- # end
38
- #
39
- # private
40
- #
41
- # def after_received_method
42
- # # Here we should have some checking logic
43
- # # If false is returned, won't schedule a perform action
44
- # end
45
- # end
46
- class BaseController
47
- extend ActiveSupport::DescendantsTracker
48
- include ActiveSupport::Callbacks
49
-
50
- # The call method is wrapped with a set of callbacks
51
- # We won't run perform at the backend if any of the callbacks
52
- # returns false
53
- # @see http://api.rubyonrails.org/classes/ActiveSupport/Callbacks/ClassMethods.html#method-i-get_callbacks
54
- define_callbacks :after_received
55
-
56
- attr_accessor :params_batch
57
-
58
- class << self
59
- attr_reader :topic
60
-
61
- # Assigns a topic to a controller and build up proper controller functionalities, so it can
62
- # cooperate with the topic settings
63
- # @param topic [Karafka::Routing::Topic]
64
- # @return [Karafka::Routing::Topic] assigned topic
65
- def topic=(topic)
66
- @topic = topic
67
- Controllers::Includer.call(self)
68
- @topic
69
- end
70
-
71
- # Creates a callback that will be executed after receiving message but before executing the
72
- # backend for processing
73
- # @param method_name [Symbol, String] method name or nil if we plan to provide a block
74
- # @yield A block with a code that should be executed before scheduling
75
- # @example Define a block after_received callback
76
- # after_received do
77
- # # logic here
78
- # end
79
- #
80
- # @example Define a class name after_received callback
81
- # after_received :method_name
82
- def after_received(method_name = nil, &block)
83
- set_callback :after_received, :before, method_name ? method_name : block
84
- end
85
- end
86
-
87
- # @return [Karafka::Routing::Topic] topic to which a given controller is subscribed
88
- def topic
89
- self.class.topic
90
- end
91
-
92
- # Creates lazy loaded params batch object
93
- # @note Until first params usage, it won't parse data at all
94
- # @param messages [Array<Kafka::FetchedMessage>, Array<Hash>] messages with raw
95
- # content (from Kafka) or messages inside a hash (from backend, etc)
96
- # @return [Karafka::Params::ParamsBatch] lazy loaded params batch
97
- def params_batch=(messages)
98
- @params_batch = Karafka::Params::ParamsBatch.new(messages, topic.parser)
99
- end
100
-
101
- # Executes the default controller flow, runs callbacks and if not halted
102
- # will call process method of a proper backend
103
- def call
104
- run_callbacks :after_received do
105
- process
106
- end
107
- end
108
-
109
- private
110
-
111
- # Method that will perform business logic on data received from Kafka
112
- # @note This method needs bo be implemented in a subclass. We stub it here as a failover if
113
- # someone forgets about it or makes on with typo
114
- def perform
115
- raise NotImplementedError, 'Implement this in a subclass'
116
- end
117
- end
118
- end
@@ -1,106 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Karafka
4
- module Connection
5
- # Class used as a wrapper around Ruby-Kafka to simplify additional
6
- # features that we provide/might provide in future
7
- class MessagesConsumer
8
- # Creates a queue consumer that will pull the data from Kafka
9
- # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group for which
10
- # we create a client
11
- # @return [Karafka::Connection::MessagesConsumer] group consumer that can subscribe to
12
- # multiple topics
13
- def initialize(consumer_group)
14
- @consumer_group = consumer_group
15
- end
16
-
17
- # Opens connection, gets messages and calls a block for each of the incoming messages
18
- # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
19
- # @note This will yield with raw messages - no preprocessing or reformatting.
20
- def fetch_loop
21
- send(
22
- consumer_group.batch_consuming ? :consume_each_batch : :consume_each_message
23
- ) { |messages| yield(messages) }
24
- rescue Kafka::ProcessingError => e
25
- # If there was an error during processing, we have to log it, pause current partition
26
- # and process other things
27
- Karafka.monitor.notice_error(self.class, e.cause)
28
- pause(e.topic, e.partition)
29
- retry
30
- # This is on purpose - see the notes for this method
31
- # rubocop:disable RescueException
32
- rescue Exception => e
33
- # rubocop:enable RescueException
34
- Karafka.monitor.notice_error(self.class, e)
35
- retry
36
- end
37
-
38
- # Gracefuly stops topic consumption
39
- def stop
40
- @kafka_consumer&.stop
41
- @kafka_consumer = nil
42
- end
43
-
44
- private
45
-
46
- attr_reader :consumer_group
47
-
48
- # Pauses processing of a given topic partition
49
- # @param topic [String] topic that we want to pause
50
- # @param partition [Integer] number partition that we want to pause
51
- def pause(topic, partition)
52
- settings = ConfigAdapter.pausing(consumer_group)
53
- return false unless settings[:timeout].positive?
54
- kafka_consumer.pause(topic, partition, settings)
55
- true
56
- end
57
-
58
- # Consumes messages from Kafka in batches
59
- # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
60
- def consume_each_batch
61
- kafka_consumer.each_batch(
62
- ConfigAdapter.consuming(consumer_group)
63
- ) do |batch|
64
- yield(batch.messages)
65
- end
66
- end
67
-
68
- # Consumes messages from Kafka one by one
69
- # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
70
- def consume_each_message
71
- kafka_consumer.each_message(
72
- ConfigAdapter.consuming(consumer_group)
73
- ) do |message|
74
- # always yield an array of messages, so we have consistent API (always a batch)
75
- yield([message])
76
- end
77
- end
78
-
79
- # @return [Kafka::Consumer] returns a ready to consume Kafka consumer
80
- # that is set up to consume from topics of a given consumer group
81
- def kafka_consumer
82
- @kafka_consumer ||= kafka.consumer(
83
- ConfigAdapter.consumer(consumer_group)
84
- ).tap do |consumer|
85
- consumer_group.topics.each do |topic|
86
- consumer.subscribe(*ConfigAdapter.subscription(topic))
87
- end
88
- end
89
- rescue Kafka::ConnectionError
90
- # If we would not wait it would totally spam log file with failed
91
- # attempts if Kafka is down
92
- sleep(consumer_group.reconnect_timeout)
93
- # We don't log and just reraise - this will be logged
94
- # down the road
95
- raise
96
- end
97
-
98
- # @return [Kafka] returns a Kafka
99
- # @note We don't cache it internally because we cache kafka_consumer that uses kafka
100
- # object instance
101
- def kafka
102
- Kafka.new(ConfigAdapter.client(consumer_group))
103
- end
104
- end
105
- end
106
- end
@@ -1,59 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Karafka
4
- module Connection
5
- # Class that consumes messages for which we listen
6
- module MessagesProcessor
7
- class << self
8
- # Processes messages (does something with them)
9
- # It will either schedule or run a proper controller action for messages
10
- # @note This should be looped to obtain a constant listening
11
- # @note We catch all the errors here, to make sure that none failures
12
- # for a given consumption will affect other consumed messages
13
- # If we wouldn't catch it, it would propagate up until killing the Celluloid actor
14
- # @param group_id [String] group_id of a group from which a given message came
15
- # @param kafka_messages [Array<Kafka::FetchedMessage>] raw messages fetched from kafka
16
- def process(group_id, kafka_messages)
17
- # @note We always get messages by topic and partition so we can take topic from the
18
- # first one and it will be valid for all the messages
19
- # We map from incoming topic name, as it might be namespaced, etc.
20
- # @see topic_mapper internal docs
21
- mapped_topic_name = Karafka::App.config.topic_mapper.incoming(kafka_messages[0].topic)
22
- topic = Routing::Router.find("#{group_id}_#{mapped_topic_name}")
23
-
24
- # Depending on a case (persisted or not) we might use new controller instance per each
25
- # batch, or use the same instance for all of them (for implementing buffering, etc)
26
- controller = Persistence.fetch(topic, kafka_messages[0].partition, :controller) do
27
- topic.controller.new
28
- end
29
-
30
- handler = topic.batch_processing ? :process_batch : :process_each
31
- send(handler, controller, kafka_messages)
32
- end
33
-
34
- private
35
-
36
- # Processes whole batch in one request (all at once)
37
- # @param controller [Karafka::BaseController] base controller descendant
38
- # @param kafka_messages [Array<Kafka::FetchedMessage>] raw messages from kafka
39
- def process_batch(controller, kafka_messages)
40
- controller.params_batch = kafka_messages
41
- Karafka.monitor.notice(self, kafka_messages)
42
- controller.call
43
- end
44
-
45
- # Processes messages one by one (like with std http requests)
46
- # @param controller [Karafka::BaseController] base controller descendant
47
- # @param kafka_messages [Array<Kafka::FetchedMessage>] raw messages from kafka
48
- def process_each(controller, kafka_messages)
49
- kafka_messages.each do |kafka_message|
50
- # @note This is a simple trick - we just process one after another, but in order
51
- # not to handle everywhere both cases (single vs batch), we just "fake" batching with
52
- # a single message for each
53
- process_batch(controller, [kafka_message])
54
- end
55
- end
56
- end
57
- end
58
- end
59
- end
@@ -1,51 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Karafka
4
- # Additional functionalities for controllers
5
- module Controllers
6
- # Module used to inject functionalities into a given controller class, based on the controller
7
- # topic and its settings
8
- # We don't need all the behaviors in all the cases, so it is totally not worth having
9
- # everything in all the cases all the time
10
- module Includer
11
- class << self
12
- # @param controller_class [Class] controller class, that will get some functionalities
13
- # based on the topic under which it operates
14
- def call(controller_class)
15
- topic = controller_class.topic
16
-
17
- bind_backend(controller_class, topic)
18
- bind_params(controller_class, topic)
19
- bind_responders(controller_class, topic)
20
- end
21
-
22
- private
23
-
24
- # Figures out backend for a given controller class, based on the topic backend and
25
- # includes it into the controller class
26
- # @param controller_class [Class] controller class
27
- # @param topic [Karafka::Routing::Topic] topic of a controller class
28
- def bind_backend(controller_class, topic)
29
- backend = Kernel.const_get("::Karafka::Backends::#{topic.backend.to_s.capitalize}")
30
- controller_class.include backend
31
- end
32
-
33
- # Adds a single #params support for non batch processed topics
34
- # @param controller_class [Class] controller class
35
- # @param topic [Karafka::Routing::Topic] topic of a controller class
36
- def bind_params(controller_class, topic)
37
- return if topic.batch_processing
38
- controller_class.include SingleParams
39
- end
40
-
41
- # Adds responders support for topics and controllers with responders defined for them
42
- # @param controller_class [Class] controller class
43
- # @param topic [Karafka::Routing::Topic] topic of a controller class
44
- def bind_responders(controller_class, topic)
45
- return unless topic.responder
46
- controller_class.include Responders
47
- end
48
- end
49
- end
50
- end
51
- end
@@ -1,19 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Karafka
4
- module Controllers
5
- # Feature that allows us to use responders flow in controller
6
- module Responders
7
- # Responds with given data using given responder. This allows us to have a similar way of
8
- # defining flows like synchronous protocols
9
- # @param data Anything we want to pass to responder based on which we want to trigger further
10
- # Kafka responding
11
- def respond_with(*data)
12
- Karafka.monitor.notice(self.class, data: data)
13
- # @note we build a new instance of responder each time, as a long running (persisted)
14
- # controllers can respond multiple times during the lifecycle
15
- topic.responder.new(topic.parser).call(*data)
16
- end
17
- end
18
- end
19
- end
@@ -1,53 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Karafka
4
- # Default logger for Event Delegator
5
- # @note It uses ::Logger features - providing basic logging
6
- class Logger < ::Logger
7
- include Singleton
8
-
9
- # Map containing informations about log level for given environment
10
- ENV_MAP = {
11
- 'production' => ::Logger::ERROR,
12
- 'test' => ::Logger::ERROR,
13
- 'development' => ::Logger::INFO,
14
- 'debug' => ::Logger::DEBUG,
15
- default: ::Logger::INFO
16
- }.freeze
17
-
18
- # Creates a new instance of logger ensuring that it has a place to write to
19
- def initialize(*_args)
20
- ensure_dir_exists
21
- super(target)
22
- self.level = ENV_MAP[Karafka.env] || ENV_MAP[:default]
23
- end
24
-
25
- private
26
-
27
- # @return [Karafka::Helpers::MultiDelegator] multi delegator instance
28
- # to which we will be writtng logs
29
- # We use this approach to log stuff to file and to the STDOUT at the same time
30
- def target
31
- Karafka::Helpers::MultiDelegator
32
- .delegate(:write, :close)
33
- .to(STDOUT, file)
34
- end
35
-
36
- # Makes sure the log directory exists
37
- def ensure_dir_exists
38
- dir = File.dirname(log_path)
39
- FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
40
- end
41
-
42
- # @return [Pathname] Path to a file to which we should log
43
- def log_path
44
- @log_path ||= Karafka::App.root.join("log/#{Karafka.env}.log")
45
- end
46
-
47
- # @return [File] file to which we want to write our logs
48
- # @note File is being opened in append mode ('a')
49
- def file
50
- @file ||= File.open(log_path, 'a')
51
- end
52
- end
53
- end
@@ -1,98 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Karafka
4
- # Monitor is used to hookup external monitoring services to monitor how Karafka works
5
- # It provides a standarized API for checking incoming messages/enqueueing etc
6
- # By default it implements logging functionalities but can be replaced with any more
7
- # sophisticated logging/monitoring system like Errbit, Airbrake, NewRelic
8
- # @note This class acts as a singleton because we are only permitted to have single monitor
9
- # per running process (just as logger)
10
- # Keep in mind, that if you create your own monitor object, you will have to implement also
11
- # logging functionality (or just inherit, super and do whatever you want)
12
- class Monitor
13
- include Singleton
14
-
15
- # This method is executed in many important places in the code (during data flow), like
16
- # the moment before #perform_async, etc. For full list just grep for 'monitor.notice'
17
- # @param caller_class [Class] class of object that executed this call
18
- # @param options [Hash] hash with options that we passed to notice. It differs based
19
- # on of who and when is calling
20
- # @note We don't provide a name of method in which this was called, because we can take
21
- # it directly from Ruby (see #caller_label method of this class for more details)
22
- # @example Notice about consuming with controller_class
23
- # Karafka.monitor.notice(self.class, controller_class: controller_class)
24
- # @example Notice about terminating with a signal
25
- # Karafka.monitor.notice(self.class, signal: signal)
26
- def notice(caller_class, options = {})
27
- logger.info("#{caller_class}##{caller_label} with #{options}")
28
- end
29
-
30
- # This method is executed when we want to notify about an error that happened somewhere
31
- # in the system
32
- # @param caller_class [Class] class of object that executed this call
33
- # @param e [Exception] exception that was raised
34
- # @note We don't provide a name of method in which this was called, because we can take
35
- # it directly from Ruby (see #caller_label method of this class for more details)
36
- # @example Notify about error
37
- # Karafka.monitor.notice(self.class, e)
38
- def notice_error(caller_class, e)
39
- caller_exceptions_map.each do |level, types|
40
- next unless types.include?(caller_class)
41
-
42
- return logger.public_send(level, e)
43
- end
44
-
45
- logger.info(e)
46
- end
47
-
48
- private
49
-
50
- # @return [Hash] Hash containing informations on which level of notification should
51
- # we use for exceptions that happen in certain parts of Karafka
52
- # @note Keep in mind that any not handled here class should be logged with info
53
- # @note Those are not maps of exceptions classes but of classes that were callers of this
54
- # particular exception
55
- def caller_exceptions_map
56
- @caller_exceptions_map ||= {
57
- error: [
58
- Karafka::Connection::MessagesConsumer,
59
- Karafka::Connection::Listener,
60
- Karafka::Params::Params
61
- ],
62
- fatal: [
63
- Karafka::Fetcher
64
- ]
65
- }
66
- end
67
-
68
- # @return [String] label of method that invoked #notice or #notice_error
69
- # @example Check label of method that invoked #notice
70
- # caller_label #=> 'fetch'
71
- # @example Check label of method that invoked #notice in a block
72
- # caller_label #=> 'block in fetch'
73
- # @example Check label of method that invoked #notice_error
74
- # caller_label #=> 'rescue in target'
75
- def caller_label
76
- # We need to calculate ancestors because if someone inherits
77
- # from this class, caller chains is longer
78
- index = self.class.ancestors.index(Karafka::Monitor)
79
- # caller_locations has a differs in result whether it is a subclass of
80
- # Karafka::Monitor, the basic Karafka::Monitor itself or a super for a subclass.
81
- # So to cover all the cases we need to differentiate.
82
- # @see https://github.com/karafka/karafka/issues/128
83
- # @note It won't work if the monitor caller_label caller class is defined using
84
- # define method
85
- super_execution = caller_locations(1, 2)[0].label == caller_locations(1, 2)[1].label
86
-
87
- scope = super_execution ? 1 : nil
88
- scope ||= index.positive? ? 0 : 1
89
-
90
- caller_locations(index + 1, 2)[scope].label
91
- end
92
-
93
- # @return [Logger] logger instance
94
- def logger
95
- Karafka.logger
96
- end
97
- end
98
- end