karafka 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.console_irbrc +13 -0
  3. data/.github/ISSUE_TEMPLATE.md +2 -0
  4. data/.gitignore +68 -0
  5. data/.rspec +1 -0
  6. data/.ruby-gemset +1 -0
  7. data/.ruby-version +1 -0
  8. data/.travis.yml +17 -0
  9. data/CHANGELOG.md +371 -0
  10. data/CODE_OF_CONDUCT.md +46 -0
  11. data/CONTRIBUTING.md +42 -0
  12. data/Gemfile +12 -0
  13. data/Gemfile.lock +111 -0
  14. data/MIT-LICENCE +18 -0
  15. data/README.md +95 -0
  16. data/bin/karafka +19 -0
  17. data/config/errors.yml +6 -0
  18. data/karafka.gemspec +35 -0
  19. data/lib/karafka.rb +68 -0
  20. data/lib/karafka/app.rb +52 -0
  21. data/lib/karafka/attributes_map.rb +67 -0
  22. data/lib/karafka/backends/inline.rb +17 -0
  23. data/lib/karafka/base_controller.rb +60 -0
  24. data/lib/karafka/base_responder.rb +185 -0
  25. data/lib/karafka/cli.rb +54 -0
  26. data/lib/karafka/cli/base.rb +78 -0
  27. data/lib/karafka/cli/console.rb +29 -0
  28. data/lib/karafka/cli/flow.rb +46 -0
  29. data/lib/karafka/cli/info.rb +29 -0
  30. data/lib/karafka/cli/install.rb +43 -0
  31. data/lib/karafka/cli/server.rb +67 -0
  32. data/lib/karafka/connection/config_adapter.rb +112 -0
  33. data/lib/karafka/connection/consumer.rb +121 -0
  34. data/lib/karafka/connection/listener.rb +51 -0
  35. data/lib/karafka/connection/processor.rb +61 -0
  36. data/lib/karafka/controllers/callbacks.rb +54 -0
  37. data/lib/karafka/controllers/includer.rb +51 -0
  38. data/lib/karafka/controllers/responders.rb +19 -0
  39. data/lib/karafka/controllers/single_params.rb +15 -0
  40. data/lib/karafka/errors.rb +43 -0
  41. data/lib/karafka/fetcher.rb +48 -0
  42. data/lib/karafka/helpers/class_matcher.rb +78 -0
  43. data/lib/karafka/helpers/config_retriever.rb +46 -0
  44. data/lib/karafka/helpers/multi_delegator.rb +33 -0
  45. data/lib/karafka/loader.rb +29 -0
  46. data/lib/karafka/logger.rb +53 -0
  47. data/lib/karafka/monitor.rb +98 -0
  48. data/lib/karafka/params/params.rb +128 -0
  49. data/lib/karafka/params/params_batch.rb +41 -0
  50. data/lib/karafka/parsers/json.rb +38 -0
  51. data/lib/karafka/patches/dry_configurable.rb +31 -0
  52. data/lib/karafka/patches/ruby_kafka.rb +34 -0
  53. data/lib/karafka/persistence/consumer.rb +25 -0
  54. data/lib/karafka/persistence/controller.rb +38 -0
  55. data/lib/karafka/process.rb +63 -0
  56. data/lib/karafka/responders/builder.rb +35 -0
  57. data/lib/karafka/responders/topic.rb +57 -0
  58. data/lib/karafka/routing/builder.rb +61 -0
  59. data/lib/karafka/routing/consumer_group.rb +61 -0
  60. data/lib/karafka/routing/consumer_mapper.rb +33 -0
  61. data/lib/karafka/routing/proxy.rb +37 -0
  62. data/lib/karafka/routing/router.rb +29 -0
  63. data/lib/karafka/routing/topic.rb +66 -0
  64. data/lib/karafka/routing/topic_mapper.rb +55 -0
  65. data/lib/karafka/schemas/config.rb +21 -0
  66. data/lib/karafka/schemas/consumer_group.rb +65 -0
  67. data/lib/karafka/schemas/consumer_group_topic.rb +18 -0
  68. data/lib/karafka/schemas/responder_usage.rb +39 -0
  69. data/lib/karafka/schemas/server_cli_options.rb +43 -0
  70. data/lib/karafka/server.rb +62 -0
  71. data/lib/karafka/setup/config.rb +163 -0
  72. data/lib/karafka/setup/configurators/base.rb +35 -0
  73. data/lib/karafka/setup/configurators/water_drop.rb +29 -0
  74. data/lib/karafka/status.rb +25 -0
  75. data/lib/karafka/templates/application_controller.rb.example +7 -0
  76. data/lib/karafka/templates/application_responder.rb.example +11 -0
  77. data/lib/karafka/templates/karafka.rb.example +41 -0
  78. data/lib/karafka/version.rb +7 -0
  79. data/log/.gitkeep +0 -0
  80. metadata +267 -0
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # App class
5
+ class App
6
+ class << self
7
+ # Sets up the whole configuration
8
+ # @param [Block] block configuration block
9
+ def setup(&block)
10
+ Setup::Config.setup(&block)
11
+ initialize!
12
+ end
13
+
14
+ # Sets up all the internal components and bootstrap whole app
15
+ # We need to know details about consumers in order to setup components,
16
+ # that's why we don't setup them after std setup is done
17
+ # @raise [Karafka::Errors::InvalidConfiguration] raised when configuration
18
+ # doesn't match with ConfigurationSchema
19
+ def boot!
20
+ Setup::Config.validate!
21
+ Setup::Config.setup_components
22
+ end
23
+
24
+ # @return [Karafka::Config] config instance
25
+ def config
26
+ Setup::Config.config
27
+ end
28
+
29
+ # @return [Karafka::Routing::Builder] consumers builder instance
30
+ def consumer_groups
31
+ Routing::Builder.instance
32
+ end
33
+
34
+ Status.instance_methods(false).each do |delegated|
35
+ define_method(delegated) do
36
+ Status.instance.public_send(delegated)
37
+ end
38
+ end
39
+
40
+ # Methods that should be delegated to Karafka module
41
+ %i[
42
+ root
43
+ env
44
+ logger monitor
45
+ ].each do |delegated|
46
+ define_method(delegated) do
47
+ Karafka.public_send(delegated)
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Both Karafka and Ruby-Kafka contain a lot of settings that can be applied on multiple
5
+ # levels. In Karafka that is on consumer group and on the topic level. In Ruby-Kafka it
6
+ # is on consumer, subscription and consumption levels. In order to maintain an order
7
+ # in managing those settings, this module was created. It contains details on what setting
8
+ # where should go and which layer (both on Karafka and Ruby-Kafka) is responsible for
9
+ # setting it and sending it forward
10
+ # @note Settings presented here cover all the settings that are being used across Karafka
11
+ module AttributesMap
12
+ class << self
13
+ # What settings should go where in ruby-kafka
14
+ # @note All other settings will be passed to Kafka.new method invocation.
15
+ # All elements in this hash are just edge cases
16
+ # @return [Hash] hash with proper sections on what to proxy where in Ruby-Kafka
17
+ def config_adapter
18
+ {
19
+ consumer: %i[
20
+ session_timeout offset_commit_interval offset_commit_threshold
21
+ offset_retention_time heartbeat_interval
22
+ ],
23
+ subscription: %i[start_from_beginning max_bytes_per_partition],
24
+ consuming: %i[min_bytes max_wait_time],
25
+ pausing: %i[pause_timeout],
26
+ # All the options that are under kafka config namespace, but are not used
27
+ # directly with kafka api, but from the Karafka user perspective, they are
28
+ # still related to kafka. They should not be proxied anywhere
29
+ ignored: %i[reconnect_timeout automatically_mark_as_consumed]
30
+ }
31
+ end
32
+
33
+ # @return [Array<Symbol>] properties that can be set on a per topic level
34
+ def topic
35
+ (config_adapter[:subscription] + %i[
36
+ backend
37
+ name
38
+ parser
39
+ responder
40
+ batch_consuming
41
+ persistent
42
+ ]).uniq
43
+ end
44
+
45
+ # @return [Array<Symbol>] properties that can be set on a per consumer group level
46
+ # @note Note that there are settings directly extracted from the config kafka namespace
47
+ # I did this that way, so I won't have to repeat same setting keys over and over again
48
+ # Thanks to this solution, if any new setting is available for ruby-kafka, we just need
49
+ # to add it to our configuration class and it will be handled automatically.
50
+ def consumer_group
51
+ # @note We don't ignore the config_adapter[:ignored] values as they should be ignored
52
+ # only when proxying details go ruby-kafka. We use ignored fields internally in karafka
53
+ ignored_settings = config_adapter[:subscription]
54
+ defined_settings = config_adapter.values.flatten
55
+ karafka_settings = %i[batch_fetching]
56
+ # This is a drity and bad hack of dry-configurable to get keys before setting values
57
+ dynamically_proxied = Karafka::Setup::Config
58
+ ._settings
59
+ .find { |s| s.name == :kafka }
60
+ .value
61
+ .instance_variable_get('@klass').settings
62
+
63
+ (defined_settings + dynamically_proxied).uniq + karafka_settings - ignored_settings
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Namespace for all different backends Karafka supports
5
+ module Backends
6
+ # Backend that just runs stuff asap without any scheduling
7
+ module Inline
8
+ private
9
+
10
+ # Executes consume code immediately (without enqueuing)
11
+ def process
12
+ Karafka.monitor.notice(self.class, params_batch)
13
+ consume
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Karafka module namespace
4
+ module Karafka
5
+ # Base controller from which all Karafka controllers should inherit
6
+ class BaseController
7
+ extend ActiveSupport::DescendantsTracker
8
+
9
+ class << self
10
+ attr_reader :topic
11
+
12
+ # Assigns a topic to a controller and build up proper controller functionalities, so it can
13
+ # cooperate with the topic settings
14
+ # @param topic [Karafka::Routing::Topic]
15
+ # @return [Karafka::Routing::Topic] assigned topic
16
+ def topic=(topic)
17
+ @topic = topic
18
+ Controllers::Includer.call(self)
19
+ end
20
+ end
21
+
22
+ # @return [Karafka::Routing::Topic] topic to which a given controller is subscribed
23
+ def topic
24
+ self.class.topic
25
+ end
26
+
27
+ # Creates lazy loaded params batch object
28
+ # @note Until first params usage, it won't parse data at all
29
+ # @param messages [Array<Kafka::FetchedMessage>, Array<Hash>] messages with raw
30
+ # content (from Kafka) or messages inside a hash (from backend, etc)
31
+ # @return [Karafka::Params::ParamsBatch] lazy loaded params batch
32
+ def params_batch=(messages)
33
+ @params_batch = Karafka::Params::ParamsBatch.new(messages, topic.parser)
34
+ end
35
+
36
+ # Executes the default controller flow.
37
+ def call
38
+ process
39
+ end
40
+
41
+ private
42
+
43
+ # We make it private as it should be accesible only from the inside of a controller
44
+ attr_reader :params_batch
45
+
46
+ # @return [Karafka::Connection::Consumer] messages consumer that can be used to
47
+ # commit manually offset or pause / stop consumer based on the business logic
48
+ def consumer
49
+ Persistence::Consumer.read
50
+ end
51
+
52
+ # Method that will perform business logic and on data received from Kafka (it will consume
53
+ # the data)
54
+ # @note This method needs bo be implemented in a subclass. We stub it here as a failover if
55
+ # someone forgets about it or makes on with typo
56
+ def consume
57
+ raise NotImplementedError, 'Implement this in a subclass'
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,185 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Base responder from which all Karafka responders should inherit
5
+ # Similar to Rails responders concept. It allows us to design flow from one app to another
6
+ # by isolating what responses should be sent (and where) based on a given action
7
+ # It differs from Rails responders in the way it works: in std http request we can have one
8
+ # response, here we can have unlimited number of them
9
+ #
10
+ # It has a simple API for defining where should we respond (and if it is required)
11
+ #
12
+ # @example Basic usage (each registered topic is required to be used by default)
13
+ # class Responder < BaseResponder
14
+ # topic :new_action
15
+ #
16
+ # def respond(data)
17
+ # respond_to :new_action, data
18
+ # end
19
+ # end
20
+ #
21
+ # @example Responding to a topic with extra options
22
+ # class Responder < BaseResponder
23
+ # topic :new_action
24
+ #
25
+ # def respond(data)
26
+ # respond_to :new_action, data, partition_key: 'thing'
27
+ # end
28
+ # end
29
+ #
30
+ # @example Marking topic as not required (we won't have to use it)
31
+ # class Responder < BaseResponder
32
+ # topic :required_topic
33
+ # topic :new_action, required: false
34
+ #
35
+ # def respond(data)
36
+ # respond_to :required_topic, data
37
+ # end
38
+ # end
39
+ #
40
+ # @example Multiple times used topic
41
+ # class Responder < BaseResponder
42
+ # topic :required_topic, multiple_usage: true
43
+ #
44
+ # def respond(data)
45
+ # data.each do |subset|
46
+ # respond_to :required_topic, subset
47
+ # end
48
+ # end
49
+ # end
50
+ #
51
+ # @example Accept multiple arguments to a respond method
52
+ # class Responder < BaseResponder
53
+ # topic :users_actions
54
+ # topic :articles_viewed
55
+ #
56
+ # def respond(user, article)
57
+ # respond_to :users_actions, user
58
+ # respond_to :articles_viewed, article
59
+ # end
60
+ # end
61
+ class BaseResponder
62
+ # Definitions of all topics that we want to be able to use in this responder should go here
63
+ class_attribute :topics
64
+
65
+ attr_reader :messages_buffer
66
+
67
+ class << self
68
+ # Registers a topic as on to which we will be able to respond
69
+ # @param topic_name [Symbol, String] name of topic to which we want to respond
70
+ # @param options [Hash] hash with optional configuration details
71
+ def topic(topic_name, options = {})
72
+ self.topics ||= {}
73
+ topic_obj = Responders::Topic.new(topic_name, options.merge(registered: true))
74
+ self.topics[topic_obj.name] = topic_obj
75
+ end
76
+
77
+ # A simple alias for easier standalone responder usage.
78
+ # Instead of building it with new.call it allows (in case of usin JSON parser)
79
+ # to just run it directly from the class level
80
+ # @param data Anything that we want to respond with
81
+ # @example Send user data with a responder (uses default Karafka::Parsers::Json parser)
82
+ # UsersCreatedResponder.call(@created_user)
83
+ def call(*data)
84
+ # Just in case there were no topics defined for a responder, we initialize with
85
+ # empty hash not to handle a nil case
86
+ self.topics ||= {}
87
+ new.call(*data)
88
+ end
89
+ end
90
+
91
+ # Creates a responder object
92
+ # @param parser_class [Class] parser class that we can use to generate appropriate string
93
+ # or nothing if we want to default to Karafka::Parsers::Json
94
+ # @return [Karafka::BaseResponder] base responder descendant responder
95
+ def initialize(parser_class = Karafka::Parsers::Json)
96
+ @parser_class = parser_class
97
+ @messages_buffer = {}
98
+ end
99
+
100
+ # Performs respond and validates that all the response requirement were met
101
+ # @param data Anything that we want to respond with
102
+ # @note We know that validators should be executed also before sending data to topics, however
103
+ # the implementation gets way more complicated then, that's why we check after everything
104
+ # was sent using responder
105
+ # @example Send user data with a responder (uses default Karafka::Parsers::Json parser)
106
+ # UsersCreatedResponder.new.call(@created_user)
107
+ # @example Send user data with a responder using non default Parser
108
+ # UsersCreatedResponder.new(MyParser).call(@created_user)
109
+ def call(*data)
110
+ respond(*data)
111
+ validate!
112
+ deliver!
113
+ end
114
+
115
+ private
116
+
117
+ # Checks if we met all the topics requirements. It will fail if we didn't send a message to
118
+ # a registered required topic, etc.
119
+ def validate!
120
+ registered_topics = self.class.topics.map do |name, topic|
121
+ topic.to_h.merge!(
122
+ usage_count: messages_buffer[name]&.count || 0
123
+ )
124
+ end
125
+
126
+ used_topics = messages_buffer.map do |name, usage|
127
+ topic = self.class.topics[name] || Responders::Topic.new(name, registered: false)
128
+ topic.to_h.merge!(usage_count: usage.count)
129
+ end
130
+
131
+ result = Karafka::Schemas::ResponderUsage.call(
132
+ registered_topics: registered_topics,
133
+ used_topics: used_topics
134
+ )
135
+
136
+ return if result.success?
137
+
138
+ raise Karafka::Errors::InvalidResponderUsage, result.errors
139
+ end
140
+
141
+ # Takes all the messages from the buffer and delivers them one by one
142
+ # @note This method is executed after the validation, so we're sure that
143
+ # what we send is legit and it will go to a proper topics
144
+ def deliver!
145
+ messages_buffer.each do |topic, data_elements|
146
+ # We map this topic name, so it will match namespaced/etc topic in Kafka
147
+ # @note By default will not change topic (if default mapper used)
148
+ mapped_topic = Karafka::App.config.topic_mapper.outgoing(topic)
149
+
150
+ data_elements.each do |data, options|
151
+ producer(options).call(
152
+ data,
153
+ options.merge(topic: mapped_topic)
154
+ )
155
+ end
156
+ end
157
+ end
158
+
159
+ # Method that needs to be implemented in a subclass. It should handle responding
160
+ # on registered topics
161
+ # @raise [NotImplementedError] This method needs to be implemented in a subclass
162
+ def respond(*_data)
163
+ raise NotImplementedError, 'Implement this in a subclass'
164
+ end
165
+
166
+ # This method allow us to respond to a single topic with a given data. It can be used
167
+ # as many times as we need. Especially when we have 1:n flow
168
+ # @param topic [Symbol, String] topic to which we want to respond
169
+ # @param data [String, Object] string or object that we want to send
170
+ # @param options [Hash] options for waterdrop (e.g. partition_key)
171
+ # @note Respond to does not accept multiple data arguments.
172
+ def respond_to(topic, data, options = {})
173
+ Karafka.monitor.notice(self.class, topic: topic, data: data, options: options)
174
+
175
+ messages_buffer[topic.to_s] ||= []
176
+ messages_buffer[topic.to_s] << [@parser_class.generate(data), options]
177
+ end
178
+
179
+ # @param options [Hash] options for waterdrop
180
+ # @return [Class] WaterDrop producer (sync or async based on the settings)
181
+ def producer(options)
182
+ options[:async] ? WaterDrop::AsyncProducer : WaterDrop::SyncProducer
183
+ end
184
+ end
185
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Karafka framework Cli
5
+ # If you want to add/modify command that belongs to CLI, please review all commands
6
+ # available in cli/ directory inside Karafka source code.
7
+ #
8
+ # @note Whole Cli is built using Thor
9
+ # @see https://github.com/erikhuda/thor
10
+ class Cli < Thor
11
+ package_name 'Karafka'
12
+
13
+ class << self
14
+ # Loads all Cli commands into Thor framework
15
+ # This method should be executed before we run Karafka::Cli.start, otherwise we won't
16
+ # have any Cli commands available
17
+ def prepare
18
+ cli_commands.each do |action|
19
+ action.bind_to(self)
20
+ end
21
+ end
22
+
23
+ private
24
+
25
+ # @return [Array<Class>] Array with Cli action classes that can be used as commands
26
+ def cli_commands
27
+ constants
28
+ .map! { |object| const_get(object) }
29
+ .keep_if do |object|
30
+ object.instance_of?(Class) && (object < Cli::Base)
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+
37
+ # This is kinda trick - since we don't have a autoload and other magic stuff
38
+ # like Rails does, so instead this method allows us to replace currently running
39
+ # console with a new one via Kernel.exec. It will start console with new code loaded
40
+ # Yes we know that it is not turbofast, however it is turbo convinient and small
41
+ #
42
+ # Also - the KARAFKA_CONSOLE is used to detect that we're executing the irb session
43
+ # so this method is only available when the Karafka console is running
44
+ #
45
+ # We skip this because this should exist and be only valid in the console
46
+ # :nocov:
47
+ if ENV['KARAFKA_CONSOLE']
48
+ # Reloads Karafka irb console session
49
+ def reload!
50
+ puts "Reloading...\n"
51
+ Kernel.exec Karafka::Cli::Console.command
52
+ end
53
+ end
54
+ # :nocov:
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ class Cli < Thor
5
+ # Base class for all the command that we want to define
6
+ # This base class provides a nicer interface to Thor and allows to easier separate single
7
+ # independent commands
8
+ # In order to define a new command you need to:
9
+ # - specify its desc
10
+ # - implement call method
11
+ #
12
+ # @example Create a dummy command
13
+ # class Dummy < Base
14
+ # self.desc = 'Dummy command'
15
+ #
16
+ # def call
17
+ # puts 'I'm doing nothing!
18
+ # end
19
+ # end
20
+ class Base
21
+ include Thor::Shell
22
+
23
+ # We can use it to call other cli methods via this object
24
+ attr_reader :cli
25
+
26
+ # @param cli [Karafka::Cli] current Karafka Cli instance
27
+ def initialize(cli)
28
+ @cli = cli
29
+ end
30
+
31
+ # This method should implement proper cli action
32
+ def call
33
+ raise NotImplementedError, 'Implement this in a subclass'
34
+ end
35
+
36
+ class << self
37
+ # Allows to set options for Thor cli
38
+ # @see https://github.com/erikhuda/thor
39
+ # @param option Single option details
40
+ def option(*option)
41
+ @options ||= []
42
+ @options << option
43
+ end
44
+
45
+ # Allows to set description of a given cli command
46
+ # @param desc [String] Description of a given cli command
47
+ def desc(desc)
48
+ @desc ||= desc
49
+ end
50
+
51
+ # This method will bind a given Cli command into Karafka Cli
52
+ # This method is a wrapper to way Thor defines its commands
53
+ # @param cli_class [Karafka::Cli] Karafka cli_class
54
+ def bind_to(cli_class)
55
+ cli_class.desc name, @desc
56
+
57
+ (@options || []).each { |option| cli_class.option(*option) }
58
+
59
+ context = self
60
+
61
+ cli_class.send :define_method, name do |*args|
62
+ context.new(self).call(*args)
63
+ end
64
+ end
65
+
66
+ private
67
+
68
+ # @return [String] downcased current class name that we use to define name for
69
+ # given Cli command
70
+ # @example for Karafka::Cli::Install
71
+ # name #=> 'install'
72
+ def name
73
+ to_s.split('::').last.downcase
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end