karafka 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +5 -5
  2. data/.ruby-version +1 -1
  3. data/.travis.yml +3 -1
  4. data/CHANGELOG.md +90 -3
  5. data/CONTRIBUTING.md +5 -6
  6. data/Gemfile +1 -1
  7. data/Gemfile.lock +59 -64
  8. data/README.md +28 -57
  9. data/bin/karafka +13 -1
  10. data/config/errors.yml +6 -0
  11. data/karafka.gemspec +10 -9
  12. data/lib/karafka.rb +19 -10
  13. data/lib/karafka/app.rb +8 -15
  14. data/lib/karafka/attributes_map.rb +4 -4
  15. data/lib/karafka/backends/inline.rb +2 -3
  16. data/lib/karafka/base_consumer.rb +68 -0
  17. data/lib/karafka/base_responder.rb +41 -17
  18. data/lib/karafka/callbacks.rb +30 -0
  19. data/lib/karafka/callbacks/config.rb +22 -0
  20. data/lib/karafka/callbacks/dsl.rb +16 -0
  21. data/lib/karafka/cli/base.rb +2 -0
  22. data/lib/karafka/cli/flow.rb +1 -1
  23. data/lib/karafka/cli/info.rb +1 -2
  24. data/lib/karafka/cli/install.rb +2 -3
  25. data/lib/karafka/cli/server.rb +9 -12
  26. data/lib/karafka/connection/client.rb +117 -0
  27. data/lib/karafka/connection/config_adapter.rb +30 -14
  28. data/lib/karafka/connection/delegator.rb +46 -0
  29. data/lib/karafka/connection/listener.rb +22 -20
  30. data/lib/karafka/consumers/callbacks.rb +54 -0
  31. data/lib/karafka/consumers/includer.rb +51 -0
  32. data/lib/karafka/consumers/responders.rb +24 -0
  33. data/lib/karafka/{controllers → consumers}/single_params.rb +3 -3
  34. data/lib/karafka/errors.rb +19 -2
  35. data/lib/karafka/fetcher.rb +30 -28
  36. data/lib/karafka/helpers/class_matcher.rb +8 -8
  37. data/lib/karafka/helpers/config_retriever.rb +2 -2
  38. data/lib/karafka/instrumentation/listener.rb +112 -0
  39. data/lib/karafka/instrumentation/logger.rb +55 -0
  40. data/lib/karafka/instrumentation/monitor.rb +64 -0
  41. data/lib/karafka/loader.rb +0 -1
  42. data/lib/karafka/params/dsl.rb +156 -0
  43. data/lib/karafka/params/params_batch.rb +7 -2
  44. data/lib/karafka/patches/dry_configurable.rb +7 -7
  45. data/lib/karafka/patches/ruby_kafka.rb +34 -0
  46. data/lib/karafka/persistence/client.rb +25 -0
  47. data/lib/karafka/persistence/consumer.rb +38 -0
  48. data/lib/karafka/persistence/topic.rb +29 -0
  49. data/lib/karafka/process.rb +6 -5
  50. data/lib/karafka/responders/builder.rb +15 -14
  51. data/lib/karafka/responders/topic.rb +8 -1
  52. data/lib/karafka/routing/builder.rb +2 -2
  53. data/lib/karafka/routing/consumer_group.rb +1 -1
  54. data/lib/karafka/routing/consumer_mapper.rb +34 -0
  55. data/lib/karafka/routing/router.rb +1 -1
  56. data/lib/karafka/routing/topic.rb +5 -11
  57. data/lib/karafka/routing/{mapper.rb → topic_mapper.rb} +2 -2
  58. data/lib/karafka/schemas/config.rb +4 -5
  59. data/lib/karafka/schemas/consumer_group.rb +45 -24
  60. data/lib/karafka/schemas/consumer_group_topic.rb +18 -0
  61. data/lib/karafka/schemas/responder_usage.rb +1 -0
  62. data/lib/karafka/server.rb +39 -20
  63. data/lib/karafka/setup/config.rb +74 -51
  64. data/lib/karafka/setup/configurators/base.rb +6 -12
  65. data/lib/karafka/setup/configurators/params.rb +25 -0
  66. data/lib/karafka/setup/configurators/water_drop.rb +15 -14
  67. data/lib/karafka/setup/dsl.rb +22 -0
  68. data/lib/karafka/templates/{application_controller.rb.example → application_consumer.rb.example} +2 -3
  69. data/lib/karafka/templates/karafka.rb.example +18 -5
  70. data/lib/karafka/version.rb +1 -1
  71. metadata +87 -63
  72. data/.github/ISSUE_TEMPLATE.md +0 -2
  73. data/Rakefile +0 -7
  74. data/lib/karafka/base_controller.rb +0 -118
  75. data/lib/karafka/connection/messages_consumer.rb +0 -106
  76. data/lib/karafka/connection/messages_processor.rb +0 -59
  77. data/lib/karafka/controllers/includer.rb +0 -51
  78. data/lib/karafka/controllers/responders.rb +0 -19
  79. data/lib/karafka/logger.rb +0 -53
  80. data/lib/karafka/monitor.rb +0 -98
  81. data/lib/karafka/params/params.rb +0 -101
  82. data/lib/karafka/persistence.rb +0 -18
  83. data/lib/karafka/setup/configurators/celluloid.rb +0 -22
data/bin/karafka CHANGED
@@ -1,7 +1,19 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'karafka'
4
- require Karafka.boot_file.to_s
4
+
5
+ # If there is a boot file, we need to require it as we expect it to contain
6
+ # Karafka app setup, routes, etc
7
+ if File.exist?(Karafka.boot_file)
8
+ require Karafka.boot_file.to_s
9
+ else
10
+ # However when it is unavailable, we still want to be able to run help command
11
+ # and install command as they don't require configured app itself to run
12
+ raise(
13
+ Karafka::Errors::MissingBootFile,
14
+ Karafka.boot_file
15
+ ) unless %w[-h install].include?(ARGV[0])
16
+ end
5
17
 
6
18
  Karafka::Cli.prepare
7
19
  Karafka::Cli.start
data/config/errors.yml ADDED
@@ -0,0 +1,6 @@
1
+ en:
2
+ errors:
3
+ broker_schema?: >
4
+ has an invalid format.
5
+ Expected schema, host and port number.
6
+ Example: kafka://127.0.0.1:9092 or kafka+ssl://127.0.0.1:9092
data/karafka.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- lib = File.expand_path('../lib', __FILE__)
3
+ lib = File.expand_path('lib', __dir__)
4
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
5
 
6
6
  require 'karafka/version'
@@ -16,17 +16,18 @@ Gem::Specification.new do |spec|
16
16
  spec.description = 'Framework used to simplify Apache Kafka based Ruby applications development'
17
17
  spec.license = 'MIT'
18
18
 
19
- spec.add_dependency 'ruby-kafka', '>= 0.4'
20
- spec.add_dependency 'celluloid'
21
- spec.add_dependency 'envlogic', '~> 1.0'
22
- spec.add_dependency 'waterdrop', '>= 0.4'
23
- spec.add_dependency 'rake', '>= 11.3'
24
- spec.add_dependency 'thor', '~> 0.19'
25
- spec.add_dependency 'activesupport', '>= 5.0'
26
- spec.add_dependency 'dry-validation', '~> 0.11'
19
+ spec.add_dependency 'activesupport', '>= 4.0'
27
20
  spec.add_dependency 'dry-configurable', '~> 0.7'
21
+ spec.add_dependency 'dry-inflector', '~> 0.1.1'
22
+ spec.add_dependency 'dry-monitor', '~> 0.1'
23
+ spec.add_dependency 'dry-validation', '~> 0.11'
24
+ spec.add_dependency 'envlogic', '~> 1.0'
28
25
  spec.add_dependency 'multi_json', '>= 1.12'
26
+ spec.add_dependency 'rake', '>= 11.3'
29
27
  spec.add_dependency 'require_all', '>= 1.4'
28
+ spec.add_dependency 'ruby-kafka', '>= 0.5.3'
29
+ spec.add_dependency 'thor', '~> 0.19'
30
+ spec.add_dependency 'waterdrop', '~> 1.2'
30
31
 
31
32
  spec.required_ruby_version = '>= 2.3.0'
32
33
 
data/lib/karafka.rb CHANGED
@@ -2,8 +2,6 @@
2
2
 
3
3
  %w[
4
4
  English
5
- bundler
6
- celluloid/current
7
5
  waterdrop
8
6
  kafka
9
7
  envlogic
@@ -13,11 +11,9 @@
13
11
  require_all
14
12
  dry-configurable
15
13
  dry-validation
14
+ dry/inflector
15
+ dry/monitor/notifications
16
16
  active_support/callbacks
17
- active_support/core_ext/class/subclasses
18
- active_support/core_ext/hash/indifferent_access
19
- active_support/descendants_tracker
20
- active_support/inflector
21
17
  karafka/loader
22
18
  ].each(&method(:require))
23
19
 
@@ -31,14 +27,14 @@ module Karafka
31
27
  @logger ||= App.config.logger
32
28
  end
33
29
 
34
- # @return [::Karafka::Monitor] monitor that we want to use. Will use dummy monitor by default
30
+ # @return [::Karafka::Monitor] monitor that we want to use
35
31
  def monitor
36
32
  @monitor ||= App.config.monitor
37
33
  end
38
34
 
39
35
  # @return [String] root path of this gem
40
36
  def gem_root
41
- Pathname.new(File.expand_path('../..', __FILE__))
37
+ Pathname.new(File.expand_path('..', __dir__))
42
38
  end
43
39
 
44
40
  # @return [String] Karafka app root path (user application path)
@@ -48,13 +44,13 @@ module Karafka
48
44
 
49
45
  # @return [String] path to Karafka gem root core
50
46
  def core_root
51
- Pathname.new(File.expand_path('../karafka', __FILE__))
47
+ Pathname.new(File.expand_path('karafka', __dir__))
52
48
  end
53
49
 
54
50
  # @return [String] path to a default file that contains booting procedure etc
55
51
  # @note By default it is a file called 'karafka.rb' but it can be specified as you wish if you
56
52
  # have Karafka that is merged into a Sinatra/Rails app and karafka.rb is taken.
57
- # It will be used for console/controllers/etc
53
+ # It will be used for console/consumers/etc
58
54
  # @example Standard only-Karafka case
59
55
  # Karafka.boot_file #=> '/home/app_path/karafka.rb'
60
56
  # @example Non standard case
@@ -66,4 +62,17 @@ module Karafka
66
62
  end
67
63
  end
68
64
 
65
+ %w[
66
+ callbacks
67
+ callbacks/*
68
+ setup/dsl
69
+ setup/config
70
+ status
71
+ schemas/config
72
+ schemas/consumer_group_topic
73
+ schemas/consumer_group
74
+ ].each { |path| require_all File.join(Karafka.core_root, path + '.rb') }
75
+
69
76
  Karafka::Loader.load!(Karafka.core_root)
77
+ Kafka::Consumer.prepend(Karafka::Patches::RubyKafka)
78
+ Dry::Configurable::Config.prepend(Karafka::Patches::DryConfigurable)
data/lib/karafka/app.rb CHANGED
@@ -3,14 +3,10 @@
3
3
  module Karafka
4
4
  # App class
5
5
  class App
6
- class << self
7
- # Sets up the whole configuration
8
- # @param [Block] block configuration block
9
- def setup(&block)
10
- Setup::Config.setup(&block)
11
- initialize!
12
- end
6
+ extend Setup::Dsl
7
+ extend Callbacks::Dsl
13
8
 
9
+ class << self
14
10
  # Sets up all the internal components and bootstrap whole app
15
11
  # We need to know details about consumers in order to setup components,
16
12
  # that's why we don't setup them after std setup is done
@@ -19,11 +15,7 @@ module Karafka
19
15
  def boot!
20
16
  Setup::Config.validate!
21
17
  Setup::Config.setup_components
22
- end
23
-
24
- # @return [Karafka::Config] config instance
25
- def config
26
- Setup::Config.config
18
+ Callbacks.after_init(Karafka::App.config)
27
19
  end
28
20
 
29
21
  # @return [Karafka::Routing::Builder] consumers builder instance
@@ -33,7 +25,7 @@ module Karafka
33
25
 
34
26
  Status.instance_methods(false).each do |delegated|
35
27
  define_method(delegated) do
36
- Status.instance.public_send(delegated)
28
+ Status.instance.send(delegated)
37
29
  end
38
30
  end
39
31
 
@@ -41,10 +33,11 @@ module Karafka
41
33
  %i[
42
34
  root
43
35
  env
44
- logger monitor
36
+ logger
37
+ monitor
45
38
  ].each do |delegated|
46
39
  define_method(delegated) do
47
- Karafka.public_send(delegated)
40
+ Karafka.send(delegated)
48
41
  end
49
42
  end
50
43
  end
@@ -21,12 +21,12 @@ module Karafka
21
21
  offset_retention_time heartbeat_interval
22
22
  ],
23
23
  subscription: %i[start_from_beginning max_bytes_per_partition],
24
- consuming: %i[min_bytes max_wait_time],
24
+ consuming: %i[min_bytes max_bytes max_wait_time],
25
25
  pausing: %i[pause_timeout],
26
26
  # All the options that are under kafka config namespace, but are not used
27
27
  # directly with kafka api, but from the Karafka user perspective, they are
28
28
  # still related to kafka. They should not be proxied anywhere
29
- ignored: %i[reconnect_timeout]
29
+ ignored: %i[reconnect_timeout automatically_mark_as_consumed]
30
30
  }
31
31
  end
32
32
 
@@ -37,7 +37,7 @@ module Karafka
37
37
  name
38
38
  parser
39
39
  responder
40
- batch_processing
40
+ batch_consuming
41
41
  persistent
42
42
  ]).uniq
43
43
  end
@@ -52,7 +52,7 @@ module Karafka
52
52
  # only when proxying details go ruby-kafka. We use ignored fields internally in karafka
53
53
  ignored_settings = config_adapter[:subscription]
54
54
  defined_settings = config_adapter.values.flatten
55
- karafka_settings = %i[batch_consuming]
55
+ karafka_settings = %i[batch_fetching]
56
56
  # This is a drity and bad hack of dry-configurable to get keys before setting values
57
57
  dynamically_proxied = Karafka::Setup::Config
58
58
  ._settings
@@ -7,10 +7,9 @@ module Karafka
7
7
  module Inline
8
8
  private
9
9
 
10
- # Executes perform code immediately (without enqueuing)
10
+ # Executes consume code immediately (without enqueuing)
11
11
  def process
12
- Karafka.monitor.notice(self.class, params_batch)
13
- perform
12
+ Karafka.monitor.instrument('backends.inline.process', caller: self) { consume }
14
13
  end
15
14
  end
16
15
  end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Karafka module namespace
4
+ module Karafka
5
+ # Base consumer from which all Karafka consumers should inherit
6
+ class BaseConsumer
7
+ extend ActiveSupport::DescendantsTracker
8
+ extend Forwardable
9
+
10
+ # Allows us to mark messages as consumed for non-automatic mode without having
11
+ # to use consumer client directly. We do this that way, because most of the people should not
12
+ # mess with the client instance directly (just in case)
13
+ def_delegator :client, :mark_as_consumed
14
+
15
+ private :mark_as_consumed
16
+
17
+ class << self
18
+ attr_reader :topic
19
+
20
+ # Assigns a topic to a consumer and builds up proper consumer functionalities
21
+ # so that it can cooperate with the topic settings
22
+ # @param topic [Karafka::Routing::Topic]
23
+ # @return [Karafka::Routing::Topic] assigned topic
24
+ def topic=(topic)
25
+ @topic = topic
26
+ Consumers::Includer.call(self)
27
+ end
28
+ end
29
+
30
+ # @return [Karafka::Routing::Topic] topic to which a given consumer is subscribed
31
+ def topic
32
+ self.class.topic
33
+ end
34
+
35
+ # Creates lazy loaded params batch object
36
+ # @note Until first params usage, it won't parse data at all
37
+ # @param messages [Array<Kafka::FetchedMessage>, Array<Hash>] messages with raw
38
+ # content (from Kafka) or messages inside a hash (from backend, etc)
39
+ # @return [Karafka::Params::ParamsBatch] lazy loaded params batch
40
+ def params_batch=(messages)
41
+ @params_batch = Karafka::Params::ParamsBatch.new(messages, topic.parser)
42
+ end
43
+
44
+ # Executes the default consumer flow.
45
+ def call
46
+ process
47
+ end
48
+
49
+ private
50
+
51
+ # We make it private as it should be accessible only from the inside of a consumer
52
+ attr_reader :params_batch
53
+
54
+ # @return [Karafka::Connection::Client] messages consuming client that can be used to
55
+ # commit manually offset or pause / stop consumer based on the business logic
56
+ def client
57
+ Persistence::Client.read
58
+ end
59
+
60
+ # Method that will perform business logic and on data received from Kafka (it will consume
61
+ # the data)
62
+ # @note This method needs bo be implemented in a subclass. We stub it here as a failover if
63
+ # someone forgets about it or makes on with typo
64
+ def consume
65
+ raise NotImplementedError, 'Implement this in a subclass'
66
+ end
67
+ end
68
+ end
@@ -62,6 +62,11 @@ module Karafka
62
62
  # Definitions of all topics that we want to be able to use in this responder should go here
63
63
  class_attribute :topics
64
64
 
65
+ # Schema that we can use to control and/or require some additional details upon options
66
+ # that are being passed to the producer. This can be in particular useful if we want to make
67
+ # sure that for example partition_key is always present.
68
+ class_attribute :options_schema
69
+
65
70
  attr_reader :messages_buffer
66
71
 
67
72
  class << self
@@ -92,7 +97,7 @@ module Karafka
92
97
  # @param parser_class [Class] parser class that we can use to generate appropriate string
93
98
  # or nothing if we want to default to Karafka::Parsers::Json
94
99
  # @return [Karafka::BaseResponder] base responder descendant responder
95
- def initialize(parser_class = Karafka::Parsers::Json)
100
+ def initialize(parser_class = Karafka::App.config.parser)
96
101
  @parser_class = parser_class
97
102
  @messages_buffer = {}
98
103
  end
@@ -108,7 +113,8 @@ module Karafka
108
113
  # UsersCreatedResponder.new(MyParser).call(@created_user)
109
114
  def call(*data)
110
115
  respond(*data)
111
- validate!
116
+ validate_usage!
117
+ validate_options!
112
118
  deliver!
113
119
  end
114
120
 
@@ -116,7 +122,7 @@ module Karafka
116
122
 
117
123
  # Checks if we met all the topics requirements. It will fail if we didn't send a message to
118
124
  # a registered required topic, etc.
119
- def validate!
125
+ def validate_usage!
120
126
  registered_topics = self.class.topics.map do |name, topic|
121
127
  topic.to_h.merge!(
122
128
  usage_count: messages_buffer[name]&.count || 0
@@ -138,21 +144,26 @@ module Karafka
138
144
  raise Karafka::Errors::InvalidResponderUsage, result.errors
139
145
  end
140
146
 
147
+ # Checks if we met all the options requirements before sending them to the producer.
148
+ def validate_options!
149
+ return true unless self.class.options_schema
150
+
151
+ messages_buffer.each_value do |messages_set|
152
+ messages_set.each do |message_data|
153
+ result = self.class.options_schema.call(message_data.last)
154
+ next if result.success?
155
+ raise Karafka::Errors::InvalidResponderMessageOptions, result.errors
156
+ end
157
+ end
158
+ end
159
+
141
160
  # Takes all the messages from the buffer and delivers them one by one
142
161
  # @note This method is executed after the validation, so we're sure that
143
162
  # what we send is legit and it will go to a proper topics
144
163
  def deliver!
145
- messages_buffer.each do |topic, data_elements|
146
- # We map this topic name, so it will match namespaced/etc topic in Kafka
147
- # @note By default will not change topic (if default mapper used)
148
- mapped_topic = Karafka::App.config.topic_mapper.outgoing(topic)
149
-
150
- data_elements.each do |(data, options)|
151
- ::WaterDrop::Message.new(
152
- mapped_topic,
153
- data,
154
- options
155
- ).send!
164
+ messages_buffer.each_value do |data_elements|
165
+ data_elements.each do |data, options|
166
+ producer(options).call(data, options)
156
167
  end
157
168
  end
158
169
  end
@@ -171,10 +182,23 @@ module Karafka
171
182
  # @param options [Hash] options for waterdrop (e.g. partition_key)
172
183
  # @note Respond to does not accept multiple data arguments.
173
184
  def respond_to(topic, data, options = {})
174
- Karafka.monitor.notice(self.class, topic: topic, data: data, options: options)
185
+ # We normalize the format to string, as WaterDrop and Ruby-Kafka support only
186
+ # string topics
187
+ topic = topic.to_s
188
+
189
+ messages_buffer[topic] ||= []
190
+ messages_buffer[topic] << [
191
+ @parser_class.generate(data),
192
+ # We map this topic name, so it will match namespaced/etc topic in Kafka
193
+ # @note By default will not change topic (if default mapper used)
194
+ options.merge(topic: Karafka::App.config.topic_mapper.outgoing(topic))
195
+ ]
196
+ end
175
197
 
176
- messages_buffer[topic.to_s] ||= []
177
- messages_buffer[topic.to_s] << [@parser_class.generate(data), options]
198
+ # @param options [Hash] options for waterdrop
199
+ # @return [Class] WaterDrop producer (sync or async based on the settings)
200
+ def producer(options)
201
+ options[:async] ? WaterDrop::AsyncProducer : WaterDrop::SyncProducer
178
202
  end
179
203
  end
180
204
  end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Additional callbacks that are used to trigger some things in given places during the
5
+ # system lifecycle
6
+ # @note Those callbacks aren't the same as consumer callbacks as they are not related to the
7
+ # lifecycle of particular messages fetches but rather to the internal flow process.
8
+ # They cannot be defined on a consumer callback level because for some of those,
9
+ # there aren't consumers in the memory yet and/or they aren't per consumer thread
10
+ module Callbacks
11
+ # Types of system callbacks that we have that are not related to consumers
12
+ TYPES = %i[
13
+ after_init
14
+ before_fetch_loop
15
+ ].freeze
16
+
17
+ class << self
18
+ TYPES.each do |callback_type|
19
+ # Executes given callbacks set at a given moment with provided arguments
20
+ define_method callback_type do |*args|
21
+ Karafka::App
22
+ .config
23
+ .callbacks
24
+ .send(callback_type)
25
+ .each { |callback| callback.call(*args) }
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Callbacks
5
+ # Additional configuration required to store procs that we will execute upon callback trigger
6
+ module Config
7
+ # Builds up internal callback accumulators
8
+ # @param klass [Class] Class that we extend with callback config
9
+ def self.extended(klass)
10
+ # option internal [Hash] - optional - internal karafka configuration settings that should
11
+ # never be changed by users directly
12
+ klass.setting :callbacks do
13
+ Callbacks::TYPES.each do |callback_type|
14
+ # option [Array<Proc>] an array of blocks that will be executed at a given moment
15
+ # depending on the callback type
16
+ setting callback_type, []
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end