karafka 1.0.0 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (83) hide show
  1. checksums.yaml +5 -5
  2. data/.ruby-version +1 -1
  3. data/.travis.yml +3 -1
  4. data/CHANGELOG.md +90 -3
  5. data/CONTRIBUTING.md +5 -6
  6. data/Gemfile +1 -1
  7. data/Gemfile.lock +59 -64
  8. data/README.md +28 -57
  9. data/bin/karafka +13 -1
  10. data/config/errors.yml +6 -0
  11. data/karafka.gemspec +10 -9
  12. data/lib/karafka.rb +19 -10
  13. data/lib/karafka/app.rb +8 -15
  14. data/lib/karafka/attributes_map.rb +4 -4
  15. data/lib/karafka/backends/inline.rb +2 -3
  16. data/lib/karafka/base_consumer.rb +68 -0
  17. data/lib/karafka/base_responder.rb +41 -17
  18. data/lib/karafka/callbacks.rb +30 -0
  19. data/lib/karafka/callbacks/config.rb +22 -0
  20. data/lib/karafka/callbacks/dsl.rb +16 -0
  21. data/lib/karafka/cli/base.rb +2 -0
  22. data/lib/karafka/cli/flow.rb +1 -1
  23. data/lib/karafka/cli/info.rb +1 -2
  24. data/lib/karafka/cli/install.rb +2 -3
  25. data/lib/karafka/cli/server.rb +9 -12
  26. data/lib/karafka/connection/client.rb +117 -0
  27. data/lib/karafka/connection/config_adapter.rb +30 -14
  28. data/lib/karafka/connection/delegator.rb +46 -0
  29. data/lib/karafka/connection/listener.rb +22 -20
  30. data/lib/karafka/consumers/callbacks.rb +54 -0
  31. data/lib/karafka/consumers/includer.rb +51 -0
  32. data/lib/karafka/consumers/responders.rb +24 -0
  33. data/lib/karafka/{controllers → consumers}/single_params.rb +3 -3
  34. data/lib/karafka/errors.rb +19 -2
  35. data/lib/karafka/fetcher.rb +30 -28
  36. data/lib/karafka/helpers/class_matcher.rb +8 -8
  37. data/lib/karafka/helpers/config_retriever.rb +2 -2
  38. data/lib/karafka/instrumentation/listener.rb +112 -0
  39. data/lib/karafka/instrumentation/logger.rb +55 -0
  40. data/lib/karafka/instrumentation/monitor.rb +64 -0
  41. data/lib/karafka/loader.rb +0 -1
  42. data/lib/karafka/params/dsl.rb +156 -0
  43. data/lib/karafka/params/params_batch.rb +7 -2
  44. data/lib/karafka/patches/dry_configurable.rb +7 -7
  45. data/lib/karafka/patches/ruby_kafka.rb +34 -0
  46. data/lib/karafka/persistence/client.rb +25 -0
  47. data/lib/karafka/persistence/consumer.rb +38 -0
  48. data/lib/karafka/persistence/topic.rb +29 -0
  49. data/lib/karafka/process.rb +6 -5
  50. data/lib/karafka/responders/builder.rb +15 -14
  51. data/lib/karafka/responders/topic.rb +8 -1
  52. data/lib/karafka/routing/builder.rb +2 -2
  53. data/lib/karafka/routing/consumer_group.rb +1 -1
  54. data/lib/karafka/routing/consumer_mapper.rb +34 -0
  55. data/lib/karafka/routing/router.rb +1 -1
  56. data/lib/karafka/routing/topic.rb +5 -11
  57. data/lib/karafka/routing/{mapper.rb → topic_mapper.rb} +2 -2
  58. data/lib/karafka/schemas/config.rb +4 -5
  59. data/lib/karafka/schemas/consumer_group.rb +45 -24
  60. data/lib/karafka/schemas/consumer_group_topic.rb +18 -0
  61. data/lib/karafka/schemas/responder_usage.rb +1 -0
  62. data/lib/karafka/server.rb +39 -20
  63. data/lib/karafka/setup/config.rb +74 -51
  64. data/lib/karafka/setup/configurators/base.rb +6 -12
  65. data/lib/karafka/setup/configurators/params.rb +25 -0
  66. data/lib/karafka/setup/configurators/water_drop.rb +15 -14
  67. data/lib/karafka/setup/dsl.rb +22 -0
  68. data/lib/karafka/templates/{application_controller.rb.example → application_consumer.rb.example} +2 -3
  69. data/lib/karafka/templates/karafka.rb.example +18 -5
  70. data/lib/karafka/version.rb +1 -1
  71. metadata +87 -63
  72. data/.github/ISSUE_TEMPLATE.md +0 -2
  73. data/Rakefile +0 -7
  74. data/lib/karafka/base_controller.rb +0 -118
  75. data/lib/karafka/connection/messages_consumer.rb +0 -106
  76. data/lib/karafka/connection/messages_processor.rb +0 -59
  77. data/lib/karafka/controllers/includer.rb +0 -51
  78. data/lib/karafka/controllers/responders.rb +0 -19
  79. data/lib/karafka/logger.rb +0 -53
  80. data/lib/karafka/monitor.rb +0 -98
  81. data/lib/karafka/params/params.rb +0 -101
  82. data/lib/karafka/persistence.rb +0 -18
  83. data/lib/karafka/setup/configurators/celluloid.rb +0 -22
data/bin/karafka CHANGED
@@ -1,7 +1,19 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'karafka'
4
- require Karafka.boot_file.to_s
4
+
5
+ # If there is a boot file, we need to require it as we expect it to contain
6
+ # Karafka app setup, routes, etc
7
+ if File.exist?(Karafka.boot_file)
8
+ require Karafka.boot_file.to_s
9
+ else
10
+ # However when it is unavailable, we still want to be able to run help command
11
+ # and install command as they don't require configured app itself to run
12
+ raise(
13
+ Karafka::Errors::MissingBootFile,
14
+ Karafka.boot_file
15
+ ) unless %w[-h install].include?(ARGV[0])
16
+ end
5
17
 
6
18
  Karafka::Cli.prepare
7
19
  Karafka::Cli.start
data/config/errors.yml ADDED
@@ -0,0 +1,6 @@
1
+ en:
2
+ errors:
3
+ broker_schema?: >
4
+ has an invalid format.
5
+ Expected schema, host and port number.
6
+ Example: kafka://127.0.0.1:9092 or kafka+ssl://127.0.0.1:9092
data/karafka.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- lib = File.expand_path('../lib', __FILE__)
3
+ lib = File.expand_path('lib', __dir__)
4
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
5
 
6
6
  require 'karafka/version'
@@ -16,17 +16,18 @@ Gem::Specification.new do |spec|
16
16
  spec.description = 'Framework used to simplify Apache Kafka based Ruby applications development'
17
17
  spec.license = 'MIT'
18
18
 
19
- spec.add_dependency 'ruby-kafka', '>= 0.4'
20
- spec.add_dependency 'celluloid'
21
- spec.add_dependency 'envlogic', '~> 1.0'
22
- spec.add_dependency 'waterdrop', '>= 0.4'
23
- spec.add_dependency 'rake', '>= 11.3'
24
- spec.add_dependency 'thor', '~> 0.19'
25
- spec.add_dependency 'activesupport', '>= 5.0'
26
- spec.add_dependency 'dry-validation', '~> 0.11'
19
+ spec.add_dependency 'activesupport', '>= 4.0'
27
20
  spec.add_dependency 'dry-configurable', '~> 0.7'
21
+ spec.add_dependency 'dry-inflector', '~> 0.1.1'
22
+ spec.add_dependency 'dry-monitor', '~> 0.1'
23
+ spec.add_dependency 'dry-validation', '~> 0.11'
24
+ spec.add_dependency 'envlogic', '~> 1.0'
28
25
  spec.add_dependency 'multi_json', '>= 1.12'
26
+ spec.add_dependency 'rake', '>= 11.3'
29
27
  spec.add_dependency 'require_all', '>= 1.4'
28
+ spec.add_dependency 'ruby-kafka', '>= 0.5.3'
29
+ spec.add_dependency 'thor', '~> 0.19'
30
+ spec.add_dependency 'waterdrop', '~> 1.2'
30
31
 
31
32
  spec.required_ruby_version = '>= 2.3.0'
32
33
 
data/lib/karafka.rb CHANGED
@@ -2,8 +2,6 @@
2
2
 
3
3
  %w[
4
4
  English
5
- bundler
6
- celluloid/current
7
5
  waterdrop
8
6
  kafka
9
7
  envlogic
@@ -13,11 +11,9 @@
13
11
  require_all
14
12
  dry-configurable
15
13
  dry-validation
14
+ dry/inflector
15
+ dry/monitor/notifications
16
16
  active_support/callbacks
17
- active_support/core_ext/class/subclasses
18
- active_support/core_ext/hash/indifferent_access
19
- active_support/descendants_tracker
20
- active_support/inflector
21
17
  karafka/loader
22
18
  ].each(&method(:require))
23
19
 
@@ -31,14 +27,14 @@ module Karafka
31
27
  @logger ||= App.config.logger
32
28
  end
33
29
 
34
- # @return [::Karafka::Monitor] monitor that we want to use. Will use dummy monitor by default
30
+ # @return [::Karafka::Monitor] monitor that we want to use
35
31
  def monitor
36
32
  @monitor ||= App.config.monitor
37
33
  end
38
34
 
39
35
  # @return [String] root path of this gem
40
36
  def gem_root
41
- Pathname.new(File.expand_path('../..', __FILE__))
37
+ Pathname.new(File.expand_path('..', __dir__))
42
38
  end
43
39
 
44
40
  # @return [String] Karafka app root path (user application path)
@@ -48,13 +44,13 @@ module Karafka
48
44
 
49
45
  # @return [String] path to Karafka gem root core
50
46
  def core_root
51
- Pathname.new(File.expand_path('../karafka', __FILE__))
47
+ Pathname.new(File.expand_path('karafka', __dir__))
52
48
  end
53
49
 
54
50
  # @return [String] path to a default file that contains booting procedure etc
55
51
  # @note By default it is a file called 'karafka.rb' but it can be specified as you wish if you
56
52
  # have Karafka that is merged into a Sinatra/Rails app and karafka.rb is taken.
57
- # It will be used for console/controllers/etc
53
+ # It will be used for console/consumers/etc
58
54
  # @example Standard only-Karafka case
59
55
  # Karafka.boot_file #=> '/home/app_path/karafka.rb'
60
56
  # @example Non standard case
@@ -66,4 +62,17 @@ module Karafka
66
62
  end
67
63
  end
68
64
 
65
+ %w[
66
+ callbacks
67
+ callbacks/*
68
+ setup/dsl
69
+ setup/config
70
+ status
71
+ schemas/config
72
+ schemas/consumer_group_topic
73
+ schemas/consumer_group
74
+ ].each { |path| require_all File.join(Karafka.core_root, path + '.rb') }
75
+
69
76
  Karafka::Loader.load!(Karafka.core_root)
77
+ Kafka::Consumer.prepend(Karafka::Patches::RubyKafka)
78
+ Dry::Configurable::Config.prepend(Karafka::Patches::DryConfigurable)
data/lib/karafka/app.rb CHANGED
@@ -3,14 +3,10 @@
3
3
  module Karafka
4
4
  # App class
5
5
  class App
6
- class << self
7
- # Sets up the whole configuration
8
- # @param [Block] block configuration block
9
- def setup(&block)
10
- Setup::Config.setup(&block)
11
- initialize!
12
- end
6
+ extend Setup::Dsl
7
+ extend Callbacks::Dsl
13
8
 
9
+ class << self
14
10
  # Sets up all the internal components and bootstrap whole app
15
11
  # We need to know details about consumers in order to setup components,
16
12
  # that's why we don't setup them after std setup is done
@@ -19,11 +15,7 @@ module Karafka
19
15
  def boot!
20
16
  Setup::Config.validate!
21
17
  Setup::Config.setup_components
22
- end
23
-
24
- # @return [Karafka::Config] config instance
25
- def config
26
- Setup::Config.config
18
+ Callbacks.after_init(Karafka::App.config)
27
19
  end
28
20
 
29
21
  # @return [Karafka::Routing::Builder] consumers builder instance
@@ -33,7 +25,7 @@ module Karafka
33
25
 
34
26
  Status.instance_methods(false).each do |delegated|
35
27
  define_method(delegated) do
36
- Status.instance.public_send(delegated)
28
+ Status.instance.send(delegated)
37
29
  end
38
30
  end
39
31
 
@@ -41,10 +33,11 @@ module Karafka
41
33
  %i[
42
34
  root
43
35
  env
44
- logger monitor
36
+ logger
37
+ monitor
45
38
  ].each do |delegated|
46
39
  define_method(delegated) do
47
- Karafka.public_send(delegated)
40
+ Karafka.send(delegated)
48
41
  end
49
42
  end
50
43
  end
@@ -21,12 +21,12 @@ module Karafka
21
21
  offset_retention_time heartbeat_interval
22
22
  ],
23
23
  subscription: %i[start_from_beginning max_bytes_per_partition],
24
- consuming: %i[min_bytes max_wait_time],
24
+ consuming: %i[min_bytes max_bytes max_wait_time],
25
25
  pausing: %i[pause_timeout],
26
26
  # All the options that are under kafka config namespace, but are not used
27
27
  # directly with kafka api, but from the Karafka user perspective, they are
28
28
  # still related to kafka. They should not be proxied anywhere
29
- ignored: %i[reconnect_timeout]
29
+ ignored: %i[reconnect_timeout automatically_mark_as_consumed]
30
30
  }
31
31
  end
32
32
 
@@ -37,7 +37,7 @@ module Karafka
37
37
  name
38
38
  parser
39
39
  responder
40
- batch_processing
40
+ batch_consuming
41
41
  persistent
42
42
  ]).uniq
43
43
  end
@@ -52,7 +52,7 @@ module Karafka
52
52
  # only when proxying details go ruby-kafka. We use ignored fields internally in karafka
53
53
  ignored_settings = config_adapter[:subscription]
54
54
  defined_settings = config_adapter.values.flatten
55
- karafka_settings = %i[batch_consuming]
55
+ karafka_settings = %i[batch_fetching]
56
56
  # This is a drity and bad hack of dry-configurable to get keys before setting values
57
57
  dynamically_proxied = Karafka::Setup::Config
58
58
  ._settings
@@ -7,10 +7,9 @@ module Karafka
7
7
  module Inline
8
8
  private
9
9
 
10
- # Executes perform code immediately (without enqueuing)
10
+ # Executes consume code immediately (without enqueuing)
11
11
  def process
12
- Karafka.monitor.notice(self.class, params_batch)
13
- perform
12
+ Karafka.monitor.instrument('backends.inline.process', caller: self) { consume }
14
13
  end
15
14
  end
16
15
  end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Karafka module namespace
4
+ module Karafka
5
+ # Base consumer from which all Karafka consumers should inherit
6
+ class BaseConsumer
7
+ extend ActiveSupport::DescendantsTracker
8
+ extend Forwardable
9
+
10
+ # Allows us to mark messages as consumed for non-automatic mode without having
11
+ # to use consumer client directly. We do this that way, because most of the people should not
12
+ # mess with the client instance directly (just in case)
13
+ def_delegator :client, :mark_as_consumed
14
+
15
+ private :mark_as_consumed
16
+
17
+ class << self
18
+ attr_reader :topic
19
+
20
+ # Assigns a topic to a consumer and builds up proper consumer functionalities
21
+ # so that it can cooperate with the topic settings
22
+ # @param topic [Karafka::Routing::Topic]
23
+ # @return [Karafka::Routing::Topic] assigned topic
24
+ def topic=(topic)
25
+ @topic = topic
26
+ Consumers::Includer.call(self)
27
+ end
28
+ end
29
+
30
+ # @return [Karafka::Routing::Topic] topic to which a given consumer is subscribed
31
+ def topic
32
+ self.class.topic
33
+ end
34
+
35
+ # Creates lazy loaded params batch object
36
+ # @note Until first params usage, it won't parse data at all
37
+ # @param messages [Array<Kafka::FetchedMessage>, Array<Hash>] messages with raw
38
+ # content (from Kafka) or messages inside a hash (from backend, etc)
39
+ # @return [Karafka::Params::ParamsBatch] lazy loaded params batch
40
+ def params_batch=(messages)
41
+ @params_batch = Karafka::Params::ParamsBatch.new(messages, topic.parser)
42
+ end
43
+
44
+ # Executes the default consumer flow.
45
+ def call
46
+ process
47
+ end
48
+
49
+ private
50
+
51
+ # We make it private as it should be accessible only from the inside of a consumer
52
+ attr_reader :params_batch
53
+
54
+ # @return [Karafka::Connection::Client] messages consuming client that can be used to
55
+ # commit manually offset or pause / stop consumer based on the business logic
56
+ def client
57
+ Persistence::Client.read
58
+ end
59
+
60
+ # Method that will perform business logic and on data received from Kafka (it will consume
61
+ # the data)
62
+ # @note This method needs bo be implemented in a subclass. We stub it here as a failover if
63
+ # someone forgets about it or makes on with typo
64
+ def consume
65
+ raise NotImplementedError, 'Implement this in a subclass'
66
+ end
67
+ end
68
+ end
@@ -62,6 +62,11 @@ module Karafka
62
62
  # Definitions of all topics that we want to be able to use in this responder should go here
63
63
  class_attribute :topics
64
64
 
65
+ # Schema that we can use to control and/or require some additional details upon options
66
+ # that are being passed to the producer. This can be in particular useful if we want to make
67
+ # sure that for example partition_key is always present.
68
+ class_attribute :options_schema
69
+
65
70
  attr_reader :messages_buffer
66
71
 
67
72
  class << self
@@ -92,7 +97,7 @@ module Karafka
92
97
  # @param parser_class [Class] parser class that we can use to generate appropriate string
93
98
  # or nothing if we want to default to Karafka::Parsers::Json
94
99
  # @return [Karafka::BaseResponder] base responder descendant responder
95
- def initialize(parser_class = Karafka::Parsers::Json)
100
+ def initialize(parser_class = Karafka::App.config.parser)
96
101
  @parser_class = parser_class
97
102
  @messages_buffer = {}
98
103
  end
@@ -108,7 +113,8 @@ module Karafka
108
113
  # UsersCreatedResponder.new(MyParser).call(@created_user)
109
114
  def call(*data)
110
115
  respond(*data)
111
- validate!
116
+ validate_usage!
117
+ validate_options!
112
118
  deliver!
113
119
  end
114
120
 
@@ -116,7 +122,7 @@ module Karafka
116
122
 
117
123
  # Checks if we met all the topics requirements. It will fail if we didn't send a message to
118
124
  # a registered required topic, etc.
119
- def validate!
125
+ def validate_usage!
120
126
  registered_topics = self.class.topics.map do |name, topic|
121
127
  topic.to_h.merge!(
122
128
  usage_count: messages_buffer[name]&.count || 0
@@ -138,21 +144,26 @@ module Karafka
138
144
  raise Karafka::Errors::InvalidResponderUsage, result.errors
139
145
  end
140
146
 
147
+ # Checks if we met all the options requirements before sending them to the producer.
148
+ def validate_options!
149
+ return true unless self.class.options_schema
150
+
151
+ messages_buffer.each_value do |messages_set|
152
+ messages_set.each do |message_data|
153
+ result = self.class.options_schema.call(message_data.last)
154
+ next if result.success?
155
+ raise Karafka::Errors::InvalidResponderMessageOptions, result.errors
156
+ end
157
+ end
158
+ end
159
+
141
160
  # Takes all the messages from the buffer and delivers them one by one
142
161
  # @note This method is executed after the validation, so we're sure that
143
162
  # what we send is legit and it will go to a proper topics
144
163
  def deliver!
145
- messages_buffer.each do |topic, data_elements|
146
- # We map this topic name, so it will match namespaced/etc topic in Kafka
147
- # @note By default will not change topic (if default mapper used)
148
- mapped_topic = Karafka::App.config.topic_mapper.outgoing(topic)
149
-
150
- data_elements.each do |(data, options)|
151
- ::WaterDrop::Message.new(
152
- mapped_topic,
153
- data,
154
- options
155
- ).send!
164
+ messages_buffer.each_value do |data_elements|
165
+ data_elements.each do |data, options|
166
+ producer(options).call(data, options)
156
167
  end
157
168
  end
158
169
  end
@@ -171,10 +182,23 @@ module Karafka
171
182
  # @param options [Hash] options for waterdrop (e.g. partition_key)
172
183
  # @note Respond to does not accept multiple data arguments.
173
184
  def respond_to(topic, data, options = {})
174
- Karafka.monitor.notice(self.class, topic: topic, data: data, options: options)
185
+ # We normalize the format to string, as WaterDrop and Ruby-Kafka support only
186
+ # string topics
187
+ topic = topic.to_s
188
+
189
+ messages_buffer[topic] ||= []
190
+ messages_buffer[topic] << [
191
+ @parser_class.generate(data),
192
+ # We map this topic name, so it will match namespaced/etc topic in Kafka
193
+ # @note By default will not change topic (if default mapper used)
194
+ options.merge(topic: Karafka::App.config.topic_mapper.outgoing(topic))
195
+ ]
196
+ end
175
197
 
176
- messages_buffer[topic.to_s] ||= []
177
- messages_buffer[topic.to_s] << [@parser_class.generate(data), options]
198
+ # @param options [Hash] options for waterdrop
199
+ # @return [Class] WaterDrop producer (sync or async based on the settings)
200
+ def producer(options)
201
+ options[:async] ? WaterDrop::AsyncProducer : WaterDrop::SyncProducer
178
202
  end
179
203
  end
180
204
  end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Additional callbacks that are used to trigger some things in given places during the
5
+ # system lifecycle
6
+ # @note Those callbacks aren't the same as consumer callbacks as they are not related to the
7
+ # lifecycle of particular messages fetches but rather to the internal flow process.
8
+ # They cannot be defined on a consumer callback level because for some of those,
9
+ # there aren't consumers in the memory yet and/or they aren't per consumer thread
10
+ module Callbacks
11
+ # Types of system callbacks that we have that are not related to consumers
12
+ TYPES = %i[
13
+ after_init
14
+ before_fetch_loop
15
+ ].freeze
16
+
17
+ class << self
18
+ TYPES.each do |callback_type|
19
+ # Executes given callbacks set at a given moment with provided arguments
20
+ define_method callback_type do |*args|
21
+ Karafka::App
22
+ .config
23
+ .callbacks
24
+ .send(callback_type)
25
+ .each { |callback| callback.call(*args) }
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Callbacks
5
+ # Additional configuration required to store procs that we will execute upon callback trigger
6
+ module Config
7
+ # Builds up internal callback accumulators
8
+ # @param klass [Class] Class that we extend with callback config
9
+ def self.extended(klass)
10
+ # option internal [Hash] - optional - internal karafka configuration settings that should
11
+ # never be changed by users directly
12
+ klass.setting :callbacks do
13
+ Callbacks::TYPES.each do |callback_type|
14
+ # option [Array<Proc>] an array of blocks that will be executed at a given moment
15
+ # depending on the callback type
16
+ setting callback_type, []
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end