karafka 1.1.2 → 1.2.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +5 -5
  2. data/.ruby-version +1 -1
  3. data/.travis.yml +1 -0
  4. data/CHANGELOG.md +34 -0
  5. data/Gemfile +1 -2
  6. data/Gemfile.lock +35 -22
  7. data/README.md +4 -3
  8. data/karafka.gemspec +5 -3
  9. data/lib/karafka.rb +4 -5
  10. data/lib/karafka/app.rb +8 -15
  11. data/lib/karafka/attributes_map.rb +1 -1
  12. data/lib/karafka/backends/inline.rb +1 -2
  13. data/lib/karafka/{base_controller.rb → base_consumer.rb} +19 -11
  14. data/lib/karafka/base_responder.rb +33 -14
  15. data/lib/karafka/callbacks.rb +30 -0
  16. data/lib/karafka/callbacks/config.rb +22 -0
  17. data/lib/karafka/callbacks/dsl.rb +16 -0
  18. data/lib/karafka/cli/install.rb +2 -3
  19. data/lib/karafka/cli/server.rb +0 -1
  20. data/lib/karafka/connection/{consumer.rb → client.rb} +25 -33
  21. data/lib/karafka/connection/config_adapter.rb +14 -6
  22. data/lib/karafka/connection/delegator.rb +46 -0
  23. data/lib/karafka/connection/listener.rb +22 -13
  24. data/lib/karafka/{controllers → consumers}/callbacks.rb +9 -9
  25. data/lib/karafka/consumers/includer.rb +51 -0
  26. data/lib/karafka/consumers/responders.rb +24 -0
  27. data/lib/karafka/{controllers → consumers}/single_params.rb +3 -3
  28. data/lib/karafka/errors.rb +10 -3
  29. data/lib/karafka/fetcher.rb +30 -34
  30. data/lib/karafka/helpers/class_matcher.rb +8 -8
  31. data/lib/karafka/helpers/config_retriever.rb +2 -2
  32. data/lib/karafka/instrumentation/listener.rb +97 -0
  33. data/lib/karafka/instrumentation/logger.rb +55 -0
  34. data/lib/karafka/instrumentation/monitor.rb +62 -0
  35. data/lib/karafka/loader.rb +0 -1
  36. data/lib/karafka/params/{params.rb → dsl.rb} +69 -44
  37. data/lib/karafka/params/params_batch.rb +2 -2
  38. data/lib/karafka/patches/dry_configurable.rb +6 -2
  39. data/lib/karafka/patches/ruby_kafka.rb +10 -10
  40. data/lib/karafka/persistence/client.rb +25 -0
  41. data/lib/karafka/persistence/consumer.rb +27 -14
  42. data/lib/karafka/persistence/topic.rb +29 -0
  43. data/lib/karafka/process.rb +5 -4
  44. data/lib/karafka/responders/builder.rb +15 -14
  45. data/lib/karafka/routing/builder.rb +1 -1
  46. data/lib/karafka/routing/consumer_mapper.rb +3 -2
  47. data/lib/karafka/routing/router.rb +1 -1
  48. data/lib/karafka/routing/topic.rb +5 -5
  49. data/lib/karafka/schemas/config.rb +3 -0
  50. data/lib/karafka/schemas/consumer_group.rb +14 -2
  51. data/lib/karafka/schemas/consumer_group_topic.rb +1 -1
  52. data/lib/karafka/server.rb +33 -5
  53. data/lib/karafka/setup/config.rb +45 -21
  54. data/lib/karafka/setup/configurators/base.rb +6 -12
  55. data/lib/karafka/setup/configurators/params.rb +25 -0
  56. data/lib/karafka/setup/configurators/water_drop.rb +6 -3
  57. data/lib/karafka/setup/dsl.rb +22 -0
  58. data/lib/karafka/templates/{application_controller.rb.example → application_consumer.rb.example} +2 -3
  59. data/lib/karafka/templates/karafka.rb.example +14 -3
  60. data/lib/karafka/version.rb +1 -1
  61. metadata +58 -23
  62. data/lib/karafka/connection/processor.rb +0 -61
  63. data/lib/karafka/controllers/includer.rb +0 -51
  64. data/lib/karafka/controllers/responders.rb +0 -19
  65. data/lib/karafka/logger.rb +0 -53
  66. data/lib/karafka/monitor.rb +0 -98
  67. data/lib/karafka/persistence/controller.rb +0 -38
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Additional callbacks that are used to trigger some things in given places during the
5
+ # system lifecycle
6
+ # @note Those callbacks aren't the same as consumer callbacks as they are not related to the
7
+ # lifecycle of particular messages fetches but rather to the internal flow process.
8
+ # They cannot be defined on a consumer callback level because for some of those,
9
+ # there aren't consumers in the memory yet and/or they aren't per consumer thread
10
+ module Callbacks
11
+ # Types of system callbacks that we have that are not related to consumers
12
+ TYPES = %i[
13
+ after_init
14
+ before_fetch_loop
15
+ ].freeze
16
+
17
+ class << self
18
+ TYPES.each do |callback_type|
19
+ # Executes given callbacks set at a given moment with provided arguments
20
+ define_method callback_type do |*args|
21
+ Karafka::App
22
+ .config
23
+ .callbacks
24
+ .send(callback_type)
25
+ .each { |block| block.call(*args) }
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Callbacks
5
+ # Additional configuration required to store procs that we will execute upon callback trigger
6
+ module Config
7
+ # Builds up internal callback accumulators
8
+ # @param klass [Class] Class that we extend with callback config
9
+ def self.extended(klass)
10
+ # option internal [Hash] - optional - internal karafka configuration settings that should
11
+ # never be changed by users directly
12
+ klass.setting :callbacks do
13
+ Callbacks::TYPES.each do |callback_type|
14
+ # option [Array<Proc>] an array of blocks that will be executed at a given moment
15
+ # depending on the callback type
16
+ setting callback_type, []
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Callbacks
5
+ # App level dsl to define callbacks
6
+ module Dsl
7
+ Callbacks::TYPES.each do |callback_type|
8
+ # Allows us to define a block, that will be executed for a given moment
9
+ # @param [Block] block that should be executed after the initialization process
10
+ define_method callback_type do |&block|
11
+ config.callbacks.send(callback_type).push block
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -9,8 +9,7 @@ module Karafka
9
9
 
10
10
  # Directories created by default
11
11
  INSTALL_DIRS = %w[
12
- app/models
13
- app/controllers
12
+ app/consumers
14
13
  app/responders
15
14
  config
16
15
  log
@@ -20,7 +19,7 @@ module Karafka
20
19
  # Where should we map proper files from templates
21
20
  INSTALL_FILES_MAP = {
22
21
  'karafka.rb.example' => Karafka.boot_file.basename,
23
- 'application_controller.rb.example' => 'app/controllers/application_controller.rb',
22
+ 'application_consumer.rb.example' => 'app/consumers/application_consumer.rb',
24
23
  'application_responder.rb.example' => 'app/responders/application_responder.rb'
25
24
  }.freeze
26
25
 
@@ -35,7 +35,6 @@ module Karafka
35
35
  # won't alarm or start new system process up until the current one is finished
36
36
  ObjectSpace.define_finalizer(self, proc { send(:clean) })
37
37
 
38
- # After we fork, we can boot celluloid again
39
38
  Karafka::Server.run
40
39
  end
41
40
 
@@ -2,37 +2,50 @@
2
2
 
3
3
  module Karafka
4
4
  module Connection
5
- # Class used as a wrapper around Ruby-Kafka to simplify additional
5
+ # Class used as a wrapper around Ruby-Kafka client to simplify additional
6
6
  # features that we provide/might provide in future and to hide the internal implementation
7
- class Consumer
8
- # Creates a queue consumer that will pull the data from Kafka
7
+ class Client
8
+ extend Forwardable
9
+
10
+ def_delegator :kafka_consumer, :seek
11
+
12
+ # Creates a queue consumer client that will pull the data from Kafka
9
13
  # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group for which
10
14
  # we create a client
11
- # @return [Karafka::Connection::Consumer] group consumer that can subscribe to
15
+ # @return [Karafka::Connection::Client] group consumer that can subscribe to
12
16
  # multiple topics
13
17
  def initialize(consumer_group)
14
18
  @consumer_group = consumer_group
15
- Persistence::Consumer.write(self)
19
+ Persistence::Client.write(self)
16
20
  end
17
21
 
18
22
  # Opens connection, gets messages and calls a block for each of the incoming messages
19
23
  # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
20
24
  # @note This will yield with raw messages - no preprocessing or reformatting.
21
25
  def fetch_loop
22
- send(
23
- consumer_group.batch_fetching ? :consume_each_batch : :consume_each_message
24
- ) { |messages| yield(messages) }
26
+ settings = ConfigAdapter.consuming(consumer_group)
27
+
28
+ if consumer_group.batch_fetching
29
+ kafka_consumer.each_batch(*settings) { |batch| yield(batch.messages) }
30
+ else
31
+ # always yield an array of messages, so we have consistent API (always a batch)
32
+ kafka_consumer.each_message(*settings) { |message| yield([message]) }
33
+ end
25
34
  rescue Kafka::ProcessingError => e
26
35
  # If there was an error during consumption, we have to log it, pause current partition
27
36
  # and process other things
28
- Karafka.monitor.notice_error(self.class, e.cause)
37
+ Karafka.monitor.instrument(
38
+ 'connection.client.fetch_loop.error',
39
+ caller: self,
40
+ error: e.cause
41
+ )
29
42
  pause(e.topic, e.partition)
30
43
  retry
31
44
  # This is on purpose - see the notes for this method
32
45
  # rubocop:disable RescueException
33
46
  rescue Exception => e
34
47
  # rubocop:enable RescueException
35
- Karafka.monitor.notice_error(self.class, e)
48
+ Karafka.monitor.instrument('connection.client.fetch_loop.error', caller: self, error: e)
36
49
  retry
37
50
  end
38
51
 
@@ -70,32 +83,11 @@ module Karafka
70
83
 
71
84
  attr_reader :consumer_group
72
85
 
73
- # Consumes messages from Kafka in batches
74
- # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
75
- def consume_each_batch
76
- kafka_consumer.each_batch(
77
- ConfigAdapter.consuming(consumer_group)
78
- ) do |batch|
79
- yield(batch.messages)
80
- end
81
- end
82
-
83
- # Consumes messages from Kafka one by one
84
- # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
85
- def consume_each_message
86
- kafka_consumer.each_message(
87
- ConfigAdapter.consuming(consumer_group)
88
- ) do |message|
89
- # always yield an array of messages, so we have consistent API (always a batch)
90
- yield([message])
91
- end
92
- end
93
-
94
86
  # @return [Kafka::Consumer] returns a ready to consume Kafka consumer
95
87
  # that is set up to consume from topics of a given consumer group
96
88
  def kafka_consumer
97
89
  @kafka_consumer ||= kafka.consumer(
98
- ConfigAdapter.consumer(consumer_group)
90
+ *ConfigAdapter.consumer(consumer_group)
99
91
  ).tap do |consumer|
100
92
  consumer_group.topics.each do |topic|
101
93
  consumer.subscribe(*ConfigAdapter.subscription(topic))
@@ -114,7 +106,7 @@ module Karafka
114
106
  # @note We don't cache it internally because we cache kafka_consumer that uses kafka
115
107
  # object instance
116
108
  def kafka
117
- Kafka.new(ConfigAdapter.client(consumer_group))
109
+ Kafka.new(*ConfigAdapter.client(consumer_group))
118
110
  end
119
111
  end
120
112
  end
@@ -14,7 +14,10 @@ module Karafka
14
14
  class << self
15
15
  # Builds all the configuration settings for Kafka.new method
16
16
  # @param _consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
17
- # @return [Hash] hash with all the settings required by Kafka.new method
17
+ # @return [Array<Hash>] Array with all the client arguments including hash with all
18
+ # the settings required by Kafka.new method
19
+ # @note We return array, so we can inject any arguments we want, in case of changes in the
20
+ # raw driver
18
21
  def client(_consumer_group)
19
22
  # This one is a default that takes all the settings except special
20
23
  # cases defined in the map
@@ -33,28 +36,33 @@ module Karafka
33
36
  settings[setting_name] = setting_value
34
37
  end
35
38
 
36
- sanitize(settings)
39
+ settings_hash = sanitize(settings)
40
+
41
+ # Normalization for the way Kafka::Client accepts arguments from 0.5.3
42
+ [settings_hash.delete(:seed_brokers), settings_hash]
37
43
  end
38
44
 
39
45
  # Builds all the configuration settings for kafka#consumer method
40
46
  # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
41
- # @return [Hash] hash with all the settings required by Kafka#consumer method
47
+ # @return [Array<Hash>] array with all the consumer arguments including hash with all
48
+ # the settings required by Kafka#consumer
42
49
  def consumer(consumer_group)
43
50
  settings = { group_id: consumer_group.id }
44
51
  settings = fetch_for(:consumer, consumer_group, settings)
45
- sanitize(settings)
52
+ [sanitize(settings)]
46
53
  end
47
54
 
48
55
  # Builds all the configuration settings for kafka consumer consume_each_batch and
49
56
  # consume_each_message methods
50
57
  # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
51
- # @return [Hash] hash with all the settings required by
58
+ # @return [Array<Hash>] Array with all the arguments required by consuming method
59
+ # including hash with all the settings required by
52
60
  # Kafka::Consumer#consume_each_message and Kafka::Consumer#consume_each_batch method
53
61
  def consuming(consumer_group)
54
62
  settings = {
55
63
  automatically_mark_as_processed: consumer_group.automatically_mark_as_consumed
56
64
  }
57
- sanitize(fetch_for(:consuming, consumer_group, settings))
65
+ [sanitize(fetch_for(:consuming, consumer_group, settings))]
58
66
  end
59
67
 
60
68
  # Builds all the configuration settings for kafka consumer#subscribe method
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Class that delegates processing of messages for which we listen to a proper processor
6
+ module Delegator
7
+ class << self
8
+ # Delegates messages (does something with them)
9
+ # It will either schedule or run a proper processor action for messages
10
+ # @note This should be looped to obtain a constant delegating of new messages
11
+ # @note We catch all the errors here, to make sure that none failures
12
+ # for a given consumption will affect other consumed messages
13
+ # If we wouldn't catch it, it would propagate up until killing the thread
14
+ # @note It is a one huge method, because of performance reasons. It is much faster then
15
+ # using send or invoking additional methods
16
+ # @param group_id [String] group_id of a group from which a given message came
17
+ # @param kafka_messages [Array<Kafka::FetchedMessage>] raw messages fetched from kafka
18
+ def call(group_id, kafka_messages)
19
+ # @note We always get messages by topic and partition so we can take topic from the
20
+ # first one and it will be valid for all the messages
21
+ topic = Persistence::Topic.fetch(group_id, kafka_messages[0].topic)
22
+ consumer = Persistence::Consumer.fetch(topic, kafka_messages[0].partition)
23
+
24
+ Karafka.monitor.instrument(
25
+ 'connection.delegator.call',
26
+ caller: self,
27
+ consumer: consumer,
28
+ kafka_messages: kafka_messages
29
+ ) do
30
+ # Depending on a case (persisted or not) we might use new consumer instance per
31
+ # each batch, or use the same one for all of them (for implementing buffering, etc.)
32
+ if topic.batch_consuming
33
+ consumer.params_batch = kafka_messages
34
+ consumer.call
35
+ else
36
+ kafka_messages.each do |kafka_message|
37
+ consumer.params_batch = [kafka_message]
38
+ consumer.call
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -7,8 +7,6 @@ module Karafka
7
7
  # @note Listener itself does nothing with the message - it will return to the block
8
8
  # a raw Kafka::FetchedMessage
9
9
  class Listener
10
- attr_reader :consumer_group
11
-
12
10
  # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group that holds details
13
11
  # on what topics and with what settings should we listen
14
12
  # @return [Karafka::Connection::Listener] listener instance
@@ -16,6 +14,17 @@ module Karafka
16
14
  @consumer_group = consumer_group
17
15
  end
18
16
 
17
+ # Runs prefetch callbacks and executes the main listener fetch loop
18
+ def call
19
+ Karafka::Callbacks.before_fetch_loop(
20
+ @consumer_group,
21
+ client
22
+ )
23
+ fetch_loop
24
+ end
25
+
26
+ private
27
+
19
28
  # Opens connection, gets messages and calls a block for each of the incoming messages
20
29
  # @yieldparam [String] consumer group id
21
30
  # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
@@ -26,25 +35,25 @@ module Karafka
26
35
  # won't crash the whole cluster. Here we mostly focus on catchin the exceptions related to
27
36
  # Kafka connections / Internet connection issues / Etc. Business logic problems should not
28
37
  # propagate this far
29
- def fetch_loop(block)
30
- consumer.fetch_loop do |raw_messages|
31
- block.call(consumer_group.id, raw_messages)
38
+ def fetch_loop
39
+ client.fetch_loop do |raw_messages|
40
+ # @note What happens here is a delegation of processing to a proper processor based
41
+ # on the incoming messages characteristics
42
+ Karafka::Connection::Delegator.call(@consumer_group.id, raw_messages)
32
43
  end
33
44
  # This is on purpose - see the notes for this method
34
45
  # rubocop:disable RescueException
35
46
  rescue Exception => e
47
+ Karafka.monitor.instrument('connection.listener.fetch_loop.error', caller: self, error: e)
36
48
  # rubocop:enable RescueException
37
- Karafka.monitor.notice_error(self.class, e)
38
- @consumer&.stop
39
- retry if @consumer
49
+ @client&.stop
50
+ retry if @client
40
51
  end
41
52
 
42
- private
43
-
44
- # @return [Karafka::Connection::Consumer] wrapped kafka consumer for a given topic
53
+ # @return [Karafka::Connection::Client] wrapped kafka consuming client for a given topic
45
54
  # consumption
46
- def consumer
47
- @consumer ||= Consumer.new(consumer_group)
55
+ def client
56
+ @client ||= Client.new(@consumer_group)
48
57
  end
49
58
  end
50
59
  end
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Karafka
4
- module Controllers
4
+ module Consumers
5
5
  # Additional callbacks that can be used to trigger some actions on certain moments like
6
6
  # manual offset management, committing or anything else outside of a standard messages flow
7
7
  # They are not included by default, as we don't want to provide functionalities that are
@@ -10,7 +10,7 @@ module Karafka
10
10
  module Callbacks
11
11
  # Types of events on which we run callbacks
12
12
  TYPES = %i[
13
- after_fetched
13
+ after_fetch
14
14
  after_poll
15
15
  before_poll
16
16
  before_stop
@@ -28,9 +28,9 @@ module Karafka
28
28
  end
29
29
  end
30
30
 
31
- # @param controller_class [Class] controller class that we extend with callbacks
32
- def self.included(controller_class)
33
- controller_class.class_eval do
31
+ # @param consumer_class [Class] consumer class that we extend with callbacks
32
+ def self.included(consumer_class)
33
+ consumer_class.class_eval do
34
34
  extend ClassMethods
35
35
  include ActiveSupport::Callbacks
36
36
 
@@ -41,11 +41,11 @@ module Karafka
41
41
  end
42
42
  end
43
43
 
44
- # Executes the default controller flow, runs callbacks and if not halted will call process
45
- # method of a proper backend. This is here because it interacts with the default Karafka
46
- # call flow and needs to be overwritten in order to support callbacks
44
+ # Executes the default consumer flow, runs callbacks and if not halted will call process
45
+ # method of a proper backend. It is here because it interacts with the default Karafka
46
+ # call flow and needs to be overwritten to support callbacks
47
47
  def call
48
- run_callbacks :after_fetched do
48
+ run_callbacks :after_fetch do
49
49
  process
50
50
  end
51
51
  end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Additional functionalities for consumers
5
+ module Consumers
6
+ # Module used to inject functionalities into a given consumer class, based on the consumer
7
+ # topic and its settings
8
+ # We don't need all the behaviors in all the cases, so it is not worth having everything
9
+ # in all the cases all the time
10
+ module Includer
11
+ class << self
12
+ # @param consumer_class [Class] consumer class, that will get some functionalities
13
+ # based on the topic under which it operates
14
+ def call(consumer_class)
15
+ topic = consumer_class.topic
16
+
17
+ bind_backend(consumer_class, topic)
18
+ bind_params(consumer_class, topic)
19
+ bind_responders(consumer_class, topic)
20
+ end
21
+
22
+ private
23
+
24
+ # Figures out backend for a given consumer class, based on the topic backend and
25
+ # includes it into the consumer class
26
+ # @param consumer_class [Class] consumer class
27
+ # @param topic [Karafka::Routing::Topic] topic of a consumer class
28
+ def bind_backend(consumer_class, topic)
29
+ backend = Kernel.const_get("::Karafka::Backends::#{topic.backend.to_s.capitalize}")
30
+ consumer_class.include backend
31
+ end
32
+
33
+ # Adds a single #params support for non batch processed topics
34
+ # @param consumer_class [Class] consumer class
35
+ # @param topic [Karafka::Routing::Topic] topic of a consumer class
36
+ def bind_params(consumer_class, topic)
37
+ return if topic.batch_consuming
38
+ consumer_class.include SingleParams
39
+ end
40
+
41
+ # Adds responders support for topics and consumers with responders defined for them
42
+ # @param consumer_class [Class] consumer class
43
+ # @param topic [Karafka::Routing::Topic] topic of a consumer class
44
+ def bind_responders(consumer_class, topic)
45
+ return unless topic.responder
46
+ consumer_class.include Responders
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end