karafka 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +5 -5
  2. data/.ruby-version +1 -1
  3. data/.travis.yml +3 -1
  4. data/CHANGELOG.md +90 -3
  5. data/CONTRIBUTING.md +5 -6
  6. data/Gemfile +1 -1
  7. data/Gemfile.lock +59 -64
  8. data/README.md +28 -57
  9. data/bin/karafka +13 -1
  10. data/config/errors.yml +6 -0
  11. data/karafka.gemspec +10 -9
  12. data/lib/karafka.rb +19 -10
  13. data/lib/karafka/app.rb +8 -15
  14. data/lib/karafka/attributes_map.rb +4 -4
  15. data/lib/karafka/backends/inline.rb +2 -3
  16. data/lib/karafka/base_consumer.rb +68 -0
  17. data/lib/karafka/base_responder.rb +41 -17
  18. data/lib/karafka/callbacks.rb +30 -0
  19. data/lib/karafka/callbacks/config.rb +22 -0
  20. data/lib/karafka/callbacks/dsl.rb +16 -0
  21. data/lib/karafka/cli/base.rb +2 -0
  22. data/lib/karafka/cli/flow.rb +1 -1
  23. data/lib/karafka/cli/info.rb +1 -2
  24. data/lib/karafka/cli/install.rb +2 -3
  25. data/lib/karafka/cli/server.rb +9 -12
  26. data/lib/karafka/connection/client.rb +117 -0
  27. data/lib/karafka/connection/config_adapter.rb +30 -14
  28. data/lib/karafka/connection/delegator.rb +46 -0
  29. data/lib/karafka/connection/listener.rb +22 -20
  30. data/lib/karafka/consumers/callbacks.rb +54 -0
  31. data/lib/karafka/consumers/includer.rb +51 -0
  32. data/lib/karafka/consumers/responders.rb +24 -0
  33. data/lib/karafka/{controllers → consumers}/single_params.rb +3 -3
  34. data/lib/karafka/errors.rb +19 -2
  35. data/lib/karafka/fetcher.rb +30 -28
  36. data/lib/karafka/helpers/class_matcher.rb +8 -8
  37. data/lib/karafka/helpers/config_retriever.rb +2 -2
  38. data/lib/karafka/instrumentation/listener.rb +112 -0
  39. data/lib/karafka/instrumentation/logger.rb +55 -0
  40. data/lib/karafka/instrumentation/monitor.rb +64 -0
  41. data/lib/karafka/loader.rb +0 -1
  42. data/lib/karafka/params/dsl.rb +156 -0
  43. data/lib/karafka/params/params_batch.rb +7 -2
  44. data/lib/karafka/patches/dry_configurable.rb +7 -7
  45. data/lib/karafka/patches/ruby_kafka.rb +34 -0
  46. data/lib/karafka/persistence/client.rb +25 -0
  47. data/lib/karafka/persistence/consumer.rb +38 -0
  48. data/lib/karafka/persistence/topic.rb +29 -0
  49. data/lib/karafka/process.rb +6 -5
  50. data/lib/karafka/responders/builder.rb +15 -14
  51. data/lib/karafka/responders/topic.rb +8 -1
  52. data/lib/karafka/routing/builder.rb +2 -2
  53. data/lib/karafka/routing/consumer_group.rb +1 -1
  54. data/lib/karafka/routing/consumer_mapper.rb +34 -0
  55. data/lib/karafka/routing/router.rb +1 -1
  56. data/lib/karafka/routing/topic.rb +5 -11
  57. data/lib/karafka/routing/{mapper.rb → topic_mapper.rb} +2 -2
  58. data/lib/karafka/schemas/config.rb +4 -5
  59. data/lib/karafka/schemas/consumer_group.rb +45 -24
  60. data/lib/karafka/schemas/consumer_group_topic.rb +18 -0
  61. data/lib/karafka/schemas/responder_usage.rb +1 -0
  62. data/lib/karafka/server.rb +39 -20
  63. data/lib/karafka/setup/config.rb +74 -51
  64. data/lib/karafka/setup/configurators/base.rb +6 -12
  65. data/lib/karafka/setup/configurators/params.rb +25 -0
  66. data/lib/karafka/setup/configurators/water_drop.rb +15 -14
  67. data/lib/karafka/setup/dsl.rb +22 -0
  68. data/lib/karafka/templates/{application_controller.rb.example → application_consumer.rb.example} +2 -3
  69. data/lib/karafka/templates/karafka.rb.example +18 -5
  70. data/lib/karafka/version.rb +1 -1
  71. metadata +87 -63
  72. data/.github/ISSUE_TEMPLATE.md +0 -2
  73. data/Rakefile +0 -7
  74. data/lib/karafka/base_controller.rb +0 -118
  75. data/lib/karafka/connection/messages_consumer.rb +0 -106
  76. data/lib/karafka/connection/messages_processor.rb +0 -59
  77. data/lib/karafka/controllers/includer.rb +0 -51
  78. data/lib/karafka/controllers/responders.rb +0 -19
  79. data/lib/karafka/logger.rb +0 -53
  80. data/lib/karafka/monitor.rb +0 -98
  81. data/lib/karafka/params/params.rb +0 -101
  82. data/lib/karafka/persistence.rb +0 -18
  83. data/lib/karafka/setup/configurators/celluloid.rb +0 -22
@@ -5,7 +5,6 @@ module Karafka
5
5
  module Loader
6
6
  # Order in which we want to load app files
7
7
  DIRS = %w[
8
- config/initializers
9
8
  lib
10
9
  app
11
10
  ].freeze
@@ -0,0 +1,156 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Params namespace encapsulating all the logic that is directly related to params handling
5
+ module Params
6
+ # Dsl for Karafka params. We don't provide the params class here as we want to allow users to
7
+ # use either hash (default) or Rails hash with indifferent access as a base for their params
8
+ #
9
+ # We do that because both of them have their own advantages and we don't want to enforce users
10
+ # to handle things differently if they already use any of those
11
+ #
12
+ # It provides lazy loading not only until the first usage, but also allows us to skip
13
+ # using parser until we execute our logic. That way we can operate with
14
+ # heavy-parsing data without slowing down the whole application.
15
+ module Dsl
16
+ # Params keys that are "our" and internal. We use this list for additional backends
17
+ # that somehow operatae on those keys
18
+ SYSTEM_KEYS = %w[
19
+ parser
20
+ value
21
+ partition
22
+ offset
23
+ key
24
+ create_time
25
+ receive_time
26
+ topic
27
+ parsed
28
+ ].freeze
29
+
30
+ # Params attributes that should be available via a method call invocation for Kafka
31
+ # client compatibility.
32
+ # Kafka passes internally Kafka::FetchedMessage object and the ruby-kafka consumer
33
+ # uses those fields via method calls, so in order to be able to pass there our params
34
+ # objects, have to have same api.
35
+ METHOD_ATTRIBUTES = %w[
36
+ topic
37
+ partition
38
+ offset
39
+ key
40
+ create_time
41
+ receive_time
42
+ ].freeze
43
+
44
+ private_constant :METHOD_ATTRIBUTES
45
+
46
+ # Class methods required by params to work
47
+ module ClassMethods
48
+ # We allow building instances only via the #build method
49
+
50
+ # @param message [Kafka::FetchedMessage, Hash] message that we get out of Kafka
51
+ # in case of building params inside main Karafka process in
52
+ # Karafka::Connection::Consumer, or a hash when we retrieve data that is already parsed
53
+ # @param parser [Class] parser class that we will use to unparse data
54
+ # @return [Karafka::Params::Params] Karafka params object not yet used parser for
55
+ # retrieving data that we've got from Kafka
56
+ # @example Build params instance from a hash
57
+ # Karafka::Params::Params.build({ key: 'value' }) #=> params object
58
+ # @example Build params instance from a Kafka::FetchedMessage object
59
+ # Karafka::Params::Params.build(message) #=> params object
60
+ def build(message, parser)
61
+ instance = new
62
+ instance['parser'] = parser
63
+
64
+ # Non kafka fetched message can happen when we interchange data with an
65
+ # additional backend
66
+ if message.is_a?(Kafka::FetchedMessage)
67
+ instance.send(
68
+ :merge!,
69
+ 'value' => message.value,
70
+ 'partition' => message.partition,
71
+ 'offset' => message.offset,
72
+ 'key' => message.key,
73
+ 'create_time' => message.create_time,
74
+ 'receive_time' => Time.now,
75
+ # When we get raw messages, they might have a topic, that was modified by a
76
+ # topic mapper. We need to "reverse" this change and map back to the non-modified
77
+ # format, so our internal flow is not corrupted with the mapping
78
+ 'topic' => Karafka::App.config.topic_mapper.incoming(message.topic)
79
+ )
80
+ else
81
+ instance.send(:merge!, message)
82
+ end
83
+
84
+ instance
85
+ end
86
+ end
87
+
88
+ # @return [Karafka::Params::Params] this will trigger parser execution. If we decide to
89
+ # retrieve data, parser will be executed to parse data. Output of parsing will be merged
90
+ # to the current object. This object will be also marked as already parsed, so we won't
91
+ # parse it again.
92
+ def retrieve!
93
+ return self if self['parsed']
94
+ self['parsed'] = true
95
+
96
+ merge!(parse(delete('value')))
97
+ end
98
+
99
+ # Includes and extends the base params klass with everything that is needed by Karafka to
100
+ # fully work in any conditions.
101
+ # @param params_klass [Karafka::Params::Params] initialized params class that we will
102
+ # use for a given Karafka process
103
+ def self.included(params_klass)
104
+ params_klass.extend(Dsl::ClassMethods)
105
+
106
+ METHOD_ATTRIBUTES.each do |attr|
107
+ # Defines a method call accessor to a particular hash field.
108
+ # @note Won't work for complex key names that contain spaces, etc
109
+ # @param key [Symbol] name of a field that we want to retrieve with a method call
110
+ # @example
111
+ # key_attr_reader :example
112
+ # params.example #=> 'my example value'
113
+ params_klass.send :define_method, attr do
114
+ self[attr]
115
+ end
116
+ end
117
+
118
+ params_klass.send :private, :merge!
119
+ params_klass.send :private, :parse
120
+ end
121
+
122
+ private
123
+
124
+ # Overwritten merge! method - it behaves differently for keys that are the same in our hash
125
+ # and in a other_hash - it will not replace keys that are the same in our hash
126
+ # and in the other one. This protects some important Karafka params keys that cannot be
127
+ # replaced with custom values from incoming Kafka message
128
+ # @param other_hash [Hash] hash that we want to merge into current
129
+ # @return [Karafka::Params::Params] our parameters hash with merged values
130
+ # @example Merge with hash without same keys
131
+ # new(a: 1, b: 2).merge!(c: 3) #=> { a: 1, b: 2, c: 3 }
132
+ # @example Merge with hash with same keys (symbol based)
133
+ # new(a: 1).merge!(a: 2) #=> { a: 1 }
134
+ # @example Merge with hash with same keys (string based)
135
+ # new(a: 1).merge!('a' => 2) #=> { a: 1 }
136
+ # @example Merge with hash with same keys (current string based)
137
+ # new('a' => 1).merge!(a: 2) #=> { a: 1 }
138
+ def merge!(other_hash)
139
+ super(other_hash) { |_key, base_value, _new_value| base_value }
140
+ end
141
+
142
+ # @param value [String] Raw data that we want to parse using consumer parser
143
+ # @note If something goes wrong, it will return raw data in a hash with a message key
144
+ # @return [Hash] parsed data or a hash with message key containing raw data if something
145
+ # went wrong during parsing
146
+ def parse(value)
147
+ Karafka.monitor.instrument('params.params.parse', caller: self) do
148
+ self['parser'].parse(value)
149
+ end
150
+ rescue ::Karafka::Errors::ParserError => e
151
+ Karafka.monitor.instrument('params.params.parse.error', caller: self, error: e)
152
+ raise e
153
+ end
154
+ end
155
+ end
156
+ end
@@ -4,7 +4,7 @@ module Karafka
4
4
  module Params
5
5
  # Params batch represents a set of messages received from Kafka.
6
6
  # @note Params internally are lazy loaded before first use. That way we can skip parsing
7
- # process if we have after_received that rejects some incoming messages without using params
7
+ # process if we have after_fetch that rejects some incoming messages without using params
8
8
  # It can be also used when handling really heavy data (in terms of parsing).
9
9
  class ParamsBatch
10
10
  include Enumerable
@@ -13,7 +13,7 @@ module Karafka
13
13
  # @param messages_batch [Array<Kafka::FetchedMessage>] messages batch
14
14
  # @param topic_parser [Class] topic parser for unparsing messages values
15
15
  def initialize(messages_batch, topic_parser)
16
- @params_batch = messages_batch.map do |message|
16
+ @params_batch = messages_batch.map! do |message|
17
17
  Karafka::Params::Params.build(message, topic_parser)
18
18
  end
19
19
  end
@@ -32,6 +32,11 @@ module Karafka
32
32
  each(&:itself)
33
33
  end
34
34
 
35
+ # @return [Karafka::Params::Params] last element after the unparsing process
36
+ def last
37
+ @params_batch.last.retrieve!
38
+ end
39
+
35
40
  # @return [Array<Karafka::Params::Params>] pure array with params (not parsed)
36
41
  def to_a
37
42
  @params_batch
@@ -13,23 +13,23 @@ module Karafka
13
13
  def initialize(*args)
14
14
  super
15
15
 
16
- @config.each do |key, _value|
17
- rebuild(key)
18
- end
16
+ @config.each_key(&method(:rebuild))
19
17
  end
20
18
 
21
19
  private
22
20
 
23
21
  # Method that rebuilds a given accessor, so when it consists a proc value, it will
24
- # evaluate it upon return
22
+ # evaluate it upon return for blocks that don't require any arguments, otherwise
23
+ # it will return the block
25
24
  # @param method_name [Symbol] name of an accessor that we want to rebuild
26
25
  def rebuild(method_name)
27
26
  define_singleton_method method_name do
28
- super().is_a?(Proc) ? super().call : super()
27
+ value = super()
28
+ return value unless value.is_a?(Proc)
29
+ return value unless value.parameters.empty?
30
+ value.call
29
31
  end
30
32
  end
31
33
  end
32
34
  end
33
35
  end
34
-
35
- ::Dry::Configurable::Config.prepend(Karafka::Patches::DryConfigurable)
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Patches
5
+ # Patches for Ruby Kafka gem
6
+ module RubyKafka
7
+ # This patch allows us to inject business logic in between fetches and before the consumer
8
+ # stop, so we can perform stop commit or anything else that we need since
9
+ # ruby-kafka fetch loop does not allow that directly
10
+ # We don't wan't to use poll ruby-kafka api as it brings many more problems that we would
11
+ # have to take care of. That way, nothing like that ever happens but we get the control
12
+ # over the stopping process that we need (since we're the once that initiate it for each
13
+ # thread)
14
+ def consumer_loop
15
+ super do
16
+ consumers = Karafka::Persistence::Consumer
17
+ .all
18
+ .values
19
+ .flat_map(&:values)
20
+ .select { |ctrl| ctrl.respond_to?(:run_callbacks) }
21
+
22
+ if Karafka::App.stopped?
23
+ consumers.each { |ctrl| ctrl.run_callbacks :before_stop }
24
+ Karafka::Persistence::Client.read.stop
25
+ else
26
+ consumers.each { |ctrl| ctrl.run_callbacks :before_poll }
27
+ yield
28
+ consumers.each { |ctrl| ctrl.run_callbacks :after_poll }
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Persistence
5
+ # Persistence layer to store current thread messages consumer client for further use
6
+ class Client
7
+ # Thread.current key under which we store current thread messages consumer client
8
+ PERSISTENCE_SCOPE = :client
9
+
10
+ # @param client [Karafka::Connection::Client] messages consumer client of
11
+ # a current thread
12
+ # @return [Karafka::Connection::Client] persisted messages consumer client
13
+ def self.write(client)
14
+ Thread.current[PERSISTENCE_SCOPE] = client
15
+ end
16
+
17
+ # @return [Karafka::Connection::Client] persisted messages consumer client
18
+ # @raise [Karafka::Errors::MissingConsumer] raised when no thread messages consumer
19
+ # client but we try to use it anyway
20
+ def self.read
21
+ Thread.current[PERSISTENCE_SCOPE] || raise(Errors::MissingClient)
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Module used to provide a persistent cache layer for Karafka components that need to be
5
+ # shared inside of a same thread
6
+ module Persistence
7
+ # Module used to provide a persistent cache across batch requests for a given
8
+ # topic and partition to store some additional details when the persistent mode
9
+ # for a given topic is turned on
10
+ class Consumer
11
+ # Thread.current scope under which we store consumers data
12
+ PERSISTENCE_SCOPE = :consumers
13
+
14
+ class << self
15
+ # @return [Hash] current thread persistence scope hash with all the consumers
16
+ def all
17
+ # @note This does not need to be threadsafe (Hash) as it is always executed in a
18
+ # current thread context
19
+ Thread.current[PERSISTENCE_SCOPE] ||= Hash.new { |hash, key| hash[key] = {} }
20
+ end
21
+
22
+ # Used to build (if block given) and/or fetch a current consumer instance that will be
23
+ # used to process messages from a given topic and partition
24
+ # @return [Karafka::BaseConsumer] base consumer descendant
25
+ # @param topic [Karafka::Routing::Topic] topic instance for which we might cache
26
+ # @param partition [Integer] number of partition for which we want to cache
27
+ def fetch(topic, partition)
28
+ # We always store a current instance for callback reasons
29
+ if topic.persistent
30
+ all[topic][partition] ||= topic.consumer.new
31
+ else
32
+ all[topic][partition] = topic.consumer.new
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Persistence
5
+ # Local cache for routing topics
6
+ # We use it in order not to build string instances and remap incoming topic upon each
7
+ # message / message batches received
8
+ class Topic
9
+ # Thread.current scope under which we store topics data
10
+ PERSISTENCE_SCOPE = :topics
11
+
12
+ # @param group_id [String] group id for which we fetch a topic representation
13
+ # @param raw_topic_name [String] raw topic name (before remapping) for which we fetch a
14
+ # topic representation
15
+ # @return [Karafka::Routing::Topic] remapped topic representation that can be used further
16
+ # on when working with given parameters
17
+ def self.fetch(group_id, raw_topic_name)
18
+ Thread.current[PERSISTENCE_SCOPE] ||= Hash.new { |hash, key| hash[key] = {} }
19
+
20
+ Thread.current[PERSISTENCE_SCOPE][group_id][raw_topic_name] ||= begin
21
+ # We map from incoming topic name, as it might be namespaced, etc.
22
+ # @see topic_mapper internal docs
23
+ mapped_topic_name = Karafka::App.config.topic_mapper.incoming(raw_topic_name)
24
+ Routing::Router.find("#{group_id}_#{mapped_topic_name}")
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -1,14 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Karafka
4
- # Class used to catch signals from ruby Signal class in order to manage Karafka shutdown
4
+ # Class used to catch signals from ruby Signal class in order to manage Karafka stop
5
5
  # @note There might be only one process - this class is a singleton
6
6
  class Process
7
7
  include Singleton
8
8
 
9
9
  # Signal types that we handle
10
10
  HANDLED_SIGNALS = %i[
11
- SIGINT SIGQUIT SIGTERM
11
+ SIGINT
12
+ SIGQUIT
13
+ SIGTERM
12
14
  ].freeze
13
15
 
14
16
  HANDLED_SIGNALS.each do |signal|
@@ -27,8 +29,7 @@ module Karafka
27
29
 
28
30
  # Creates an instance of process and creates empty hash for callbacks
29
31
  def initialize
30
- @callbacks = {}
31
- HANDLED_SIGNALS.each { |signal| @callbacks[signal] = [] }
32
+ @callbacks = Hash.new { |hsh, key| hsh[key] = [] }
32
33
  end
33
34
 
34
35
  # Method catches all HANDLED_SIGNALS and performs appropriate callbacks (if defined)
@@ -56,7 +57,7 @@ module Karafka
56
57
  # we have to spin up a new thread to do this
57
58
  def notice_signal(signal)
58
59
  Thread.new do
59
- Karafka.monitor.notice(self.class, signal: signal)
60
+ Karafka.monitor.instrument('process.notice_signal', caller: self, signal: signal)
60
61
  end
61
62
  end
62
63
  end
@@ -3,30 +3,31 @@
3
3
  module Karafka
4
4
  # Responders namespace encapsulates all the internal responder implementation parts
5
5
  module Responders
6
- # Responders builder is used to find (based on the controller class name) a responder that
7
- # match the controller. This is used when user does not provide a responder inside routing
8
- # but he still names responder with the same convention (and namespaces) as controller
6
+ # Responders builder is used to finding (based on the consumer class name) a responder
7
+ # that match the consumer. We use it when user does not provide a responder inside routing,
8
+ # but he still names responder with the same convention (and namespaces) as consumer
9
+ #
9
10
  # @example Matching responder exists
10
- # Karafka::Responder::Builder(NewEventsController).build #=> NewEventsResponder
11
+ # Karafka::Responder::Builder(NewEventsConsumer).build #=> NewEventsResponder
11
12
  # @example Matching responder does not exist
12
- # Karafka::Responder::Builder(NewBuildsController).build #=> nil
13
+ # Karafka::Responder::Builder(NewBuildsConsumer).build #=> nil
13
14
  class Builder
14
- # @param controller_class [Karafka::BaseController, nil] descendant of
15
- # Karafka::BaseController
16
- # @example Tries to find a responder that matches a given controller. If nothing found,
17
- # will return nil (nil is accepted, because it means that a given controller don't
15
+ # @param consumer_class [Karafka::BaseConsumer, nil] descendant of
16
+ # Karafka::BaseConsumer
17
+ # @example Tries to find a responder that matches a given consumer. If nothing found,
18
+ # will return nil (nil is accepted, because it means that a given consumer don't
18
19
  # pipe stuff further on)
19
- def initialize(controller_class)
20
- @controller_class = controller_class
20
+ def initialize(consumer_class)
21
+ @consumer_class = consumer_class
21
22
  end
22
23
 
23
- # Tries to figure out a responder based on a controller class name
24
+ # Tries to figure out a responder based on a consumer class name
24
25
  # @return [Class] Responder class (not an instance)
25
26
  # @return [nil] or nil if there's no matching responding class
26
27
  def build
27
28
  Helpers::ClassMatcher.new(
28
- @controller_class,
29
- from: 'Controller',
29
+ @consumer_class,
30
+ from: 'Consumer',
30
31
  to: 'Responder'
31
32
  ).match
32
33
  end
@@ -36,13 +36,20 @@ module Karafka
36
36
  @options[:registered] == true
37
37
  end
38
38
 
39
+ # @return [Boolean] do we want to use async producer. Defaults to false as the sync producer
40
+ # is safer and introduces less problems
41
+ def async?
42
+ @options.key?(:async) ? @options[:async] : false
43
+ end
44
+
39
45
  # @return [Hash] hash with this topic attributes and options
40
46
  def to_h
41
47
  {
42
48
  name: name,
43
49
  multiple_usage: multiple_usage?,
44
50
  required: required?,
45
- registered: registered?
51
+ registered: registered?,
52
+ async: async?
46
53
  }
47
54
  end
48
55
  end