karafka 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +5 -5
  2. data/.ruby-version +1 -1
  3. data/.travis.yml +1 -0
  4. data/CHANGELOG.md +46 -2
  5. data/CONTRIBUTING.md +5 -6
  6. data/Gemfile +1 -2
  7. data/Gemfile.lock +41 -29
  8. data/README.md +13 -19
  9. data/karafka.gemspec +6 -4
  10. data/lib/karafka.rb +17 -7
  11. data/lib/karafka/app.rb +8 -15
  12. data/lib/karafka/attributes_map.rb +1 -1
  13. data/lib/karafka/backends/inline.rb +1 -2
  14. data/lib/karafka/{base_controller.rb → base_consumer.rb} +19 -11
  15. data/lib/karafka/base_responder.rb +34 -15
  16. data/lib/karafka/callbacks.rb +30 -0
  17. data/lib/karafka/callbacks/config.rb +22 -0
  18. data/lib/karafka/callbacks/dsl.rb +16 -0
  19. data/lib/karafka/cli/install.rb +2 -3
  20. data/lib/karafka/cli/server.rb +0 -1
  21. data/lib/karafka/connection/{consumer.rb → client.rb} +32 -36
  22. data/lib/karafka/connection/config_adapter.rb +14 -6
  23. data/lib/karafka/connection/delegator.rb +46 -0
  24. data/lib/karafka/connection/listener.rb +22 -13
  25. data/lib/karafka/{controllers → consumers}/callbacks.rb +9 -9
  26. data/lib/karafka/consumers/includer.rb +51 -0
  27. data/lib/karafka/consumers/responders.rb +24 -0
  28. data/lib/karafka/{controllers → consumers}/single_params.rb +3 -3
  29. data/lib/karafka/errors.rb +10 -3
  30. data/lib/karafka/fetcher.rb +30 -34
  31. data/lib/karafka/helpers/class_matcher.rb +8 -8
  32. data/lib/karafka/helpers/config_retriever.rb +2 -2
  33. data/lib/karafka/instrumentation/listener.rb +112 -0
  34. data/lib/karafka/instrumentation/logger.rb +55 -0
  35. data/lib/karafka/instrumentation/monitor.rb +64 -0
  36. data/lib/karafka/loader.rb +0 -1
  37. data/lib/karafka/params/{params.rb → dsl.rb} +71 -43
  38. data/lib/karafka/params/params_batch.rb +7 -2
  39. data/lib/karafka/patches/dry_configurable.rb +6 -2
  40. data/lib/karafka/patches/ruby_kafka.rb +10 -10
  41. data/lib/karafka/persistence/client.rb +25 -0
  42. data/lib/karafka/persistence/consumer.rb +27 -14
  43. data/lib/karafka/persistence/topic.rb +29 -0
  44. data/lib/karafka/process.rb +5 -4
  45. data/lib/karafka/responders/builder.rb +15 -14
  46. data/lib/karafka/routing/builder.rb +1 -1
  47. data/lib/karafka/routing/consumer_mapper.rb +3 -2
  48. data/lib/karafka/routing/router.rb +1 -1
  49. data/lib/karafka/routing/topic.rb +5 -11
  50. data/lib/karafka/schemas/config.rb +3 -0
  51. data/lib/karafka/schemas/consumer_group.rb +15 -3
  52. data/lib/karafka/schemas/consumer_group_topic.rb +1 -1
  53. data/lib/karafka/server.rb +37 -5
  54. data/lib/karafka/setup/config.rb +47 -21
  55. data/lib/karafka/setup/configurators/base.rb +6 -12
  56. data/lib/karafka/setup/configurators/params.rb +25 -0
  57. data/lib/karafka/setup/configurators/water_drop.rb +6 -3
  58. data/lib/karafka/setup/dsl.rb +22 -0
  59. data/lib/karafka/templates/{application_controller.rb.example → application_consumer.rb.example} +2 -3
  60. data/lib/karafka/templates/karafka.rb.example +17 -4
  61. data/lib/karafka/version.rb +1 -1
  62. metadata +58 -24
  63. data/.github/ISSUE_TEMPLATE.md +0 -2
  64. data/lib/karafka/connection/processor.rb +0 -61
  65. data/lib/karafka/controllers/includer.rb +0 -51
  66. data/lib/karafka/controllers/responders.rb +0 -19
  67. data/lib/karafka/logger.rb +0 -53
  68. data/lib/karafka/monitor.rb +0 -98
  69. data/lib/karafka/persistence/controller.rb +0 -38
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Instrumentation
5
+ # Default logger for Event Delegator
6
+ # @note It uses ::Logger features - providing basic logging
7
+ class Logger < ::Logger
8
+ include Singleton
9
+
10
+ # Map containing information about log level for given environment
11
+ ENV_MAP = {
12
+ 'production' => ::Logger::ERROR,
13
+ 'test' => ::Logger::ERROR,
14
+ 'development' => ::Logger::INFO,
15
+ 'debug' => ::Logger::DEBUG,
16
+ 'default' => ::Logger::INFO
17
+ }.freeze
18
+
19
+ # Creates a new instance of logger ensuring that it has a place to write to
20
+ def initialize(*_args)
21
+ ensure_dir_exists
22
+ super(target)
23
+ self.level = ENV_MAP[Karafka.env] || ENV_MAP['default']
24
+ end
25
+
26
+ private
27
+
28
+ # @return [Karafka::Helpers::MultiDelegator] multi delegator instance
29
+ # to which we will be writtng logs
30
+ # We use this approach to log stuff to file and to the STDOUT at the same time
31
+ def target
32
+ Karafka::Helpers::MultiDelegator
33
+ .delegate(:write, :close)
34
+ .to(STDOUT, file)
35
+ end
36
+
37
+ # Makes sure the log directory exists
38
+ def ensure_dir_exists
39
+ dir = File.dirname(log_path)
40
+ FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
41
+ end
42
+
43
+ # @return [Pathname] Path to a file to which we should log
44
+ def log_path
45
+ @log_path ||= Karafka::App.root.join("log/#{Karafka.env}.log")
46
+ end
47
+
48
+ # @return [File] file to which we want to write our logs
49
+ # @note File is being opened in append mode ('a')
50
+ def file
51
+ @file ||= File.open(log_path, 'a')
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Namespace for all the things related with Karafka instrumentation process
5
+ module Instrumentation
6
+ # Monitor is used to hookup external monitoring services to monitor how Karafka works
7
+ # It provides a standardized API for checking incoming messages/enqueueing etc
8
+ # Since it is a pub-sub based on dry-monitor, you can use as many subscribers/loggers at the
9
+ # same time, which means that you might have for example file logging and newrelic at the same
10
+ # time
11
+ # @note This class acts as a singleton because we are only permitted to have single monitor
12
+ # per running process (just as logger)
13
+ class Monitor < Dry::Monitor::Notifications
14
+ include Singleton
15
+
16
+ # List of events that we support in the system and to which a monitor client can hook up
17
+ # @note The non-error once support timestamp benchmarking
18
+ # @note Depending on Karafka extensions and additional engines, this might not be the
19
+ # complete list of all the events. Please use the #available_events on fully loaded
20
+ # Karafka system to determine all of the events you can use.
21
+ # Last 4 events are from WaterDrop but for convenience we use the same monitor for the
22
+ # whole karafka ecosystem
23
+ BASE_EVENTS = %w[
24
+ params.params.parse
25
+ params.params.parse.error
26
+ connection.listener.fetch_loop.error
27
+ connection.client.fetch_loop.error
28
+ connection.delegator.call
29
+ fetcher.call.error
30
+ backends.inline.process
31
+ process.notice_signal
32
+ consumers.responders.respond_with
33
+ async_producer.call.error
34
+ async_producer.call.retry
35
+ sync_producer.call.error
36
+ sync_producer.call.retry
37
+ server.stop
38
+ server.stop.error
39
+ ].freeze
40
+
41
+ private_constant :BASE_EVENTS
42
+
43
+ # @return [Karafka::Instrumentation::Monitor] monitor instance for system instrumentation
44
+ def initialize
45
+ super(:karafka)
46
+ BASE_EVENTS.each(&method(:register_event))
47
+ end
48
+
49
+ # Allows us to subscribe to events with a code that will be yielded upon events
50
+ # @param event_name_or_listener [String, Object] name of the event we want to subscribe to
51
+ # or a listener if we decide to go with object listener
52
+ def subscribe(event_name_or_listener)
53
+ return super unless event_name_or_listener.is_a?(String)
54
+ return super if available_events.include?(event_name_or_listener)
55
+ raise Errors::UnregisteredMonitorEvent, event_name_or_listener
56
+ end
57
+
58
+ # @return [Array<String>] names of available events to which we can subscribe
59
+ def available_events
60
+ __bus__.events.keys
61
+ end
62
+ end
63
+ end
64
+ end
@@ -5,7 +5,6 @@ module Karafka
5
5
  module Loader
6
6
  # Order in which we want to load app files
7
7
  DIRS = %w[
8
- config/initializers
9
8
  lib
10
9
  app
11
10
  ].freeze
@@ -3,18 +3,28 @@
3
3
  module Karafka
4
4
  # Params namespace encapsulating all the logic that is directly related to params handling
5
5
  module Params
6
- # Class-wrapper for hash with indifferent access with additional lazy loading feature
6
+ # Dsl for Karafka params. We don't provide the params class here as we want to allow users to
7
+ # use either hash (default) or Rails hash with indifferent access as a base for their params
8
+ #
9
+ # We do that because both of them have their own advantages and we don't want to enforce users
10
+ # to handle things differently if they already use any of those
11
+ #
7
12
  # It provides lazy loading not only until the first usage, but also allows us to skip
8
13
  # using parser until we execute our logic. That way we can operate with
9
14
  # heavy-parsing data without slowing down the whole application.
10
- class Params < HashWithIndifferentAccess
11
- # Kafka::FetchedMessage attributes that we want to use inside of params
12
- KAFKA_MESSAGE_ATTRIBUTES = %i[
15
+ module Dsl
16
+ # Params keys that are "our" and internal. We use this list for additional backends
17
+ # that somehow operatae on those keys
18
+ SYSTEM_KEYS = %w[
19
+ parser
13
20
  value
14
21
  partition
15
22
  offset
16
23
  key
17
24
  create_time
25
+ receive_time
26
+ topic
27
+ parsed
18
28
  ].freeze
19
29
 
20
30
  # Params attributes that should be available via a method call invocation for Kafka
@@ -22,15 +32,19 @@ module Karafka
22
32
  # Kafka passes internally Kafka::FetchedMessage object and the ruby-kafka consumer
23
33
  # uses those fields via method calls, so in order to be able to pass there our params
24
34
  # objects, have to have same api.
25
- PARAMS_METHOD_ATTRIBUTES = %i[
35
+ METHOD_ATTRIBUTES = %w[
26
36
  topic
27
37
  partition
28
38
  offset
29
39
  key
30
40
  create_time
41
+ receive_time
31
42
  ].freeze
32
43
 
33
- class << self
44
+ private_constant :METHOD_ATTRIBUTES
45
+
46
+ # Class methods required by params to work
47
+ module ClassMethods
34
48
  # We allow building instances only via the #build method
35
49
 
36
50
  # @param message [Kafka::FetchedMessage, Hash] message that we get out of Kafka
@@ -44,38 +58,30 @@ module Karafka
44
58
  # @example Build params instance from a Kafka::FetchedMessage object
45
59
  # Karafka::Params::Params.build(message) #=> params object
46
60
  def build(message, parser)
47
- # Hash case happens inside backends that interchange data
48
- if message.is_a?(Hash)
49
- new(parser: parser).send(:merge!, message)
50
- else
51
- # This happens inside Kafka::FetchedProcessor
52
- new(
53
- parser: parser,
54
- parsed: false,
55
- received_at: Time.now
56
- ).tap do |instance|
57
- KAFKA_MESSAGE_ATTRIBUTES.each do |attribute|
58
- instance[attribute] = message.send(attribute)
59
- end
61
+ instance = new
62
+ instance['parser'] = parser
60
63
 
64
+ # Non kafka fetched message can happen when we interchange data with an
65
+ # additional backend
66
+ if message.is_a?(Kafka::FetchedMessage)
67
+ instance.send(
68
+ :merge!,
69
+ 'value' => message.value,
70
+ 'partition' => message.partition,
71
+ 'offset' => message.offset,
72
+ 'key' => message.key,
73
+ 'create_time' => message.create_time,
74
+ 'receive_time' => Time.now,
61
75
  # When we get raw messages, they might have a topic, that was modified by a
62
76
  # topic mapper. We need to "reverse" this change and map back to the non-modified
63
77
  # format, so our internal flow is not corrupted with the mapping
64
- instance[:topic] = Karafka::App.config.topic_mapper.incoming(message.topic)
65
- end
78
+ 'topic' => Karafka::App.config.topic_mapper.incoming(message.topic)
79
+ )
80
+ else
81
+ instance.send(:merge!, message)
66
82
  end
67
- end
68
83
 
69
- # Defines a method call accessor to a particular hash field.
70
- # @note Won't work for complex key names that contain spaces, etc
71
- # @param key [Symbol] name of a field that we want to retrieve with a method call
72
- # @example
73
- # key_attr_reader :example
74
- # params.example #=> 'my example value'
75
- def key_attr_reader(key)
76
- define_method key do
77
- self[key]
78
- end
84
+ instance
79
85
  end
80
86
  end
81
87
 
@@ -84,19 +90,42 @@ module Karafka
84
90
  # to the current object. This object will be also marked as already parsed, so we won't
85
91
  # parse it again.
86
92
  def retrieve!
87
- return self if self[:parsed]
93
+ return self if self['parsed']
94
+ self['parsed'] = true
88
95
 
89
- merge!(parse(delete(:value)))
96
+ merge!(parse(delete('value')))
90
97
  end
91
98
 
92
- PARAMS_METHOD_ATTRIBUTES.each(&method(:key_attr_reader))
99
+ # Includes and extends the base params klass with everything that is needed by Karafka to
100
+ # fully work in any conditions.
101
+ # @param params_klass [Karafka::Params::Params] initialized params class that we will
102
+ # use for a given Karafka process
103
+ def self.included(params_klass)
104
+ params_klass.extend(Dsl::ClassMethods)
105
+
106
+ METHOD_ATTRIBUTES.each do |attr|
107
+ # Defines a method call accessor to a particular hash field.
108
+ # @note Won't work for complex key names that contain spaces, etc
109
+ # @param key [Symbol] name of a field that we want to retrieve with a method call
110
+ # @example
111
+ # key_attr_reader :example
112
+ # params.example #=> 'my example value'
113
+ params_klass.send :define_method, attr do
114
+ self[attr]
115
+ end
116
+ end
117
+
118
+ params_klass.send :private, :merge!
119
+ params_klass.send :private, :parse
120
+ end
93
121
 
94
122
  private
95
123
 
96
124
  # Overwritten merge! method - it behaves differently for keys that are the same in our hash
97
125
  # and in a other_hash - it will not replace keys that are the same in our hash
98
- # and in the other one
99
- # @param other_hash [Hash, HashWithIndifferentAccess] hash that we want to merge into current
126
+ # and in the other one. This protects some important Karafka params keys that cannot be
127
+ # replaced with custom values from incoming Kafka message
128
+ # @param other_hash [Hash] hash that we want to merge into current
100
129
  # @return [Karafka::Params::Params] our parameters hash with merged values
101
130
  # @example Merge with hash without same keys
102
131
  # new(a: 1, b: 2).merge!(c: 3) #=> { a: 1, b: 2, c: 3 }
@@ -110,18 +139,17 @@ module Karafka
110
139
  super(other_hash) { |_key, base_value, _new_value| base_value }
111
140
  end
112
141
 
113
- # @param value [String] Raw data that we want to parse using controller's parser
142
+ # @param value [String] Raw data that we want to parse using consumer parser
114
143
  # @note If something goes wrong, it will return raw data in a hash with a message key
115
144
  # @return [Hash] parsed data or a hash with message key containing raw data if something
116
145
  # went wrong during parsing
117
146
  def parse(value)
118
- self[:parser].parse(value)
119
- # We catch both of them, because for default JSON - we use JSON parser directly
147
+ Karafka.monitor.instrument('params.params.parse', caller: self) do
148
+ self['parser'].parse(value)
149
+ end
120
150
  rescue ::Karafka::Errors::ParserError => e
121
- Karafka.monitor.notice_error(self.class, e)
151
+ Karafka.monitor.instrument('params.params.parse.error', caller: self, error: e)
122
152
  raise e
123
- ensure
124
- self[:parsed] = true
125
153
  end
126
154
  end
127
155
  end
@@ -4,7 +4,7 @@ module Karafka
4
4
  module Params
5
5
  # Params batch represents a set of messages received from Kafka.
6
6
  # @note Params internally are lazy loaded before first use. That way we can skip parsing
7
- # process if we have after_fetched that rejects some incoming messages without using params
7
+ # process if we have after_fetch that rejects some incoming messages without using params
8
8
  # It can be also used when handling really heavy data (in terms of parsing).
9
9
  class ParamsBatch
10
10
  include Enumerable
@@ -13,7 +13,7 @@ module Karafka
13
13
  # @param messages_batch [Array<Kafka::FetchedMessage>] messages batch
14
14
  # @param topic_parser [Class] topic parser for unparsing messages values
15
15
  def initialize(messages_batch, topic_parser)
16
- @params_batch = messages_batch.map do |message|
16
+ @params_batch = messages_batch.map! do |message|
17
17
  Karafka::Params::Params.build(message, topic_parser)
18
18
  end
19
19
  end
@@ -32,6 +32,11 @@ module Karafka
32
32
  each(&:itself)
33
33
  end
34
34
 
35
+ # @return [Karafka::Params::Params] last element after the unparsing process
36
+ def last
37
+ @params_batch.last.retrieve!
38
+ end
39
+
35
40
  # @return [Array<Karafka::Params::Params>] pure array with params (not parsed)
36
41
  def to_a
37
42
  @params_batch
@@ -19,11 +19,15 @@ module Karafka
19
19
  private
20
20
 
21
21
  # Method that rebuilds a given accessor, so when it consists a proc value, it will
22
- # evaluate it upon return
22
+ # evaluate it upon return for blocks that don't require any arguments, otherwise
23
+ # it will return the block
23
24
  # @param method_name [Symbol] name of an accessor that we want to rebuild
24
25
  def rebuild(method_name)
25
26
  define_singleton_method method_name do
26
- super().is_a?(Proc) ? super().call : super()
27
+ value = super()
28
+ return value unless value.is_a?(Proc)
29
+ return value unless value.parameters.empty?
30
+ value.call
27
31
  end
28
32
  end
29
33
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Karafka
4
4
  module Patches
5
- # Batches for Ruby Kafka gem
5
+ # Patches for Ruby Kafka gem
6
6
  module RubyKafka
7
7
  # This patch allows us to inject business logic in between fetches and before the consumer
8
8
  # stop, so we can perform stop commit or anything else that we need since
@@ -13,19 +13,19 @@ module Karafka
13
13
  # thread)
14
14
  def consumer_loop
15
15
  super do
16
- controllers = Karafka::Persistence::Controller
17
- .all
18
- .values
19
- .flat_map(&:values)
20
- .select { |ctrl| ctrl.respond_to?(:run_callbacks) }
16
+ consumers = Karafka::Persistence::Consumer
17
+ .all
18
+ .values
19
+ .flat_map(&:values)
20
+ .select { |ctrl| ctrl.respond_to?(:run_callbacks) }
21
21
 
22
22
  if Karafka::App.stopped?
23
- controllers.each { |ctrl| ctrl.run_callbacks :before_stop }
24
- Karafka::Persistence::Consumer.read.stop
23
+ consumers.each { |ctrl| ctrl.run_callbacks :before_stop }
24
+ Karafka::Persistence::Client.read.stop
25
25
  else
26
- controllers.each { |ctrl| ctrl.run_callbacks :before_poll }
26
+ consumers.each { |ctrl| ctrl.run_callbacks :before_poll }
27
27
  yield
28
- controllers.each { |ctrl| ctrl.run_callbacks :after_poll }
28
+ consumers.each { |ctrl| ctrl.run_callbacks :after_poll }
29
29
  end
30
30
  end
31
31
  end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Persistence
5
+ # Persistence layer to store current thread messages consumer client for further use
6
+ class Client
7
+ # Thread.current key under which we store current thread messages consumer client
8
+ PERSISTENCE_SCOPE = :client
9
+
10
+ # @param client [Karafka::Connection::Client] messages consumer client of
11
+ # a current thread
12
+ # @return [Karafka::Connection::Client] persisted messages consumer client
13
+ def self.write(client)
14
+ Thread.current[PERSISTENCE_SCOPE] = client
15
+ end
16
+
17
+ # @return [Karafka::Connection::Client] persisted messages consumer client
18
+ # @raise [Karafka::Errors::MissingConsumer] raised when no thread messages consumer
19
+ # client but we try to use it anyway
20
+ def self.read
21
+ Thread.current[PERSISTENCE_SCOPE] || raise(Errors::MissingClient)
22
+ end
23
+ end
24
+ end
25
+ end
@@ -1,24 +1,37 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Karafka
4
+ # Module used to provide a persistent cache layer for Karafka components that need to be
5
+ # shared inside of a same thread
4
6
  module Persistence
5
- # Persistence layer to store current thread messages consumer for further use
7
+ # Module used to provide a persistent cache across batch requests for a given
8
+ # topic and partition to store some additional details when the persistent mode
9
+ # for a given topic is turned on
6
10
  class Consumer
7
- # Thread.current key under which we store current thread messages consumer
8
- PERSISTENCE_SCOPE = :consumer
11
+ # Thread.current scope under which we store consumers data
12
+ PERSISTENCE_SCOPE = :consumers
9
13
 
10
- # @param consumer [Karafka::Connection::Consumer] messages consumer of
11
- # a current thread
12
- # @return [Karafka::Connection::Consumer] persisted messages consumer
13
- def self.write(consumer)
14
- Thread.current[PERSISTENCE_SCOPE] = consumer
15
- end
14
+ class << self
15
+ # @return [Hash] current thread persistence scope hash with all the consumers
16
+ def all
17
+ # @note This does not need to be threadsafe (Hash) as it is always executed in a
18
+ # current thread context
19
+ Thread.current[PERSISTENCE_SCOPE] ||= Hash.new { |hash, key| hash[key] = {} }
20
+ end
16
21
 
17
- # @return [Karafka::Connection::Consumer] persisted messages consumer
18
- # @raise [Karafka::Errors::MissingConsumer] raised when no thread messages consumer
19
- # but we try to use it anyway
20
- def self.read
21
- Thread.current[PERSISTENCE_SCOPE] || raise(Errors::MissingConsumer)
22
+ # Used to build (if block given) and/or fetch a current consumer instance that will be
23
+ # used to process messages from a given topic and partition
24
+ # @return [Karafka::BaseConsumer] base consumer descendant
25
+ # @param topic [Karafka::Routing::Topic] topic instance for which we might cache
26
+ # @param partition [Integer] number of partition for which we want to cache
27
+ def fetch(topic, partition)
28
+ # We always store a current instance for callback reasons
29
+ if topic.persistent
30
+ all[topic][partition] ||= topic.consumer.new
31
+ else
32
+ all[topic][partition] = topic.consumer.new
33
+ end
34
+ end
22
35
  end
23
36
  end
24
37
  end