karafka 1.1.0 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. checksums.yaml +5 -5
  2. data/.ruby-version +1 -1
  3. data/.travis.yml +1 -0
  4. data/CHANGELOG.md +46 -2
  5. data/CONTRIBUTING.md +5 -6
  6. data/Gemfile +1 -2
  7. data/Gemfile.lock +41 -29
  8. data/README.md +13 -19
  9. data/karafka.gemspec +6 -4
  10. data/lib/karafka.rb +17 -7
  11. data/lib/karafka/app.rb +8 -15
  12. data/lib/karafka/attributes_map.rb +1 -1
  13. data/lib/karafka/backends/inline.rb +1 -2
  14. data/lib/karafka/{base_controller.rb → base_consumer.rb} +19 -11
  15. data/lib/karafka/base_responder.rb +34 -15
  16. data/lib/karafka/callbacks.rb +30 -0
  17. data/lib/karafka/callbacks/config.rb +22 -0
  18. data/lib/karafka/callbacks/dsl.rb +16 -0
  19. data/lib/karafka/cli/install.rb +2 -3
  20. data/lib/karafka/cli/server.rb +0 -1
  21. data/lib/karafka/connection/{consumer.rb → client.rb} +32 -36
  22. data/lib/karafka/connection/config_adapter.rb +14 -6
  23. data/lib/karafka/connection/delegator.rb +46 -0
  24. data/lib/karafka/connection/listener.rb +22 -13
  25. data/lib/karafka/{controllers → consumers}/callbacks.rb +9 -9
  26. data/lib/karafka/consumers/includer.rb +51 -0
  27. data/lib/karafka/consumers/responders.rb +24 -0
  28. data/lib/karafka/{controllers → consumers}/single_params.rb +3 -3
  29. data/lib/karafka/errors.rb +10 -3
  30. data/lib/karafka/fetcher.rb +30 -34
  31. data/lib/karafka/helpers/class_matcher.rb +8 -8
  32. data/lib/karafka/helpers/config_retriever.rb +2 -2
  33. data/lib/karafka/instrumentation/listener.rb +112 -0
  34. data/lib/karafka/instrumentation/logger.rb +55 -0
  35. data/lib/karafka/instrumentation/monitor.rb +64 -0
  36. data/lib/karafka/loader.rb +0 -1
  37. data/lib/karafka/params/{params.rb → dsl.rb} +71 -43
  38. data/lib/karafka/params/params_batch.rb +7 -2
  39. data/lib/karafka/patches/dry_configurable.rb +6 -2
  40. data/lib/karafka/patches/ruby_kafka.rb +10 -10
  41. data/lib/karafka/persistence/client.rb +25 -0
  42. data/lib/karafka/persistence/consumer.rb +27 -14
  43. data/lib/karafka/persistence/topic.rb +29 -0
  44. data/lib/karafka/process.rb +5 -4
  45. data/lib/karafka/responders/builder.rb +15 -14
  46. data/lib/karafka/routing/builder.rb +1 -1
  47. data/lib/karafka/routing/consumer_mapper.rb +3 -2
  48. data/lib/karafka/routing/router.rb +1 -1
  49. data/lib/karafka/routing/topic.rb +5 -11
  50. data/lib/karafka/schemas/config.rb +3 -0
  51. data/lib/karafka/schemas/consumer_group.rb +15 -3
  52. data/lib/karafka/schemas/consumer_group_topic.rb +1 -1
  53. data/lib/karafka/server.rb +37 -5
  54. data/lib/karafka/setup/config.rb +47 -21
  55. data/lib/karafka/setup/configurators/base.rb +6 -12
  56. data/lib/karafka/setup/configurators/params.rb +25 -0
  57. data/lib/karafka/setup/configurators/water_drop.rb +6 -3
  58. data/lib/karafka/setup/dsl.rb +22 -0
  59. data/lib/karafka/templates/{application_controller.rb.example → application_consumer.rb.example} +2 -3
  60. data/lib/karafka/templates/karafka.rb.example +17 -4
  61. data/lib/karafka/version.rb +1 -1
  62. metadata +58 -24
  63. data/.github/ISSUE_TEMPLATE.md +0 -2
  64. data/lib/karafka/connection/processor.rb +0 -61
  65. data/lib/karafka/controllers/includer.rb +0 -51
  66. data/lib/karafka/controllers/responders.rb +0 -19
  67. data/lib/karafka/logger.rb +0 -53
  68. data/lib/karafka/monitor.rb +0 -98
  69. data/lib/karafka/persistence/controller.rb +0 -38
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Instrumentation
5
+ # Default logger for Event Delegator
6
+ # @note It uses ::Logger features - providing basic logging
7
+ class Logger < ::Logger
8
+ include Singleton
9
+
10
+ # Map containing information about log level for given environment
11
+ ENV_MAP = {
12
+ 'production' => ::Logger::ERROR,
13
+ 'test' => ::Logger::ERROR,
14
+ 'development' => ::Logger::INFO,
15
+ 'debug' => ::Logger::DEBUG,
16
+ 'default' => ::Logger::INFO
17
+ }.freeze
18
+
19
+ # Creates a new instance of logger ensuring that it has a place to write to
20
+ def initialize(*_args)
21
+ ensure_dir_exists
22
+ super(target)
23
+ self.level = ENV_MAP[Karafka.env] || ENV_MAP['default']
24
+ end
25
+
26
+ private
27
+
28
+ # @return [Karafka::Helpers::MultiDelegator] multi delegator instance
29
+ # to which we will be writtng logs
30
+ # We use this approach to log stuff to file and to the STDOUT at the same time
31
+ def target
32
+ Karafka::Helpers::MultiDelegator
33
+ .delegate(:write, :close)
34
+ .to(STDOUT, file)
35
+ end
36
+
37
+ # Makes sure the log directory exists
38
+ def ensure_dir_exists
39
+ dir = File.dirname(log_path)
40
+ FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
41
+ end
42
+
43
+ # @return [Pathname] Path to a file to which we should log
44
+ def log_path
45
+ @log_path ||= Karafka::App.root.join("log/#{Karafka.env}.log")
46
+ end
47
+
48
+ # @return [File] file to which we want to write our logs
49
+ # @note File is being opened in append mode ('a')
50
+ def file
51
+ @file ||= File.open(log_path, 'a')
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Namespace for all the things related with Karafka instrumentation process
5
+ module Instrumentation
6
+ # Monitor is used to hookup external monitoring services to monitor how Karafka works
7
+ # It provides a standardized API for checking incoming messages/enqueueing etc
8
+ # Since it is a pub-sub based on dry-monitor, you can use as many subscribers/loggers at the
9
+ # same time, which means that you might have for example file logging and newrelic at the same
10
+ # time
11
+ # @note This class acts as a singleton because we are only permitted to have single monitor
12
+ # per running process (just as logger)
13
+ class Monitor < Dry::Monitor::Notifications
14
+ include Singleton
15
+
16
+ # List of events that we support in the system and to which a monitor client can hook up
17
+ # @note The non-error once support timestamp benchmarking
18
+ # @note Depending on Karafka extensions and additional engines, this might not be the
19
+ # complete list of all the events. Please use the #available_events on fully loaded
20
+ # Karafka system to determine all of the events you can use.
21
+ # Last 4 events are from WaterDrop but for convenience we use the same monitor for the
22
+ # whole karafka ecosystem
23
+ BASE_EVENTS = %w[
24
+ params.params.parse
25
+ params.params.parse.error
26
+ connection.listener.fetch_loop.error
27
+ connection.client.fetch_loop.error
28
+ connection.delegator.call
29
+ fetcher.call.error
30
+ backends.inline.process
31
+ process.notice_signal
32
+ consumers.responders.respond_with
33
+ async_producer.call.error
34
+ async_producer.call.retry
35
+ sync_producer.call.error
36
+ sync_producer.call.retry
37
+ server.stop
38
+ server.stop.error
39
+ ].freeze
40
+
41
+ private_constant :BASE_EVENTS
42
+
43
+ # @return [Karafka::Instrumentation::Monitor] monitor instance for system instrumentation
44
+ def initialize
45
+ super(:karafka)
46
+ BASE_EVENTS.each(&method(:register_event))
47
+ end
48
+
49
+ # Allows us to subscribe to events with a code that will be yielded upon events
50
+ # @param event_name_or_listener [String, Object] name of the event we want to subscribe to
51
+ # or a listener if we decide to go with object listener
52
+ def subscribe(event_name_or_listener)
53
+ return super unless event_name_or_listener.is_a?(String)
54
+ return super if available_events.include?(event_name_or_listener)
55
+ raise Errors::UnregisteredMonitorEvent, event_name_or_listener
56
+ end
57
+
58
+ # @return [Array<String>] names of available events to which we can subscribe
59
+ def available_events
60
+ __bus__.events.keys
61
+ end
62
+ end
63
+ end
64
+ end
@@ -5,7 +5,6 @@ module Karafka
5
5
  module Loader
6
6
  # Order in which we want to load app files
7
7
  DIRS = %w[
8
- config/initializers
9
8
  lib
10
9
  app
11
10
  ].freeze
@@ -3,18 +3,28 @@
3
3
  module Karafka
4
4
  # Params namespace encapsulating all the logic that is directly related to params handling
5
5
  module Params
6
- # Class-wrapper for hash with indifferent access with additional lazy loading feature
6
+ # Dsl for Karafka params. We don't provide the params class here as we want to allow users to
7
+ # use either hash (default) or Rails hash with indifferent access as a base for their params
8
+ #
9
+ # We do that because both of them have their own advantages and we don't want to enforce users
10
+ # to handle things differently if they already use any of those
11
+ #
7
12
  # It provides lazy loading not only until the first usage, but also allows us to skip
8
13
  # using parser until we execute our logic. That way we can operate with
9
14
  # heavy-parsing data without slowing down the whole application.
10
- class Params < HashWithIndifferentAccess
11
- # Kafka::FetchedMessage attributes that we want to use inside of params
12
- KAFKA_MESSAGE_ATTRIBUTES = %i[
15
+ module Dsl
16
+ # Params keys that are "our" and internal. We use this list for additional backends
17
+ # that somehow operatae on those keys
18
+ SYSTEM_KEYS = %w[
19
+ parser
13
20
  value
14
21
  partition
15
22
  offset
16
23
  key
17
24
  create_time
25
+ receive_time
26
+ topic
27
+ parsed
18
28
  ].freeze
19
29
 
20
30
  # Params attributes that should be available via a method call invocation for Kafka
@@ -22,15 +32,19 @@ module Karafka
22
32
  # Kafka passes internally Kafka::FetchedMessage object and the ruby-kafka consumer
23
33
  # uses those fields via method calls, so in order to be able to pass there our params
24
34
  # objects, have to have same api.
25
- PARAMS_METHOD_ATTRIBUTES = %i[
35
+ METHOD_ATTRIBUTES = %w[
26
36
  topic
27
37
  partition
28
38
  offset
29
39
  key
30
40
  create_time
41
+ receive_time
31
42
  ].freeze
32
43
 
33
- class << self
44
+ private_constant :METHOD_ATTRIBUTES
45
+
46
+ # Class methods required by params to work
47
+ module ClassMethods
34
48
  # We allow building instances only via the #build method
35
49
 
36
50
  # @param message [Kafka::FetchedMessage, Hash] message that we get out of Kafka
@@ -44,38 +58,30 @@ module Karafka
44
58
  # @example Build params instance from a Kafka::FetchedMessage object
45
59
  # Karafka::Params::Params.build(message) #=> params object
46
60
  def build(message, parser)
47
- # Hash case happens inside backends that interchange data
48
- if message.is_a?(Hash)
49
- new(parser: parser).send(:merge!, message)
50
- else
51
- # This happens inside Kafka::FetchedProcessor
52
- new(
53
- parser: parser,
54
- parsed: false,
55
- received_at: Time.now
56
- ).tap do |instance|
57
- KAFKA_MESSAGE_ATTRIBUTES.each do |attribute|
58
- instance[attribute] = message.send(attribute)
59
- end
61
+ instance = new
62
+ instance['parser'] = parser
60
63
 
64
+ # Non kafka fetched message can happen when we interchange data with an
65
+ # additional backend
66
+ if message.is_a?(Kafka::FetchedMessage)
67
+ instance.send(
68
+ :merge!,
69
+ 'value' => message.value,
70
+ 'partition' => message.partition,
71
+ 'offset' => message.offset,
72
+ 'key' => message.key,
73
+ 'create_time' => message.create_time,
74
+ 'receive_time' => Time.now,
61
75
  # When we get raw messages, they might have a topic, that was modified by a
62
76
  # topic mapper. We need to "reverse" this change and map back to the non-modified
63
77
  # format, so our internal flow is not corrupted with the mapping
64
- instance[:topic] = Karafka::App.config.topic_mapper.incoming(message.topic)
65
- end
78
+ 'topic' => Karafka::App.config.topic_mapper.incoming(message.topic)
79
+ )
80
+ else
81
+ instance.send(:merge!, message)
66
82
  end
67
- end
68
83
 
69
- # Defines a method call accessor to a particular hash field.
70
- # @note Won't work for complex key names that contain spaces, etc
71
- # @param key [Symbol] name of a field that we want to retrieve with a method call
72
- # @example
73
- # key_attr_reader :example
74
- # params.example #=> 'my example value'
75
- def key_attr_reader(key)
76
- define_method key do
77
- self[key]
78
- end
84
+ instance
79
85
  end
80
86
  end
81
87
 
@@ -84,19 +90,42 @@ module Karafka
84
90
  # to the current object. This object will be also marked as already parsed, so we won't
85
91
  # parse it again.
86
92
  def retrieve!
87
- return self if self[:parsed]
93
+ return self if self['parsed']
94
+ self['parsed'] = true
88
95
 
89
- merge!(parse(delete(:value)))
96
+ merge!(parse(delete('value')))
90
97
  end
91
98
 
92
- PARAMS_METHOD_ATTRIBUTES.each(&method(:key_attr_reader))
99
+ # Includes and extends the base params klass with everything that is needed by Karafka to
100
+ # fully work in any conditions.
101
+ # @param params_klass [Karafka::Params::Params] initialized params class that we will
102
+ # use for a given Karafka process
103
+ def self.included(params_klass)
104
+ params_klass.extend(Dsl::ClassMethods)
105
+
106
+ METHOD_ATTRIBUTES.each do |attr|
107
+ # Defines a method call accessor to a particular hash field.
108
+ # @note Won't work for complex key names that contain spaces, etc
109
+ # @param key [Symbol] name of a field that we want to retrieve with a method call
110
+ # @example
111
+ # key_attr_reader :example
112
+ # params.example #=> 'my example value'
113
+ params_klass.send :define_method, attr do
114
+ self[attr]
115
+ end
116
+ end
117
+
118
+ params_klass.send :private, :merge!
119
+ params_klass.send :private, :parse
120
+ end
93
121
 
94
122
  private
95
123
 
96
124
  # Overwritten merge! method - it behaves differently for keys that are the same in our hash
97
125
  # and in a other_hash - it will not replace keys that are the same in our hash
98
- # and in the other one
99
- # @param other_hash [Hash, HashWithIndifferentAccess] hash that we want to merge into current
126
+ # and in the other one. This protects some important Karafka params keys that cannot be
127
+ # replaced with custom values from incoming Kafka message
128
+ # @param other_hash [Hash] hash that we want to merge into current
100
129
  # @return [Karafka::Params::Params] our parameters hash with merged values
101
130
  # @example Merge with hash without same keys
102
131
  # new(a: 1, b: 2).merge!(c: 3) #=> { a: 1, b: 2, c: 3 }
@@ -110,18 +139,17 @@ module Karafka
110
139
  super(other_hash) { |_key, base_value, _new_value| base_value }
111
140
  end
112
141
 
113
- # @param value [String] Raw data that we want to parse using controller's parser
142
+ # @param value [String] Raw data that we want to parse using consumer parser
114
143
  # @note If something goes wrong, it will return raw data in a hash with a message key
115
144
  # @return [Hash] parsed data or a hash with message key containing raw data if something
116
145
  # went wrong during parsing
117
146
  def parse(value)
118
- self[:parser].parse(value)
119
- # We catch both of them, because for default JSON - we use JSON parser directly
147
+ Karafka.monitor.instrument('params.params.parse', caller: self) do
148
+ self['parser'].parse(value)
149
+ end
120
150
  rescue ::Karafka::Errors::ParserError => e
121
- Karafka.monitor.notice_error(self.class, e)
151
+ Karafka.monitor.instrument('params.params.parse.error', caller: self, error: e)
122
152
  raise e
123
- ensure
124
- self[:parsed] = true
125
153
  end
126
154
  end
127
155
  end
@@ -4,7 +4,7 @@ module Karafka
4
4
  module Params
5
5
  # Params batch represents a set of messages received from Kafka.
6
6
  # @note Params internally are lazy loaded before first use. That way we can skip parsing
7
- # process if we have after_fetched that rejects some incoming messages without using params
7
+ # process if we have after_fetch that rejects some incoming messages without using params
8
8
  # It can be also used when handling really heavy data (in terms of parsing).
9
9
  class ParamsBatch
10
10
  include Enumerable
@@ -13,7 +13,7 @@ module Karafka
13
13
  # @param messages_batch [Array<Kafka::FetchedMessage>] messages batch
14
14
  # @param topic_parser [Class] topic parser for unparsing messages values
15
15
  def initialize(messages_batch, topic_parser)
16
- @params_batch = messages_batch.map do |message|
16
+ @params_batch = messages_batch.map! do |message|
17
17
  Karafka::Params::Params.build(message, topic_parser)
18
18
  end
19
19
  end
@@ -32,6 +32,11 @@ module Karafka
32
32
  each(&:itself)
33
33
  end
34
34
 
35
+ # @return [Karafka::Params::Params] last element after the unparsing process
36
+ def last
37
+ @params_batch.last.retrieve!
38
+ end
39
+
35
40
  # @return [Array<Karafka::Params::Params>] pure array with params (not parsed)
36
41
  def to_a
37
42
  @params_batch
@@ -19,11 +19,15 @@ module Karafka
19
19
  private
20
20
 
21
21
  # Method that rebuilds a given accessor, so when it consists a proc value, it will
22
- # evaluate it upon return
22
+ # evaluate it upon return for blocks that don't require any arguments, otherwise
23
+ # it will return the block
23
24
  # @param method_name [Symbol] name of an accessor that we want to rebuild
24
25
  def rebuild(method_name)
25
26
  define_singleton_method method_name do
26
- super().is_a?(Proc) ? super().call : super()
27
+ value = super()
28
+ return value unless value.is_a?(Proc)
29
+ return value unless value.parameters.empty?
30
+ value.call
27
31
  end
28
32
  end
29
33
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Karafka
4
4
  module Patches
5
- # Batches for Ruby Kafka gem
5
+ # Patches for Ruby Kafka gem
6
6
  module RubyKafka
7
7
  # This patch allows us to inject business logic in between fetches and before the consumer
8
8
  # stop, so we can perform stop commit or anything else that we need since
@@ -13,19 +13,19 @@ module Karafka
13
13
  # thread)
14
14
  def consumer_loop
15
15
  super do
16
- controllers = Karafka::Persistence::Controller
17
- .all
18
- .values
19
- .flat_map(&:values)
20
- .select { |ctrl| ctrl.respond_to?(:run_callbacks) }
16
+ consumers = Karafka::Persistence::Consumer
17
+ .all
18
+ .values
19
+ .flat_map(&:values)
20
+ .select { |ctrl| ctrl.respond_to?(:run_callbacks) }
21
21
 
22
22
  if Karafka::App.stopped?
23
- controllers.each { |ctrl| ctrl.run_callbacks :before_stop }
24
- Karafka::Persistence::Consumer.read.stop
23
+ consumers.each { |ctrl| ctrl.run_callbacks :before_stop }
24
+ Karafka::Persistence::Client.read.stop
25
25
  else
26
- controllers.each { |ctrl| ctrl.run_callbacks :before_poll }
26
+ consumers.each { |ctrl| ctrl.run_callbacks :before_poll }
27
27
  yield
28
- controllers.each { |ctrl| ctrl.run_callbacks :after_poll }
28
+ consumers.each { |ctrl| ctrl.run_callbacks :after_poll }
29
29
  end
30
30
  end
31
31
  end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Persistence
5
+ # Persistence layer to store current thread messages consumer client for further use
6
+ class Client
7
+ # Thread.current key under which we store current thread messages consumer client
8
+ PERSISTENCE_SCOPE = :client
9
+
10
+ # @param client [Karafka::Connection::Client] messages consumer client of
11
+ # a current thread
12
+ # @return [Karafka::Connection::Client] persisted messages consumer client
13
+ def self.write(client)
14
+ Thread.current[PERSISTENCE_SCOPE] = client
15
+ end
16
+
17
+ # @return [Karafka::Connection::Client] persisted messages consumer client
18
+ # @raise [Karafka::Errors::MissingConsumer] raised when no thread messages consumer
19
+ # client but we try to use it anyway
20
+ def self.read
21
+ Thread.current[PERSISTENCE_SCOPE] || raise(Errors::MissingClient)
22
+ end
23
+ end
24
+ end
25
+ end
@@ -1,24 +1,37 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Karafka
4
+ # Module used to provide a persistent cache layer for Karafka components that need to be
5
+ # shared inside of a same thread
4
6
  module Persistence
5
- # Persistence layer to store current thread messages consumer for further use
7
+ # Module used to provide a persistent cache across batch requests for a given
8
+ # topic and partition to store some additional details when the persistent mode
9
+ # for a given topic is turned on
6
10
  class Consumer
7
- # Thread.current key under which we store current thread messages consumer
8
- PERSISTENCE_SCOPE = :consumer
11
+ # Thread.current scope under which we store consumers data
12
+ PERSISTENCE_SCOPE = :consumers
9
13
 
10
- # @param consumer [Karafka::Connection::Consumer] messages consumer of
11
- # a current thread
12
- # @return [Karafka::Connection::Consumer] persisted messages consumer
13
- def self.write(consumer)
14
- Thread.current[PERSISTENCE_SCOPE] = consumer
15
- end
14
+ class << self
15
+ # @return [Hash] current thread persistence scope hash with all the consumers
16
+ def all
17
+ # @note This does not need to be threadsafe (Hash) as it is always executed in a
18
+ # current thread context
19
+ Thread.current[PERSISTENCE_SCOPE] ||= Hash.new { |hash, key| hash[key] = {} }
20
+ end
16
21
 
17
- # @return [Karafka::Connection::Consumer] persisted messages consumer
18
- # @raise [Karafka::Errors::MissingConsumer] raised when no thread messages consumer
19
- # but we try to use it anyway
20
- def self.read
21
- Thread.current[PERSISTENCE_SCOPE] || raise(Errors::MissingConsumer)
22
+ # Used to build (if block given) and/or fetch a current consumer instance that will be
23
+ # used to process messages from a given topic and partition
24
+ # @return [Karafka::BaseConsumer] base consumer descendant
25
+ # @param topic [Karafka::Routing::Topic] topic instance for which we might cache
26
+ # @param partition [Integer] number of partition for which we want to cache
27
+ def fetch(topic, partition)
28
+ # We always store a current instance for callback reasons
29
+ if topic.persistent
30
+ all[topic][partition] ||= topic.consumer.new
31
+ else
32
+ all[topic][partition] = topic.consumer.new
33
+ end
34
+ end
22
35
  end
23
36
  end
24
37
  end