karafka 1.3.0 → 1.4.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.diffend.yml +3 -0
  4. data/.github/workflows/ci.yml +76 -0
  5. data/.ruby-version +1 -1
  6. data/CHANGELOG.md +112 -15
  7. data/CODE_OF_CONDUCT.md +1 -1
  8. data/Gemfile +2 -0
  9. data/Gemfile.lock +87 -98
  10. data/README.md +28 -31
  11. data/certs/mensfeld.pem +24 -23
  12. data/config/errors.yml +2 -0
  13. data/docker-compose.yml +17 -0
  14. data/karafka.gemspec +22 -14
  15. data/lib/karafka/assignment_strategies/round_robin.rb +13 -0
  16. data/lib/karafka/attributes_map.rb +3 -8
  17. data/lib/karafka/cli/base.rb +4 -4
  18. data/lib/karafka/cli/flow.rb +9 -6
  19. data/lib/karafka/cli/info.rb +1 -1
  20. data/lib/karafka/cli/install.rb +5 -2
  21. data/lib/karafka/cli/missingno.rb +19 -0
  22. data/lib/karafka/cli/server.rb +8 -8
  23. data/lib/karafka/cli.rb +9 -1
  24. data/lib/karafka/connection/api_adapter.rb +27 -24
  25. data/lib/karafka/connection/batch_delegator.rb +5 -1
  26. data/lib/karafka/connection/builder.rb +9 -2
  27. data/lib/karafka/connection/client.rb +9 -6
  28. data/lib/karafka/connection/listener.rb +2 -2
  29. data/lib/karafka/consumers/batch_metadata.rb +10 -0
  30. data/lib/karafka/consumers/includer.rb +5 -4
  31. data/lib/karafka/contracts/consumer_group.rb +10 -5
  32. data/lib/karafka/contracts/server_cli_options.rb +2 -0
  33. data/lib/karafka/contracts.rb +1 -1
  34. data/lib/karafka/helpers/class_matcher.rb +2 -2
  35. data/lib/karafka/instrumentation/logger.rb +6 -9
  36. data/lib/karafka/instrumentation/stdout_listener.rb +6 -4
  37. data/lib/karafka/params/batch_metadata.rb +26 -0
  38. data/lib/karafka/params/builders/batch_metadata.rb +30 -0
  39. data/lib/karafka/params/builders/params.rb +17 -15
  40. data/lib/karafka/params/builders/params_batch.rb +2 -2
  41. data/lib/karafka/params/metadata.rb +14 -29
  42. data/lib/karafka/params/params.rb +27 -41
  43. data/lib/karafka/params/params_batch.rb +15 -16
  44. data/lib/karafka/routing/builder.rb +1 -0
  45. data/lib/karafka/routing/consumer_group.rb +5 -3
  46. data/lib/karafka/serialization/json/deserializer.rb +2 -2
  47. data/lib/karafka/server.rb +4 -1
  48. data/lib/karafka/setup/config.rb +60 -52
  49. data/lib/karafka/templates/karafka.rb.erb +1 -1
  50. data/lib/karafka/version.rb +1 -1
  51. data/lib/karafka.rb +3 -1
  52. data.tar.gz.sig +0 -0
  53. metadata +75 -93
  54. metadata.gz.sig +0 -0
  55. data/.github/FUNDING.yml +0 -3
  56. data/.travis.yml +0 -36
  57. data/lib/karafka/consumers/metadata.rb +0 -10
  58. data/lib/karafka/params/builders/metadata.rb +0 -33
@@ -6,6 +6,8 @@ module Karafka
6
6
  # We validate some basics + the list of consumer_groups on which we want to use, to make
7
7
  # sure that all of them are defined, plus that a pidfile does not exist
8
8
  class ServerCliOptions < Dry::Validation::Contract
9
+ config.messages.load_paths << File.join(Karafka.gem_root, 'config', 'errors.yml')
10
+
9
11
  params do
10
12
  optional(:pid).filled(:str?)
11
13
  optional(:daemon).filled(:bool?)
@@ -5,6 +5,6 @@ module Karafka
5
5
  module Contracts
6
6
  # Regexp for validating format of groups and topics
7
7
  # @note It is not nested inside of the contracts, as it is used by couple of them
8
- TOPIC_REGEXP = /\A(\w|\-|\.)+\z/.freeze
8
+ TOPIC_REGEXP = /\A(\w|-|\.)+\z/.freeze
9
9
  end
10
10
  end
@@ -8,7 +8,7 @@ module Karafka
8
8
  class ClassMatcher
9
9
  # Regexp used to remove any non classy like characters that might be in the consumer
10
10
  # class name (if defined dynamically, etc)
11
- CONSTANT_REGEXP = %r{[?!=+\-\*/\^\|&\[\]<>%~\#\:\s\(\)]}.freeze
11
+ CONSTANT_REGEXP = %r{[?!=+\-*/\^|&\[\]<>%~\#:\s()]}.freeze
12
12
 
13
13
  private_constant :CONSTANT_REGEXP
14
14
 
@@ -44,7 +44,7 @@ module Karafka
44
44
  # @example From Namespaced::Super2Consumer matching responder
45
45
  # matcher.name #=> Super2Responder
46
46
  def name
47
- inflected = @klass.to_s.split('::').last.to_s
47
+ inflected = +@klass.to_s.split('::').last.to_s
48
48
  # We inject the from into the name just in case it is missing as in a situation like
49
49
  # that it would just sanitize the name without adding the "to" postfix.
50
50
  # It could create cases when we want to build for example a responder to a consumer
@@ -20,7 +20,6 @@ module Karafka
20
20
  # @param _args Any arguments that we don't care about but that are needed in order to
21
21
  # make this logger compatible with the default Ruby one
22
22
  def initialize(*_args)
23
- ensure_dir_exists
24
23
  super(target)
25
24
  self.level = ENV_MAP[Karafka.env] || ENV_MAP['default']
26
25
  end
@@ -29,17 +28,11 @@ module Karafka
29
28
 
30
29
  # @return [Karafka::Helpers::MultiDelegator] multi delegator instance
31
30
  # to which we will be writing logs
32
- # We use this approach to log stuff to file and to the STDOUT at the same time
31
+ # We use this approach to log stuff to file and to the $stdout at the same time
33
32
  def target
34
33
  Karafka::Helpers::MultiDelegator
35
34
  .delegate(:write, :close)
36
- .to(STDOUT, file)
37
- end
38
-
39
- # Makes sure the log directory exists
40
- def ensure_dir_exists
41
- dir = File.dirname(log_path)
42
- FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
35
+ .to(*[$stdout, file].compact)
43
36
  end
44
37
 
45
38
  # @return [Pathname] Path to a file to which we should log
@@ -50,7 +43,11 @@ module Karafka
50
43
  # @return [File] file to which we want to write our logs
51
44
  # @note File is being opened in append mode ('a')
52
45
  def file
46
+ FileUtils.mkdir_p(File.dirname(log_path))
47
+
53
48
  @file ||= File.open(log_path, 'a')
49
+ rescue Errno::EACCES, Errno::EROFS
50
+ nil
54
51
  end
55
52
  end
56
53
  end
@@ -43,7 +43,7 @@ module Karafka
43
43
  # so it returns a topic as a string, not a routing topic
44
44
  debug(
45
45
  <<~MSG.chomp.tr("\n", ' ')
46
- Params deserialization for #{event[:caller].topic} topic
46
+ Params deserialization for #{event[:caller].metadata.topic} topic
47
47
  successful in #{event[:time]} ms
48
48
  MSG
49
49
  )
@@ -52,7 +52,9 @@ module Karafka
52
52
  # Logs unsuccessful deserialization attempts of incoming data
53
53
  # @param event [Dry::Events::Event] event details including payload
54
54
  def on_params_params_deserialize_error(event)
55
- error "Params deserialization error for #{event[:caller].topic} topic: #{event[:error]}"
55
+ topic = event[:caller].metadata.topic
56
+ error = event[:error]
57
+ error "Params deserialization error for #{topic} topic: #{error}"
56
58
  end
57
59
 
58
60
  # Logs errors that occurred in a listener fetch loop
@@ -101,10 +103,10 @@ module Karafka
101
103
  info "Responded from #{calling.class} using #{responder} with following data #{data}"
102
104
  end
103
105
 
104
- # Logs info that we're initializing Karafka app
106
+ # Logs info that we're initializing Karafka framework components
105
107
  # @param _event [Dry::Events::Event] event details including payload
106
108
  def on_app_initializing(_event)
107
- info "Initializing Karafka server #{::Process.pid}"
109
+ info "Initializing Karafka framework #{::Process.pid}"
108
110
  end
109
111
 
110
112
  # Logs info that we're running Karafka app
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ # Simple batch metadata object that stores all non-message information received from Kafka
6
+ # cluster while fetching the data
7
+ # @note This metadata object refers to per batch metadata, not `#params.metadata`
8
+ BatchMetadata = Struct.new(
9
+ :batch_size,
10
+ :first_offset,
11
+ :highwater_mark_offset,
12
+ :unknown_last_offset,
13
+ :last_offset,
14
+ :offset_lag,
15
+ :deserializer,
16
+ :partition,
17
+ :topic,
18
+ keyword_init: true
19
+ ) do
20
+ # @return [Boolean] is the last offset known or unknown
21
+ def unknown_last_offset?
22
+ unknown_last_offset
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ module Builders
6
+ # Builder for creating batch metadata object based on the batch informations
7
+ module BatchMetadata
8
+ class << self
9
+ # Creates metadata based on the kafka batch data
10
+ # @param kafka_batch [Kafka::FetchedBatch] kafka batch details
11
+ # @param topic [Karafka::Routing::Topic] topic for which we've fetched the batch
12
+ # @return [Karafka::Params::BatchMetadata] batch metadata object
13
+ def from_kafka_batch(kafka_batch, topic)
14
+ Karafka::Params::BatchMetadata.new(
15
+ batch_size: kafka_batch.messages.count,
16
+ first_offset: kafka_batch.first_offset,
17
+ highwater_mark_offset: kafka_batch.highwater_mark_offset,
18
+ unknown_last_offset: kafka_batch.unknown_last_offset?,
19
+ last_offset: kafka_batch.last_offset,
20
+ offset_lag: kafka_batch.offset_lag,
21
+ deserializer: topic.deserializer,
22
+ partition: kafka_batch.partition,
23
+ topic: topic.name
24
+ ).freeze
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -12,22 +12,24 @@ module Karafka
12
12
  class << self
13
13
  # @param kafka_message [Kafka::FetchedMessage] message fetched from Kafka
14
14
  # @param topic [Karafka::Routing::Topic] topic for which this message was fetched
15
- # @return [Karafka::Params::Params] params object
15
+ # @return [Karafka::Params::Params] params object with payload and message metadata
16
16
  def from_kafka_message(kafka_message, topic)
17
- Karafka::Params::Params
18
- .new
19
- .merge!(
20
- 'create_time' => kafka_message.create_time,
21
- 'headers' => kafka_message.headers || {},
22
- 'is_control_record' => kafka_message.is_control_record,
23
- 'key' => kafka_message.key,
24
- 'offset' => kafka_message.offset,
25
- 'deserializer' => topic.deserializer,
26
- 'partition' => kafka_message.partition,
27
- 'receive_time' => Time.now,
28
- 'topic' => kafka_message.topic,
29
- 'payload' => kafka_message.value
30
- )
17
+ metadata = Karafka::Params::Metadata.new(
18
+ create_time: kafka_message.create_time,
19
+ headers: kafka_message.headers || {},
20
+ is_control_record: kafka_message.is_control_record,
21
+ key: kafka_message.key,
22
+ offset: kafka_message.offset,
23
+ deserializer: topic.deserializer,
24
+ partition: kafka_message.partition,
25
+ receive_time: Time.now,
26
+ topic: topic.name
27
+ ).freeze
28
+
29
+ Karafka::Params::Params.new(
30
+ kafka_message.value,
31
+ metadata
32
+ )
31
33
  end
32
34
  end
33
35
  end
@@ -12,11 +12,11 @@ module Karafka
12
12
  # @param topic [Karafka::Routing::Topic] topic for which we're received messages
13
13
  # @return [Karafka::Params::ParamsBatch<Karafka::Params::Params>] batch with params
14
14
  def from_kafka_messages(kafka_messages, topic)
15
- params_array = kafka_messages.map! do |message|
15
+ params_array = kafka_messages.map do |message|
16
16
  Karafka::Params::Builders::Params.from_kafka_message(message, topic)
17
17
  end
18
18
 
19
- Karafka::Params::ParamsBatch.new(params_array)
19
+ Karafka::Params::ParamsBatch.new(params_array).freeze
20
20
  end
21
21
  end
22
22
  end
@@ -2,34 +2,19 @@
2
2
 
3
3
  module Karafka
4
4
  module Params
5
- # Simple metadata object that stores all non-message information received from Kafka cluster
6
- # while fetching the data
7
- class Metadata < Hash
8
- # Attributes that should be accessible as methods as well (not only hash)
9
- METHOD_ATTRIBUTES = %w[
10
- batch_size
11
- first_offset
12
- highwater_mark_offset
13
- last_offset
14
- offset_lag
15
- deserializer
16
- partition
17
- topic
18
- ].freeze
19
-
20
- private_constant :METHOD_ATTRIBUTES
21
-
22
- METHOD_ATTRIBUTES.each do |attr|
23
- # Defines a method call accessor to a particular hash field.
24
- define_method(attr) do
25
- self[attr]
26
- end
27
- end
28
-
29
- # @return [Boolean] is the last offset known or unknown
30
- def unknown_last_offset?
31
- self['unknown_last_offset']
32
- end
33
- end
5
+ # Single message / params metadata details that can be accessed without the need for the
6
+ # payload deserialization
7
+ Metadata = Struct.new(
8
+ :create_time,
9
+ :headers,
10
+ :is_control_record,
11
+ :key,
12
+ :offset,
13
+ :deserializer,
14
+ :partition,
15
+ :receive_time,
16
+ :topic,
17
+ keyword_init: true
18
+ )
34
19
  end
35
20
  end
@@ -6,58 +6,44 @@ module Karafka
6
6
  # It provides lazy loading not only until the first usage, but also allows us to skip
7
7
  # using deserializer until we execute our logic. That way we can operate with
8
8
  # heavy-deserialization data without slowing down the whole application.
9
- class Params < Hash
10
- # Params attributes that should be available via a method call invocation for Kafka
11
- # client compatibility.
12
- # Kafka passes internally Kafka::FetchedMessage object and the ruby-kafka consumer
13
- # uses those fields via method calls, so in order to be able to pass there our params
14
- # objects, have to have same api.
15
- METHOD_ATTRIBUTES = %w[
16
- create_time
17
- headers
18
- is_control_record
19
- key
20
- offset
21
- deserializer
22
- deserialized
23
- partition
24
- receive_time
25
- topic
26
- payload
27
- ].freeze
9
+ class Params
10
+ extend Forwardable
28
11
 
29
- private_constant :METHOD_ATTRIBUTES
12
+ attr_reader :raw_payload, :metadata
30
13
 
31
- METHOD_ATTRIBUTES.each do |attr|
32
- # Defines a method call accessor to a particular hash field.
33
- # @note Won't work for complex key names that contain spaces, etc
34
- # @param key [Symbol] name of a field that we want to retrieve with a method call
35
- # @example
36
- # key_attr_reader :example
37
- # params.example #=> 'my example payload'
38
- define_method(attr) do
39
- self[attr]
40
- end
14
+ def_delegators :metadata, *Metadata.members
15
+
16
+ # @param raw_payload [Object] incoming payload before deserialization
17
+ # @param metadata [Karafka::Params::Metadata] message metadata object
18
+ def initialize(raw_payload, metadata)
19
+ @raw_payload = raw_payload
20
+ @metadata = metadata
21
+ @deserialized = false
22
+ @payload = nil
41
23
  end
42
24
 
43
- # @return [Karafka::Params::Params] This method will trigger deserializer execution. If we
44
- # decide to retrieve data, deserializer will be executed to get data. Output of that will
45
- # be merged to the current object. This object will be also marked as already deserialized,
46
- # so we won't deserialize it again.
47
- def deserialize!
48
- return self if self['deserialized']
25
+ # @return [Object] lazy-deserialized data (deserialized upon first request)
26
+ def payload
27
+ return @payload if deserialized?
28
+
29
+ @payload = deserialize
30
+ # We mark deserialization as successful after deserialization, as in case of an error
31
+ # this won't be falsely set to true
32
+ @deserialized = true
33
+ @payload
34
+ end
49
35
 
50
- self['deserialized'] = true
51
- self['payload'] = deserialize
52
- self
36
+ # @return [Boolean] did given params payload were deserialized already
37
+ def deserialized?
38
+ @deserialized
53
39
  end
54
40
 
55
41
  private
56
42
 
57
- # @return [Object] deserialized data
43
+ # @return [Object] tries de-serializes data
58
44
  def deserialize
59
45
  Karafka.monitor.instrument('params.params.deserialize', caller: self) do
60
- self['deserializer'].call(self)
46
+ metadata.deserializer.call(self)
61
47
  end
62
48
  rescue ::StandardError => e
63
49
  Karafka.monitor.instrument('params.params.deserialize.error', caller: self, error: e)
@@ -15,47 +15,46 @@ module Karafka
15
15
  @params_array = params_array
16
16
  end
17
17
 
18
- # @yieldparam [Karafka::Params::Params] each deserialized and loaded params instance
19
- # @note Invocation of this method will cause loading and deserializing each param after
20
- # another. If you want to get access without deserializing, please access params_array
21
- # directly
18
+ # @yieldparam [Karafka::Params::Params] each params instance
19
+ # @note Invocation of this method will not cause loading and deserializing each param after
20
+ # another.
22
21
  def each
23
- @params_array.each { |param| yield(param.deserialize!) }
22
+ @params_array.each { |param| yield(param) }
24
23
  end
25
24
 
26
25
  # @return [Array<Karafka::Params::Params>] returns all the params in a loaded state, so they
27
26
  # can be used for batch insert, etc. Without invoking all, up until first use, they won't
28
27
  # be deserialized
29
28
  def deserialize!
30
- each(&:itself)
29
+ each(&:payload)
31
30
  end
32
31
 
33
32
  # @return [Array<Object>] array with deserialized payloads. This method can be useful when
34
33
  # we don't care about metadata and just want to extract all the data payloads from the
35
34
  # batch
36
35
  def payloads
37
- deserialize!.map(&:payload)
36
+ map(&:payload)
38
37
  end
39
38
 
40
- # @return [Karafka::Params::Params] first element after the deserialization process
39
+ # @return [Karafka::Params::Params] first element
41
40
  def first
42
- @params_array.first.deserialize!
41
+ @params_array.first
43
42
  end
44
43
 
45
- # @return [Karafka::Params::Params] last element after the deserialization process
44
+ # @return [Karafka::Params::Params] last element
46
45
  def last
47
- @params_array.last.deserialize!
48
- end
49
-
50
- # @return [Array<Karafka::Params::Params>] pure array with params (not deserialized)
51
- def to_a
52
- @params_array
46
+ @params_array.last
53
47
  end
54
48
 
55
49
  # @return [Integer] number of messages in the batch
56
50
  def size
57
51
  @params_array.size
58
52
  end
53
+
54
+ # @return [Array<Karafka::Params::Params>] pure array with params
55
+ def to_a
56
+ @params_array
57
+ end
59
58
  end
60
59
  end
61
60
  end
@@ -16,6 +16,7 @@ module Karafka
16
16
  private_constant :CONTRACT
17
17
 
18
18
  def initialize
19
+ super
19
20
  @draws = Concurrent::Array.new
20
21
  end
21
22
 
@@ -8,9 +8,11 @@ module Karafka
8
8
  class ConsumerGroup
9
9
  extend Helpers::ConfigRetriever
10
10
 
11
- attr_reader :topics
12
- attr_reader :id
13
- attr_reader :name
11
+ attr_reader(
12
+ :topics,
13
+ :id,
14
+ :name
15
+ )
14
16
 
15
17
  # @param name [String, Symbol] raw name of this consumer group. Raw means, that it does not
16
18
  # yet have an application client_id namespace, this will be added here by default.
@@ -17,8 +17,8 @@ module Karafka
17
17
  # }
18
18
  # Deserializer.call(params) #=> { 'a' => 1 }
19
19
  def call(params)
20
- ::MultiJson.load(params['payload'])
21
- rescue ::MultiJson::ParseError => e
20
+ params.raw_payload.nil? ? nil : ::JSON.parse(params.raw_payload)
21
+ rescue ::JSON::ParserError => e
22
22
  raise ::Karafka::Errors::DeserializationError, e
23
23
  end
24
24
  end
@@ -58,9 +58,12 @@ module Karafka
58
58
  def stop_supervised
59
59
  Karafka::App.stop!
60
60
 
61
+ # See https://github.com/dry-rb/dry-configurable/issues/93
62
+ timeout = Thread.new { Karafka::App.config.shutdown_timeout }.join.value
63
+
61
64
  # We check from time to time (for the timeout period) if all the threads finished
62
65
  # their work and if so, we can just return and normal shutdown process will take place
63
- (Karafka::App.config.shutdown_timeout * SUPERVISION_CHECK_FACTOR).to_i.times do
66
+ (timeout * SUPERVISION_CHECK_FACTOR).to_i.times do
64
67
  if consumer_threads.count(&:alive?).zero?
65
68
  Thread.new { Karafka.monitor.instrument('app.stopped') }.join
66
69
  return