karafka 1.3.0 → 1.4.14

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.diffend.yml +3 -0
  4. data/.github/workflows/ci.yml +76 -0
  5. data/.ruby-version +1 -1
  6. data/CHANGELOG.md +112 -15
  7. data/CODE_OF_CONDUCT.md +1 -1
  8. data/Gemfile +2 -0
  9. data/Gemfile.lock +87 -98
  10. data/README.md +28 -31
  11. data/certs/mensfeld.pem +24 -23
  12. data/config/errors.yml +2 -0
  13. data/docker-compose.yml +17 -0
  14. data/karafka.gemspec +22 -14
  15. data/lib/karafka/assignment_strategies/round_robin.rb +13 -0
  16. data/lib/karafka/attributes_map.rb +3 -8
  17. data/lib/karafka/cli/base.rb +4 -4
  18. data/lib/karafka/cli/flow.rb +9 -6
  19. data/lib/karafka/cli/info.rb +1 -1
  20. data/lib/karafka/cli/install.rb +5 -2
  21. data/lib/karafka/cli/missingno.rb +19 -0
  22. data/lib/karafka/cli/server.rb +8 -8
  23. data/lib/karafka/cli.rb +9 -1
  24. data/lib/karafka/connection/api_adapter.rb +27 -24
  25. data/lib/karafka/connection/batch_delegator.rb +5 -1
  26. data/lib/karafka/connection/builder.rb +9 -2
  27. data/lib/karafka/connection/client.rb +9 -6
  28. data/lib/karafka/connection/listener.rb +2 -2
  29. data/lib/karafka/consumers/batch_metadata.rb +10 -0
  30. data/lib/karafka/consumers/includer.rb +5 -4
  31. data/lib/karafka/contracts/consumer_group.rb +10 -5
  32. data/lib/karafka/contracts/server_cli_options.rb +2 -0
  33. data/lib/karafka/contracts.rb +1 -1
  34. data/lib/karafka/helpers/class_matcher.rb +2 -2
  35. data/lib/karafka/instrumentation/logger.rb +6 -9
  36. data/lib/karafka/instrumentation/stdout_listener.rb +6 -4
  37. data/lib/karafka/params/batch_metadata.rb +26 -0
  38. data/lib/karafka/params/builders/batch_metadata.rb +30 -0
  39. data/lib/karafka/params/builders/params.rb +17 -15
  40. data/lib/karafka/params/builders/params_batch.rb +2 -2
  41. data/lib/karafka/params/metadata.rb +14 -29
  42. data/lib/karafka/params/params.rb +27 -41
  43. data/lib/karafka/params/params_batch.rb +15 -16
  44. data/lib/karafka/routing/builder.rb +1 -0
  45. data/lib/karafka/routing/consumer_group.rb +5 -3
  46. data/lib/karafka/serialization/json/deserializer.rb +2 -2
  47. data/lib/karafka/server.rb +4 -1
  48. data/lib/karafka/setup/config.rb +60 -52
  49. data/lib/karafka/templates/karafka.rb.erb +1 -1
  50. data/lib/karafka/version.rb +1 -1
  51. data/lib/karafka.rb +3 -1
  52. data.tar.gz.sig +0 -0
  53. metadata +75 -93
  54. metadata.gz.sig +0 -0
  55. data/.github/FUNDING.yml +0 -3
  56. data/.travis.yml +0 -36
  57. data/lib/karafka/consumers/metadata.rb +0 -10
  58. data/lib/karafka/params/builders/metadata.rb +0 -33
@@ -6,6 +6,8 @@ module Karafka
6
6
  # We validate some basics + the list of consumer_groups on which we want to use, to make
7
7
  # sure that all of them are defined, plus that a pidfile does not exist
8
8
  class ServerCliOptions < Dry::Validation::Contract
9
+ config.messages.load_paths << File.join(Karafka.gem_root, 'config', 'errors.yml')
10
+
9
11
  params do
10
12
  optional(:pid).filled(:str?)
11
13
  optional(:daemon).filled(:bool?)
@@ -5,6 +5,6 @@ module Karafka
5
5
  module Contracts
6
6
  # Regexp for validating format of groups and topics
7
7
  # @note It is not nested inside of the contracts, as it is used by couple of them
8
- TOPIC_REGEXP = /\A(\w|\-|\.)+\z/.freeze
8
+ TOPIC_REGEXP = /\A(\w|-|\.)+\z/.freeze
9
9
  end
10
10
  end
@@ -8,7 +8,7 @@ module Karafka
8
8
  class ClassMatcher
9
9
  # Regexp used to remove any non classy like characters that might be in the consumer
10
10
  # class name (if defined dynamically, etc)
11
- CONSTANT_REGEXP = %r{[?!=+\-\*/\^\|&\[\]<>%~\#\:\s\(\)]}.freeze
11
+ CONSTANT_REGEXP = %r{[?!=+\-*/\^|&\[\]<>%~\#:\s()]}.freeze
12
12
 
13
13
  private_constant :CONSTANT_REGEXP
14
14
 
@@ -44,7 +44,7 @@ module Karafka
44
44
  # @example From Namespaced::Super2Consumer matching responder
45
45
  # matcher.name #=> Super2Responder
46
46
  def name
47
- inflected = @klass.to_s.split('::').last.to_s
47
+ inflected = +@klass.to_s.split('::').last.to_s
48
48
  # We inject the from into the name just in case it is missing as in a situation like
49
49
  # that it would just sanitize the name without adding the "to" postfix.
50
50
  # It could create cases when we want to build for example a responder to a consumer
@@ -20,7 +20,6 @@ module Karafka
20
20
  # @param _args Any arguments that we don't care about but that are needed in order to
21
21
  # make this logger compatible with the default Ruby one
22
22
  def initialize(*_args)
23
- ensure_dir_exists
24
23
  super(target)
25
24
  self.level = ENV_MAP[Karafka.env] || ENV_MAP['default']
26
25
  end
@@ -29,17 +28,11 @@ module Karafka
29
28
 
30
29
  # @return [Karafka::Helpers::MultiDelegator] multi delegator instance
31
30
  # to which we will be writing logs
32
- # We use this approach to log stuff to file and to the STDOUT at the same time
31
+ # We use this approach to log stuff to file and to the $stdout at the same time
33
32
  def target
34
33
  Karafka::Helpers::MultiDelegator
35
34
  .delegate(:write, :close)
36
- .to(STDOUT, file)
37
- end
38
-
39
- # Makes sure the log directory exists
40
- def ensure_dir_exists
41
- dir = File.dirname(log_path)
42
- FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
35
+ .to(*[$stdout, file].compact)
43
36
  end
44
37
 
45
38
  # @return [Pathname] Path to a file to which we should log
@@ -50,7 +43,11 @@ module Karafka
50
43
  # @return [File] file to which we want to write our logs
51
44
  # @note File is being opened in append mode ('a')
52
45
  def file
46
+ FileUtils.mkdir_p(File.dirname(log_path))
47
+
53
48
  @file ||= File.open(log_path, 'a')
49
+ rescue Errno::EACCES, Errno::EROFS
50
+ nil
54
51
  end
55
52
  end
56
53
  end
@@ -43,7 +43,7 @@ module Karafka
43
43
  # so it returns a topic as a string, not a routing topic
44
44
  debug(
45
45
  <<~MSG.chomp.tr("\n", ' ')
46
- Params deserialization for #{event[:caller].topic} topic
46
+ Params deserialization for #{event[:caller].metadata.topic} topic
47
47
  successful in #{event[:time]} ms
48
48
  MSG
49
49
  )
@@ -52,7 +52,9 @@ module Karafka
52
52
  # Logs unsuccessful deserialization attempts of incoming data
53
53
  # @param event [Dry::Events::Event] event details including payload
54
54
  def on_params_params_deserialize_error(event)
55
- error "Params deserialization error for #{event[:caller].topic} topic: #{event[:error]}"
55
+ topic = event[:caller].metadata.topic
56
+ error = event[:error]
57
+ error "Params deserialization error for #{topic} topic: #{error}"
56
58
  end
57
59
 
58
60
  # Logs errors that occurred in a listener fetch loop
@@ -101,10 +103,10 @@ module Karafka
101
103
  info "Responded from #{calling.class} using #{responder} with following data #{data}"
102
104
  end
103
105
 
104
- # Logs info that we're initializing Karafka app
106
+ # Logs info that we're initializing Karafka framework components
105
107
  # @param _event [Dry::Events::Event] event details including payload
106
108
  def on_app_initializing(_event)
107
- info "Initializing Karafka server #{::Process.pid}"
109
+ info "Initializing Karafka framework #{::Process.pid}"
108
110
  end
109
111
 
110
112
  # Logs info that we're running Karafka app
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ # Simple batch metadata object that stores all non-message information received from Kafka
6
+ # cluster while fetching the data
7
+ # @note This metadata object refers to per batch metadata, not `#params.metadata`
8
+ BatchMetadata = Struct.new(
9
+ :batch_size,
10
+ :first_offset,
11
+ :highwater_mark_offset,
12
+ :unknown_last_offset,
13
+ :last_offset,
14
+ :offset_lag,
15
+ :deserializer,
16
+ :partition,
17
+ :topic,
18
+ keyword_init: true
19
+ ) do
20
+ # @return [Boolean] is the last offset known or unknown
21
+ def unknown_last_offset?
22
+ unknown_last_offset
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ module Builders
6
+ # Builder for creating batch metadata object based on the batch informations
7
+ module BatchMetadata
8
+ class << self
9
+ # Creates metadata based on the kafka batch data
10
+ # @param kafka_batch [Kafka::FetchedBatch] kafka batch details
11
+ # @param topic [Karafka::Routing::Topic] topic for which we've fetched the batch
12
+ # @return [Karafka::Params::BatchMetadata] batch metadata object
13
+ def from_kafka_batch(kafka_batch, topic)
14
+ Karafka::Params::BatchMetadata.new(
15
+ batch_size: kafka_batch.messages.count,
16
+ first_offset: kafka_batch.first_offset,
17
+ highwater_mark_offset: kafka_batch.highwater_mark_offset,
18
+ unknown_last_offset: kafka_batch.unknown_last_offset?,
19
+ last_offset: kafka_batch.last_offset,
20
+ offset_lag: kafka_batch.offset_lag,
21
+ deserializer: topic.deserializer,
22
+ partition: kafka_batch.partition,
23
+ topic: topic.name
24
+ ).freeze
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -12,22 +12,24 @@ module Karafka
12
12
  class << self
13
13
  # @param kafka_message [Kafka::FetchedMessage] message fetched from Kafka
14
14
  # @param topic [Karafka::Routing::Topic] topic for which this message was fetched
15
- # @return [Karafka::Params::Params] params object
15
+ # @return [Karafka::Params::Params] params object with payload and message metadata
16
16
  def from_kafka_message(kafka_message, topic)
17
- Karafka::Params::Params
18
- .new
19
- .merge!(
20
- 'create_time' => kafka_message.create_time,
21
- 'headers' => kafka_message.headers || {},
22
- 'is_control_record' => kafka_message.is_control_record,
23
- 'key' => kafka_message.key,
24
- 'offset' => kafka_message.offset,
25
- 'deserializer' => topic.deserializer,
26
- 'partition' => kafka_message.partition,
27
- 'receive_time' => Time.now,
28
- 'topic' => kafka_message.topic,
29
- 'payload' => kafka_message.value
30
- )
17
+ metadata = Karafka::Params::Metadata.new(
18
+ create_time: kafka_message.create_time,
19
+ headers: kafka_message.headers || {},
20
+ is_control_record: kafka_message.is_control_record,
21
+ key: kafka_message.key,
22
+ offset: kafka_message.offset,
23
+ deserializer: topic.deserializer,
24
+ partition: kafka_message.partition,
25
+ receive_time: Time.now,
26
+ topic: topic.name
27
+ ).freeze
28
+
29
+ Karafka::Params::Params.new(
30
+ kafka_message.value,
31
+ metadata
32
+ )
31
33
  end
32
34
  end
33
35
  end
@@ -12,11 +12,11 @@ module Karafka
12
12
  # @param topic [Karafka::Routing::Topic] topic for which we're received messages
13
13
  # @return [Karafka::Params::ParamsBatch<Karafka::Params::Params>] batch with params
14
14
  def from_kafka_messages(kafka_messages, topic)
15
- params_array = kafka_messages.map! do |message|
15
+ params_array = kafka_messages.map do |message|
16
16
  Karafka::Params::Builders::Params.from_kafka_message(message, topic)
17
17
  end
18
18
 
19
- Karafka::Params::ParamsBatch.new(params_array)
19
+ Karafka::Params::ParamsBatch.new(params_array).freeze
20
20
  end
21
21
  end
22
22
  end
@@ -2,34 +2,19 @@
2
2
 
3
3
  module Karafka
4
4
  module Params
5
- # Simple metadata object that stores all non-message information received from Kafka cluster
6
- # while fetching the data
7
- class Metadata < Hash
8
- # Attributes that should be accessible as methods as well (not only hash)
9
- METHOD_ATTRIBUTES = %w[
10
- batch_size
11
- first_offset
12
- highwater_mark_offset
13
- last_offset
14
- offset_lag
15
- deserializer
16
- partition
17
- topic
18
- ].freeze
19
-
20
- private_constant :METHOD_ATTRIBUTES
21
-
22
- METHOD_ATTRIBUTES.each do |attr|
23
- # Defines a method call accessor to a particular hash field.
24
- define_method(attr) do
25
- self[attr]
26
- end
27
- end
28
-
29
- # @return [Boolean] is the last offset known or unknown
30
- def unknown_last_offset?
31
- self['unknown_last_offset']
32
- end
33
- end
5
+ # Single message / params metadata details that can be accessed without the need for the
6
+ # payload deserialization
7
+ Metadata = Struct.new(
8
+ :create_time,
9
+ :headers,
10
+ :is_control_record,
11
+ :key,
12
+ :offset,
13
+ :deserializer,
14
+ :partition,
15
+ :receive_time,
16
+ :topic,
17
+ keyword_init: true
18
+ )
34
19
  end
35
20
  end
@@ -6,58 +6,44 @@ module Karafka
6
6
  # It provides lazy loading not only until the first usage, but also allows us to skip
7
7
  # using deserializer until we execute our logic. That way we can operate with
8
8
  # heavy-deserialization data without slowing down the whole application.
9
- class Params < Hash
10
- # Params attributes that should be available via a method call invocation for Kafka
11
- # client compatibility.
12
- # Kafka passes internally Kafka::FetchedMessage object and the ruby-kafka consumer
13
- # uses those fields via method calls, so in order to be able to pass there our params
14
- # objects, have to have same api.
15
- METHOD_ATTRIBUTES = %w[
16
- create_time
17
- headers
18
- is_control_record
19
- key
20
- offset
21
- deserializer
22
- deserialized
23
- partition
24
- receive_time
25
- topic
26
- payload
27
- ].freeze
9
+ class Params
10
+ extend Forwardable
28
11
 
29
- private_constant :METHOD_ATTRIBUTES
12
+ attr_reader :raw_payload, :metadata
30
13
 
31
- METHOD_ATTRIBUTES.each do |attr|
32
- # Defines a method call accessor to a particular hash field.
33
- # @note Won't work for complex key names that contain spaces, etc
34
- # @param key [Symbol] name of a field that we want to retrieve with a method call
35
- # @example
36
- # key_attr_reader :example
37
- # params.example #=> 'my example payload'
38
- define_method(attr) do
39
- self[attr]
40
- end
14
+ def_delegators :metadata, *Metadata.members
15
+
16
+ # @param raw_payload [Object] incoming payload before deserialization
17
+ # @param metadata [Karafka::Params::Metadata] message metadata object
18
+ def initialize(raw_payload, metadata)
19
+ @raw_payload = raw_payload
20
+ @metadata = metadata
21
+ @deserialized = false
22
+ @payload = nil
41
23
  end
42
24
 
43
- # @return [Karafka::Params::Params] This method will trigger deserializer execution. If we
44
- # decide to retrieve data, deserializer will be executed to get data. Output of that will
45
- # be merged to the current object. This object will be also marked as already deserialized,
46
- # so we won't deserialize it again.
47
- def deserialize!
48
- return self if self['deserialized']
25
+ # @return [Object] lazy-deserialized data (deserialized upon first request)
26
+ def payload
27
+ return @payload if deserialized?
28
+
29
+ @payload = deserialize
30
+ # We mark deserialization as successful after deserialization, as in case of an error
31
+ # this won't be falsely set to true
32
+ @deserialized = true
33
+ @payload
34
+ end
49
35
 
50
- self['deserialized'] = true
51
- self['payload'] = deserialize
52
- self
36
+ # @return [Boolean] did given params payload were deserialized already
37
+ def deserialized?
38
+ @deserialized
53
39
  end
54
40
 
55
41
  private
56
42
 
57
- # @return [Object] deserialized data
43
+ # @return [Object] tries de-serializes data
58
44
  def deserialize
59
45
  Karafka.monitor.instrument('params.params.deserialize', caller: self) do
60
- self['deserializer'].call(self)
46
+ metadata.deserializer.call(self)
61
47
  end
62
48
  rescue ::StandardError => e
63
49
  Karafka.monitor.instrument('params.params.deserialize.error', caller: self, error: e)
@@ -15,47 +15,46 @@ module Karafka
15
15
  @params_array = params_array
16
16
  end
17
17
 
18
- # @yieldparam [Karafka::Params::Params] each deserialized and loaded params instance
19
- # @note Invocation of this method will cause loading and deserializing each param after
20
- # another. If you want to get access without deserializing, please access params_array
21
- # directly
18
+ # @yieldparam [Karafka::Params::Params] each params instance
19
+ # @note Invocation of this method will not cause loading and deserializing each param after
20
+ # another.
22
21
  def each
23
- @params_array.each { |param| yield(param.deserialize!) }
22
+ @params_array.each { |param| yield(param) }
24
23
  end
25
24
 
26
25
  # @return [Array<Karafka::Params::Params>] returns all the params in a loaded state, so they
27
26
  # can be used for batch insert, etc. Without invoking all, up until first use, they won't
28
27
  # be deserialized
29
28
  def deserialize!
30
- each(&:itself)
29
+ each(&:payload)
31
30
  end
32
31
 
33
32
  # @return [Array<Object>] array with deserialized payloads. This method can be useful when
34
33
  # we don't care about metadata and just want to extract all the data payloads from the
35
34
  # batch
36
35
  def payloads
37
- deserialize!.map(&:payload)
36
+ map(&:payload)
38
37
  end
39
38
 
40
- # @return [Karafka::Params::Params] first element after the deserialization process
39
+ # @return [Karafka::Params::Params] first element
41
40
  def first
42
- @params_array.first.deserialize!
41
+ @params_array.first
43
42
  end
44
43
 
45
- # @return [Karafka::Params::Params] last element after the deserialization process
44
+ # @return [Karafka::Params::Params] last element
46
45
  def last
47
- @params_array.last.deserialize!
48
- end
49
-
50
- # @return [Array<Karafka::Params::Params>] pure array with params (not deserialized)
51
- def to_a
52
- @params_array
46
+ @params_array.last
53
47
  end
54
48
 
55
49
  # @return [Integer] number of messages in the batch
56
50
  def size
57
51
  @params_array.size
58
52
  end
53
+
54
+ # @return [Array<Karafka::Params::Params>] pure array with params
55
+ def to_a
56
+ @params_array
57
+ end
59
58
  end
60
59
  end
61
60
  end
@@ -16,6 +16,7 @@ module Karafka
16
16
  private_constant :CONTRACT
17
17
 
18
18
  def initialize
19
+ super
19
20
  @draws = Concurrent::Array.new
20
21
  end
21
22
 
@@ -8,9 +8,11 @@ module Karafka
8
8
  class ConsumerGroup
9
9
  extend Helpers::ConfigRetriever
10
10
 
11
- attr_reader :topics
12
- attr_reader :id
13
- attr_reader :name
11
+ attr_reader(
12
+ :topics,
13
+ :id,
14
+ :name
15
+ )
14
16
 
15
17
  # @param name [String, Symbol] raw name of this consumer group. Raw means, that it does not
16
18
  # yet have an application client_id namespace, this will be added here by default.
@@ -17,8 +17,8 @@ module Karafka
17
17
  # }
18
18
  # Deserializer.call(params) #=> { 'a' => 1 }
19
19
  def call(params)
20
- ::MultiJson.load(params['payload'])
21
- rescue ::MultiJson::ParseError => e
20
+ params.raw_payload.nil? ? nil : ::JSON.parse(params.raw_payload)
21
+ rescue ::JSON::ParserError => e
22
22
  raise ::Karafka::Errors::DeserializationError, e
23
23
  end
24
24
  end
@@ -58,9 +58,12 @@ module Karafka
58
58
  def stop_supervised
59
59
  Karafka::App.stop!
60
60
 
61
+ # See https://github.com/dry-rb/dry-configurable/issues/93
62
+ timeout = Thread.new { Karafka::App.config.shutdown_timeout }.join.value
63
+
61
64
  # We check from time to time (for the timeout period) if all the threads finished
62
65
  # their work and if so, we can just return and normal shutdown process will take place
63
- (Karafka::App.config.shutdown_timeout * SUPERVISION_CHECK_FACTOR).to_i.times do
66
+ (timeout * SUPERVISION_CHECK_FACTOR).to_i.times do
64
67
  if consumer_threads.count(&:alive?).zero?
65
68
  Thread.new { Karafka.monitor.instrument('app.stopped') }.join
66
69
  return