karafka 1.3.3 → 1.4.0.pre.rc1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of karafka might be problematic. Click here for more details.

Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/.diffend.yml +3 -0
  5. data/.github/workflows/ci.yml +52 -0
  6. data/.ruby-version +1 -1
  7. data/CHANGELOG.md +34 -1
  8. data/CODE_OF_CONDUCT.md +1 -1
  9. data/Gemfile +2 -0
  10. data/Gemfile.lock +45 -47
  11. data/README.md +3 -5
  12. data/certs/mensfeld.pem +21 -21
  13. data/docker-compose.yml +17 -0
  14. data/karafka.gemspec +3 -4
  15. data/lib/karafka.rb +1 -1
  16. data/lib/karafka/attributes_map.rb +2 -8
  17. data/lib/karafka/cli.rb +1 -1
  18. data/lib/karafka/cli/flow.rb +9 -6
  19. data/lib/karafka/cli/info.rb +1 -1
  20. data/lib/karafka/connection/api_adapter.rb +12 -6
  21. data/lib/karafka/connection/batch_delegator.rb +5 -1
  22. data/lib/karafka/connection/builder.rb +4 -2
  23. data/lib/karafka/connection/client.rb +1 -1
  24. data/lib/karafka/consumers/batch_metadata.rb +10 -0
  25. data/lib/karafka/consumers/includer.rb +5 -4
  26. data/lib/karafka/contracts/server_cli_options.rb +2 -0
  27. data/lib/karafka/instrumentation/stdout_listener.rb +4 -2
  28. data/lib/karafka/params/batch_metadata.rb +26 -0
  29. data/lib/karafka/params/builders/batch_metadata.rb +30 -0
  30. data/lib/karafka/params/builders/params.rb +17 -15
  31. data/lib/karafka/params/builders/params_batch.rb +2 -2
  32. data/lib/karafka/params/metadata.rb +14 -29
  33. data/lib/karafka/params/params.rb +24 -42
  34. data/lib/karafka/params/params_batch.rb +15 -16
  35. data/lib/karafka/serialization/json/deserializer.rb +2 -2
  36. data/lib/karafka/server.rb +4 -1
  37. data/lib/karafka/setup/config.rb +2 -0
  38. data/lib/karafka/version.rb +1 -1
  39. metadata +37 -48
  40. metadata.gz.sig +0 -0
  41. data/.travis.yml +0 -36
  42. data/lib/karafka/consumers/metadata.rb +0 -10
  43. data/lib/karafka/params/builders/metadata.rb +0 -33
@@ -52,14 +52,8 @@ module Karafka
52
52
  ignored_settings = api_adapter[:subscribe]
53
53
  defined_settings = api_adapter.values.flatten
54
54
  karafka_settings = %i[batch_fetching]
55
- # This is a dirty and bad hack of dry-configurable to get keys before setting values
56
- dynamically_proxied = Karafka::Setup::Config
57
- ._settings
58
- .settings
59
- .find { |s| s.name == :kafka }
60
- .value
61
- .names
62
- .to_a
55
+
56
+ dynamically_proxied = Karafka::Setup::Config.config.kafka.to_h.keys
63
57
 
64
58
  (defined_settings + dynamically_proxied).uniq + karafka_settings - ignored_settings
65
59
  end
@@ -47,7 +47,7 @@ end
47
47
  if ENV['KARAFKA_CONSOLE']
48
48
  # Reloads Karafka irb console session
49
49
  def reload!
50
- puts "Reloading...\n"
50
+ Karafka.logger.info "Reloading...\n"
51
51
  Kernel.exec Karafka::Cli::Console.command
52
52
  end
53
53
  end
@@ -11,19 +11,22 @@ module Karafka
11
11
  def call
12
12
  topics.each do |topic|
13
13
  any_topics = !topic.responder&.topics.nil?
14
+ log_messages = []
14
15
 
15
16
  if any_topics
16
- puts "#{topic.name} =>"
17
+ log_messages << "#{topic.name} =>"
17
18
 
18
19
  topic.responder.topics.each_value do |responder_topic|
19
20
  features = []
20
21
  features << (responder_topic.required? ? 'always' : 'conditionally')
21
22
 
22
- print responder_topic.name, "(#{features.join(', ')})"
23
+ log_messages << format(responder_topic.name, "(#{features.join(', ')})")
23
24
  end
24
25
  else
25
- puts "#{topic.name} => (nothing)"
26
+ log_messages << "#{topic.name} => (nothing)"
26
27
  end
28
+
29
+ Karafka.logger.info(log_messages.join("\n"))
27
30
  end
28
31
  end
29
32
 
@@ -34,11 +37,11 @@ module Karafka
34
37
  Karafka::App.consumer_groups.map(&:topics).flatten.sort_by(&:name)
35
38
  end
36
39
 
37
- # Prints a given value with label in a nice way
40
+ # Formats a given value with label in a nice way
38
41
  # @param label [String] label describing value
39
42
  # @param value [String] value that should be printed
40
- def print(label, value)
41
- printf "%-25s %s\n", " - #{label}:", value
43
+ def format(label, value)
44
+ " - #{label}: #{value}"
42
45
  end
43
46
  end
44
47
  end
@@ -24,7 +24,7 @@ module Karafka
24
24
  "Kafka seed brokers: #{config.kafka.seed_brokers}"
25
25
  ]
26
26
 
27
- puts(info.join("\n"))
27
+ Karafka.logger.info(info.join("\n"))
28
28
  end
29
29
  end
30
30
  end
@@ -14,11 +14,12 @@ module Karafka
14
14
  module ApiAdapter
15
15
  class << self
16
16
  # Builds all the configuration settings for Kafka.new method
17
+ # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
17
18
  # @return [Array<Hash>] Array with all the client arguments including hash with all
18
19
  # the settings required by Kafka.new method
19
20
  # @note We return array, so we can inject any arguments we want, in case of changes in the
20
21
  # raw driver
21
- def client
22
+ def client(consumer_group)
22
23
  # This one is a default that takes all the settings except special
23
24
  # cases defined in the map
24
25
  settings = {
@@ -26,14 +27,17 @@ module Karafka
26
27
  client_id: ::Karafka::App.config.client_id
27
28
  }
28
29
 
29
- kafka_configs.each do |setting_name, setting_value|
30
+ kafka_configs.each_key do |setting_name|
30
31
  # All options for config adapter should be ignored as we're just interested
31
32
  # in what is left, as we want to pass all the options that are "typical"
32
33
  # and not listed in the api_adapter special cases mapping. All the values
33
34
  # from the api_adapter mapping go somewhere else, not to the client directly
34
35
  next if AttributesMap.api_adapter.values.flatten.include?(setting_name)
35
36
 
36
- settings[setting_name] = setting_value
37
+ # Settings for each consumer group are either defined per consumer group or are
38
+ # inherited from the global/general settings level, thus we don't have to fetch them
39
+ # from the kafka settings as they are already on a consumer group level
40
+ settings[setting_name] = consumer_group.public_send(setting_name)
37
41
  end
38
42
 
39
43
  settings_hash = sanitize(settings)
@@ -105,11 +109,13 @@ module Karafka
105
109
  # Majority of users don't use custom topic mappers. No need to change anything when it
106
110
  # is a default mapper that does not change anything. Only some cloud providers require
107
111
  # topics to be remapped
108
- return [params] if Karafka::App.config.topic_mapper.is_a?(Karafka::Routing::TopicMapper)
112
+ return [params.metadata] if Karafka::App.config.topic_mapper.is_a?(
113
+ Karafka::Routing::TopicMapper
114
+ )
109
115
 
110
116
  # @note We don't use tap as it is around 13% slower than non-dup version
111
- dupped = params.dup
112
- dupped['topic'] = Karafka::App.config.topic_mapper.outgoing(params.topic)
117
+ dupped = params.metadata.dup
118
+ dupped['topic'] = Karafka::App.config.topic_mapper.outgoing(params.metadata.topic)
113
119
  [dupped]
114
120
  end
115
121
 
@@ -23,7 +23,11 @@ module Karafka
23
23
  ) do
24
24
  # Due to how ruby-kafka is built, we have the metadata that is stored on the batch
25
25
  # level only available for batch consuming
26
- consumer.metadata = Params::Builders::Metadata.from_kafka_batch(kafka_batch, topic)
26
+ consumer.batch_metadata = Params::Builders::BatchMetadata.from_kafka_batch(
27
+ kafka_batch,
28
+ topic
29
+ )
30
+
27
31
  kafka_messages = kafka_batch.messages
28
32
 
29
33
  # Depending on a case (persisted or not) we might use new consumer instance per
@@ -6,9 +6,11 @@ module Karafka
6
6
  module Builder
7
7
  class << self
8
8
  # Builds a Kafka::Client instance that we use to work with Kafka cluster
9
+ # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group for which we want
10
+ # to have a new Kafka client
9
11
  # @return [::Kafka::Client] returns a Kafka client
10
- def call
11
- Kafka.new(*ApiAdapter.client)
12
+ def call(consumer_group)
13
+ Kafka.new(*ApiAdapter.client(consumer_group))
12
14
  end
13
15
  end
14
16
  end
@@ -97,7 +97,7 @@ module Karafka
97
97
  def kafka_consumer
98
98
  # @note We don't cache the connection internally because we cache kafka_consumer that uses
99
99
  # kafka client object instance
100
- @kafka_consumer ||= Builder.call.consumer(
100
+ @kafka_consumer ||= Builder.call(consumer_group).consumer(
101
101
  *ApiAdapter.consumer(consumer_group)
102
102
  ).tap do |consumer|
103
103
  consumer_group.topics.each do |topic|
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Consumers
5
+ # Brings the batch metadata into consumers that support batch_fetching
6
+ module BatchMetadata
7
+ attr_accessor :batch_metadata
8
+ end
9
+ end
10
+ end
@@ -16,7 +16,7 @@ module Karafka
16
16
 
17
17
  bind_backend(consumer, topic)
18
18
  bind_params(consumer, topic)
19
- bind_metadata(consumer, topic)
19
+ bind_batch_metadata(consumer, topic)
20
20
  bind_responders(consumer, topic)
21
21
  end
22
22
 
@@ -40,13 +40,14 @@ module Karafka
40
40
  consumer.extend(SingleParams)
41
41
  end
42
42
 
43
- # Adds an option to work with metadata for consumer instances that have batch fetching
43
+ # Adds an option to work with batch metadata for consumer instances that have
44
+ # batch fetching enabled
44
45
  # @param consumer [Karafka::BaseConsumer] consumer instance
45
46
  # @param topic [Karafka::Routing::Topic] topic of a consumer class
46
- def bind_metadata(consumer, topic)
47
+ def bind_batch_metadata(consumer, topic)
47
48
  return unless topic.batch_fetching
48
49
 
49
- consumer.extend(Metadata)
50
+ consumer.extend(BatchMetadata)
50
51
  end
51
52
 
52
53
  # Adds responders support for topics and consumers with responders defined for them
@@ -6,6 +6,8 @@ module Karafka
6
6
  # We validate some basics + the list of consumer_groups on which we want to use, to make
7
7
  # sure that all of them are defined, plus that a pidfile does not exist
8
8
  class ServerCliOptions < Dry::Validation::Contract
9
+ config.messages.load_paths << File.join(Karafka.gem_root, 'config', 'errors.yml')
10
+
9
11
  params do
10
12
  optional(:pid).filled(:str?)
11
13
  optional(:daemon).filled(:bool?)
@@ -43,7 +43,7 @@ module Karafka
43
43
  # so it returns a topic as a string, not a routing topic
44
44
  debug(
45
45
  <<~MSG.chomp.tr("\n", ' ')
46
- Params deserialization for #{event[:caller].topic} topic
46
+ Params deserialization for #{event[:caller].metadata.topic} topic
47
47
  successful in #{event[:time]} ms
48
48
  MSG
49
49
  )
@@ -52,7 +52,9 @@ module Karafka
52
52
  # Logs unsuccessful deserialization attempts of incoming data
53
53
  # @param event [Dry::Events::Event] event details including payload
54
54
  def on_params_params_deserialize_error(event)
55
- error "Params deserialization error for #{event[:caller].topic} topic: #{event[:error]}"
55
+ topic = event[:caller].metadata.topic
56
+ error = event[:error]
57
+ error "Params deserialization error for #{topic} topic: #{error}"
56
58
  end
57
59
 
58
60
  # Logs errors that occurred in a listener fetch loop
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ # Simple batch metadata object that stores all non-message information received from Kafka
6
+ # cluster while fetching the data
7
+ # @note This metadata object refers to per batch metadata, not `#params.metadata`
8
+ BatchMetadata = Struct.new(
9
+ :batch_size,
10
+ :first_offset,
11
+ :highwater_mark_offset,
12
+ :unknown_last_offset,
13
+ :last_offset,
14
+ :offset_lag,
15
+ :deserializer,
16
+ :partition,
17
+ :topic,
18
+ keyword_init: true
19
+ ) do
20
+ # @return [Boolean] is the last offset known or unknown
21
+ def unknown_last_offset?
22
+ unknown_last_offset
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ module Builders
6
+ # Builder for creating batch metadata object based on the batch informations
7
+ module BatchMetadata
8
+ class << self
9
+ # Creates metadata based on the kafka batch data
10
+ # @param kafka_batch [Kafka::FetchedBatch] kafka batch details
11
+ # @param topic [Karafka::Routing::Topic] topic for which we've fetched the batch
12
+ # @return [Karafka::Params::BatchMetadata] batch metadata object
13
+ def from_kafka_batch(kafka_batch, topic)
14
+ Karafka::Params::BatchMetadata.new(
15
+ batch_size: kafka_batch.messages.count,
16
+ first_offset: kafka_batch.first_offset,
17
+ highwater_mark_offset: kafka_batch.highwater_mark_offset,
18
+ unknown_last_offset: kafka_batch.unknown_last_offset?,
19
+ last_offset: kafka_batch.last_offset,
20
+ offset_lag: kafka_batch.offset_lag,
21
+ deserializer: topic.deserializer,
22
+ partition: kafka_batch.partition,
23
+ topic: topic.name
24
+ ).freeze
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -12,22 +12,24 @@ module Karafka
12
12
  class << self
13
13
  # @param kafka_message [Kafka::FetchedMessage] message fetched from Kafka
14
14
  # @param topic [Karafka::Routing::Topic] topic for which this message was fetched
15
- # @return [Karafka::Params::Params] params object
15
+ # @return [Karafka::Params::Params] params object with payload and message metadata
16
16
  def from_kafka_message(kafka_message, topic)
17
- Karafka::Params::Params
18
- .new
19
- .merge!(
20
- 'create_time' => kafka_message.create_time,
21
- 'headers' => kafka_message.headers || {},
22
- 'is_control_record' => kafka_message.is_control_record,
23
- 'key' => kafka_message.key,
24
- 'offset' => kafka_message.offset,
25
- 'deserializer' => topic.deserializer,
26
- 'partition' => kafka_message.partition,
27
- 'receive_time' => Time.now,
28
- 'topic' => kafka_message.topic,
29
- 'payload' => kafka_message.value
30
- )
17
+ metadata = Karafka::Params::Metadata.new(
18
+ create_time: kafka_message.create_time,
19
+ headers: kafka_message.headers || {},
20
+ is_control_record: kafka_message.is_control_record,
21
+ key: kafka_message.key,
22
+ offset: kafka_message.offset,
23
+ deserializer: topic.deserializer,
24
+ partition: kafka_message.partition,
25
+ receive_time: Time.now,
26
+ topic: topic.name
27
+ ).freeze
28
+
29
+ Karafka::Params::Params.new(
30
+ kafka_message.value,
31
+ metadata
32
+ )
31
33
  end
32
34
  end
33
35
  end
@@ -12,11 +12,11 @@ module Karafka
12
12
  # @param topic [Karafka::Routing::Topic] topic for which we're received messages
13
13
  # @return [Karafka::Params::ParamsBatch<Karafka::Params::Params>] batch with params
14
14
  def from_kafka_messages(kafka_messages, topic)
15
- params_array = kafka_messages.map! do |message|
15
+ params_array = kafka_messages.map do |message|
16
16
  Karafka::Params::Builders::Params.from_kafka_message(message, topic)
17
17
  end
18
18
 
19
- Karafka::Params::ParamsBatch.new(params_array)
19
+ Karafka::Params::ParamsBatch.new(params_array).freeze
20
20
  end
21
21
  end
22
22
  end
@@ -2,34 +2,19 @@
2
2
 
3
3
  module Karafka
4
4
  module Params
5
- # Simple metadata object that stores all non-message information received from Kafka cluster
6
- # while fetching the data
7
- class Metadata < Hash
8
- # Attributes that should be accessible as methods as well (not only hash)
9
- METHOD_ATTRIBUTES = %w[
10
- batch_size
11
- first_offset
12
- highwater_mark_offset
13
- last_offset
14
- offset_lag
15
- deserializer
16
- partition
17
- topic
18
- ].freeze
19
-
20
- private_constant :METHOD_ATTRIBUTES
21
-
22
- METHOD_ATTRIBUTES.each do |attr|
23
- # Defines a method call accessor to a particular hash field.
24
- define_method(attr) do
25
- self[attr]
26
- end
27
- end
28
-
29
- # @return [Boolean] is the last offset known or unknown
30
- def unknown_last_offset?
31
- self['unknown_last_offset']
32
- end
33
- end
5
+ # Single message / params metadata details that can be accessed without the need for the
6
+ # payload deserialization
7
+ Metadata = Struct.new(
8
+ :create_time,
9
+ :headers,
10
+ :is_control_record,
11
+ :key,
12
+ :offset,
13
+ :deserializer,
14
+ :partition,
15
+ :receive_time,
16
+ :topic,
17
+ keyword_init: true
18
+ )
34
19
  end
35
20
  end
@@ -6,58 +6,40 @@ module Karafka
6
6
  # It provides lazy loading not only until the first usage, but also allows us to skip
7
7
  # using deserializer until we execute our logic. That way we can operate with
8
8
  # heavy-deserialization data without slowing down the whole application.
9
- class Params < Hash
10
- # Params attributes that should be available via a method call invocation for Kafka
11
- # client compatibility.
12
- # Kafka passes internally Kafka::FetchedMessage object and the ruby-kafka consumer
13
- # uses those fields via method calls, so in order to be able to pass there our params
14
- # objects, have to have same api.
15
- METHOD_ATTRIBUTES = %w[
16
- create_time
17
- headers
18
- is_control_record
19
- key
20
- offset
21
- deserializer
22
- deserialized
23
- partition
24
- receive_time
25
- topic
26
- payload
27
- ].freeze
9
+ class Params
10
+ attr_reader :raw_payload, :metadata
28
11
 
29
- private_constant :METHOD_ATTRIBUTES
30
-
31
- METHOD_ATTRIBUTES.each do |attr|
32
- # Defines a method call accessor to a particular hash field.
33
- # @note Won't work for complex key names that contain spaces, etc
34
- # @param key [Symbol] name of a field that we want to retrieve with a method call
35
- # @example
36
- # key_attr_reader :example
37
- # params.example #=> 'my example payload'
38
- define_method(attr) do
39
- self[attr]
40
- end
12
+ # @param raw_payload [Object] incoming payload before deserialization
13
+ # @param metadata [Karafka::Params::Metadata] message metadata object
14
+ def initialize(raw_payload, metadata)
15
+ @raw_payload = raw_payload
16
+ @metadata = metadata
17
+ @deserialized = false
18
+ @payload = nil
41
19
  end
42
20
 
43
- # @return [Karafka::Params::Params] This method will trigger deserializer execution. If we
44
- # decide to retrieve data, deserializer will be executed to get data. Output of that will
45
- # be merged to the current object. This object will be also marked as already deserialized,
46
- # so we won't deserialize it again.
47
- def deserialize!
48
- return self if self['deserialized']
21
+ # @return [Object] lazy-deserialized data (deserialized upon first request)
22
+ def payload
23
+ return @payload if deserialized?
24
+
25
+ @payload = deserialize
26
+ # We mark deserialization as successful after deserialization, as in case of an error
27
+ # this won't be falsely set to true
28
+ @deserialized = true
29
+ @payload
30
+ end
49
31
 
50
- self['deserialized'] = true
51
- self['payload'] = deserialize
52
- self
32
+ # @return [Boolean] did given params payload were deserialized already
33
+ def deserialized?
34
+ @deserialized
53
35
  end
54
36
 
55
37
  private
56
38
 
57
- # @return [Object] deserialized data
39
+ # @return [Object] tries de-serializes data
58
40
  def deserialize
59
41
  Karafka.monitor.instrument('params.params.deserialize', caller: self) do
60
- self['deserializer'].call(self)
42
+ metadata.deserializer.call(self)
61
43
  end
62
44
  rescue ::StandardError => e
63
45
  Karafka.monitor.instrument('params.params.deserialize.error', caller: self, error: e)