karafka 1.3.5 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,7 +24,7 @@ module Karafka
24
24
  "Kafka seed brokers: #{config.kafka.seed_brokers}"
25
25
  ]
26
26
 
27
- puts(info.join("\n"))
27
+ Karafka.logger.info(info.join("\n"))
28
28
  end
29
29
  end
30
30
  end
@@ -14,11 +14,12 @@ module Karafka
14
14
  module ApiAdapter
15
15
  class << self
16
16
  # Builds all the configuration settings for Kafka.new method
17
+ # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
17
18
  # @return [Array<Hash>] Array with all the client arguments including hash with all
18
19
  # the settings required by Kafka.new method
19
20
  # @note We return array, so we can inject any arguments we want, in case of changes in the
20
21
  # raw driver
21
- def client
22
+ def client(consumer_group)
22
23
  # This one is a default that takes all the settings except special
23
24
  # cases defined in the map
24
25
  settings = {
@@ -26,14 +27,17 @@ module Karafka
26
27
  client_id: ::Karafka::App.config.client_id
27
28
  }
28
29
 
29
- kafka_configs.each do |setting_name, setting_value|
30
+ kafka_configs.each_key do |setting_name|
30
31
  # All options for config adapter should be ignored as we're just interested
31
32
  # in what is left, as we want to pass all the options that are "typical"
32
33
  # and not listed in the api_adapter special cases mapping. All the values
33
34
  # from the api_adapter mapping go somewhere else, not to the client directly
34
35
  next if AttributesMap.api_adapter.values.flatten.include?(setting_name)
35
36
 
36
- settings[setting_name] = setting_value
37
+ # Settings for each consumer group are either defined per consumer group or are
38
+ # inherited from the global/general settings level, thus we don't have to fetch them
39
+ # from the kafka settings as they are already on a consumer group level
40
+ settings[setting_name] = consumer_group.public_send(setting_name)
37
41
  end
38
42
 
39
43
  settings_hash = sanitize(settings)
@@ -105,11 +109,13 @@ module Karafka
105
109
  # Majority of users don't use custom topic mappers. No need to change anything when it
106
110
  # is a default mapper that does not change anything. Only some cloud providers require
107
111
  # topics to be remapped
108
- return [params] if Karafka::App.config.topic_mapper.is_a?(Karafka::Routing::TopicMapper)
112
+ return [params.metadata] if Karafka::App.config.topic_mapper.is_a?(
113
+ Karafka::Routing::TopicMapper
114
+ )
109
115
 
110
116
  # @note We don't use tap as it is around 13% slower than non-dup version
111
- dupped = params.dup
112
- dupped['topic'] = Karafka::App.config.topic_mapper.outgoing(params.topic)
117
+ dupped = params.metadata.dup
118
+ dupped['topic'] = Karafka::App.config.topic_mapper.outgoing(params.metadata.topic)
113
119
  [dupped]
114
120
  end
115
121
 
@@ -23,7 +23,11 @@ module Karafka
23
23
  ) do
24
24
  # Due to how ruby-kafka is built, we have the metadata that is stored on the batch
25
25
  # level only available for batch consuming
26
- consumer.metadata = Params::Builders::Metadata.from_kafka_batch(kafka_batch, topic)
26
+ consumer.batch_metadata = Params::Builders::BatchMetadata.from_kafka_batch(
27
+ kafka_batch,
28
+ topic
29
+ )
30
+
27
31
  kafka_messages = kafka_batch.messages
28
32
 
29
33
  # Depending on a case (persisted or not) we might use new consumer instance per
@@ -6,9 +6,11 @@ module Karafka
6
6
  module Builder
7
7
  class << self
8
8
  # Builds a Kafka::Client instance that we use to work with Kafka cluster
9
+ # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group for which we want
10
+ # to have a new Kafka client
9
11
  # @return [::Kafka::Client] returns a Kafka client
10
- def call
11
- Kafka.new(*ApiAdapter.client)
12
+ def call(consumer_group)
13
+ Kafka.new(*ApiAdapter.client(consumer_group))
12
14
  end
13
15
  end
14
16
  end
@@ -97,7 +97,7 @@ module Karafka
97
97
  def kafka_consumer
98
98
  # @note We don't cache the connection internally because we cache kafka_consumer that uses
99
99
  # kafka client object instance
100
- @kafka_consumer ||= Builder.call.consumer(
100
+ @kafka_consumer ||= Builder.call(consumer_group).consumer(
101
101
  *ApiAdapter.consumer(consumer_group)
102
102
  ).tap do |consumer|
103
103
  consumer_group.topics.each do |topic|
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Consumers
5
+ # Brings the batch metadata into consumers that support batch_fetching
6
+ module BatchMetadata
7
+ attr_accessor :batch_metadata
8
+ end
9
+ end
10
+ end
@@ -16,7 +16,7 @@ module Karafka
16
16
 
17
17
  bind_backend(consumer, topic)
18
18
  bind_params(consumer, topic)
19
- bind_metadata(consumer, topic)
19
+ bind_batch_metadata(consumer, topic)
20
20
  bind_responders(consumer, topic)
21
21
  end
22
22
 
@@ -40,13 +40,14 @@ module Karafka
40
40
  consumer.extend(SingleParams)
41
41
  end
42
42
 
43
- # Adds an option to work with metadata for consumer instances that have batch fetching
43
+ # Adds an option to work with batch metadata for consumer instances that have
44
+ # batch fetching enabled
44
45
  # @param consumer [Karafka::BaseConsumer] consumer instance
45
46
  # @param topic [Karafka::Routing::Topic] topic of a consumer class
46
- def bind_metadata(consumer, topic)
47
+ def bind_batch_metadata(consumer, topic)
47
48
  return unless topic.batch_fetching
48
49
 
49
- consumer.extend(Metadata)
50
+ consumer.extend(BatchMetadata)
50
51
  end
51
52
 
52
53
  # Adds responders support for topics and consumers with responders defined for them
@@ -43,7 +43,7 @@ module Karafka
43
43
  # so it returns a topic as a string, not a routing topic
44
44
  debug(
45
45
  <<~MSG.chomp.tr("\n", ' ')
46
- Params deserialization for #{event[:caller].topic} topic
46
+ Params deserialization for #{event[:caller].metadata.topic} topic
47
47
  successful in #{event[:time]} ms
48
48
  MSG
49
49
  )
@@ -52,7 +52,9 @@ module Karafka
52
52
  # Logs unsuccessful deserialization attempts of incoming data
53
53
  # @param event [Dry::Events::Event] event details including payload
54
54
  def on_params_params_deserialize_error(event)
55
- error "Params deserialization error for #{event[:caller].topic} topic: #{event[:error]}"
55
+ topic = event[:caller].metadata.topic
56
+ error = event[:error]
57
+ error "Params deserialization error for #{topic} topic: #{error}"
56
58
  end
57
59
 
58
60
  # Logs errors that occurred in a listener fetch loop
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ # Simple batch metadata object that stores all non-message information received from Kafka
6
+ # cluster while fetching the data
7
+ # @note This metadata object refers to per batch metadata, not `#params.metadata`
8
+ BatchMetadata = Struct.new(
9
+ :batch_size,
10
+ :first_offset,
11
+ :highwater_mark_offset,
12
+ :unknown_last_offset,
13
+ :last_offset,
14
+ :offset_lag,
15
+ :deserializer,
16
+ :partition,
17
+ :topic,
18
+ keyword_init: true
19
+ ) do
20
+ # @return [Boolean] is the last offset known or unknown
21
+ def unknown_last_offset?
22
+ unknown_last_offset
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ module Builders
6
+ # Builder for creating batch metadata object based on the batch informations
7
+ module BatchMetadata
8
+ class << self
9
+ # Creates metadata based on the kafka batch data
10
+ # @param kafka_batch [Kafka::FetchedBatch] kafka batch details
11
+ # @param topic [Karafka::Routing::Topic] topic for which we've fetched the batch
12
+ # @return [Karafka::Params::BatchMetadata] batch metadata object
13
+ def from_kafka_batch(kafka_batch, topic)
14
+ Karafka::Params::BatchMetadata.new(
15
+ batch_size: kafka_batch.messages.count,
16
+ first_offset: kafka_batch.first_offset,
17
+ highwater_mark_offset: kafka_batch.highwater_mark_offset,
18
+ unknown_last_offset: kafka_batch.unknown_last_offset?,
19
+ last_offset: kafka_batch.last_offset,
20
+ offset_lag: kafka_batch.offset_lag,
21
+ deserializer: topic.deserializer,
22
+ partition: kafka_batch.partition,
23
+ topic: topic.name
24
+ ).freeze
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -12,22 +12,24 @@ module Karafka
12
12
  class << self
13
13
  # @param kafka_message [Kafka::FetchedMessage] message fetched from Kafka
14
14
  # @param topic [Karafka::Routing::Topic] topic for which this message was fetched
15
- # @return [Karafka::Params::Params] params object
15
+ # @return [Karafka::Params::Params] params object with payload and message metadata
16
16
  def from_kafka_message(kafka_message, topic)
17
- Karafka::Params::Params
18
- .new
19
- .merge!(
20
- 'create_time' => kafka_message.create_time,
21
- 'headers' => kafka_message.headers || {},
22
- 'is_control_record' => kafka_message.is_control_record,
23
- 'key' => kafka_message.key,
24
- 'offset' => kafka_message.offset,
25
- 'deserializer' => topic.deserializer,
26
- 'partition' => kafka_message.partition,
27
- 'receive_time' => Time.now,
28
- 'topic' => kafka_message.topic,
29
- 'payload' => kafka_message.value
30
- )
17
+ metadata = Karafka::Params::Metadata.new(
18
+ create_time: kafka_message.create_time,
19
+ headers: kafka_message.headers || {},
20
+ is_control_record: kafka_message.is_control_record,
21
+ key: kafka_message.key,
22
+ offset: kafka_message.offset,
23
+ deserializer: topic.deserializer,
24
+ partition: kafka_message.partition,
25
+ receive_time: Time.now,
26
+ topic: topic.name
27
+ ).freeze
28
+
29
+ Karafka::Params::Params.new(
30
+ kafka_message.value,
31
+ metadata
32
+ )
31
33
  end
32
34
  end
33
35
  end
@@ -12,11 +12,11 @@ module Karafka
12
12
  # @param topic [Karafka::Routing::Topic] topic for which we're received messages
13
13
  # @return [Karafka::Params::ParamsBatch<Karafka::Params::Params>] batch with params
14
14
  def from_kafka_messages(kafka_messages, topic)
15
- params_array = kafka_messages.map! do |message|
15
+ params_array = kafka_messages.map do |message|
16
16
  Karafka::Params::Builders::Params.from_kafka_message(message, topic)
17
17
  end
18
18
 
19
- Karafka::Params::ParamsBatch.new(params_array)
19
+ Karafka::Params::ParamsBatch.new(params_array).freeze
20
20
  end
21
21
  end
22
22
  end
@@ -2,34 +2,19 @@
2
2
 
3
3
  module Karafka
4
4
  module Params
5
- # Simple metadata object that stores all non-message information received from Kafka cluster
6
- # while fetching the data
7
- class Metadata < Hash
8
- # Attributes that should be accessible as methods as well (not only hash)
9
- METHOD_ATTRIBUTES = %w[
10
- batch_size
11
- first_offset
12
- highwater_mark_offset
13
- last_offset
14
- offset_lag
15
- deserializer
16
- partition
17
- topic
18
- ].freeze
19
-
20
- private_constant :METHOD_ATTRIBUTES
21
-
22
- METHOD_ATTRIBUTES.each do |attr|
23
- # Defines a method call accessor to a particular hash field.
24
- define_method(attr) do
25
- self[attr]
26
- end
27
- end
28
-
29
- # @return [Boolean] is the last offset known or unknown
30
- def unknown_last_offset?
31
- self['unknown_last_offset']
32
- end
33
- end
5
+ # Single message / params metadata details that can be accessed without the need for the
6
+ # payload deserialization
7
+ Metadata = Struct.new(
8
+ :create_time,
9
+ :headers,
10
+ :is_control_record,
11
+ :key,
12
+ :offset,
13
+ :deserializer,
14
+ :partition,
15
+ :receive_time,
16
+ :topic,
17
+ keyword_init: true
18
+ )
34
19
  end
35
20
  end
@@ -6,58 +6,44 @@ module Karafka
6
6
  # It provides lazy loading not only until the first usage, but also allows us to skip
7
7
  # using deserializer until we execute our logic. That way we can operate with
8
8
  # heavy-deserialization data without slowing down the whole application.
9
- class Params < Hash
10
- # Params attributes that should be available via a method call invocation for Kafka
11
- # client compatibility.
12
- # Kafka passes internally Kafka::FetchedMessage object and the ruby-kafka consumer
13
- # uses those fields via method calls, so in order to be able to pass there our params
14
- # objects, have to have same api.
15
- METHOD_ATTRIBUTES = %w[
16
- create_time
17
- headers
18
- is_control_record
19
- key
20
- offset
21
- deserializer
22
- deserialized
23
- partition
24
- receive_time
25
- topic
26
- payload
27
- ].freeze
9
+ class Params
10
+ extend Forwardable
28
11
 
29
- private_constant :METHOD_ATTRIBUTES
12
+ attr_reader :raw_payload, :metadata
30
13
 
31
- METHOD_ATTRIBUTES.each do |attr|
32
- # Defines a method call accessor to a particular hash field.
33
- # @note Won't work for complex key names that contain spaces, etc
34
- # @param key [Symbol] name of a field that we want to retrieve with a method call
35
- # @example
36
- # key_attr_reader :example
37
- # params.example #=> 'my example payload'
38
- define_method(attr) do
39
- self[attr]
40
- end
14
+ def_delegators :metadata, *Metadata.members
15
+
16
+ # @param raw_payload [Object] incoming payload before deserialization
17
+ # @param metadata [Karafka::Params::Metadata] message metadata object
18
+ def initialize(raw_payload, metadata)
19
+ @raw_payload = raw_payload
20
+ @metadata = metadata
21
+ @deserialized = false
22
+ @payload = nil
41
23
  end
42
24
 
43
- # @return [Karafka::Params::Params] This method will trigger deserializer execution. If we
44
- # decide to retrieve data, deserializer will be executed to get data. Output of that will
45
- # be merged to the current object. This object will be also marked as already deserialized,
46
- # so we won't deserialize it again.
47
- def deserialize!
48
- return self if self['deserialized']
25
+ # @return [Object] lazy-deserialized data (deserialized upon first request)
26
+ def payload
27
+ return @payload if deserialized?
28
+
29
+ @payload = deserialize
30
+ # We mark deserialization as successful after deserialization, as in case of an error
31
+ # this won't be falsely set to true
32
+ @deserialized = true
33
+ @payload
34
+ end
49
35
 
50
- self['deserialized'] = true
51
- self['payload'] = deserialize
52
- self
36
+ # @return [Boolean] did given params payload were deserialized already
37
+ def deserialized?
38
+ @deserialized
53
39
  end
54
40
 
55
41
  private
56
42
 
57
- # @return [Object] deserialized data
43
+ # @return [Object] tries de-serializes data
58
44
  def deserialize
59
45
  Karafka.monitor.instrument('params.params.deserialize', caller: self) do
60
- self['deserializer'].call(self)
46
+ metadata.deserializer.call(self)
61
47
  end
62
48
  rescue ::StandardError => e
63
49
  Karafka.monitor.instrument('params.params.deserialize.error', caller: self, error: e)
@@ -15,47 +15,46 @@ module Karafka
15
15
  @params_array = params_array
16
16
  end
17
17
 
18
- # @yieldparam [Karafka::Params::Params] each deserialized and loaded params instance
19
- # @note Invocation of this method will cause loading and deserializing each param after
20
- # another. If you want to get access without deserializing, please access params_array
21
- # directly
18
+ # @yieldparam [Karafka::Params::Params] each params instance
19
+ # @note Invocation of this method will not cause loading and deserializing each param after
20
+ # another.
22
21
  def each
23
- @params_array.each { |param| yield(param.deserialize!) }
22
+ @params_array.each { |param| yield(param) }
24
23
  end
25
24
 
26
25
  # @return [Array<Karafka::Params::Params>] returns all the params in a loaded state, so they
27
26
  # can be used for batch insert, etc. Without invoking all, up until first use, they won't
28
27
  # be deserialized
29
28
  def deserialize!
30
- each(&:itself)
29
+ each(&:payload)
31
30
  end
32
31
 
33
32
  # @return [Array<Object>] array with deserialized payloads. This method can be useful when
34
33
  # we don't care about metadata and just want to extract all the data payloads from the
35
34
  # batch
36
35
  def payloads
37
- deserialize!.map(&:payload)
36
+ map(&:payload)
38
37
  end
39
38
 
40
- # @return [Karafka::Params::Params] first element after the deserialization process
39
+ # @return [Karafka::Params::Params] first element
41
40
  def first
42
- @params_array.first.deserialize!
41
+ @params_array.first
43
42
  end
44
43
 
45
- # @return [Karafka::Params::Params] last element after the deserialization process
44
+ # @return [Karafka::Params::Params] last element
46
45
  def last
47
- @params_array.last.deserialize!
48
- end
49
-
50
- # @return [Array<Karafka::Params::Params>] pure array with params (not deserialized)
51
- def to_a
52
- @params_array
46
+ @params_array.last
53
47
  end
54
48
 
55
49
  # @return [Integer] number of messages in the batch
56
50
  def size
57
51
  @params_array.size
58
52
  end
53
+
54
+ # @return [Array<Karafka::Params::Params>] pure array with params
55
+ def to_a
56
+ @params_array
57
+ end
59
58
  end
60
59
  end
61
60
  end