karafka 1.3.4 → 1.4.0.rc2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/.diffend.yml +3 -0
- data/.github/workflows/ci.yml +52 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +30 -1
- data/CODE_OF_CONDUCT.md +1 -1
- data/Gemfile +2 -0
- data/Gemfile.lock +39 -40
- data/README.md +3 -5
- data/certs/mensfeld.pem +21 -21
- data/docker-compose.yml +17 -0
- data/karafka.gemspec +3 -4
- data/lib/karafka.rb +1 -1
- data/lib/karafka/cli.rb +1 -1
- data/lib/karafka/cli/flow.rb +9 -10
- data/lib/karafka/cli/info.rb +1 -1
- data/lib/karafka/connection/api_adapter.rb +12 -6
- data/lib/karafka/connection/batch_delegator.rb +5 -1
- data/lib/karafka/connection/builder.rb +4 -2
- data/lib/karafka/connection/client.rb +1 -1
- data/lib/karafka/consumers/batch_metadata.rb +10 -0
- data/lib/karafka/consumers/includer.rb +5 -4
- data/lib/karafka/instrumentation/stdout_listener.rb +4 -2
- data/lib/karafka/params/batch_metadata.rb +26 -0
- data/lib/karafka/params/builders/batch_metadata.rb +30 -0
- data/lib/karafka/params/builders/params.rb +17 -15
- data/lib/karafka/params/builders/params_batch.rb +2 -2
- data/lib/karafka/params/metadata.rb +14 -29
- data/lib/karafka/params/params.rb +27 -41
- data/lib/karafka/params/params_batch.rb +15 -16
- data/lib/karafka/serialization/json/deserializer.rb +2 -2
- data/lib/karafka/server.rb +4 -1
- data/lib/karafka/setup/config.rb +2 -0
- data/lib/karafka/version.rb +1 -1
- metadata +37 -48
- metadata.gz.sig +0 -0
- data/.travis.yml +0 -36
- data/lib/karafka/consumers/metadata.rb +0 -10
- data/lib/karafka/params/builders/metadata.rb +0 -33
data/lib/karafka/cli/info.rb
CHANGED
@@ -14,11 +14,12 @@ module Karafka
|
|
14
14
|
module ApiAdapter
|
15
15
|
class << self
|
16
16
|
# Builds all the configuration settings for Kafka.new method
|
17
|
+
# @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
|
17
18
|
# @return [Array<Hash>] Array with all the client arguments including hash with all
|
18
19
|
# the settings required by Kafka.new method
|
19
20
|
# @note We return array, so we can inject any arguments we want, in case of changes in the
|
20
21
|
# raw driver
|
21
|
-
def client
|
22
|
+
def client(consumer_group)
|
22
23
|
# This one is a default that takes all the settings except special
|
23
24
|
# cases defined in the map
|
24
25
|
settings = {
|
@@ -26,14 +27,17 @@ module Karafka
|
|
26
27
|
client_id: ::Karafka::App.config.client_id
|
27
28
|
}
|
28
29
|
|
29
|
-
kafka_configs.
|
30
|
+
kafka_configs.each_key do |setting_name|
|
30
31
|
# All options for config adapter should be ignored as we're just interested
|
31
32
|
# in what is left, as we want to pass all the options that are "typical"
|
32
33
|
# and not listed in the api_adapter special cases mapping. All the values
|
33
34
|
# from the api_adapter mapping go somewhere else, not to the client directly
|
34
35
|
next if AttributesMap.api_adapter.values.flatten.include?(setting_name)
|
35
36
|
|
36
|
-
|
37
|
+
# Settings for each consumer group are either defined per consumer group or are
|
38
|
+
# inherited from the global/general settings level, thus we don't have to fetch them
|
39
|
+
# from the kafka settings as they are already on a consumer group level
|
40
|
+
settings[setting_name] = consumer_group.public_send(setting_name)
|
37
41
|
end
|
38
42
|
|
39
43
|
settings_hash = sanitize(settings)
|
@@ -105,11 +109,13 @@ module Karafka
|
|
105
109
|
# Majority of users don't use custom topic mappers. No need to change anything when it
|
106
110
|
# is a default mapper that does not change anything. Only some cloud providers require
|
107
111
|
# topics to be remapped
|
108
|
-
return [params] if Karafka::App.config.topic_mapper.is_a?(
|
112
|
+
return [params.metadata] if Karafka::App.config.topic_mapper.is_a?(
|
113
|
+
Karafka::Routing::TopicMapper
|
114
|
+
)
|
109
115
|
|
110
116
|
# @note We don't use tap as it is around 13% slower than non-dup version
|
111
|
-
dupped = params.dup
|
112
|
-
dupped['topic'] = Karafka::App.config.topic_mapper.outgoing(params.topic)
|
117
|
+
dupped = params.metadata.dup
|
118
|
+
dupped['topic'] = Karafka::App.config.topic_mapper.outgoing(params.metadata.topic)
|
113
119
|
[dupped]
|
114
120
|
end
|
115
121
|
|
@@ -23,7 +23,11 @@ module Karafka
|
|
23
23
|
) do
|
24
24
|
# Due to how ruby-kafka is built, we have the metadata that is stored on the batch
|
25
25
|
# level only available for batch consuming
|
26
|
-
consumer.
|
26
|
+
consumer.batch_metadata = Params::Builders::BatchMetadata.from_kafka_batch(
|
27
|
+
kafka_batch,
|
28
|
+
topic
|
29
|
+
)
|
30
|
+
|
27
31
|
kafka_messages = kafka_batch.messages
|
28
32
|
|
29
33
|
# Depending on a case (persisted or not) we might use new consumer instance per
|
@@ -6,9 +6,11 @@ module Karafka
|
|
6
6
|
module Builder
|
7
7
|
class << self
|
8
8
|
# Builds a Kafka::Client instance that we use to work with Kafka cluster
|
9
|
+
# @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group for which we want
|
10
|
+
# to have a new Kafka client
|
9
11
|
# @return [::Kafka::Client] returns a Kafka client
|
10
|
-
def call
|
11
|
-
Kafka.new(*ApiAdapter.client)
|
12
|
+
def call(consumer_group)
|
13
|
+
Kafka.new(*ApiAdapter.client(consumer_group))
|
12
14
|
end
|
13
15
|
end
|
14
16
|
end
|
@@ -97,7 +97,7 @@ module Karafka
|
|
97
97
|
def kafka_consumer
|
98
98
|
# @note We don't cache the connection internally because we cache kafka_consumer that uses
|
99
99
|
# kafka client object instance
|
100
|
-
@kafka_consumer ||= Builder.call.consumer(
|
100
|
+
@kafka_consumer ||= Builder.call(consumer_group).consumer(
|
101
101
|
*ApiAdapter.consumer(consumer_group)
|
102
102
|
).tap do |consumer|
|
103
103
|
consumer_group.topics.each do |topic|
|
@@ -16,7 +16,7 @@ module Karafka
|
|
16
16
|
|
17
17
|
bind_backend(consumer, topic)
|
18
18
|
bind_params(consumer, topic)
|
19
|
-
|
19
|
+
bind_batch_metadata(consumer, topic)
|
20
20
|
bind_responders(consumer, topic)
|
21
21
|
end
|
22
22
|
|
@@ -40,13 +40,14 @@ module Karafka
|
|
40
40
|
consumer.extend(SingleParams)
|
41
41
|
end
|
42
42
|
|
43
|
-
# Adds an option to work with metadata for consumer instances that have
|
43
|
+
# Adds an option to work with batch metadata for consumer instances that have
|
44
|
+
# batch fetching enabled
|
44
45
|
# @param consumer [Karafka::BaseConsumer] consumer instance
|
45
46
|
# @param topic [Karafka::Routing::Topic] topic of a consumer class
|
46
|
-
def
|
47
|
+
def bind_batch_metadata(consumer, topic)
|
47
48
|
return unless topic.batch_fetching
|
48
49
|
|
49
|
-
consumer.extend(
|
50
|
+
consumer.extend(BatchMetadata)
|
50
51
|
end
|
51
52
|
|
52
53
|
# Adds responders support for topics and consumers with responders defined for them
|
@@ -43,7 +43,7 @@ module Karafka
|
|
43
43
|
# so it returns a topic as a string, not a routing topic
|
44
44
|
debug(
|
45
45
|
<<~MSG.chomp.tr("\n", ' ')
|
46
|
-
Params deserialization for #{event[:caller].topic} topic
|
46
|
+
Params deserialization for #{event[:caller].metadata.topic} topic
|
47
47
|
successful in #{event[:time]} ms
|
48
48
|
MSG
|
49
49
|
)
|
@@ -52,7 +52,9 @@ module Karafka
|
|
52
52
|
# Logs unsuccessful deserialization attempts of incoming data
|
53
53
|
# @param event [Dry::Events::Event] event details including payload
|
54
54
|
def on_params_params_deserialize_error(event)
|
55
|
-
|
55
|
+
topic = event[:caller].metadata.topic
|
56
|
+
error = event[:error]
|
57
|
+
error "Params deserialization error for #{topic} topic: #{error}"
|
56
58
|
end
|
57
59
|
|
58
60
|
# Logs errors that occurred in a listener fetch loop
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Params
|
5
|
+
# Simple batch metadata object that stores all non-message information received from Kafka
|
6
|
+
# cluster while fetching the data
|
7
|
+
# @note This metadata object refers to per batch metadata, not `#params.metadata`
|
8
|
+
BatchMetadata = Struct.new(
|
9
|
+
:batch_size,
|
10
|
+
:first_offset,
|
11
|
+
:highwater_mark_offset,
|
12
|
+
:unknown_last_offset,
|
13
|
+
:last_offset,
|
14
|
+
:offset_lag,
|
15
|
+
:deserializer,
|
16
|
+
:partition,
|
17
|
+
:topic,
|
18
|
+
keyword_init: true
|
19
|
+
) do
|
20
|
+
# @return [Boolean] is the last offset known or unknown
|
21
|
+
def unknown_last_offset?
|
22
|
+
unknown_last_offset
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Params
|
5
|
+
module Builders
|
6
|
+
# Builder for creating batch metadata object based on the batch informations
|
7
|
+
module BatchMetadata
|
8
|
+
class << self
|
9
|
+
# Creates metadata based on the kafka batch data
|
10
|
+
# @param kafka_batch [Kafka::FetchedBatch] kafka batch details
|
11
|
+
# @param topic [Karafka::Routing::Topic] topic for which we've fetched the batch
|
12
|
+
# @return [Karafka::Params::BatchMetadata] batch metadata object
|
13
|
+
def from_kafka_batch(kafka_batch, topic)
|
14
|
+
Karafka::Params::BatchMetadata.new(
|
15
|
+
batch_size: kafka_batch.messages.count,
|
16
|
+
first_offset: kafka_batch.first_offset,
|
17
|
+
highwater_mark_offset: kafka_batch.highwater_mark_offset,
|
18
|
+
unknown_last_offset: kafka_batch.unknown_last_offset?,
|
19
|
+
last_offset: kafka_batch.last_offset,
|
20
|
+
offset_lag: kafka_batch.offset_lag,
|
21
|
+
deserializer: topic.deserializer,
|
22
|
+
partition: kafka_batch.partition,
|
23
|
+
topic: topic.name
|
24
|
+
).freeze
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -12,22 +12,24 @@ module Karafka
|
|
12
12
|
class << self
|
13
13
|
# @param kafka_message [Kafka::FetchedMessage] message fetched from Kafka
|
14
14
|
# @param topic [Karafka::Routing::Topic] topic for which this message was fetched
|
15
|
-
# @return [Karafka::Params::Params] params object
|
15
|
+
# @return [Karafka::Params::Params] params object with payload and message metadata
|
16
16
|
def from_kafka_message(kafka_message, topic)
|
17
|
-
Karafka::Params::
|
18
|
-
.
|
19
|
-
.
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
17
|
+
metadata = Karafka::Params::Metadata.new(
|
18
|
+
create_time: kafka_message.create_time,
|
19
|
+
headers: kafka_message.headers || {},
|
20
|
+
is_control_record: kafka_message.is_control_record,
|
21
|
+
key: kafka_message.key,
|
22
|
+
offset: kafka_message.offset,
|
23
|
+
deserializer: topic.deserializer,
|
24
|
+
partition: kafka_message.partition,
|
25
|
+
receive_time: Time.now,
|
26
|
+
topic: topic.name
|
27
|
+
).freeze
|
28
|
+
|
29
|
+
Karafka::Params::Params.new(
|
30
|
+
kafka_message.value,
|
31
|
+
metadata
|
32
|
+
)
|
31
33
|
end
|
32
34
|
end
|
33
35
|
end
|
@@ -12,11 +12,11 @@ module Karafka
|
|
12
12
|
# @param topic [Karafka::Routing::Topic] topic for which we're received messages
|
13
13
|
# @return [Karafka::Params::ParamsBatch<Karafka::Params::Params>] batch with params
|
14
14
|
def from_kafka_messages(kafka_messages, topic)
|
15
|
-
params_array = kafka_messages.map
|
15
|
+
params_array = kafka_messages.map do |message|
|
16
16
|
Karafka::Params::Builders::Params.from_kafka_message(message, topic)
|
17
17
|
end
|
18
18
|
|
19
|
-
Karafka::Params::ParamsBatch.new(params_array)
|
19
|
+
Karafka::Params::ParamsBatch.new(params_array).freeze
|
20
20
|
end
|
21
21
|
end
|
22
22
|
end
|
@@ -2,34 +2,19 @@
|
|
2
2
|
|
3
3
|
module Karafka
|
4
4
|
module Params
|
5
|
-
#
|
6
|
-
#
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
private_constant :METHOD_ATTRIBUTES
|
21
|
-
|
22
|
-
METHOD_ATTRIBUTES.each do |attr|
|
23
|
-
# Defines a method call accessor to a particular hash field.
|
24
|
-
define_method(attr) do
|
25
|
-
self[attr]
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
# @return [Boolean] is the last offset known or unknown
|
30
|
-
def unknown_last_offset?
|
31
|
-
self['unknown_last_offset']
|
32
|
-
end
|
33
|
-
end
|
5
|
+
# Single message / params metadata details that can be accessed without the need for the
|
6
|
+
# payload deserialization
|
7
|
+
Metadata = Struct.new(
|
8
|
+
:create_time,
|
9
|
+
:headers,
|
10
|
+
:is_control_record,
|
11
|
+
:key,
|
12
|
+
:offset,
|
13
|
+
:deserializer,
|
14
|
+
:partition,
|
15
|
+
:receive_time,
|
16
|
+
:topic,
|
17
|
+
keyword_init: true
|
18
|
+
)
|
34
19
|
end
|
35
20
|
end
|
@@ -6,58 +6,44 @@ module Karafka
|
|
6
6
|
# It provides lazy loading not only until the first usage, but also allows us to skip
|
7
7
|
# using deserializer until we execute our logic. That way we can operate with
|
8
8
|
# heavy-deserialization data without slowing down the whole application.
|
9
|
-
class Params
|
10
|
-
|
11
|
-
# client compatibility.
|
12
|
-
# Kafka passes internally Kafka::FetchedMessage object and the ruby-kafka consumer
|
13
|
-
# uses those fields via method calls, so in order to be able to pass there our params
|
14
|
-
# objects, have to have same api.
|
15
|
-
METHOD_ATTRIBUTES = %w[
|
16
|
-
create_time
|
17
|
-
headers
|
18
|
-
is_control_record
|
19
|
-
key
|
20
|
-
offset
|
21
|
-
deserializer
|
22
|
-
deserialized
|
23
|
-
partition
|
24
|
-
receive_time
|
25
|
-
topic
|
26
|
-
payload
|
27
|
-
].freeze
|
9
|
+
class Params
|
10
|
+
extend Forwardable
|
28
11
|
|
29
|
-
|
12
|
+
attr_reader :raw_payload, :metadata
|
30
13
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
end
|
14
|
+
def_delegators :metadata, *Metadata.members
|
15
|
+
|
16
|
+
# @param raw_payload [Object] incoming payload before deserialization
|
17
|
+
# @param metadata [Karafka::Params::Metadata] message metadata object
|
18
|
+
def initialize(raw_payload, metadata)
|
19
|
+
@raw_payload = raw_payload
|
20
|
+
@metadata = metadata
|
21
|
+
@deserialized = false
|
22
|
+
@payload = nil
|
41
23
|
end
|
42
24
|
|
43
|
-
# @return [
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
25
|
+
# @return [Object] lazy-deserialized data (deserialized upon first request)
|
26
|
+
def payload
|
27
|
+
return @payload if deserialized?
|
28
|
+
|
29
|
+
@payload = deserialize
|
30
|
+
# We mark deserialization as successful after deserialization, as in case of an error
|
31
|
+
# this won't be falsely set to true
|
32
|
+
@deserialized = true
|
33
|
+
@payload
|
34
|
+
end
|
49
35
|
|
50
|
-
|
51
|
-
|
52
|
-
|
36
|
+
# @return [Boolean] did given params payload were deserialized already
|
37
|
+
def deserialized?
|
38
|
+
@deserialized
|
53
39
|
end
|
54
40
|
|
55
41
|
private
|
56
42
|
|
57
|
-
# @return [Object]
|
43
|
+
# @return [Object] tries de-serializes data
|
58
44
|
def deserialize
|
59
45
|
Karafka.monitor.instrument('params.params.deserialize', caller: self) do
|
60
|
-
|
46
|
+
metadata.deserializer.call(self)
|
61
47
|
end
|
62
48
|
rescue ::StandardError => e
|
63
49
|
Karafka.monitor.instrument('params.params.deserialize.error', caller: self, error: e)
|
@@ -15,47 +15,46 @@ module Karafka
|
|
15
15
|
@params_array = params_array
|
16
16
|
end
|
17
17
|
|
18
|
-
# @yieldparam [Karafka::Params::Params] each
|
19
|
-
# @note Invocation of this method will cause loading and deserializing each param after
|
20
|
-
# another.
|
21
|
-
# directly
|
18
|
+
# @yieldparam [Karafka::Params::Params] each params instance
|
19
|
+
# @note Invocation of this method will not cause loading and deserializing each param after
|
20
|
+
# another.
|
22
21
|
def each
|
23
|
-
@params_array.each { |param| yield(param
|
22
|
+
@params_array.each { |param| yield(param) }
|
24
23
|
end
|
25
24
|
|
26
25
|
# @return [Array<Karafka::Params::Params>] returns all the params in a loaded state, so they
|
27
26
|
# can be used for batch insert, etc. Without invoking all, up until first use, they won't
|
28
27
|
# be deserialized
|
29
28
|
def deserialize!
|
30
|
-
each(&:
|
29
|
+
each(&:payload)
|
31
30
|
end
|
32
31
|
|
33
32
|
# @return [Array<Object>] array with deserialized payloads. This method can be useful when
|
34
33
|
# we don't care about metadata and just want to extract all the data payloads from the
|
35
34
|
# batch
|
36
35
|
def payloads
|
37
|
-
|
36
|
+
map(&:payload)
|
38
37
|
end
|
39
38
|
|
40
|
-
# @return [Karafka::Params::Params] first element
|
39
|
+
# @return [Karafka::Params::Params] first element
|
41
40
|
def first
|
42
|
-
@params_array.first
|
41
|
+
@params_array.first
|
43
42
|
end
|
44
43
|
|
45
|
-
# @return [Karafka::Params::Params] last element
|
44
|
+
# @return [Karafka::Params::Params] last element
|
46
45
|
def last
|
47
|
-
@params_array.last
|
48
|
-
end
|
49
|
-
|
50
|
-
# @return [Array<Karafka::Params::Params>] pure array with params (not deserialized)
|
51
|
-
def to_a
|
52
|
-
@params_array
|
46
|
+
@params_array.last
|
53
47
|
end
|
54
48
|
|
55
49
|
# @return [Integer] number of messages in the batch
|
56
50
|
def size
|
57
51
|
@params_array.size
|
58
52
|
end
|
53
|
+
|
54
|
+
# @return [Array<Karafka::Params::Params>] pure array with params
|
55
|
+
def to_a
|
56
|
+
@params_array
|
57
|
+
end
|
59
58
|
end
|
60
59
|
end
|
61
60
|
end
|