karafka 1.3.6 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/.diffend.yml +3 -0
- data/.github/workflows/ci.yml +52 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +27 -1
- data/CODE_OF_CONDUCT.md +1 -1
- data/Gemfile +2 -0
- data/Gemfile.lock +42 -41
- data/README.md +3 -5
- data/certs/mensfeld.pem +21 -21
- data/config/errors.yml +2 -0
- data/docker-compose.yml +17 -0
- data/karafka.gemspec +3 -4
- data/lib/karafka.rb +1 -1
- data/lib/karafka/assignment_strategies/round_robin.rb +13 -0
- data/lib/karafka/attributes_map.rb +1 -0
- data/lib/karafka/cli.rb +8 -0
- data/lib/karafka/cli/base.rb +4 -4
- data/lib/karafka/cli/missingno.rb +19 -0
- data/lib/karafka/connection/api_adapter.rb +5 -3
- data/lib/karafka/connection/batch_delegator.rb +5 -1
- data/lib/karafka/consumers/batch_metadata.rb +10 -0
- data/lib/karafka/consumers/includer.rb +5 -4
- data/lib/karafka/contracts.rb +1 -1
- data/lib/karafka/contracts/consumer_group.rb +8 -3
- data/lib/karafka/helpers/class_matcher.rb +1 -1
- data/lib/karafka/instrumentation/logger.rb +2 -2
- data/lib/karafka/instrumentation/stdout_listener.rb +4 -2
- data/lib/karafka/params/batch_metadata.rb +26 -0
- data/lib/karafka/params/builders/batch_metadata.rb +30 -0
- data/lib/karafka/params/builders/params.rb +17 -15
- data/lib/karafka/params/builders/params_batch.rb +2 -2
- data/lib/karafka/params/metadata.rb +14 -29
- data/lib/karafka/params/params.rb +27 -41
- data/lib/karafka/params/params_batch.rb +15 -16
- data/lib/karafka/routing/builder.rb +1 -0
- data/lib/karafka/routing/consumer_group.rb +5 -3
- data/lib/karafka/serialization/json/deserializer.rb +2 -2
- data/lib/karafka/setup/config.rb +5 -0
- data/lib/karafka/version.rb +1 -1
- metadata +37 -46
- metadata.gz.sig +0 -0
- data/.travis.yml +0 -36
- data/lib/karafka/consumers/metadata.rb +0 -10
- data/lib/karafka/params/builders/metadata.rb +0 -33
@@ -19,6 +19,7 @@ module Karafka
|
|
19
19
|
consumer: %i[
|
20
20
|
session_timeout offset_commit_interval offset_commit_threshold
|
21
21
|
offset_retention_time heartbeat_interval fetcher_max_queue_size
|
22
|
+
assignment_strategy
|
22
23
|
],
|
23
24
|
subscribe: %i[start_from_beginning max_bytes_per_partition],
|
24
25
|
consumption: %i[min_bytes max_bytes max_wait_time],
|
data/lib/karafka/cli.rb
CHANGED
@@ -10,6 +10,8 @@ module Karafka
|
|
10
10
|
class Cli < Thor
|
11
11
|
package_name 'Karafka'
|
12
12
|
|
13
|
+
default_task :missingno
|
14
|
+
|
13
15
|
class << self
|
14
16
|
# Loads all Cli commands into Thor framework
|
15
17
|
# This method should be executed before we run Karafka::Cli.start, otherwise we won't
|
@@ -20,6 +22,12 @@ module Karafka
|
|
20
22
|
end
|
21
23
|
end
|
22
24
|
|
25
|
+
# When there is a CLI crash, exit
|
26
|
+
# @return [true]
|
27
|
+
def exit_on_failure?
|
28
|
+
true
|
29
|
+
end
|
30
|
+
|
23
31
|
private
|
24
32
|
|
25
33
|
# @return [Array<Class>] Array with Cli action classes that can be used as commands
|
data/lib/karafka/cli/base.rb
CHANGED
@@ -43,16 +43,16 @@ module Karafka
|
|
43
43
|
end
|
44
44
|
|
45
45
|
# Allows to set description of a given cli command
|
46
|
-
# @param
|
47
|
-
def desc(
|
48
|
-
@desc ||=
|
46
|
+
# @param args [Array] All the arguments that Thor desc method accepts
|
47
|
+
def desc(*args)
|
48
|
+
@desc ||= args
|
49
49
|
end
|
50
50
|
|
51
51
|
# This method will bind a given Cli command into Karafka Cli
|
52
52
|
# This method is a wrapper to way Thor defines its commands
|
53
53
|
# @param cli_class [Karafka::Cli] Karafka cli_class
|
54
54
|
def bind_to(cli_class)
|
55
|
-
cli_class.desc name,
|
55
|
+
cli_class.desc name, *@desc
|
56
56
|
|
57
57
|
(@options || []).each { |option| cli_class.option(*option) }
|
58
58
|
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
class Cli < Thor
|
5
|
+
# Command that gets invoked when no method is provided when running the CLI
|
6
|
+
# It allows us to exit with exit code 1 instead of default 0 to indicate that something
|
7
|
+
# was missing
|
8
|
+
# @see https://github.com/karafka/karafka/issues/619
|
9
|
+
class Missingno < Base
|
10
|
+
desc 'Hidden command that gets invoked when no command is provided', hide: true
|
11
|
+
|
12
|
+
# Prints an error about the lack of command (nothing selected)
|
13
|
+
def call
|
14
|
+
Karafka.logger.error('No command provided')
|
15
|
+
exit 1
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -109,11 +109,13 @@ module Karafka
|
|
109
109
|
# Majority of users don't use custom topic mappers. No need to change anything when it
|
110
110
|
# is a default mapper that does not change anything. Only some cloud providers require
|
111
111
|
# topics to be remapped
|
112
|
-
return [params] if Karafka::App.config.topic_mapper.is_a?(
|
112
|
+
return [params.metadata] if Karafka::App.config.topic_mapper.is_a?(
|
113
|
+
Karafka::Routing::TopicMapper
|
114
|
+
)
|
113
115
|
|
114
116
|
# @note We don't use tap as it is around 13% slower than non-dup version
|
115
|
-
dupped = params.dup
|
116
|
-
dupped['topic'] = Karafka::App.config.topic_mapper.outgoing(params.topic)
|
117
|
+
dupped = params.metadata.dup
|
118
|
+
dupped['topic'] = Karafka::App.config.topic_mapper.outgoing(params.metadata.topic)
|
117
119
|
[dupped]
|
118
120
|
end
|
119
121
|
|
@@ -23,7 +23,11 @@ module Karafka
|
|
23
23
|
) do
|
24
24
|
# Due to how ruby-kafka is built, we have the metadata that is stored on the batch
|
25
25
|
# level only available for batch consuming
|
26
|
-
consumer.
|
26
|
+
consumer.batch_metadata = Params::Builders::BatchMetadata.from_kafka_batch(
|
27
|
+
kafka_batch,
|
28
|
+
topic
|
29
|
+
)
|
30
|
+
|
27
31
|
kafka_messages = kafka_batch.messages
|
28
32
|
|
29
33
|
# Depending on a case (persisted or not) we might use new consumer instance per
|
@@ -16,7 +16,7 @@ module Karafka
|
|
16
16
|
|
17
17
|
bind_backend(consumer, topic)
|
18
18
|
bind_params(consumer, topic)
|
19
|
-
|
19
|
+
bind_batch_metadata(consumer, topic)
|
20
20
|
bind_responders(consumer, topic)
|
21
21
|
end
|
22
22
|
|
@@ -40,13 +40,14 @@ module Karafka
|
|
40
40
|
consumer.extend(SingleParams)
|
41
41
|
end
|
42
42
|
|
43
|
-
# Adds an option to work with metadata for consumer instances that have
|
43
|
+
# Adds an option to work with batch metadata for consumer instances that have
|
44
|
+
# batch fetching enabled
|
44
45
|
# @param consumer [Karafka::BaseConsumer] consumer instance
|
45
46
|
# @param topic [Karafka::Routing::Topic] topic of a consumer class
|
46
|
-
def
|
47
|
+
def bind_batch_metadata(consumer, topic)
|
47
48
|
return unless topic.batch_fetching
|
48
49
|
|
49
|
-
consumer.extend(
|
50
|
+
consumer.extend(BatchMetadata)
|
50
51
|
end
|
51
52
|
|
52
53
|
# Adds responders support for topics and consumers with responders defined for them
|
data/lib/karafka/contracts.rb
CHANGED
@@ -32,6 +32,7 @@ module Karafka
|
|
32
32
|
required(:offset_retention_time).maybe(:integer)
|
33
33
|
required(:heartbeat_interval).filled { (int? | float?) & gteq?(0) }
|
34
34
|
required(:fetcher_max_queue_size).filled(:int?, gt?: 0)
|
35
|
+
required(:assignment_strategy).value(:any)
|
35
36
|
required(:connect_timeout).filled { (int? | float?) & gt?(0) }
|
36
37
|
required(:reconnect_timeout).filled { (int? | float?) & gteq?(0) }
|
37
38
|
required(:socket_timeout).filled { (int? | float?) & gt?(0) }
|
@@ -70,13 +71,13 @@ module Karafka
|
|
70
71
|
|
71
72
|
# Uri rule to check if uri is in a Karafka acceptable format
|
72
73
|
rule(:seed_brokers) do
|
73
|
-
if value
|
74
|
+
if value.is_a?(Array) && !value.all?(&method(:kafka_uri?))
|
74
75
|
key.failure(:invalid_broker_schema)
|
75
76
|
end
|
76
77
|
end
|
77
78
|
|
78
79
|
rule(:topics) do
|
79
|
-
if value
|
80
|
+
if value.is_a?(Array)
|
80
81
|
names = value.map { |topic| topic[:name] }
|
81
82
|
|
82
83
|
key.failure(:topics_names_not_unique) if names.size != names.uniq.size
|
@@ -84,7 +85,7 @@ module Karafka
|
|
84
85
|
end
|
85
86
|
|
86
87
|
rule(:topics) do
|
87
|
-
if value
|
88
|
+
if value.is_a?(Array)
|
88
89
|
value.each_with_index do |topic, index|
|
89
90
|
TOPIC_CONTRACT.call(topic).errors.each do |error|
|
90
91
|
key([:topics, index, error.path[0]]).failure(error.text)
|
@@ -93,6 +94,10 @@ module Karafka
|
|
93
94
|
end
|
94
95
|
end
|
95
96
|
|
97
|
+
rule(:assignment_strategy) do
|
98
|
+
key.failure(:does_not_respond_to_call) unless value.respond_to?(:call)
|
99
|
+
end
|
100
|
+
|
96
101
|
rule(:ssl_client_cert, :ssl_client_cert_key) do
|
97
102
|
if values[:ssl_client_cert] && !values[:ssl_client_cert_key]
|
98
103
|
key(:ssl_client_cert_key).failure(:ssl_client_cert_with_ssl_client_cert_key)
|
@@ -8,7 +8,7 @@ module Karafka
|
|
8
8
|
class ClassMatcher
|
9
9
|
# Regexp used to remove any non classy like characters that might be in the consumer
|
10
10
|
# class name (if defined dynamically, etc)
|
11
|
-
CONSTANT_REGEXP = %r{[
|
11
|
+
CONSTANT_REGEXP = %r{[?!=+\-*/\^|&\[\]<>%~\#:\s()]}.freeze
|
12
12
|
|
13
13
|
private_constant :CONSTANT_REGEXP
|
14
14
|
|
@@ -29,11 +29,11 @@ module Karafka
|
|
29
29
|
|
30
30
|
# @return [Karafka::Helpers::MultiDelegator] multi delegator instance
|
31
31
|
# to which we will be writing logs
|
32
|
-
# We use this approach to log stuff to file and to the
|
32
|
+
# We use this approach to log stuff to file and to the $stdout at the same time
|
33
33
|
def target
|
34
34
|
Karafka::Helpers::MultiDelegator
|
35
35
|
.delegate(:write, :close)
|
36
|
-
.to(
|
36
|
+
.to($stdout, file)
|
37
37
|
end
|
38
38
|
|
39
39
|
# Makes sure the log directory exists as long as we can write to it
|
@@ -43,7 +43,7 @@ module Karafka
|
|
43
43
|
# so it returns a topic as a string, not a routing topic
|
44
44
|
debug(
|
45
45
|
<<~MSG.chomp.tr("\n", ' ')
|
46
|
-
Params deserialization for #{event[:caller].topic} topic
|
46
|
+
Params deserialization for #{event[:caller].metadata.topic} topic
|
47
47
|
successful in #{event[:time]} ms
|
48
48
|
MSG
|
49
49
|
)
|
@@ -52,7 +52,9 @@ module Karafka
|
|
52
52
|
# Logs unsuccessful deserialization attempts of incoming data
|
53
53
|
# @param event [Dry::Events::Event] event details including payload
|
54
54
|
def on_params_params_deserialize_error(event)
|
55
|
-
|
55
|
+
topic = event[:caller].metadata.topic
|
56
|
+
error = event[:error]
|
57
|
+
error "Params deserialization error for #{topic} topic: #{error}"
|
56
58
|
end
|
57
59
|
|
58
60
|
# Logs errors that occurred in a listener fetch loop
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Params
|
5
|
+
# Simple batch metadata object that stores all non-message information received from Kafka
|
6
|
+
# cluster while fetching the data
|
7
|
+
# @note This metadata object refers to per batch metadata, not `#params.metadata`
|
8
|
+
BatchMetadata = Struct.new(
|
9
|
+
:batch_size,
|
10
|
+
:first_offset,
|
11
|
+
:highwater_mark_offset,
|
12
|
+
:unknown_last_offset,
|
13
|
+
:last_offset,
|
14
|
+
:offset_lag,
|
15
|
+
:deserializer,
|
16
|
+
:partition,
|
17
|
+
:topic,
|
18
|
+
keyword_init: true
|
19
|
+
) do
|
20
|
+
# @return [Boolean] is the last offset known or unknown
|
21
|
+
def unknown_last_offset?
|
22
|
+
unknown_last_offset
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Params
|
5
|
+
module Builders
|
6
|
+
# Builder for creating batch metadata object based on the batch informations
|
7
|
+
module BatchMetadata
|
8
|
+
class << self
|
9
|
+
# Creates metadata based on the kafka batch data
|
10
|
+
# @param kafka_batch [Kafka::FetchedBatch] kafka batch details
|
11
|
+
# @param topic [Karafka::Routing::Topic] topic for which we've fetched the batch
|
12
|
+
# @return [Karafka::Params::BatchMetadata] batch metadata object
|
13
|
+
def from_kafka_batch(kafka_batch, topic)
|
14
|
+
Karafka::Params::BatchMetadata.new(
|
15
|
+
batch_size: kafka_batch.messages.count,
|
16
|
+
first_offset: kafka_batch.first_offset,
|
17
|
+
highwater_mark_offset: kafka_batch.highwater_mark_offset,
|
18
|
+
unknown_last_offset: kafka_batch.unknown_last_offset?,
|
19
|
+
last_offset: kafka_batch.last_offset,
|
20
|
+
offset_lag: kafka_batch.offset_lag,
|
21
|
+
deserializer: topic.deserializer,
|
22
|
+
partition: kafka_batch.partition,
|
23
|
+
topic: topic.name
|
24
|
+
).freeze
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -12,22 +12,24 @@ module Karafka
|
|
12
12
|
class << self
|
13
13
|
# @param kafka_message [Kafka::FetchedMessage] message fetched from Kafka
|
14
14
|
# @param topic [Karafka::Routing::Topic] topic for which this message was fetched
|
15
|
-
# @return [Karafka::Params::Params] params object
|
15
|
+
# @return [Karafka::Params::Params] params object with payload and message metadata
|
16
16
|
def from_kafka_message(kafka_message, topic)
|
17
|
-
Karafka::Params::
|
18
|
-
.
|
19
|
-
.
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
17
|
+
metadata = Karafka::Params::Metadata.new(
|
18
|
+
create_time: kafka_message.create_time,
|
19
|
+
headers: kafka_message.headers || {},
|
20
|
+
is_control_record: kafka_message.is_control_record,
|
21
|
+
key: kafka_message.key,
|
22
|
+
offset: kafka_message.offset,
|
23
|
+
deserializer: topic.deserializer,
|
24
|
+
partition: kafka_message.partition,
|
25
|
+
receive_time: Time.now,
|
26
|
+
topic: topic.name
|
27
|
+
).freeze
|
28
|
+
|
29
|
+
Karafka::Params::Params.new(
|
30
|
+
kafka_message.value,
|
31
|
+
metadata
|
32
|
+
)
|
31
33
|
end
|
32
34
|
end
|
33
35
|
end
|
@@ -12,11 +12,11 @@ module Karafka
|
|
12
12
|
# @param topic [Karafka::Routing::Topic] topic for which we're received messages
|
13
13
|
# @return [Karafka::Params::ParamsBatch<Karafka::Params::Params>] batch with params
|
14
14
|
def from_kafka_messages(kafka_messages, topic)
|
15
|
-
params_array = kafka_messages.map
|
15
|
+
params_array = kafka_messages.map do |message|
|
16
16
|
Karafka::Params::Builders::Params.from_kafka_message(message, topic)
|
17
17
|
end
|
18
18
|
|
19
|
-
Karafka::Params::ParamsBatch.new(params_array)
|
19
|
+
Karafka::Params::ParamsBatch.new(params_array).freeze
|
20
20
|
end
|
21
21
|
end
|
22
22
|
end
|
@@ -2,34 +2,19 @@
|
|
2
2
|
|
3
3
|
module Karafka
|
4
4
|
module Params
|
5
|
-
#
|
6
|
-
#
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
private_constant :METHOD_ATTRIBUTES
|
21
|
-
|
22
|
-
METHOD_ATTRIBUTES.each do |attr|
|
23
|
-
# Defines a method call accessor to a particular hash field.
|
24
|
-
define_method(attr) do
|
25
|
-
self[attr]
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
# @return [Boolean] is the last offset known or unknown
|
30
|
-
def unknown_last_offset?
|
31
|
-
self['unknown_last_offset']
|
32
|
-
end
|
33
|
-
end
|
5
|
+
# Single message / params metadata details that can be accessed without the need for the
|
6
|
+
# payload deserialization
|
7
|
+
Metadata = Struct.new(
|
8
|
+
:create_time,
|
9
|
+
:headers,
|
10
|
+
:is_control_record,
|
11
|
+
:key,
|
12
|
+
:offset,
|
13
|
+
:deserializer,
|
14
|
+
:partition,
|
15
|
+
:receive_time,
|
16
|
+
:topic,
|
17
|
+
keyword_init: true
|
18
|
+
)
|
34
19
|
end
|
35
20
|
end
|
@@ -6,58 +6,44 @@ module Karafka
|
|
6
6
|
# It provides lazy loading not only until the first usage, but also allows us to skip
|
7
7
|
# using deserializer until we execute our logic. That way we can operate with
|
8
8
|
# heavy-deserialization data without slowing down the whole application.
|
9
|
-
class Params
|
10
|
-
|
11
|
-
# client compatibility.
|
12
|
-
# Kafka passes internally Kafka::FetchedMessage object and the ruby-kafka consumer
|
13
|
-
# uses those fields via method calls, so in order to be able to pass there our params
|
14
|
-
# objects, have to have same api.
|
15
|
-
METHOD_ATTRIBUTES = %w[
|
16
|
-
create_time
|
17
|
-
headers
|
18
|
-
is_control_record
|
19
|
-
key
|
20
|
-
offset
|
21
|
-
deserializer
|
22
|
-
deserialized
|
23
|
-
partition
|
24
|
-
receive_time
|
25
|
-
topic
|
26
|
-
payload
|
27
|
-
].freeze
|
9
|
+
class Params
|
10
|
+
extend Forwardable
|
28
11
|
|
29
|
-
|
12
|
+
attr_reader :raw_payload, :metadata
|
30
13
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
end
|
14
|
+
def_delegators :metadata, *Metadata.members
|
15
|
+
|
16
|
+
# @param raw_payload [Object] incoming payload before deserialization
|
17
|
+
# @param metadata [Karafka::Params::Metadata] message metadata object
|
18
|
+
def initialize(raw_payload, metadata)
|
19
|
+
@raw_payload = raw_payload
|
20
|
+
@metadata = metadata
|
21
|
+
@deserialized = false
|
22
|
+
@payload = nil
|
41
23
|
end
|
42
24
|
|
43
|
-
# @return [
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
25
|
+
# @return [Object] lazy-deserialized data (deserialized upon first request)
|
26
|
+
def payload
|
27
|
+
return @payload if deserialized?
|
28
|
+
|
29
|
+
@payload = deserialize
|
30
|
+
# We mark deserialization as successful after deserialization, as in case of an error
|
31
|
+
# this won't be falsely set to true
|
32
|
+
@deserialized = true
|
33
|
+
@payload
|
34
|
+
end
|
49
35
|
|
50
|
-
|
51
|
-
|
52
|
-
|
36
|
+
# @return [Boolean] did given params payload were deserialized already
|
37
|
+
def deserialized?
|
38
|
+
@deserialized
|
53
39
|
end
|
54
40
|
|
55
41
|
private
|
56
42
|
|
57
|
-
# @return [Object]
|
43
|
+
# @return [Object] tries de-serializes data
|
58
44
|
def deserialize
|
59
45
|
Karafka.monitor.instrument('params.params.deserialize', caller: self) do
|
60
|
-
|
46
|
+
metadata.deserializer.call(self)
|
61
47
|
end
|
62
48
|
rescue ::StandardError => e
|
63
49
|
Karafka.monitor.instrument('params.params.deserialize.error', caller: self, error: e)
|