karafka 1.3.0 → 1.4.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.diffend.yml +3 -0
- data/.github/workflows/ci.yml +76 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +112 -15
- data/CODE_OF_CONDUCT.md +1 -1
- data/Gemfile +2 -0
- data/Gemfile.lock +87 -98
- data/README.md +28 -31
- data/certs/mensfeld.pem +24 -23
- data/config/errors.yml +2 -0
- data/docker-compose.yml +17 -0
- data/karafka.gemspec +22 -14
- data/lib/karafka/assignment_strategies/round_robin.rb +13 -0
- data/lib/karafka/attributes_map.rb +3 -8
- data/lib/karafka/cli/base.rb +4 -4
- data/lib/karafka/cli/flow.rb +9 -6
- data/lib/karafka/cli/info.rb +1 -1
- data/lib/karafka/cli/install.rb +5 -2
- data/lib/karafka/cli/missingno.rb +19 -0
- data/lib/karafka/cli/server.rb +8 -8
- data/lib/karafka/cli.rb +9 -1
- data/lib/karafka/connection/api_adapter.rb +27 -24
- data/lib/karafka/connection/batch_delegator.rb +5 -1
- data/lib/karafka/connection/builder.rb +9 -2
- data/lib/karafka/connection/client.rb +9 -6
- data/lib/karafka/connection/listener.rb +2 -2
- data/lib/karafka/consumers/batch_metadata.rb +10 -0
- data/lib/karafka/consumers/includer.rb +5 -4
- data/lib/karafka/contracts/consumer_group.rb +10 -5
- data/lib/karafka/contracts/server_cli_options.rb +2 -0
- data/lib/karafka/contracts.rb +1 -1
- data/lib/karafka/helpers/class_matcher.rb +2 -2
- data/lib/karafka/instrumentation/logger.rb +6 -9
- data/lib/karafka/instrumentation/stdout_listener.rb +6 -4
- data/lib/karafka/params/batch_metadata.rb +26 -0
- data/lib/karafka/params/builders/batch_metadata.rb +30 -0
- data/lib/karafka/params/builders/params.rb +17 -15
- data/lib/karafka/params/builders/params_batch.rb +2 -2
- data/lib/karafka/params/metadata.rb +14 -29
- data/lib/karafka/params/params.rb +27 -41
- data/lib/karafka/params/params_batch.rb +15 -16
- data/lib/karafka/routing/builder.rb +1 -0
- data/lib/karafka/routing/consumer_group.rb +5 -3
- data/lib/karafka/serialization/json/deserializer.rb +2 -2
- data/lib/karafka/server.rb +4 -1
- data/lib/karafka/setup/config.rb +60 -52
- data/lib/karafka/templates/karafka.rb.erb +1 -1
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +3 -1
- data.tar.gz.sig +0 -0
- metadata +75 -93
- metadata.gz.sig +0 -0
- data/.github/FUNDING.yml +0 -3
- data/.travis.yml +0 -36
- data/lib/karafka/consumers/metadata.rb +0 -10
- data/lib/karafka/params/builders/metadata.rb +0 -33
@@ -6,6 +6,8 @@ module Karafka
|
|
6
6
|
# We validate some basics + the list of consumer_groups on which we want to use, to make
|
7
7
|
# sure that all of them are defined, plus that a pidfile does not exist
|
8
8
|
class ServerCliOptions < Dry::Validation::Contract
|
9
|
+
config.messages.load_paths << File.join(Karafka.gem_root, 'config', 'errors.yml')
|
10
|
+
|
9
11
|
params do
|
10
12
|
optional(:pid).filled(:str?)
|
11
13
|
optional(:daemon).filled(:bool?)
|
data/lib/karafka/contracts.rb
CHANGED
@@ -8,7 +8,7 @@ module Karafka
|
|
8
8
|
class ClassMatcher
|
9
9
|
# Regexp used to remove any non classy like characters that might be in the consumer
|
10
10
|
# class name (if defined dynamically, etc)
|
11
|
-
CONSTANT_REGEXP = %r{[
|
11
|
+
CONSTANT_REGEXP = %r{[?!=+\-*/\^|&\[\]<>%~\#:\s()]}.freeze
|
12
12
|
|
13
13
|
private_constant :CONSTANT_REGEXP
|
14
14
|
|
@@ -44,7 +44,7 @@ module Karafka
|
|
44
44
|
# @example From Namespaced::Super2Consumer matching responder
|
45
45
|
# matcher.name #=> Super2Responder
|
46
46
|
def name
|
47
|
-
inflected =
|
47
|
+
inflected = +@klass.to_s.split('::').last.to_s
|
48
48
|
# We inject the from into the name just in case it is missing as in a situation like
|
49
49
|
# that it would just sanitize the name without adding the "to" postfix.
|
50
50
|
# It could create cases when we want to build for example a responder to a consumer
|
@@ -20,7 +20,6 @@ module Karafka
|
|
20
20
|
# @param _args Any arguments that we don't care about but that are needed in order to
|
21
21
|
# make this logger compatible with the default Ruby one
|
22
22
|
def initialize(*_args)
|
23
|
-
ensure_dir_exists
|
24
23
|
super(target)
|
25
24
|
self.level = ENV_MAP[Karafka.env] || ENV_MAP['default']
|
26
25
|
end
|
@@ -29,17 +28,11 @@ module Karafka
|
|
29
28
|
|
30
29
|
# @return [Karafka::Helpers::MultiDelegator] multi delegator instance
|
31
30
|
# to which we will be writing logs
|
32
|
-
# We use this approach to log stuff to file and to the
|
31
|
+
# We use this approach to log stuff to file and to the $stdout at the same time
|
33
32
|
def target
|
34
33
|
Karafka::Helpers::MultiDelegator
|
35
34
|
.delegate(:write, :close)
|
36
|
-
.to(
|
37
|
-
end
|
38
|
-
|
39
|
-
# Makes sure the log directory exists
|
40
|
-
def ensure_dir_exists
|
41
|
-
dir = File.dirname(log_path)
|
42
|
-
FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
|
35
|
+
.to(*[$stdout, file].compact)
|
43
36
|
end
|
44
37
|
|
45
38
|
# @return [Pathname] Path to a file to which we should log
|
@@ -50,7 +43,11 @@ module Karafka
|
|
50
43
|
# @return [File] file to which we want to write our logs
|
51
44
|
# @note File is being opened in append mode ('a')
|
52
45
|
def file
|
46
|
+
FileUtils.mkdir_p(File.dirname(log_path))
|
47
|
+
|
53
48
|
@file ||= File.open(log_path, 'a')
|
49
|
+
rescue Errno::EACCES, Errno::EROFS
|
50
|
+
nil
|
54
51
|
end
|
55
52
|
end
|
56
53
|
end
|
@@ -43,7 +43,7 @@ module Karafka
|
|
43
43
|
# so it returns a topic as a string, not a routing topic
|
44
44
|
debug(
|
45
45
|
<<~MSG.chomp.tr("\n", ' ')
|
46
|
-
Params deserialization for #{event[:caller].topic} topic
|
46
|
+
Params deserialization for #{event[:caller].metadata.topic} topic
|
47
47
|
successful in #{event[:time]} ms
|
48
48
|
MSG
|
49
49
|
)
|
@@ -52,7 +52,9 @@ module Karafka
|
|
52
52
|
# Logs unsuccessful deserialization attempts of incoming data
|
53
53
|
# @param event [Dry::Events::Event] event details including payload
|
54
54
|
def on_params_params_deserialize_error(event)
|
55
|
-
|
55
|
+
topic = event[:caller].metadata.topic
|
56
|
+
error = event[:error]
|
57
|
+
error "Params deserialization error for #{topic} topic: #{error}"
|
56
58
|
end
|
57
59
|
|
58
60
|
# Logs errors that occurred in a listener fetch loop
|
@@ -101,10 +103,10 @@ module Karafka
|
|
101
103
|
info "Responded from #{calling.class} using #{responder} with following data #{data}"
|
102
104
|
end
|
103
105
|
|
104
|
-
# Logs info that we're initializing Karafka
|
106
|
+
# Logs info that we're initializing Karafka framework components
|
105
107
|
# @param _event [Dry::Events::Event] event details including payload
|
106
108
|
def on_app_initializing(_event)
|
107
|
-
info "Initializing Karafka
|
109
|
+
info "Initializing Karafka framework #{::Process.pid}"
|
108
110
|
end
|
109
111
|
|
110
112
|
# Logs info that we're running Karafka app
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Params
|
5
|
+
# Simple batch metadata object that stores all non-message information received from Kafka
|
6
|
+
# cluster while fetching the data
|
7
|
+
# @note This metadata object refers to per batch metadata, not `#params.metadata`
|
8
|
+
BatchMetadata = Struct.new(
|
9
|
+
:batch_size,
|
10
|
+
:first_offset,
|
11
|
+
:highwater_mark_offset,
|
12
|
+
:unknown_last_offset,
|
13
|
+
:last_offset,
|
14
|
+
:offset_lag,
|
15
|
+
:deserializer,
|
16
|
+
:partition,
|
17
|
+
:topic,
|
18
|
+
keyword_init: true
|
19
|
+
) do
|
20
|
+
# @return [Boolean] is the last offset known or unknown
|
21
|
+
def unknown_last_offset?
|
22
|
+
unknown_last_offset
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Params
|
5
|
+
module Builders
|
6
|
+
# Builder for creating batch metadata object based on the batch informations
|
7
|
+
module BatchMetadata
|
8
|
+
class << self
|
9
|
+
# Creates metadata based on the kafka batch data
|
10
|
+
# @param kafka_batch [Kafka::FetchedBatch] kafka batch details
|
11
|
+
# @param topic [Karafka::Routing::Topic] topic for which we've fetched the batch
|
12
|
+
# @return [Karafka::Params::BatchMetadata] batch metadata object
|
13
|
+
def from_kafka_batch(kafka_batch, topic)
|
14
|
+
Karafka::Params::BatchMetadata.new(
|
15
|
+
batch_size: kafka_batch.messages.count,
|
16
|
+
first_offset: kafka_batch.first_offset,
|
17
|
+
highwater_mark_offset: kafka_batch.highwater_mark_offset,
|
18
|
+
unknown_last_offset: kafka_batch.unknown_last_offset?,
|
19
|
+
last_offset: kafka_batch.last_offset,
|
20
|
+
offset_lag: kafka_batch.offset_lag,
|
21
|
+
deserializer: topic.deserializer,
|
22
|
+
partition: kafka_batch.partition,
|
23
|
+
topic: topic.name
|
24
|
+
).freeze
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -12,22 +12,24 @@ module Karafka
|
|
12
12
|
class << self
|
13
13
|
# @param kafka_message [Kafka::FetchedMessage] message fetched from Kafka
|
14
14
|
# @param topic [Karafka::Routing::Topic] topic for which this message was fetched
|
15
|
-
# @return [Karafka::Params::Params] params object
|
15
|
+
# @return [Karafka::Params::Params] params object with payload and message metadata
|
16
16
|
def from_kafka_message(kafka_message, topic)
|
17
|
-
Karafka::Params::
|
18
|
-
.
|
19
|
-
.
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
17
|
+
metadata = Karafka::Params::Metadata.new(
|
18
|
+
create_time: kafka_message.create_time,
|
19
|
+
headers: kafka_message.headers || {},
|
20
|
+
is_control_record: kafka_message.is_control_record,
|
21
|
+
key: kafka_message.key,
|
22
|
+
offset: kafka_message.offset,
|
23
|
+
deserializer: topic.deserializer,
|
24
|
+
partition: kafka_message.partition,
|
25
|
+
receive_time: Time.now,
|
26
|
+
topic: topic.name
|
27
|
+
).freeze
|
28
|
+
|
29
|
+
Karafka::Params::Params.new(
|
30
|
+
kafka_message.value,
|
31
|
+
metadata
|
32
|
+
)
|
31
33
|
end
|
32
34
|
end
|
33
35
|
end
|
@@ -12,11 +12,11 @@ module Karafka
|
|
12
12
|
# @param topic [Karafka::Routing::Topic] topic for which we're received messages
|
13
13
|
# @return [Karafka::Params::ParamsBatch<Karafka::Params::Params>] batch with params
|
14
14
|
def from_kafka_messages(kafka_messages, topic)
|
15
|
-
params_array = kafka_messages.map
|
15
|
+
params_array = kafka_messages.map do |message|
|
16
16
|
Karafka::Params::Builders::Params.from_kafka_message(message, topic)
|
17
17
|
end
|
18
18
|
|
19
|
-
Karafka::Params::ParamsBatch.new(params_array)
|
19
|
+
Karafka::Params::ParamsBatch.new(params_array).freeze
|
20
20
|
end
|
21
21
|
end
|
22
22
|
end
|
@@ -2,34 +2,19 @@
|
|
2
2
|
|
3
3
|
module Karafka
|
4
4
|
module Params
|
5
|
-
#
|
6
|
-
#
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
private_constant :METHOD_ATTRIBUTES
|
21
|
-
|
22
|
-
METHOD_ATTRIBUTES.each do |attr|
|
23
|
-
# Defines a method call accessor to a particular hash field.
|
24
|
-
define_method(attr) do
|
25
|
-
self[attr]
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
# @return [Boolean] is the last offset known or unknown
|
30
|
-
def unknown_last_offset?
|
31
|
-
self['unknown_last_offset']
|
32
|
-
end
|
33
|
-
end
|
5
|
+
# Single message / params metadata details that can be accessed without the need for the
|
6
|
+
# payload deserialization
|
7
|
+
Metadata = Struct.new(
|
8
|
+
:create_time,
|
9
|
+
:headers,
|
10
|
+
:is_control_record,
|
11
|
+
:key,
|
12
|
+
:offset,
|
13
|
+
:deserializer,
|
14
|
+
:partition,
|
15
|
+
:receive_time,
|
16
|
+
:topic,
|
17
|
+
keyword_init: true
|
18
|
+
)
|
34
19
|
end
|
35
20
|
end
|
@@ -6,58 +6,44 @@ module Karafka
|
|
6
6
|
# It provides lazy loading not only until the first usage, but also allows us to skip
|
7
7
|
# using deserializer until we execute our logic. That way we can operate with
|
8
8
|
# heavy-deserialization data without slowing down the whole application.
|
9
|
-
class Params
|
10
|
-
|
11
|
-
# client compatibility.
|
12
|
-
# Kafka passes internally Kafka::FetchedMessage object and the ruby-kafka consumer
|
13
|
-
# uses those fields via method calls, so in order to be able to pass there our params
|
14
|
-
# objects, have to have same api.
|
15
|
-
METHOD_ATTRIBUTES = %w[
|
16
|
-
create_time
|
17
|
-
headers
|
18
|
-
is_control_record
|
19
|
-
key
|
20
|
-
offset
|
21
|
-
deserializer
|
22
|
-
deserialized
|
23
|
-
partition
|
24
|
-
receive_time
|
25
|
-
topic
|
26
|
-
payload
|
27
|
-
].freeze
|
9
|
+
class Params
|
10
|
+
extend Forwardable
|
28
11
|
|
29
|
-
|
12
|
+
attr_reader :raw_payload, :metadata
|
30
13
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
end
|
14
|
+
def_delegators :metadata, *Metadata.members
|
15
|
+
|
16
|
+
# @param raw_payload [Object] incoming payload before deserialization
|
17
|
+
# @param metadata [Karafka::Params::Metadata] message metadata object
|
18
|
+
def initialize(raw_payload, metadata)
|
19
|
+
@raw_payload = raw_payload
|
20
|
+
@metadata = metadata
|
21
|
+
@deserialized = false
|
22
|
+
@payload = nil
|
41
23
|
end
|
42
24
|
|
43
|
-
# @return [
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
25
|
+
# @return [Object] lazy-deserialized data (deserialized upon first request)
|
26
|
+
def payload
|
27
|
+
return @payload if deserialized?
|
28
|
+
|
29
|
+
@payload = deserialize
|
30
|
+
# We mark deserialization as successful after deserialization, as in case of an error
|
31
|
+
# this won't be falsely set to true
|
32
|
+
@deserialized = true
|
33
|
+
@payload
|
34
|
+
end
|
49
35
|
|
50
|
-
|
51
|
-
|
52
|
-
|
36
|
+
# @return [Boolean] did given params payload were deserialized already
|
37
|
+
def deserialized?
|
38
|
+
@deserialized
|
53
39
|
end
|
54
40
|
|
55
41
|
private
|
56
42
|
|
57
|
-
# @return [Object]
|
43
|
+
# @return [Object] tries de-serializes data
|
58
44
|
def deserialize
|
59
45
|
Karafka.monitor.instrument('params.params.deserialize', caller: self) do
|
60
|
-
|
46
|
+
metadata.deserializer.call(self)
|
61
47
|
end
|
62
48
|
rescue ::StandardError => e
|
63
49
|
Karafka.monitor.instrument('params.params.deserialize.error', caller: self, error: e)
|
@@ -15,47 +15,46 @@ module Karafka
|
|
15
15
|
@params_array = params_array
|
16
16
|
end
|
17
17
|
|
18
|
-
# @yieldparam [Karafka::Params::Params] each
|
19
|
-
# @note Invocation of this method will cause loading and deserializing each param after
|
20
|
-
# another.
|
21
|
-
# directly
|
18
|
+
# @yieldparam [Karafka::Params::Params] each params instance
|
19
|
+
# @note Invocation of this method will not cause loading and deserializing each param after
|
20
|
+
# another.
|
22
21
|
def each
|
23
|
-
@params_array.each { |param| yield(param
|
22
|
+
@params_array.each { |param| yield(param) }
|
24
23
|
end
|
25
24
|
|
26
25
|
# @return [Array<Karafka::Params::Params>] returns all the params in a loaded state, so they
|
27
26
|
# can be used for batch insert, etc. Without invoking all, up until first use, they won't
|
28
27
|
# be deserialized
|
29
28
|
def deserialize!
|
30
|
-
each(&:
|
29
|
+
each(&:payload)
|
31
30
|
end
|
32
31
|
|
33
32
|
# @return [Array<Object>] array with deserialized payloads. This method can be useful when
|
34
33
|
# we don't care about metadata and just want to extract all the data payloads from the
|
35
34
|
# batch
|
36
35
|
def payloads
|
37
|
-
|
36
|
+
map(&:payload)
|
38
37
|
end
|
39
38
|
|
40
|
-
# @return [Karafka::Params::Params] first element
|
39
|
+
# @return [Karafka::Params::Params] first element
|
41
40
|
def first
|
42
|
-
@params_array.first
|
41
|
+
@params_array.first
|
43
42
|
end
|
44
43
|
|
45
|
-
# @return [Karafka::Params::Params] last element
|
44
|
+
# @return [Karafka::Params::Params] last element
|
46
45
|
def last
|
47
|
-
@params_array.last
|
48
|
-
end
|
49
|
-
|
50
|
-
# @return [Array<Karafka::Params::Params>] pure array with params (not deserialized)
|
51
|
-
def to_a
|
52
|
-
@params_array
|
46
|
+
@params_array.last
|
53
47
|
end
|
54
48
|
|
55
49
|
# @return [Integer] number of messages in the batch
|
56
50
|
def size
|
57
51
|
@params_array.size
|
58
52
|
end
|
53
|
+
|
54
|
+
# @return [Array<Karafka::Params::Params>] pure array with params
|
55
|
+
def to_a
|
56
|
+
@params_array
|
57
|
+
end
|
59
58
|
end
|
60
59
|
end
|
61
60
|
end
|
@@ -8,9 +8,11 @@ module Karafka
|
|
8
8
|
class ConsumerGroup
|
9
9
|
extend Helpers::ConfigRetriever
|
10
10
|
|
11
|
-
attr_reader
|
12
|
-
|
13
|
-
|
11
|
+
attr_reader(
|
12
|
+
:topics,
|
13
|
+
:id,
|
14
|
+
:name
|
15
|
+
)
|
14
16
|
|
15
17
|
# @param name [String, Symbol] raw name of this consumer group. Raw means, that it does not
|
16
18
|
# yet have an application client_id namespace, this will be added here by default.
|
@@ -17,8 +17,8 @@ module Karafka
|
|
17
17
|
# }
|
18
18
|
# Deserializer.call(params) #=> { 'a' => 1 }
|
19
19
|
def call(params)
|
20
|
-
::
|
21
|
-
rescue ::
|
20
|
+
params.raw_payload.nil? ? nil : ::JSON.parse(params.raw_payload)
|
21
|
+
rescue ::JSON::ParserError => e
|
22
22
|
raise ::Karafka::Errors::DeserializationError, e
|
23
23
|
end
|
24
24
|
end
|
data/lib/karafka/server.rb
CHANGED
@@ -58,9 +58,12 @@ module Karafka
|
|
58
58
|
def stop_supervised
|
59
59
|
Karafka::App.stop!
|
60
60
|
|
61
|
+
# See https://github.com/dry-rb/dry-configurable/issues/93
|
62
|
+
timeout = Thread.new { Karafka::App.config.shutdown_timeout }.join.value
|
63
|
+
|
61
64
|
# We check from time to time (for the timeout period) if all the threads finished
|
62
65
|
# their work and if so, we can just return and normal shutdown process will take place
|
63
|
-
(
|
66
|
+
(timeout * SUPERVISION_CHECK_FACTOR).to_i.times do
|
64
67
|
if consumer_threads.count(&:alive?).zero?
|
65
68
|
Thread.new { Karafka.monitor.instrument('app.stopped') }.join
|
66
69
|
return
|