karafka 1.3.0 → 1.4.14
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.diffend.yml +3 -0
- data/.github/workflows/ci.yml +76 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +112 -15
- data/CODE_OF_CONDUCT.md +1 -1
- data/Gemfile +2 -0
- data/Gemfile.lock +87 -98
- data/README.md +28 -31
- data/certs/mensfeld.pem +24 -23
- data/config/errors.yml +2 -0
- data/docker-compose.yml +17 -0
- data/karafka.gemspec +22 -14
- data/lib/karafka/assignment_strategies/round_robin.rb +13 -0
- data/lib/karafka/attributes_map.rb +3 -8
- data/lib/karafka/cli/base.rb +4 -4
- data/lib/karafka/cli/flow.rb +9 -6
- data/lib/karafka/cli/info.rb +1 -1
- data/lib/karafka/cli/install.rb +5 -2
- data/lib/karafka/cli/missingno.rb +19 -0
- data/lib/karafka/cli/server.rb +8 -8
- data/lib/karafka/cli.rb +9 -1
- data/lib/karafka/connection/api_adapter.rb +27 -24
- data/lib/karafka/connection/batch_delegator.rb +5 -1
- data/lib/karafka/connection/builder.rb +9 -2
- data/lib/karafka/connection/client.rb +9 -6
- data/lib/karafka/connection/listener.rb +2 -2
- data/lib/karafka/consumers/batch_metadata.rb +10 -0
- data/lib/karafka/consumers/includer.rb +5 -4
- data/lib/karafka/contracts/consumer_group.rb +10 -5
- data/lib/karafka/contracts/server_cli_options.rb +2 -0
- data/lib/karafka/contracts.rb +1 -1
- data/lib/karafka/helpers/class_matcher.rb +2 -2
- data/lib/karafka/instrumentation/logger.rb +6 -9
- data/lib/karafka/instrumentation/stdout_listener.rb +6 -4
- data/lib/karafka/params/batch_metadata.rb +26 -0
- data/lib/karafka/params/builders/batch_metadata.rb +30 -0
- data/lib/karafka/params/builders/params.rb +17 -15
- data/lib/karafka/params/builders/params_batch.rb +2 -2
- data/lib/karafka/params/metadata.rb +14 -29
- data/lib/karafka/params/params.rb +27 -41
- data/lib/karafka/params/params_batch.rb +15 -16
- data/lib/karafka/routing/builder.rb +1 -0
- data/lib/karafka/routing/consumer_group.rb +5 -3
- data/lib/karafka/serialization/json/deserializer.rb +2 -2
- data/lib/karafka/server.rb +4 -1
- data/lib/karafka/setup/config.rb +60 -52
- data/lib/karafka/templates/karafka.rb.erb +1 -1
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +3 -1
- data.tar.gz.sig +0 -0
- metadata +75 -93
- metadata.gz.sig +0 -0
- data/.github/FUNDING.yml +0 -3
- data/.travis.yml +0 -36
- data/lib/karafka/consumers/metadata.rb +0 -10
- data/lib/karafka/params/builders/metadata.rb +0 -33
@@ -6,6 +6,8 @@ module Karafka
|
|
6
6
|
# We validate some basics + the list of consumer_groups on which we want to use, to make
|
7
7
|
# sure that all of them are defined, plus that a pidfile does not exist
|
8
8
|
class ServerCliOptions < Dry::Validation::Contract
|
9
|
+
config.messages.load_paths << File.join(Karafka.gem_root, 'config', 'errors.yml')
|
10
|
+
|
9
11
|
params do
|
10
12
|
optional(:pid).filled(:str?)
|
11
13
|
optional(:daemon).filled(:bool?)
|
data/lib/karafka/contracts.rb
CHANGED
@@ -8,7 +8,7 @@ module Karafka
|
|
8
8
|
class ClassMatcher
|
9
9
|
# Regexp used to remove any non classy like characters that might be in the consumer
|
10
10
|
# class name (if defined dynamically, etc)
|
11
|
-
CONSTANT_REGEXP = %r{[
|
11
|
+
CONSTANT_REGEXP = %r{[?!=+\-*/\^|&\[\]<>%~\#:\s()]}.freeze
|
12
12
|
|
13
13
|
private_constant :CONSTANT_REGEXP
|
14
14
|
|
@@ -44,7 +44,7 @@ module Karafka
|
|
44
44
|
# @example From Namespaced::Super2Consumer matching responder
|
45
45
|
# matcher.name #=> Super2Responder
|
46
46
|
def name
|
47
|
-
inflected =
|
47
|
+
inflected = +@klass.to_s.split('::').last.to_s
|
48
48
|
# We inject the from into the name just in case it is missing as in a situation like
|
49
49
|
# that it would just sanitize the name without adding the "to" postfix.
|
50
50
|
# It could create cases when we want to build for example a responder to a consumer
|
@@ -20,7 +20,6 @@ module Karafka
|
|
20
20
|
# @param _args Any arguments that we don't care about but that are needed in order to
|
21
21
|
# make this logger compatible with the default Ruby one
|
22
22
|
def initialize(*_args)
|
23
|
-
ensure_dir_exists
|
24
23
|
super(target)
|
25
24
|
self.level = ENV_MAP[Karafka.env] || ENV_MAP['default']
|
26
25
|
end
|
@@ -29,17 +28,11 @@ module Karafka
|
|
29
28
|
|
30
29
|
# @return [Karafka::Helpers::MultiDelegator] multi delegator instance
|
31
30
|
# to which we will be writing logs
|
32
|
-
# We use this approach to log stuff to file and to the
|
31
|
+
# We use this approach to log stuff to file and to the $stdout at the same time
|
33
32
|
def target
|
34
33
|
Karafka::Helpers::MultiDelegator
|
35
34
|
.delegate(:write, :close)
|
36
|
-
.to(
|
37
|
-
end
|
38
|
-
|
39
|
-
# Makes sure the log directory exists
|
40
|
-
def ensure_dir_exists
|
41
|
-
dir = File.dirname(log_path)
|
42
|
-
FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
|
35
|
+
.to(*[$stdout, file].compact)
|
43
36
|
end
|
44
37
|
|
45
38
|
# @return [Pathname] Path to a file to which we should log
|
@@ -50,7 +43,11 @@ module Karafka
|
|
50
43
|
# @return [File] file to which we want to write our logs
|
51
44
|
# @note File is being opened in append mode ('a')
|
52
45
|
def file
|
46
|
+
FileUtils.mkdir_p(File.dirname(log_path))
|
47
|
+
|
53
48
|
@file ||= File.open(log_path, 'a')
|
49
|
+
rescue Errno::EACCES, Errno::EROFS
|
50
|
+
nil
|
54
51
|
end
|
55
52
|
end
|
56
53
|
end
|
@@ -43,7 +43,7 @@ module Karafka
|
|
43
43
|
# so it returns a topic as a string, not a routing topic
|
44
44
|
debug(
|
45
45
|
<<~MSG.chomp.tr("\n", ' ')
|
46
|
-
Params deserialization for #{event[:caller].topic} topic
|
46
|
+
Params deserialization for #{event[:caller].metadata.topic} topic
|
47
47
|
successful in #{event[:time]} ms
|
48
48
|
MSG
|
49
49
|
)
|
@@ -52,7 +52,9 @@ module Karafka
|
|
52
52
|
# Logs unsuccessful deserialization attempts of incoming data
|
53
53
|
# @param event [Dry::Events::Event] event details including payload
|
54
54
|
def on_params_params_deserialize_error(event)
|
55
|
-
|
55
|
+
topic = event[:caller].metadata.topic
|
56
|
+
error = event[:error]
|
57
|
+
error "Params deserialization error for #{topic} topic: #{error}"
|
56
58
|
end
|
57
59
|
|
58
60
|
# Logs errors that occurred in a listener fetch loop
|
@@ -101,10 +103,10 @@ module Karafka
|
|
101
103
|
info "Responded from #{calling.class} using #{responder} with following data #{data}"
|
102
104
|
end
|
103
105
|
|
104
|
-
# Logs info that we're initializing Karafka
|
106
|
+
# Logs info that we're initializing Karafka framework components
|
105
107
|
# @param _event [Dry::Events::Event] event details including payload
|
106
108
|
def on_app_initializing(_event)
|
107
|
-
info "Initializing Karafka
|
109
|
+
info "Initializing Karafka framework #{::Process.pid}"
|
108
110
|
end
|
109
111
|
|
110
112
|
# Logs info that we're running Karafka app
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Params
|
5
|
+
# Simple batch metadata object that stores all non-message information received from Kafka
|
6
|
+
# cluster while fetching the data
|
7
|
+
# @note This metadata object refers to per batch metadata, not `#params.metadata`
|
8
|
+
BatchMetadata = Struct.new(
|
9
|
+
:batch_size,
|
10
|
+
:first_offset,
|
11
|
+
:highwater_mark_offset,
|
12
|
+
:unknown_last_offset,
|
13
|
+
:last_offset,
|
14
|
+
:offset_lag,
|
15
|
+
:deserializer,
|
16
|
+
:partition,
|
17
|
+
:topic,
|
18
|
+
keyword_init: true
|
19
|
+
) do
|
20
|
+
# @return [Boolean] is the last offset known or unknown
|
21
|
+
def unknown_last_offset?
|
22
|
+
unknown_last_offset
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Params
|
5
|
+
module Builders
|
6
|
+
# Builder for creating batch metadata object based on the batch informations
|
7
|
+
module BatchMetadata
|
8
|
+
class << self
|
9
|
+
# Creates metadata based on the kafka batch data
|
10
|
+
# @param kafka_batch [Kafka::FetchedBatch] kafka batch details
|
11
|
+
# @param topic [Karafka::Routing::Topic] topic for which we've fetched the batch
|
12
|
+
# @return [Karafka::Params::BatchMetadata] batch metadata object
|
13
|
+
def from_kafka_batch(kafka_batch, topic)
|
14
|
+
Karafka::Params::BatchMetadata.new(
|
15
|
+
batch_size: kafka_batch.messages.count,
|
16
|
+
first_offset: kafka_batch.first_offset,
|
17
|
+
highwater_mark_offset: kafka_batch.highwater_mark_offset,
|
18
|
+
unknown_last_offset: kafka_batch.unknown_last_offset?,
|
19
|
+
last_offset: kafka_batch.last_offset,
|
20
|
+
offset_lag: kafka_batch.offset_lag,
|
21
|
+
deserializer: topic.deserializer,
|
22
|
+
partition: kafka_batch.partition,
|
23
|
+
topic: topic.name
|
24
|
+
).freeze
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -12,22 +12,24 @@ module Karafka
|
|
12
12
|
class << self
|
13
13
|
# @param kafka_message [Kafka::FetchedMessage] message fetched from Kafka
|
14
14
|
# @param topic [Karafka::Routing::Topic] topic for which this message was fetched
|
15
|
-
# @return [Karafka::Params::Params] params object
|
15
|
+
# @return [Karafka::Params::Params] params object with payload and message metadata
|
16
16
|
def from_kafka_message(kafka_message, topic)
|
17
|
-
Karafka::Params::
|
18
|
-
.
|
19
|
-
.
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
17
|
+
metadata = Karafka::Params::Metadata.new(
|
18
|
+
create_time: kafka_message.create_time,
|
19
|
+
headers: kafka_message.headers || {},
|
20
|
+
is_control_record: kafka_message.is_control_record,
|
21
|
+
key: kafka_message.key,
|
22
|
+
offset: kafka_message.offset,
|
23
|
+
deserializer: topic.deserializer,
|
24
|
+
partition: kafka_message.partition,
|
25
|
+
receive_time: Time.now,
|
26
|
+
topic: topic.name
|
27
|
+
).freeze
|
28
|
+
|
29
|
+
Karafka::Params::Params.new(
|
30
|
+
kafka_message.value,
|
31
|
+
metadata
|
32
|
+
)
|
31
33
|
end
|
32
34
|
end
|
33
35
|
end
|
@@ -12,11 +12,11 @@ module Karafka
|
|
12
12
|
# @param topic [Karafka::Routing::Topic] topic for which we're received messages
|
13
13
|
# @return [Karafka::Params::ParamsBatch<Karafka::Params::Params>] batch with params
|
14
14
|
def from_kafka_messages(kafka_messages, topic)
|
15
|
-
params_array = kafka_messages.map
|
15
|
+
params_array = kafka_messages.map do |message|
|
16
16
|
Karafka::Params::Builders::Params.from_kafka_message(message, topic)
|
17
17
|
end
|
18
18
|
|
19
|
-
Karafka::Params::ParamsBatch.new(params_array)
|
19
|
+
Karafka::Params::ParamsBatch.new(params_array).freeze
|
20
20
|
end
|
21
21
|
end
|
22
22
|
end
|
@@ -2,34 +2,19 @@
|
|
2
2
|
|
3
3
|
module Karafka
|
4
4
|
module Params
|
5
|
-
#
|
6
|
-
#
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
private_constant :METHOD_ATTRIBUTES
|
21
|
-
|
22
|
-
METHOD_ATTRIBUTES.each do |attr|
|
23
|
-
# Defines a method call accessor to a particular hash field.
|
24
|
-
define_method(attr) do
|
25
|
-
self[attr]
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
# @return [Boolean] is the last offset known or unknown
|
30
|
-
def unknown_last_offset?
|
31
|
-
self['unknown_last_offset']
|
32
|
-
end
|
33
|
-
end
|
5
|
+
# Single message / params metadata details that can be accessed without the need for the
|
6
|
+
# payload deserialization
|
7
|
+
Metadata = Struct.new(
|
8
|
+
:create_time,
|
9
|
+
:headers,
|
10
|
+
:is_control_record,
|
11
|
+
:key,
|
12
|
+
:offset,
|
13
|
+
:deserializer,
|
14
|
+
:partition,
|
15
|
+
:receive_time,
|
16
|
+
:topic,
|
17
|
+
keyword_init: true
|
18
|
+
)
|
34
19
|
end
|
35
20
|
end
|
@@ -6,58 +6,44 @@ module Karafka
|
|
6
6
|
# It provides lazy loading not only until the first usage, but also allows us to skip
|
7
7
|
# using deserializer until we execute our logic. That way we can operate with
|
8
8
|
# heavy-deserialization data without slowing down the whole application.
|
9
|
-
class Params
|
10
|
-
|
11
|
-
# client compatibility.
|
12
|
-
# Kafka passes internally Kafka::FetchedMessage object and the ruby-kafka consumer
|
13
|
-
# uses those fields via method calls, so in order to be able to pass there our params
|
14
|
-
# objects, have to have same api.
|
15
|
-
METHOD_ATTRIBUTES = %w[
|
16
|
-
create_time
|
17
|
-
headers
|
18
|
-
is_control_record
|
19
|
-
key
|
20
|
-
offset
|
21
|
-
deserializer
|
22
|
-
deserialized
|
23
|
-
partition
|
24
|
-
receive_time
|
25
|
-
topic
|
26
|
-
payload
|
27
|
-
].freeze
|
9
|
+
class Params
|
10
|
+
extend Forwardable
|
28
11
|
|
29
|
-
|
12
|
+
attr_reader :raw_payload, :metadata
|
30
13
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
end
|
14
|
+
def_delegators :metadata, *Metadata.members
|
15
|
+
|
16
|
+
# @param raw_payload [Object] incoming payload before deserialization
|
17
|
+
# @param metadata [Karafka::Params::Metadata] message metadata object
|
18
|
+
def initialize(raw_payload, metadata)
|
19
|
+
@raw_payload = raw_payload
|
20
|
+
@metadata = metadata
|
21
|
+
@deserialized = false
|
22
|
+
@payload = nil
|
41
23
|
end
|
42
24
|
|
43
|
-
# @return [
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
25
|
+
# @return [Object] lazy-deserialized data (deserialized upon first request)
|
26
|
+
def payload
|
27
|
+
return @payload if deserialized?
|
28
|
+
|
29
|
+
@payload = deserialize
|
30
|
+
# We mark deserialization as successful after deserialization, as in case of an error
|
31
|
+
# this won't be falsely set to true
|
32
|
+
@deserialized = true
|
33
|
+
@payload
|
34
|
+
end
|
49
35
|
|
50
|
-
|
51
|
-
|
52
|
-
|
36
|
+
# @return [Boolean] did given params payload were deserialized already
|
37
|
+
def deserialized?
|
38
|
+
@deserialized
|
53
39
|
end
|
54
40
|
|
55
41
|
private
|
56
42
|
|
57
|
-
# @return [Object]
|
43
|
+
# @return [Object] tries de-serializes data
|
58
44
|
def deserialize
|
59
45
|
Karafka.monitor.instrument('params.params.deserialize', caller: self) do
|
60
|
-
|
46
|
+
metadata.deserializer.call(self)
|
61
47
|
end
|
62
48
|
rescue ::StandardError => e
|
63
49
|
Karafka.monitor.instrument('params.params.deserialize.error', caller: self, error: e)
|
@@ -15,47 +15,46 @@ module Karafka
|
|
15
15
|
@params_array = params_array
|
16
16
|
end
|
17
17
|
|
18
|
-
# @yieldparam [Karafka::Params::Params] each
|
19
|
-
# @note Invocation of this method will cause loading and deserializing each param after
|
20
|
-
# another.
|
21
|
-
# directly
|
18
|
+
# @yieldparam [Karafka::Params::Params] each params instance
|
19
|
+
# @note Invocation of this method will not cause loading and deserializing each param after
|
20
|
+
# another.
|
22
21
|
def each
|
23
|
-
@params_array.each { |param| yield(param
|
22
|
+
@params_array.each { |param| yield(param) }
|
24
23
|
end
|
25
24
|
|
26
25
|
# @return [Array<Karafka::Params::Params>] returns all the params in a loaded state, so they
|
27
26
|
# can be used for batch insert, etc. Without invoking all, up until first use, they won't
|
28
27
|
# be deserialized
|
29
28
|
def deserialize!
|
30
|
-
each(&:
|
29
|
+
each(&:payload)
|
31
30
|
end
|
32
31
|
|
33
32
|
# @return [Array<Object>] array with deserialized payloads. This method can be useful when
|
34
33
|
# we don't care about metadata and just want to extract all the data payloads from the
|
35
34
|
# batch
|
36
35
|
def payloads
|
37
|
-
|
36
|
+
map(&:payload)
|
38
37
|
end
|
39
38
|
|
40
|
-
# @return [Karafka::Params::Params] first element
|
39
|
+
# @return [Karafka::Params::Params] first element
|
41
40
|
def first
|
42
|
-
@params_array.first
|
41
|
+
@params_array.first
|
43
42
|
end
|
44
43
|
|
45
|
-
# @return [Karafka::Params::Params] last element
|
44
|
+
# @return [Karafka::Params::Params] last element
|
46
45
|
def last
|
47
|
-
@params_array.last
|
48
|
-
end
|
49
|
-
|
50
|
-
# @return [Array<Karafka::Params::Params>] pure array with params (not deserialized)
|
51
|
-
def to_a
|
52
|
-
@params_array
|
46
|
+
@params_array.last
|
53
47
|
end
|
54
48
|
|
55
49
|
# @return [Integer] number of messages in the batch
|
56
50
|
def size
|
57
51
|
@params_array.size
|
58
52
|
end
|
53
|
+
|
54
|
+
# @return [Array<Karafka::Params::Params>] pure array with params
|
55
|
+
def to_a
|
56
|
+
@params_array
|
57
|
+
end
|
59
58
|
end
|
60
59
|
end
|
61
60
|
end
|
@@ -8,9 +8,11 @@ module Karafka
|
|
8
8
|
class ConsumerGroup
|
9
9
|
extend Helpers::ConfigRetriever
|
10
10
|
|
11
|
-
attr_reader
|
12
|
-
|
13
|
-
|
11
|
+
attr_reader(
|
12
|
+
:topics,
|
13
|
+
:id,
|
14
|
+
:name
|
15
|
+
)
|
14
16
|
|
15
17
|
# @param name [String, Symbol] raw name of this consumer group. Raw means, that it does not
|
16
18
|
# yet have an application client_id namespace, this will be added here by default.
|
@@ -17,8 +17,8 @@ module Karafka
|
|
17
17
|
# }
|
18
18
|
# Deserializer.call(params) #=> { 'a' => 1 }
|
19
19
|
def call(params)
|
20
|
-
::
|
21
|
-
rescue ::
|
20
|
+
params.raw_payload.nil? ? nil : ::JSON.parse(params.raw_payload)
|
21
|
+
rescue ::JSON::ParserError => e
|
22
22
|
raise ::Karafka::Errors::DeserializationError, e
|
23
23
|
end
|
24
24
|
end
|
data/lib/karafka/server.rb
CHANGED
@@ -58,9 +58,12 @@ module Karafka
|
|
58
58
|
def stop_supervised
|
59
59
|
Karafka::App.stop!
|
60
60
|
|
61
|
+
# See https://github.com/dry-rb/dry-configurable/issues/93
|
62
|
+
timeout = Thread.new { Karafka::App.config.shutdown_timeout }.join.value
|
63
|
+
|
61
64
|
# We check from time to time (for the timeout period) if all the threads finished
|
62
65
|
# their work and if so, we can just return and normal shutdown process will take place
|
63
|
-
(
|
66
|
+
(timeout * SUPERVISION_CHECK_FACTOR).to_i.times do
|
64
67
|
if consumer_threads.count(&:alive?).zero?
|
65
68
|
Thread.new { Karafka.monitor.instrument('app.stopped') }.join
|
66
69
|
return
|