karafka 1.3.6 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/.diffend.yml +3 -0
  5. data/.github/workflows/ci.yml +52 -0
  6. data/.ruby-version +1 -1
  7. data/CHANGELOG.md +27 -1
  8. data/CODE_OF_CONDUCT.md +1 -1
  9. data/Gemfile +2 -0
  10. data/Gemfile.lock +42 -41
  11. data/README.md +3 -5
  12. data/certs/mensfeld.pem +21 -21
  13. data/config/errors.yml +2 -0
  14. data/docker-compose.yml +17 -0
  15. data/karafka.gemspec +3 -4
  16. data/lib/karafka.rb +1 -1
  17. data/lib/karafka/assignment_strategies/round_robin.rb +13 -0
  18. data/lib/karafka/attributes_map.rb +1 -0
  19. data/lib/karafka/cli.rb +8 -0
  20. data/lib/karafka/cli/base.rb +4 -4
  21. data/lib/karafka/cli/missingno.rb +19 -0
  22. data/lib/karafka/connection/api_adapter.rb +5 -3
  23. data/lib/karafka/connection/batch_delegator.rb +5 -1
  24. data/lib/karafka/consumers/batch_metadata.rb +10 -0
  25. data/lib/karafka/consumers/includer.rb +5 -4
  26. data/lib/karafka/contracts.rb +1 -1
  27. data/lib/karafka/contracts/consumer_group.rb +8 -3
  28. data/lib/karafka/helpers/class_matcher.rb +1 -1
  29. data/lib/karafka/instrumentation/logger.rb +2 -2
  30. data/lib/karafka/instrumentation/stdout_listener.rb +4 -2
  31. data/lib/karafka/params/batch_metadata.rb +26 -0
  32. data/lib/karafka/params/builders/batch_metadata.rb +30 -0
  33. data/lib/karafka/params/builders/params.rb +17 -15
  34. data/lib/karafka/params/builders/params_batch.rb +2 -2
  35. data/lib/karafka/params/metadata.rb +14 -29
  36. data/lib/karafka/params/params.rb +27 -41
  37. data/lib/karafka/params/params_batch.rb +15 -16
  38. data/lib/karafka/routing/builder.rb +1 -0
  39. data/lib/karafka/routing/consumer_group.rb +5 -3
  40. data/lib/karafka/serialization/json/deserializer.rb +2 -2
  41. data/lib/karafka/setup/config.rb +5 -0
  42. data/lib/karafka/version.rb +1 -1
  43. metadata +37 -46
  44. metadata.gz.sig +0 -0
  45. data/.travis.yml +0 -36
  46. data/lib/karafka/consumers/metadata.rb +0 -10
  47. data/lib/karafka/params/builders/metadata.rb +0 -33
@@ -19,6 +19,7 @@ module Karafka
19
19
  consumer: %i[
20
20
  session_timeout offset_commit_interval offset_commit_threshold
21
21
  offset_retention_time heartbeat_interval fetcher_max_queue_size
22
+ assignment_strategy
22
23
  ],
23
24
  subscribe: %i[start_from_beginning max_bytes_per_partition],
24
25
  consumption: %i[min_bytes max_bytes max_wait_time],
@@ -10,6 +10,8 @@ module Karafka
10
10
  class Cli < Thor
11
11
  package_name 'Karafka'
12
12
 
13
+ default_task :missingno
14
+
13
15
  class << self
14
16
  # Loads all Cli commands into Thor framework
15
17
  # This method should be executed before we run Karafka::Cli.start, otherwise we won't
@@ -20,6 +22,12 @@ module Karafka
20
22
  end
21
23
  end
22
24
 
25
+ # When there is a CLI crash, exit
26
+ # @return [true]
27
+ def exit_on_failure?
28
+ true
29
+ end
30
+
23
31
  private
24
32
 
25
33
  # @return [Array<Class>] Array with Cli action classes that can be used as commands
@@ -43,16 +43,16 @@ module Karafka
43
43
  end
44
44
 
45
45
  # Allows to set description of a given cli command
46
- # @param desc [String] Description of a given cli command
47
- def desc(desc)
48
- @desc ||= desc
46
+ # @param args [Array] All the arguments that Thor desc method accepts
47
+ def desc(*args)
48
+ @desc ||= args
49
49
  end
50
50
 
51
51
  # This method will bind a given Cli command into Karafka Cli
52
52
  # This method is a wrapper to way Thor defines its commands
53
53
  # @param cli_class [Karafka::Cli] Karafka cli_class
54
54
  def bind_to(cli_class)
55
- cli_class.desc name, @desc
55
+ cli_class.desc name, *@desc
56
56
 
57
57
  (@options || []).each { |option| cli_class.option(*option) }
58
58
 
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ class Cli < Thor
5
+ # Command that gets invoked when no method is provided when running the CLI
6
+ # It allows us to exit with exit code 1 instead of default 0 to indicate that something
7
+ # was missing
8
+ # @see https://github.com/karafka/karafka/issues/619
9
+ class Missingno < Base
10
+ desc 'Hidden command that gets invoked when no command is provided', hide: true
11
+
12
+ # Prints an error about the lack of command (nothing selected)
13
+ def call
14
+ Karafka.logger.error('No command provided')
15
+ exit 1
16
+ end
17
+ end
18
+ end
19
+ end
@@ -109,11 +109,13 @@ module Karafka
109
109
  # Majority of users don't use custom topic mappers. No need to change anything when it
110
110
  # is a default mapper that does not change anything. Only some cloud providers require
111
111
  # topics to be remapped
112
- return [params] if Karafka::App.config.topic_mapper.is_a?(Karafka::Routing::TopicMapper)
112
+ return [params.metadata] if Karafka::App.config.topic_mapper.is_a?(
113
+ Karafka::Routing::TopicMapper
114
+ )
113
115
 
114
116
  # @note We don't use tap as it is around 13% slower than non-dup version
115
- dupped = params.dup
116
- dupped['topic'] = Karafka::App.config.topic_mapper.outgoing(params.topic)
117
+ dupped = params.metadata.dup
118
+ dupped['topic'] = Karafka::App.config.topic_mapper.outgoing(params.metadata.topic)
117
119
  [dupped]
118
120
  end
119
121
 
@@ -23,7 +23,11 @@ module Karafka
23
23
  ) do
24
24
  # Due to how ruby-kafka is built, we have the metadata that is stored on the batch
25
25
  # level only available for batch consuming
26
- consumer.metadata = Params::Builders::Metadata.from_kafka_batch(kafka_batch, topic)
26
+ consumer.batch_metadata = Params::Builders::BatchMetadata.from_kafka_batch(
27
+ kafka_batch,
28
+ topic
29
+ )
30
+
27
31
  kafka_messages = kafka_batch.messages
28
32
 
29
33
  # Depending on a case (persisted or not) we might use new consumer instance per
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Consumers
5
+ # Brings the batch metadata into consumers that support batch_fetching
6
+ module BatchMetadata
7
+ attr_accessor :batch_metadata
8
+ end
9
+ end
10
+ end
@@ -16,7 +16,7 @@ module Karafka
16
16
 
17
17
  bind_backend(consumer, topic)
18
18
  bind_params(consumer, topic)
19
- bind_metadata(consumer, topic)
19
+ bind_batch_metadata(consumer, topic)
20
20
  bind_responders(consumer, topic)
21
21
  end
22
22
 
@@ -40,13 +40,14 @@ module Karafka
40
40
  consumer.extend(SingleParams)
41
41
  end
42
42
 
43
- # Adds an option to work with metadata for consumer instances that have batch fetching
43
+ # Adds an option to work with batch metadata for consumer instances that have
44
+ # batch fetching enabled
44
45
  # @param consumer [Karafka::BaseConsumer] consumer instance
45
46
  # @param topic [Karafka::Routing::Topic] topic of a consumer class
46
- def bind_metadata(consumer, topic)
47
+ def bind_batch_metadata(consumer, topic)
47
48
  return unless topic.batch_fetching
48
49
 
49
- consumer.extend(Metadata)
50
+ consumer.extend(BatchMetadata)
50
51
  end
51
52
 
52
53
  # Adds responders support for topics and consumers with responders defined for them
@@ -5,6 +5,6 @@ module Karafka
5
5
  module Contracts
6
6
  # Regexp for validating format of groups and topics
7
7
  # @note It is not nested inside of the contracts, as it is used by couple of them
8
- TOPIC_REGEXP = /\A(\w|\-|\.)+\z/.freeze
8
+ TOPIC_REGEXP = /\A(\w|-|\.)+\z/.freeze
9
9
  end
10
10
  end
@@ -32,6 +32,7 @@ module Karafka
32
32
  required(:offset_retention_time).maybe(:integer)
33
33
  required(:heartbeat_interval).filled { (int? | float?) & gteq?(0) }
34
34
  required(:fetcher_max_queue_size).filled(:int?, gt?: 0)
35
+ required(:assignment_strategy).value(:any)
35
36
  required(:connect_timeout).filled { (int? | float?) & gt?(0) }
36
37
  required(:reconnect_timeout).filled { (int? | float?) & gteq?(0) }
37
38
  required(:socket_timeout).filled { (int? | float?) & gt?(0) }
@@ -70,13 +71,13 @@ module Karafka
70
71
 
71
72
  # Uri rule to check if uri is in a Karafka acceptable format
72
73
  rule(:seed_brokers) do
73
- if value&.is_a?(Array) && !value.all?(&method(:kafka_uri?))
74
+ if value.is_a?(Array) && !value.all?(&method(:kafka_uri?))
74
75
  key.failure(:invalid_broker_schema)
75
76
  end
76
77
  end
77
78
 
78
79
  rule(:topics) do
79
- if value&.is_a?(Array)
80
+ if value.is_a?(Array)
80
81
  names = value.map { |topic| topic[:name] }
81
82
 
82
83
  key.failure(:topics_names_not_unique) if names.size != names.uniq.size
@@ -84,7 +85,7 @@ module Karafka
84
85
  end
85
86
 
86
87
  rule(:topics) do
87
- if value&.is_a?(Array)
88
+ if value.is_a?(Array)
88
89
  value.each_with_index do |topic, index|
89
90
  TOPIC_CONTRACT.call(topic).errors.each do |error|
90
91
  key([:topics, index, error.path[0]]).failure(error.text)
@@ -93,6 +94,10 @@ module Karafka
93
94
  end
94
95
  end
95
96
 
97
+ rule(:assignment_strategy) do
98
+ key.failure(:does_not_respond_to_call) unless value.respond_to?(:call)
99
+ end
100
+
96
101
  rule(:ssl_client_cert, :ssl_client_cert_key) do
97
102
  if values[:ssl_client_cert] && !values[:ssl_client_cert_key]
98
103
  key(:ssl_client_cert_key).failure(:ssl_client_cert_with_ssl_client_cert_key)
@@ -8,7 +8,7 @@ module Karafka
8
8
  class ClassMatcher
9
9
  # Regexp used to remove any non classy like characters that might be in the consumer
10
10
  # class name (if defined dynamically, etc)
11
- CONSTANT_REGEXP = %r{[?!=+\-\*/\^\|&\[\]<>%~\#\:\s\(\)]}.freeze
11
+ CONSTANT_REGEXP = %r{[?!=+\-*/\^|&\[\]<>%~\#:\s()]}.freeze
12
12
 
13
13
  private_constant :CONSTANT_REGEXP
14
14
 
@@ -29,11 +29,11 @@ module Karafka
29
29
 
30
30
  # @return [Karafka::Helpers::MultiDelegator] multi delegator instance
31
31
  # to which we will be writing logs
32
- # We use this approach to log stuff to file and to the STDOUT at the same time
32
+ # We use this approach to log stuff to file and to the $stdout at the same time
33
33
  def target
34
34
  Karafka::Helpers::MultiDelegator
35
35
  .delegate(:write, :close)
36
- .to(STDOUT, file)
36
+ .to($stdout, file)
37
37
  end
38
38
 
39
39
  # Makes sure the log directory exists as long as we can write to it
@@ -43,7 +43,7 @@ module Karafka
43
43
  # so it returns a topic as a string, not a routing topic
44
44
  debug(
45
45
  <<~MSG.chomp.tr("\n", ' ')
46
- Params deserialization for #{event[:caller].topic} topic
46
+ Params deserialization for #{event[:caller].metadata.topic} topic
47
47
  successful in #{event[:time]} ms
48
48
  MSG
49
49
  )
@@ -52,7 +52,9 @@ module Karafka
52
52
  # Logs unsuccessful deserialization attempts of incoming data
53
53
  # @param event [Dry::Events::Event] event details including payload
54
54
  def on_params_params_deserialize_error(event)
55
- error "Params deserialization error for #{event[:caller].topic} topic: #{event[:error]}"
55
+ topic = event[:caller].metadata.topic
56
+ error = event[:error]
57
+ error "Params deserialization error for #{topic} topic: #{error}"
56
58
  end
57
59
 
58
60
  # Logs errors that occurred in a listener fetch loop
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ # Simple batch metadata object that stores all non-message information received from Kafka
6
+ # cluster while fetching the data
7
+ # @note This metadata object refers to per batch metadata, not `#params.metadata`
8
+ BatchMetadata = Struct.new(
9
+ :batch_size,
10
+ :first_offset,
11
+ :highwater_mark_offset,
12
+ :unknown_last_offset,
13
+ :last_offset,
14
+ :offset_lag,
15
+ :deserializer,
16
+ :partition,
17
+ :topic,
18
+ keyword_init: true
19
+ ) do
20
+ # @return [Boolean] is the last offset known or unknown
21
+ def unknown_last_offset?
22
+ unknown_last_offset
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ module Builders
6
+ # Builder for creating batch metadata object based on the batch informations
7
+ module BatchMetadata
8
+ class << self
9
+ # Creates metadata based on the kafka batch data
10
+ # @param kafka_batch [Kafka::FetchedBatch] kafka batch details
11
+ # @param topic [Karafka::Routing::Topic] topic for which we've fetched the batch
12
+ # @return [Karafka::Params::BatchMetadata] batch metadata object
13
+ def from_kafka_batch(kafka_batch, topic)
14
+ Karafka::Params::BatchMetadata.new(
15
+ batch_size: kafka_batch.messages.count,
16
+ first_offset: kafka_batch.first_offset,
17
+ highwater_mark_offset: kafka_batch.highwater_mark_offset,
18
+ unknown_last_offset: kafka_batch.unknown_last_offset?,
19
+ last_offset: kafka_batch.last_offset,
20
+ offset_lag: kafka_batch.offset_lag,
21
+ deserializer: topic.deserializer,
22
+ partition: kafka_batch.partition,
23
+ topic: topic.name
24
+ ).freeze
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -12,22 +12,24 @@ module Karafka
12
12
  class << self
13
13
  # @param kafka_message [Kafka::FetchedMessage] message fetched from Kafka
14
14
  # @param topic [Karafka::Routing::Topic] topic for which this message was fetched
15
- # @return [Karafka::Params::Params] params object
15
+ # @return [Karafka::Params::Params] params object with payload and message metadata
16
16
  def from_kafka_message(kafka_message, topic)
17
- Karafka::Params::Params
18
- .new
19
- .merge!(
20
- 'create_time' => kafka_message.create_time,
21
- 'headers' => kafka_message.headers || {},
22
- 'is_control_record' => kafka_message.is_control_record,
23
- 'key' => kafka_message.key,
24
- 'offset' => kafka_message.offset,
25
- 'deserializer' => topic.deserializer,
26
- 'partition' => kafka_message.partition,
27
- 'receive_time' => Time.now,
28
- 'topic' => kafka_message.topic,
29
- 'payload' => kafka_message.value
30
- )
17
+ metadata = Karafka::Params::Metadata.new(
18
+ create_time: kafka_message.create_time,
19
+ headers: kafka_message.headers || {},
20
+ is_control_record: kafka_message.is_control_record,
21
+ key: kafka_message.key,
22
+ offset: kafka_message.offset,
23
+ deserializer: topic.deserializer,
24
+ partition: kafka_message.partition,
25
+ receive_time: Time.now,
26
+ topic: topic.name
27
+ ).freeze
28
+
29
+ Karafka::Params::Params.new(
30
+ kafka_message.value,
31
+ metadata
32
+ )
31
33
  end
32
34
  end
33
35
  end
@@ -12,11 +12,11 @@ module Karafka
12
12
  # @param topic [Karafka::Routing::Topic] topic for which we're received messages
13
13
  # @return [Karafka::Params::ParamsBatch<Karafka::Params::Params>] batch with params
14
14
  def from_kafka_messages(kafka_messages, topic)
15
- params_array = kafka_messages.map! do |message|
15
+ params_array = kafka_messages.map do |message|
16
16
  Karafka::Params::Builders::Params.from_kafka_message(message, topic)
17
17
  end
18
18
 
19
- Karafka::Params::ParamsBatch.new(params_array)
19
+ Karafka::Params::ParamsBatch.new(params_array).freeze
20
20
  end
21
21
  end
22
22
  end
@@ -2,34 +2,19 @@
2
2
 
3
3
  module Karafka
4
4
  module Params
5
- # Simple metadata object that stores all non-message information received from Kafka cluster
6
- # while fetching the data
7
- class Metadata < Hash
8
- # Attributes that should be accessible as methods as well (not only hash)
9
- METHOD_ATTRIBUTES = %w[
10
- batch_size
11
- first_offset
12
- highwater_mark_offset
13
- last_offset
14
- offset_lag
15
- deserializer
16
- partition
17
- topic
18
- ].freeze
19
-
20
- private_constant :METHOD_ATTRIBUTES
21
-
22
- METHOD_ATTRIBUTES.each do |attr|
23
- # Defines a method call accessor to a particular hash field.
24
- define_method(attr) do
25
- self[attr]
26
- end
27
- end
28
-
29
- # @return [Boolean] is the last offset known or unknown
30
- def unknown_last_offset?
31
- self['unknown_last_offset']
32
- end
33
- end
5
+ # Single message / params metadata details that can be accessed without the need for the
6
+ # payload deserialization
7
+ Metadata = Struct.new(
8
+ :create_time,
9
+ :headers,
10
+ :is_control_record,
11
+ :key,
12
+ :offset,
13
+ :deserializer,
14
+ :partition,
15
+ :receive_time,
16
+ :topic,
17
+ keyword_init: true
18
+ )
34
19
  end
35
20
  end
@@ -6,58 +6,44 @@ module Karafka
6
6
  # It provides lazy loading not only until the first usage, but also allows us to skip
7
7
  # using deserializer until we execute our logic. That way we can operate with
8
8
  # heavy-deserialization data without slowing down the whole application.
9
- class Params < Hash
10
- # Params attributes that should be available via a method call invocation for Kafka
11
- # client compatibility.
12
- # Kafka passes internally Kafka::FetchedMessage object and the ruby-kafka consumer
13
- # uses those fields via method calls, so in order to be able to pass there our params
14
- # objects, have to have same api.
15
- METHOD_ATTRIBUTES = %w[
16
- create_time
17
- headers
18
- is_control_record
19
- key
20
- offset
21
- deserializer
22
- deserialized
23
- partition
24
- receive_time
25
- topic
26
- payload
27
- ].freeze
9
+ class Params
10
+ extend Forwardable
28
11
 
29
- private_constant :METHOD_ATTRIBUTES
12
+ attr_reader :raw_payload, :metadata
30
13
 
31
- METHOD_ATTRIBUTES.each do |attr|
32
- # Defines a method call accessor to a particular hash field.
33
- # @note Won't work for complex key names that contain spaces, etc
34
- # @param key [Symbol] name of a field that we want to retrieve with a method call
35
- # @example
36
- # key_attr_reader :example
37
- # params.example #=> 'my example payload'
38
- define_method(attr) do
39
- self[attr]
40
- end
14
+ def_delegators :metadata, *Metadata.members
15
+
16
+ # @param raw_payload [Object] incoming payload before deserialization
17
+ # @param metadata [Karafka::Params::Metadata] message metadata object
18
+ def initialize(raw_payload, metadata)
19
+ @raw_payload = raw_payload
20
+ @metadata = metadata
21
+ @deserialized = false
22
+ @payload = nil
41
23
  end
42
24
 
43
- # @return [Karafka::Params::Params] This method will trigger deserializer execution. If we
44
- # decide to retrieve data, deserializer will be executed to get data. Output of that will
45
- # be merged to the current object. This object will be also marked as already deserialized,
46
- # so we won't deserialize it again.
47
- def deserialize!
48
- return self if self['deserialized']
25
+ # @return [Object] lazy-deserialized data (deserialized upon first request)
26
+ def payload
27
+ return @payload if deserialized?
28
+
29
+ @payload = deserialize
30
+ # We mark deserialization as successful after deserialization, as in case of an error
31
+ # this won't be falsely set to true
32
+ @deserialized = true
33
+ @payload
34
+ end
49
35
 
50
- self['deserialized'] = true
51
- self['payload'] = deserialize
52
- self
36
+ # @return [Boolean] did given params payload were deserialized already
37
+ def deserialized?
38
+ @deserialized
53
39
  end
54
40
 
55
41
  private
56
42
 
57
- # @return [Object] deserialized data
43
+ # @return [Object] tries de-serializes data
58
44
  def deserialize
59
45
  Karafka.monitor.instrument('params.params.deserialize', caller: self) do
60
- self['deserializer'].call(self)
46
+ metadata.deserializer.call(self)
61
47
  end
62
48
  rescue ::StandardError => e
63
49
  Karafka.monitor.instrument('params.params.deserialize.error', caller: self, error: e)