karafka 1.4.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +2 -0
  3. data.tar.gz.sig +0 -0
  4. data/.coditsu/ci.yml +3 -0
  5. data/.console_irbrc +11 -0
  6. data/.diffend.yml +3 -0
  7. data/.github/FUNDING.yml +3 -0
  8. data/.github/ISSUE_TEMPLATE/bug_report.md +50 -0
  9. data/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
  10. data/.github/workflows/ci.yml +52 -0
  11. data/.gitignore +69 -0
  12. data/.rspec +1 -0
  13. data/.ruby-gemset +1 -0
  14. data/.ruby-version +1 -0
  15. data/CHANGELOG.md +566 -0
  16. data/CODE_OF_CONDUCT.md +46 -0
  17. data/CONTRIBUTING.md +41 -0
  18. data/Gemfile +14 -0
  19. data/Gemfile.lock +139 -0
  20. data/MIT-LICENCE +18 -0
  21. data/README.md +99 -0
  22. data/bin/karafka +19 -0
  23. data/certs/mensfeld.pem +25 -0
  24. data/config/errors.yml +39 -0
  25. data/docker-compose.yml +17 -0
  26. data/karafka.gemspec +43 -0
  27. data/lib/karafka.rb +72 -0
  28. data/lib/karafka/app.rb +53 -0
  29. data/lib/karafka/attributes_map.rb +62 -0
  30. data/lib/karafka/backends/inline.rb +16 -0
  31. data/lib/karafka/base_consumer.rb +57 -0
  32. data/lib/karafka/base_responder.rb +226 -0
  33. data/lib/karafka/cli.rb +54 -0
  34. data/lib/karafka/cli/base.rb +78 -0
  35. data/lib/karafka/cli/console.rb +31 -0
  36. data/lib/karafka/cli/flow.rb +48 -0
  37. data/lib/karafka/cli/info.rb +31 -0
  38. data/lib/karafka/cli/install.rb +66 -0
  39. data/lib/karafka/cli/server.rb +71 -0
  40. data/lib/karafka/code_reloader.rb +67 -0
  41. data/lib/karafka/connection/api_adapter.rb +161 -0
  42. data/lib/karafka/connection/batch_delegator.rb +55 -0
  43. data/lib/karafka/connection/builder.rb +18 -0
  44. data/lib/karafka/connection/client.rb +117 -0
  45. data/lib/karafka/connection/listener.rb +71 -0
  46. data/lib/karafka/connection/message_delegator.rb +36 -0
  47. data/lib/karafka/consumers/batch_metadata.rb +10 -0
  48. data/lib/karafka/consumers/callbacks.rb +71 -0
  49. data/lib/karafka/consumers/includer.rb +64 -0
  50. data/lib/karafka/consumers/responders.rb +24 -0
  51. data/lib/karafka/consumers/single_params.rb +15 -0
  52. data/lib/karafka/contracts.rb +10 -0
  53. data/lib/karafka/contracts/config.rb +21 -0
  54. data/lib/karafka/contracts/consumer_group.rb +206 -0
  55. data/lib/karafka/contracts/consumer_group_topic.rb +19 -0
  56. data/lib/karafka/contracts/responder_usage.rb +54 -0
  57. data/lib/karafka/contracts/server_cli_options.rb +31 -0
  58. data/lib/karafka/errors.rb +51 -0
  59. data/lib/karafka/fetcher.rb +42 -0
  60. data/lib/karafka/helpers/class_matcher.rb +88 -0
  61. data/lib/karafka/helpers/config_retriever.rb +46 -0
  62. data/lib/karafka/helpers/inflector.rb +26 -0
  63. data/lib/karafka/helpers/multi_delegator.rb +32 -0
  64. data/lib/karafka/instrumentation/logger.rb +58 -0
  65. data/lib/karafka/instrumentation/monitor.rb +70 -0
  66. data/lib/karafka/instrumentation/proctitle_listener.rb +36 -0
  67. data/lib/karafka/instrumentation/stdout_listener.rb +140 -0
  68. data/lib/karafka/params/batch_metadata.rb +26 -0
  69. data/lib/karafka/params/builders/batch_metadata.rb +30 -0
  70. data/lib/karafka/params/builders/params.rb +38 -0
  71. data/lib/karafka/params/builders/params_batch.rb +25 -0
  72. data/lib/karafka/params/metadata.rb +20 -0
  73. data/lib/karafka/params/params.rb +50 -0
  74. data/lib/karafka/params/params_batch.rb +60 -0
  75. data/lib/karafka/patches/ruby_kafka.rb +47 -0
  76. data/lib/karafka/persistence/client.rb +29 -0
  77. data/lib/karafka/persistence/consumers.rb +45 -0
  78. data/lib/karafka/persistence/topics.rb +48 -0
  79. data/lib/karafka/process.rb +60 -0
  80. data/lib/karafka/responders/builder.rb +36 -0
  81. data/lib/karafka/responders/topic.rb +55 -0
  82. data/lib/karafka/routing/builder.rb +89 -0
  83. data/lib/karafka/routing/consumer_group.rb +61 -0
  84. data/lib/karafka/routing/consumer_mapper.rb +34 -0
  85. data/lib/karafka/routing/proxy.rb +46 -0
  86. data/lib/karafka/routing/router.rb +29 -0
  87. data/lib/karafka/routing/topic.rb +62 -0
  88. data/lib/karafka/routing/topic_mapper.rb +53 -0
  89. data/lib/karafka/serialization/json/deserializer.rb +27 -0
  90. data/lib/karafka/serialization/json/serializer.rb +31 -0
  91. data/lib/karafka/server.rb +86 -0
  92. data/lib/karafka/setup/config.rb +223 -0
  93. data/lib/karafka/setup/configurators/water_drop.rb +36 -0
  94. data/lib/karafka/setup/dsl.rb +21 -0
  95. data/lib/karafka/status.rb +29 -0
  96. data/lib/karafka/templates/application_consumer.rb.erb +7 -0
  97. data/lib/karafka/templates/application_responder.rb.erb +11 -0
  98. data/lib/karafka/templates/karafka.rb.erb +92 -0
  99. data/lib/karafka/version.rb +7 -0
  100. data/log/.gitkeep +0 -0
  101. metadata +325 -0
  102. metadata.gz.sig +4 -0
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Instrumentation
5
+ # Listener that sets a proc title with a nice descriptive value
6
+ class ProctitleListener
7
+ # Updates proc title to an initializing one
8
+ # @param _event [Dry::Events::Event] event details including payload
9
+ def on_app_initializing(_event)
10
+ setproctitle('initializing')
11
+ end
12
+
13
+ # Updates proc title to a running one
14
+ # @param _event [Dry::Events::Event] event details including payload
15
+ def on_app_running(_event)
16
+ setproctitle('running')
17
+ end
18
+
19
+ # Updates proc title to a stopping one
20
+ # @param _event [Dry::Events::Event] event details including payload
21
+ def on_app_stopping(_event)
22
+ setproctitle('stopping')
23
+ end
24
+
25
+ private
26
+
27
+ # Sets a proper proc title with our constant prefix
28
+ # @param status [String] any status we want to set
29
+ def setproctitle(status)
30
+ ::Process.setproctitle(
31
+ "karafka #{Karafka::App.config.client_id} (#{status})"
32
+ )
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,140 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Instrumentation
5
+ # Default listener that hooks up to our instrumentation and uses its events for logging
6
+ # It can be removed/replaced or anything without any harm to the Karafka app flow
7
+ class StdoutListener
8
+ # Log levels that we use in this particular listener
9
+ USED_LOG_LEVELS = %i[
10
+ debug
11
+ info
12
+ error
13
+ fatal
14
+ ].freeze
15
+
16
+ # Logs details about incoming batches and with which consumer we will consume them
17
+ # @param event [Dry::Events::Event] event details including payload
18
+ def on_connection_batch_delegator_call(event)
19
+ consumer = event[:consumer]
20
+ topic = consumer.topic.name
21
+ kafka_messages = event[:kafka_batch].messages
22
+ info(
23
+ <<~MSG.chomp.tr("\n", ' ')
24
+ #{kafka_messages.count} messages
25
+ on #{topic} topic
26
+ delegated to #{consumer.class}
27
+ MSG
28
+ )
29
+ end
30
+
31
+ # Logs details about incoming message and with which consumer we will consume it
32
+ # @param event [Dry::Events::Event] event details including payload
33
+ def on_connection_message_delegator_call(event)
34
+ consumer = event[:consumer]
35
+ topic = consumer.topic.name
36
+ info "1 message on #{topic} topic delegated to #{consumer.class}"
37
+ end
38
+
39
+ # Logs details about each received message value deserialization
40
+ # @param event [Dry::Events::Event] event details including payload
41
+ def on_params_params_deserialize(event)
42
+ # Keep in mind, that a caller here is a param object not a controller,
43
+ # so it returns a topic as a string, not a routing topic
44
+ debug(
45
+ <<~MSG.chomp.tr("\n", ' ')
46
+ Params deserialization for #{event[:caller].metadata.topic} topic
47
+ successful in #{event[:time]} ms
48
+ MSG
49
+ )
50
+ end
51
+
52
+ # Logs unsuccessful deserialization attempts of incoming data
53
+ # @param event [Dry::Events::Event] event details including payload
54
+ def on_params_params_deserialize_error(event)
55
+ topic = event[:caller].metadata.topic
56
+ error = event[:error]
57
+ error "Params deserialization error for #{topic} topic: #{error}"
58
+ end
59
+
60
+ # Logs errors that occurred in a listener fetch loop
61
+ # @param event [Dry::Events::Event] event details including payload
62
+ # @note It's an error as we can recover from it not a fatal
63
+ def on_connection_listener_fetch_loop_error(event)
64
+ error "Listener fetch loop error: #{event[:error]}"
65
+ end
66
+
67
+ # Logs errors that are related to the connection itself
68
+ # @param event [Dry::Events::Event] event details including payload
69
+ # @note Karafka will attempt to reconnect, so an error not a fatal
70
+ def on_connection_client_fetch_loop_error(event)
71
+ error "Client fetch loop error: #{event[:error]}"
72
+ end
73
+
74
+ # Logs info about crashed fetcher
75
+ # @param event [Dry::Events::Event] event details including payload
76
+ # @note If this happens, Karafka will shutdown as it means a critical error
77
+ # in one of the threads
78
+ def on_fetcher_call_error(event)
79
+ fatal "Fetcher crash due to an error: #{event[:error]}"
80
+ end
81
+
82
+ # Logs info about processing of a certain dataset with an inline backend
83
+ # @param event [Dry::Events::Event] event details including payload
84
+ def on_backends_inline_process(event)
85
+ count = event[:caller].send(:params_batch).to_a.size
86
+ topic = event[:caller].topic.name
87
+ time = event[:time]
88
+ info "Inline processing of topic #{topic} with #{count} messages took #{time} ms"
89
+ end
90
+
91
+ # Logs info about system signals that Karafka received
92
+ # @param event [Dry::Events::Event] event details including payload
93
+ def on_process_notice_signal(event)
94
+ info "Received #{event[:signal]} system signal"
95
+ end
96
+
97
+ # Logs info about responder usage withing a controller flow
98
+ # @param event [Dry::Events::Event] event details including payload
99
+ def on_consumers_responders_respond_with(event)
100
+ calling = event[:caller]
101
+ responder = calling.topic.responder
102
+ data = event[:data]
103
+ info "Responded from #{calling.class} using #{responder} with following data #{data}"
104
+ end
105
+
106
+ # Logs info that we're initializing Karafka app
107
+ # @param _event [Dry::Events::Event] event details including payload
108
+ def on_app_initializing(_event)
109
+ info "Initializing Karafka server #{::Process.pid}"
110
+ end
111
+
112
+ # Logs info that we're running Karafka app
113
+ # @param _event [Dry::Events::Event] event details including payload
114
+ def on_app_running(_event)
115
+ info "Running Karafka server #{::Process.pid}"
116
+ end
117
+
118
+ # Logs info that we're going to stop the Karafka server
119
+ # @param _event [Dry::Events::Event] event details including payload
120
+ def on_app_stopping(_event)
121
+ # We use a separate thread as logging can't be called from trap context
122
+ Thread.new { info "Stopping Karafka server #{::Process.pid}" }
123
+ end
124
+
125
+ # Logs an error that Karafka was unable to stop the server gracefully and it had to do a
126
+ # forced exit
127
+ # @param _event [Dry::Events::Event] event details including payload
128
+ def on_app_stopping_error(_event)
129
+ # We use a separate thread as logging can't be called from trap context
130
+ Thread.new { error "Forceful Karafka server #{::Process.pid} stop" }
131
+ end
132
+
133
+ USED_LOG_LEVELS.each do |log_level|
134
+ define_method log_level do |*args|
135
+ Karafka.logger.send(log_level, *args)
136
+ end
137
+ end
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ # Simple batch metadata object that stores all non-message information received from Kafka
6
+ # cluster while fetching the data
7
+ # @note This metadata object refers to per batch metadata, not `#params.metadata`
8
+ BatchMetadata = Struct.new(
9
+ :batch_size,
10
+ :first_offset,
11
+ :highwater_mark_offset,
12
+ :unknown_last_offset,
13
+ :last_offset,
14
+ :offset_lag,
15
+ :deserializer,
16
+ :partition,
17
+ :topic,
18
+ keyword_init: true
19
+ ) do
20
+ # @return [Boolean] is the last offset known or unknown
21
+ def unknown_last_offset?
22
+ unknown_last_offset
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ module Builders
6
+ # Builder for creating batch metadata object based on the batch informations
7
+ module BatchMetadata
8
+ class << self
9
+ # Creates metadata based on the kafka batch data
10
+ # @param kafka_batch [Kafka::FetchedBatch] kafka batch details
11
+ # @param topic [Karafka::Routing::Topic] topic for which we've fetched the batch
12
+ # @return [Karafka::Params::BatchMetadata] batch metadata object
13
+ def from_kafka_batch(kafka_batch, topic)
14
+ Karafka::Params::BatchMetadata.new(
15
+ batch_size: kafka_batch.messages.count,
16
+ first_offset: kafka_batch.first_offset,
17
+ highwater_mark_offset: kafka_batch.highwater_mark_offset,
18
+ unknown_last_offset: kafka_batch.unknown_last_offset?,
19
+ last_offset: kafka_batch.last_offset,
20
+ offset_lag: kafka_batch.offset_lag,
21
+ deserializer: topic.deserializer,
22
+ partition: kafka_batch.partition,
23
+ topic: topic.name
24
+ ).freeze
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ # Due to the fact, that we create params related objects in couple contexts / places
6
+ # plus backends can build up them their own way we have this namespace.
7
+ # It allows to isolate actual params objects from their building process that can be
8
+ # context dependent.
9
+ module Builders
10
+ # Builder for params
11
+ module Params
12
+ class << self
13
+ # @param kafka_message [Kafka::FetchedMessage] message fetched from Kafka
14
+ # @param topic [Karafka::Routing::Topic] topic for which this message was fetched
15
+ # @return [Karafka::Params::Params] params object with payload and message metadata
16
+ def from_kafka_message(kafka_message, topic)
17
+ metadata = Karafka::Params::Metadata.new(
18
+ create_time: kafka_message.create_time,
19
+ headers: kafka_message.headers || {},
20
+ is_control_record: kafka_message.is_control_record,
21
+ key: kafka_message.key,
22
+ offset: kafka_message.offset,
23
+ deserializer: topic.deserializer,
24
+ partition: kafka_message.partition,
25
+ receive_time: Time.now,
26
+ topic: topic.name
27
+ ).freeze
28
+
29
+ Karafka::Params::Params.new(
30
+ kafka_message.value,
31
+ metadata
32
+ )
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ module Builders
6
+ # Builder for creating params batch instances
7
+ module ParamsBatch
8
+ class << self
9
+ # Creates params batch with params inside based on the incoming messages
10
+ # and the topic from which it comes
11
+ # @param kafka_messages [Array<Kafka::FetchedMessage>] raw fetched messages
12
+ # @param topic [Karafka::Routing::Topic] topic for which we're received messages
13
+ # @return [Karafka::Params::ParamsBatch<Karafka::Params::Params>] batch with params
14
+ def from_kafka_messages(kafka_messages, topic)
15
+ params_array = kafka_messages.map do |message|
16
+ Karafka::Params::Builders::Params.from_kafka_message(message, topic)
17
+ end
18
+
19
+ Karafka::Params::ParamsBatch.new(params_array).freeze
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ # Single message / params metadata details that can be accessed without the need for the
6
+ # payload deserialization
7
+ Metadata = Struct.new(
8
+ :create_time,
9
+ :headers,
10
+ :is_control_record,
11
+ :key,
12
+ :offset,
13
+ :deserializer,
14
+ :partition,
15
+ :receive_time,
16
+ :topic,
17
+ keyword_init: true
18
+ )
19
+ end
20
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Params namespace encapsulating all the logic that is directly related to params handling
5
+ module Params
6
+ # It provides lazy loading not only until the first usage, but also allows us to skip
7
+ # using deserializer until we execute our logic. That way we can operate with
8
+ # heavy-deserialization data without slowing down the whole application.
9
+ class Params
10
+ attr_reader :raw_payload, :metadata
11
+
12
+ # @param raw_payload [Object] incoming payload before deserialization
13
+ # @param metadata [Karafka::Params::Metadata] message metadata object
14
+ def initialize(raw_payload, metadata)
15
+ @raw_payload = raw_payload
16
+ @metadata = metadata
17
+ @deserialized = false
18
+ @payload = nil
19
+ end
20
+
21
+ # @return [Object] lazy-deserialized data (deserialized upon first request)
22
+ def payload
23
+ return @payload if deserialized?
24
+
25
+ @payload = deserialize
26
+ # We mark deserialization as successful after deserialization, as in case of an error
27
+ # this won't be falsely set to true
28
+ @deserialized = true
29
+ @payload
30
+ end
31
+
32
+ # @return [Boolean] did given params payload were deserialized already
33
+ def deserialized?
34
+ @deserialized
35
+ end
36
+
37
+ private
38
+
39
+ # @return [Object] tries de-serializes data
40
+ def deserialize
41
+ Karafka.monitor.instrument('params.params.deserialize', caller: self) do
42
+ metadata.deserializer.call(self)
43
+ end
44
+ rescue ::StandardError => e
45
+ Karafka.monitor.instrument('params.params.deserialize.error', caller: self, error: e)
46
+ raise e
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ # Params batch represents a set of messages received from Kafka.
6
+ # @note Params internally are lazy loaded before first use. That way we can skip
7
+ # deserialization process if we have after_fetch that rejects some incoming messages
8
+ # without using params It can be also used when handling really heavy data.
9
+ class ParamsBatch
10
+ include Enumerable
11
+
12
+ # @param params_array [Array<Karafka::Params::Params>] array with karafka params
13
+ # @return [Karafka::Params::ParamsBatch] lazy evaluated params batch object
14
+ def initialize(params_array)
15
+ @params_array = params_array
16
+ end
17
+
18
+ # @yieldparam [Karafka::Params::Params] each params instance
19
+ # @note Invocation of this method will not cause loading and deserializing each param after
20
+ # another.
21
+ def each
22
+ @params_array.each { |param| yield(param) }
23
+ end
24
+
25
+ # @return [Array<Karafka::Params::Params>] returns all the params in a loaded state, so they
26
+ # can be used for batch insert, etc. Without invoking all, up until first use, they won't
27
+ # be deserialized
28
+ def deserialize!
29
+ each(&:payload)
30
+ end
31
+
32
+ # @return [Array<Object>] array with deserialized payloads. This method can be useful when
33
+ # we don't care about metadata and just want to extract all the data payloads from the
34
+ # batch
35
+ def payloads
36
+ map(&:payload)
37
+ end
38
+
39
+ # @return [Karafka::Params::Params] first element
40
+ def first
41
+ @params_array.first
42
+ end
43
+
44
+ # @return [Karafka::Params::Params] last element
45
+ def last
46
+ @params_array.last
47
+ end
48
+
49
+ # @return [Integer] number of messages in the batch
50
+ def size
51
+ @params_array.size
52
+ end
53
+
54
+ # @return [Array<Karafka::Params::Params>] pure array with params
55
+ def to_a
56
+ @params_array
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Namespace for various other libs patches
5
+ module Patches
6
+ # Patches for Ruby Kafka gem
7
+ module RubyKafka
8
+ # This patch allows us to inject business logic in between fetches and before the consumer
9
+ # stop, so we can perform stop commit or anything else that we need since
10
+ # ruby-kafka fetch loop does not allow that directly
11
+ # We don't won't to use poll ruby-kafka api as it brings many more problems that we would
12
+ # have to take care of. That way, nothing like that ever happens but we get the control
13
+ # over the stopping process that we need (since we're the once that initiate it for each
14
+ # thread)
15
+ def consumer_loop
16
+ super do
17
+ consumers = Karafka::Persistence::Consumers
18
+ .current
19
+ .values
20
+ .flat_map(&:values)
21
+ .select { |consumer| consumer.class.respond_to?(:after_fetch) }
22
+
23
+ if Karafka::App.stopping?
24
+ publish_event(consumers, 'before_stop')
25
+ Karafka::Persistence::Client.read.stop
26
+ else
27
+ publish_event(consumers, 'before_poll')
28
+ yield
29
+ publish_event(consumers, 'after_poll')
30
+ end
31
+ end
32
+ end
33
+
34
+ private
35
+
36
+ # Notifies consumers about particular events happening
37
+ # @param consumers [Array<Object>] all consumers that want to be notified about an event
38
+ # @param event_name [String] name of the event that happened
39
+ def publish_event(consumers, event_name)
40
+ consumers.each do |consumer|
41
+ key = "consumers.#{Helpers::Inflector.map(consumer.class.to_s)}.#{event_name}"
42
+ Karafka::App.monitor.instrument(key, context: consumer)
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end