karafka 1.4.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (102) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +2 -0
  3. data.tar.gz.sig +0 -0
  4. data/.coditsu/ci.yml +3 -0
  5. data/.console_irbrc +11 -0
  6. data/.diffend.yml +3 -0
  7. data/.github/FUNDING.yml +3 -0
  8. data/.github/ISSUE_TEMPLATE/bug_report.md +50 -0
  9. data/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
  10. data/.github/workflows/ci.yml +52 -0
  11. data/.gitignore +69 -0
  12. data/.rspec +1 -0
  13. data/.ruby-gemset +1 -0
  14. data/.ruby-version +1 -0
  15. data/CHANGELOG.md +566 -0
  16. data/CODE_OF_CONDUCT.md +46 -0
  17. data/CONTRIBUTING.md +41 -0
  18. data/Gemfile +14 -0
  19. data/Gemfile.lock +139 -0
  20. data/MIT-LICENCE +18 -0
  21. data/README.md +99 -0
  22. data/bin/karafka +19 -0
  23. data/certs/mensfeld.pem +25 -0
  24. data/config/errors.yml +39 -0
  25. data/docker-compose.yml +17 -0
  26. data/karafka.gemspec +43 -0
  27. data/lib/karafka.rb +72 -0
  28. data/lib/karafka/app.rb +53 -0
  29. data/lib/karafka/attributes_map.rb +62 -0
  30. data/lib/karafka/backends/inline.rb +16 -0
  31. data/lib/karafka/base_consumer.rb +57 -0
  32. data/lib/karafka/base_responder.rb +226 -0
  33. data/lib/karafka/cli.rb +54 -0
  34. data/lib/karafka/cli/base.rb +78 -0
  35. data/lib/karafka/cli/console.rb +31 -0
  36. data/lib/karafka/cli/flow.rb +48 -0
  37. data/lib/karafka/cli/info.rb +31 -0
  38. data/lib/karafka/cli/install.rb +66 -0
  39. data/lib/karafka/cli/server.rb +71 -0
  40. data/lib/karafka/code_reloader.rb +67 -0
  41. data/lib/karafka/connection/api_adapter.rb +161 -0
  42. data/lib/karafka/connection/batch_delegator.rb +55 -0
  43. data/lib/karafka/connection/builder.rb +18 -0
  44. data/lib/karafka/connection/client.rb +117 -0
  45. data/lib/karafka/connection/listener.rb +71 -0
  46. data/lib/karafka/connection/message_delegator.rb +36 -0
  47. data/lib/karafka/consumers/batch_metadata.rb +10 -0
  48. data/lib/karafka/consumers/callbacks.rb +71 -0
  49. data/lib/karafka/consumers/includer.rb +64 -0
  50. data/lib/karafka/consumers/responders.rb +24 -0
  51. data/lib/karafka/consumers/single_params.rb +15 -0
  52. data/lib/karafka/contracts.rb +10 -0
  53. data/lib/karafka/contracts/config.rb +21 -0
  54. data/lib/karafka/contracts/consumer_group.rb +206 -0
  55. data/lib/karafka/contracts/consumer_group_topic.rb +19 -0
  56. data/lib/karafka/contracts/responder_usage.rb +54 -0
  57. data/lib/karafka/contracts/server_cli_options.rb +31 -0
  58. data/lib/karafka/errors.rb +51 -0
  59. data/lib/karafka/fetcher.rb +42 -0
  60. data/lib/karafka/helpers/class_matcher.rb +88 -0
  61. data/lib/karafka/helpers/config_retriever.rb +46 -0
  62. data/lib/karafka/helpers/inflector.rb +26 -0
  63. data/lib/karafka/helpers/multi_delegator.rb +32 -0
  64. data/lib/karafka/instrumentation/logger.rb +58 -0
  65. data/lib/karafka/instrumentation/monitor.rb +70 -0
  66. data/lib/karafka/instrumentation/proctitle_listener.rb +36 -0
  67. data/lib/karafka/instrumentation/stdout_listener.rb +140 -0
  68. data/lib/karafka/params/batch_metadata.rb +26 -0
  69. data/lib/karafka/params/builders/batch_metadata.rb +30 -0
  70. data/lib/karafka/params/builders/params.rb +38 -0
  71. data/lib/karafka/params/builders/params_batch.rb +25 -0
  72. data/lib/karafka/params/metadata.rb +20 -0
  73. data/lib/karafka/params/params.rb +50 -0
  74. data/lib/karafka/params/params_batch.rb +60 -0
  75. data/lib/karafka/patches/ruby_kafka.rb +47 -0
  76. data/lib/karafka/persistence/client.rb +29 -0
  77. data/lib/karafka/persistence/consumers.rb +45 -0
  78. data/lib/karafka/persistence/topics.rb +48 -0
  79. data/lib/karafka/process.rb +60 -0
  80. data/lib/karafka/responders/builder.rb +36 -0
  81. data/lib/karafka/responders/topic.rb +55 -0
  82. data/lib/karafka/routing/builder.rb +89 -0
  83. data/lib/karafka/routing/consumer_group.rb +61 -0
  84. data/lib/karafka/routing/consumer_mapper.rb +34 -0
  85. data/lib/karafka/routing/proxy.rb +46 -0
  86. data/lib/karafka/routing/router.rb +29 -0
  87. data/lib/karafka/routing/topic.rb +62 -0
  88. data/lib/karafka/routing/topic_mapper.rb +53 -0
  89. data/lib/karafka/serialization/json/deserializer.rb +27 -0
  90. data/lib/karafka/serialization/json/serializer.rb +31 -0
  91. data/lib/karafka/server.rb +86 -0
  92. data/lib/karafka/setup/config.rb +223 -0
  93. data/lib/karafka/setup/configurators/water_drop.rb +36 -0
  94. data/lib/karafka/setup/dsl.rb +21 -0
  95. data/lib/karafka/status.rb +29 -0
  96. data/lib/karafka/templates/application_consumer.rb.erb +7 -0
  97. data/lib/karafka/templates/application_responder.rb.erb +11 -0
  98. data/lib/karafka/templates/karafka.rb.erb +92 -0
  99. data/lib/karafka/version.rb +7 -0
  100. data/log/.gitkeep +0 -0
  101. metadata +325 -0
  102. metadata.gz.sig +4 -0
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Instrumentation
5
+ # Listener that sets a proc title with a nice descriptive value
6
+ class ProctitleListener
7
+ # Updates proc title to an initializing one
8
+ # @param _event [Dry::Events::Event] event details including payload
9
+ def on_app_initializing(_event)
10
+ setproctitle('initializing')
11
+ end
12
+
13
+ # Updates proc title to a running one
14
+ # @param _event [Dry::Events::Event] event details including payload
15
+ def on_app_running(_event)
16
+ setproctitle('running')
17
+ end
18
+
19
+ # Updates proc title to a stopping one
20
+ # @param _event [Dry::Events::Event] event details including payload
21
+ def on_app_stopping(_event)
22
+ setproctitle('stopping')
23
+ end
24
+
25
+ private
26
+
27
+ # Sets a proper proc title with our constant prefix
28
+ # @param status [String] any status we want to set
29
+ def setproctitle(status)
30
+ ::Process.setproctitle(
31
+ "karafka #{Karafka::App.config.client_id} (#{status})"
32
+ )
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,140 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Instrumentation
5
+ # Default listener that hooks up to our instrumentation and uses its events for logging
6
+ # It can be removed/replaced or anything without any harm to the Karafka app flow
7
+ class StdoutListener
8
+ # Log levels that we use in this particular listener
9
+ USED_LOG_LEVELS = %i[
10
+ debug
11
+ info
12
+ error
13
+ fatal
14
+ ].freeze
15
+
16
+ # Logs details about incoming batches and with which consumer we will consume them
17
+ # @param event [Dry::Events::Event] event details including payload
18
+ def on_connection_batch_delegator_call(event)
19
+ consumer = event[:consumer]
20
+ topic = consumer.topic.name
21
+ kafka_messages = event[:kafka_batch].messages
22
+ info(
23
+ <<~MSG.chomp.tr("\n", ' ')
24
+ #{kafka_messages.count} messages
25
+ on #{topic} topic
26
+ delegated to #{consumer.class}
27
+ MSG
28
+ )
29
+ end
30
+
31
+ # Logs details about incoming message and with which consumer we will consume it
32
+ # @param event [Dry::Events::Event] event details including payload
33
+ def on_connection_message_delegator_call(event)
34
+ consumer = event[:consumer]
35
+ topic = consumer.topic.name
36
+ info "1 message on #{topic} topic delegated to #{consumer.class}"
37
+ end
38
+
39
+ # Logs details about each received message value deserialization
40
+ # @param event [Dry::Events::Event] event details including payload
41
+ def on_params_params_deserialize(event)
42
+ # Keep in mind, that a caller here is a param object not a controller,
43
+ # so it returns a topic as a string, not a routing topic
44
+ debug(
45
+ <<~MSG.chomp.tr("\n", ' ')
46
+ Params deserialization for #{event[:caller].metadata.topic} topic
47
+ successful in #{event[:time]} ms
48
+ MSG
49
+ )
50
+ end
51
+
52
+ # Logs unsuccessful deserialization attempts of incoming data
53
+ # @param event [Dry::Events::Event] event details including payload
54
+ def on_params_params_deserialize_error(event)
55
+ topic = event[:caller].metadata.topic
56
+ error = event[:error]
57
+ error "Params deserialization error for #{topic} topic: #{error}"
58
+ end
59
+
60
+ # Logs errors that occurred in a listener fetch loop
61
+ # @param event [Dry::Events::Event] event details including payload
62
+ # @note It's an error as we can recover from it not a fatal
63
+ def on_connection_listener_fetch_loop_error(event)
64
+ error "Listener fetch loop error: #{event[:error]}"
65
+ end
66
+
67
+ # Logs errors that are related to the connection itself
68
+ # @param event [Dry::Events::Event] event details including payload
69
+ # @note Karafka will attempt to reconnect, so an error not a fatal
70
+ def on_connection_client_fetch_loop_error(event)
71
+ error "Client fetch loop error: #{event[:error]}"
72
+ end
73
+
74
+ # Logs info about crashed fetcher
75
+ # @param event [Dry::Events::Event] event details including payload
76
+ # @note If this happens, Karafka will shutdown as it means a critical error
77
+ # in one of the threads
78
+ def on_fetcher_call_error(event)
79
+ fatal "Fetcher crash due to an error: #{event[:error]}"
80
+ end
81
+
82
+ # Logs info about processing of a certain dataset with an inline backend
83
+ # @param event [Dry::Events::Event] event details including payload
84
+ def on_backends_inline_process(event)
85
+ count = event[:caller].send(:params_batch).to_a.size
86
+ topic = event[:caller].topic.name
87
+ time = event[:time]
88
+ info "Inline processing of topic #{topic} with #{count} messages took #{time} ms"
89
+ end
90
+
91
+ # Logs info about system signals that Karafka received
92
+ # @param event [Dry::Events::Event] event details including payload
93
+ def on_process_notice_signal(event)
94
+ info "Received #{event[:signal]} system signal"
95
+ end
96
+
97
+ # Logs info about responder usage withing a controller flow
98
+ # @param event [Dry::Events::Event] event details including payload
99
+ def on_consumers_responders_respond_with(event)
100
+ calling = event[:caller]
101
+ responder = calling.topic.responder
102
+ data = event[:data]
103
+ info "Responded from #{calling.class} using #{responder} with following data #{data}"
104
+ end
105
+
106
+ # Logs info that we're initializing Karafka app
107
+ # @param _event [Dry::Events::Event] event details including payload
108
+ def on_app_initializing(_event)
109
+ info "Initializing Karafka server #{::Process.pid}"
110
+ end
111
+
112
+ # Logs info that we're running Karafka app
113
+ # @param _event [Dry::Events::Event] event details including payload
114
+ def on_app_running(_event)
115
+ info "Running Karafka server #{::Process.pid}"
116
+ end
117
+
118
+ # Logs info that we're going to stop the Karafka server
119
+ # @param _event [Dry::Events::Event] event details including payload
120
+ def on_app_stopping(_event)
121
+ # We use a separate thread as logging can't be called from trap context
122
+ Thread.new { info "Stopping Karafka server #{::Process.pid}" }
123
+ end
124
+
125
+ # Logs an error that Karafka was unable to stop the server gracefully and it had to do a
126
+ # forced exit
127
+ # @param _event [Dry::Events::Event] event details including payload
128
+ def on_app_stopping_error(_event)
129
+ # We use a separate thread as logging can't be called from trap context
130
+ Thread.new { error "Forceful Karafka server #{::Process.pid} stop" }
131
+ end
132
+
133
+ USED_LOG_LEVELS.each do |log_level|
134
+ define_method log_level do |*args|
135
+ Karafka.logger.send(log_level, *args)
136
+ end
137
+ end
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ # Simple batch metadata object that stores all non-message information received from Kafka
6
+ # cluster while fetching the data
7
+ # @note This metadata object refers to per batch metadata, not `#params.metadata`
8
+ BatchMetadata = Struct.new(
9
+ :batch_size,
10
+ :first_offset,
11
+ :highwater_mark_offset,
12
+ :unknown_last_offset,
13
+ :last_offset,
14
+ :offset_lag,
15
+ :deserializer,
16
+ :partition,
17
+ :topic,
18
+ keyword_init: true
19
+ ) do
20
+ # @return [Boolean] is the last offset known or unknown
21
+ def unknown_last_offset?
22
+ unknown_last_offset
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ module Builders
6
+ # Builder for creating batch metadata object based on the batch informations
7
+ module BatchMetadata
8
+ class << self
9
+ # Creates metadata based on the kafka batch data
10
+ # @param kafka_batch [Kafka::FetchedBatch] kafka batch details
11
+ # @param topic [Karafka::Routing::Topic] topic for which we've fetched the batch
12
+ # @return [Karafka::Params::BatchMetadata] batch metadata object
13
+ def from_kafka_batch(kafka_batch, topic)
14
+ Karafka::Params::BatchMetadata.new(
15
+ batch_size: kafka_batch.messages.count,
16
+ first_offset: kafka_batch.first_offset,
17
+ highwater_mark_offset: kafka_batch.highwater_mark_offset,
18
+ unknown_last_offset: kafka_batch.unknown_last_offset?,
19
+ last_offset: kafka_batch.last_offset,
20
+ offset_lag: kafka_batch.offset_lag,
21
+ deserializer: topic.deserializer,
22
+ partition: kafka_batch.partition,
23
+ topic: topic.name
24
+ ).freeze
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ # Due to the fact, that we create params related objects in couple contexts / places
6
+ # plus backends can build up them their own way we have this namespace.
7
+ # It allows to isolate actual params objects from their building process that can be
8
+ # context dependent.
9
+ module Builders
10
+ # Builder for params
11
+ module Params
12
+ class << self
13
+ # @param kafka_message [Kafka::FetchedMessage] message fetched from Kafka
14
+ # @param topic [Karafka::Routing::Topic] topic for which this message was fetched
15
+ # @return [Karafka::Params::Params] params object with payload and message metadata
16
+ def from_kafka_message(kafka_message, topic)
17
+ metadata = Karafka::Params::Metadata.new(
18
+ create_time: kafka_message.create_time,
19
+ headers: kafka_message.headers || {},
20
+ is_control_record: kafka_message.is_control_record,
21
+ key: kafka_message.key,
22
+ offset: kafka_message.offset,
23
+ deserializer: topic.deserializer,
24
+ partition: kafka_message.partition,
25
+ receive_time: Time.now,
26
+ topic: topic.name
27
+ ).freeze
28
+
29
+ Karafka::Params::Params.new(
30
+ kafka_message.value,
31
+ metadata
32
+ )
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ module Builders
6
+ # Builder for creating params batch instances
7
+ module ParamsBatch
8
+ class << self
9
+ # Creates params batch with params inside based on the incoming messages
10
+ # and the topic from which it comes
11
+ # @param kafka_messages [Array<Kafka::FetchedMessage>] raw fetched messages
12
+ # @param topic [Karafka::Routing::Topic] topic for which we're received messages
13
+ # @return [Karafka::Params::ParamsBatch<Karafka::Params::Params>] batch with params
14
+ def from_kafka_messages(kafka_messages, topic)
15
+ params_array = kafka_messages.map do |message|
16
+ Karafka::Params::Builders::Params.from_kafka_message(message, topic)
17
+ end
18
+
19
+ Karafka::Params::ParamsBatch.new(params_array).freeze
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ # Single message / params metadata details that can be accessed without the need for the
6
+ # payload deserialization
7
+ Metadata = Struct.new(
8
+ :create_time,
9
+ :headers,
10
+ :is_control_record,
11
+ :key,
12
+ :offset,
13
+ :deserializer,
14
+ :partition,
15
+ :receive_time,
16
+ :topic,
17
+ keyword_init: true
18
+ )
19
+ end
20
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Params namespace encapsulating all the logic that is directly related to params handling
5
+ module Params
6
+ # It provides lazy loading not only until the first usage, but also allows us to skip
7
+ # using deserializer until we execute our logic. That way we can operate with
8
+ # heavy-deserialization data without slowing down the whole application.
9
+ class Params
10
+ attr_reader :raw_payload, :metadata
11
+
12
+ # @param raw_payload [Object] incoming payload before deserialization
13
+ # @param metadata [Karafka::Params::Metadata] message metadata object
14
+ def initialize(raw_payload, metadata)
15
+ @raw_payload = raw_payload
16
+ @metadata = metadata
17
+ @deserialized = false
18
+ @payload = nil
19
+ end
20
+
21
+ # @return [Object] lazy-deserialized data (deserialized upon first request)
22
+ def payload
23
+ return @payload if deserialized?
24
+
25
+ @payload = deserialize
26
+ # We mark deserialization as successful after deserialization, as in case of an error
27
+ # this won't be falsely set to true
28
+ @deserialized = true
29
+ @payload
30
+ end
31
+
32
+ # @return [Boolean] did given params payload were deserialized already
33
+ def deserialized?
34
+ @deserialized
35
+ end
36
+
37
+ private
38
+
39
+ # @return [Object] tries de-serializes data
40
+ def deserialize
41
+ Karafka.monitor.instrument('params.params.deserialize', caller: self) do
42
+ metadata.deserializer.call(self)
43
+ end
44
+ rescue ::StandardError => e
45
+ Karafka.monitor.instrument('params.params.deserialize.error', caller: self, error: e)
46
+ raise e
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ # Params batch represents a set of messages received from Kafka.
6
+ # @note Params internally are lazy loaded before first use. That way we can skip
7
+ # deserialization process if we have after_fetch that rejects some incoming messages
8
+ # without using params It can be also used when handling really heavy data.
9
+ class ParamsBatch
10
+ include Enumerable
11
+
12
+ # @param params_array [Array<Karafka::Params::Params>] array with karafka params
13
+ # @return [Karafka::Params::ParamsBatch] lazy evaluated params batch object
14
+ def initialize(params_array)
15
+ @params_array = params_array
16
+ end
17
+
18
+ # @yieldparam [Karafka::Params::Params] each params instance
19
+ # @note Invocation of this method will not cause loading and deserializing each param after
20
+ # another.
21
+ def each
22
+ @params_array.each { |param| yield(param) }
23
+ end
24
+
25
+ # @return [Array<Karafka::Params::Params>] returns all the params in a loaded state, so they
26
+ # can be used for batch insert, etc. Without invoking all, up until first use, they won't
27
+ # be deserialized
28
+ def deserialize!
29
+ each(&:payload)
30
+ end
31
+
32
+ # @return [Array<Object>] array with deserialized payloads. This method can be useful when
33
+ # we don't care about metadata and just want to extract all the data payloads from the
34
+ # batch
35
+ def payloads
36
+ map(&:payload)
37
+ end
38
+
39
+ # @return [Karafka::Params::Params] first element
40
+ def first
41
+ @params_array.first
42
+ end
43
+
44
+ # @return [Karafka::Params::Params] last element
45
+ def last
46
+ @params_array.last
47
+ end
48
+
49
+ # @return [Integer] number of messages in the batch
50
+ def size
51
+ @params_array.size
52
+ end
53
+
54
+ # @return [Array<Karafka::Params::Params>] pure array with params
55
+ def to_a
56
+ @params_array
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Namespace for various other libs patches
5
+ module Patches
6
+ # Patches for Ruby Kafka gem
7
+ module RubyKafka
8
+ # This patch allows us to inject business logic in between fetches and before the consumer
9
+ # stop, so we can perform stop commit or anything else that we need since
10
+ # ruby-kafka fetch loop does not allow that directly
11
+ # We don't won't to use poll ruby-kafka api as it brings many more problems that we would
12
+ # have to take care of. That way, nothing like that ever happens but we get the control
13
+ # over the stopping process that we need (since we're the once that initiate it for each
14
+ # thread)
15
+ def consumer_loop
16
+ super do
17
+ consumers = Karafka::Persistence::Consumers
18
+ .current
19
+ .values
20
+ .flat_map(&:values)
21
+ .select { |consumer| consumer.class.respond_to?(:after_fetch) }
22
+
23
+ if Karafka::App.stopping?
24
+ publish_event(consumers, 'before_stop')
25
+ Karafka::Persistence::Client.read.stop
26
+ else
27
+ publish_event(consumers, 'before_poll')
28
+ yield
29
+ publish_event(consumers, 'after_poll')
30
+ end
31
+ end
32
+ end
33
+
34
+ private
35
+
36
+ # Notifies consumers about particular events happening
37
+ # @param consumers [Array<Object>] all consumers that want to be notified about an event
38
+ # @param event_name [String] name of the event that happened
39
+ def publish_event(consumers, event_name)
40
+ consumers.each do |consumer|
41
+ key = "consumers.#{Helpers::Inflector.map(consumer.class.to_s)}.#{event_name}"
42
+ Karafka::App.monitor.instrument(key, context: consumer)
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end