karafka 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +2 -0
  3. data.tar.gz.sig +0 -0
  4. data/.coditsu/ci.yml +3 -0
  5. data/.console_irbrc +11 -0
  6. data/.github/FUNDING.yml +3 -0
  7. data/.github/ISSUE_TEMPLATE/bug_report.md +50 -0
  8. data/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
  9. data/.gitignore +69 -0
  10. data/.rspec +1 -0
  11. data/.ruby-gemset +1 -0
  12. data/.ruby-version +1 -0
  13. data/.travis.yml +36 -0
  14. data/CHANGELOG.md +520 -0
  15. data/CODE_OF_CONDUCT.md +46 -0
  16. data/CONTRIBUTING.md +41 -0
  17. data/Gemfile +12 -0
  18. data/Gemfile.lock +137 -0
  19. data/MIT-LICENCE +18 -0
  20. data/README.md +101 -0
  21. data/bin/karafka +19 -0
  22. data/certs/mensfeld.pem +25 -0
  23. data/config/errors.yml +39 -0
  24. data/karafka.gemspec +44 -0
  25. data/lib/karafka.rb +71 -0
  26. data/lib/karafka/app.rb +53 -0
  27. data/lib/karafka/attributes_map.rb +68 -0
  28. data/lib/karafka/backends/inline.rb +16 -0
  29. data/lib/karafka/base_consumer.rb +57 -0
  30. data/lib/karafka/base_responder.rb +226 -0
  31. data/lib/karafka/cli.rb +54 -0
  32. data/lib/karafka/cli/base.rb +78 -0
  33. data/lib/karafka/cli/console.rb +31 -0
  34. data/lib/karafka/cli/flow.rb +45 -0
  35. data/lib/karafka/cli/info.rb +31 -0
  36. data/lib/karafka/cli/install.rb +64 -0
  37. data/lib/karafka/cli/server.rb +71 -0
  38. data/lib/karafka/code_reloader.rb +67 -0
  39. data/lib/karafka/connection/api_adapter.rb +155 -0
  40. data/lib/karafka/connection/batch_delegator.rb +51 -0
  41. data/lib/karafka/connection/builder.rb +16 -0
  42. data/lib/karafka/connection/client.rb +117 -0
  43. data/lib/karafka/connection/listener.rb +71 -0
  44. data/lib/karafka/connection/message_delegator.rb +36 -0
  45. data/lib/karafka/consumers/callbacks.rb +71 -0
  46. data/lib/karafka/consumers/includer.rb +63 -0
  47. data/lib/karafka/consumers/metadata.rb +10 -0
  48. data/lib/karafka/consumers/responders.rb +24 -0
  49. data/lib/karafka/consumers/single_params.rb +15 -0
  50. data/lib/karafka/contracts.rb +10 -0
  51. data/lib/karafka/contracts/config.rb +21 -0
  52. data/lib/karafka/contracts/consumer_group.rb +206 -0
  53. data/lib/karafka/contracts/consumer_group_topic.rb +19 -0
  54. data/lib/karafka/contracts/responder_usage.rb +54 -0
  55. data/lib/karafka/contracts/server_cli_options.rb +29 -0
  56. data/lib/karafka/errors.rb +51 -0
  57. data/lib/karafka/fetcher.rb +42 -0
  58. data/lib/karafka/helpers/class_matcher.rb +88 -0
  59. data/lib/karafka/helpers/config_retriever.rb +46 -0
  60. data/lib/karafka/helpers/inflector.rb +26 -0
  61. data/lib/karafka/helpers/multi_delegator.rb +32 -0
  62. data/lib/karafka/instrumentation/logger.rb +57 -0
  63. data/lib/karafka/instrumentation/monitor.rb +70 -0
  64. data/lib/karafka/instrumentation/proctitle_listener.rb +36 -0
  65. data/lib/karafka/instrumentation/stdout_listener.rb +138 -0
  66. data/lib/karafka/params/builders/metadata.rb +33 -0
  67. data/lib/karafka/params/builders/params.rb +36 -0
  68. data/lib/karafka/params/builders/params_batch.rb +25 -0
  69. data/lib/karafka/params/metadata.rb +35 -0
  70. data/lib/karafka/params/params.rb +68 -0
  71. data/lib/karafka/params/params_batch.rb +61 -0
  72. data/lib/karafka/patches/ruby_kafka.rb +47 -0
  73. data/lib/karafka/persistence/client.rb +29 -0
  74. data/lib/karafka/persistence/consumers.rb +45 -0
  75. data/lib/karafka/persistence/topics.rb +48 -0
  76. data/lib/karafka/process.rb +60 -0
  77. data/lib/karafka/responders/builder.rb +36 -0
  78. data/lib/karafka/responders/topic.rb +55 -0
  79. data/lib/karafka/routing/builder.rb +89 -0
  80. data/lib/karafka/routing/consumer_group.rb +61 -0
  81. data/lib/karafka/routing/consumer_mapper.rb +34 -0
  82. data/lib/karafka/routing/proxy.rb +46 -0
  83. data/lib/karafka/routing/router.rb +29 -0
  84. data/lib/karafka/routing/topic.rb +62 -0
  85. data/lib/karafka/routing/topic_mapper.rb +53 -0
  86. data/lib/karafka/serialization/json/deserializer.rb +27 -0
  87. data/lib/karafka/serialization/json/serializer.rb +31 -0
  88. data/lib/karafka/server.rb +83 -0
  89. data/lib/karafka/setup/config.rb +221 -0
  90. data/lib/karafka/setup/configurators/water_drop.rb +36 -0
  91. data/lib/karafka/setup/dsl.rb +21 -0
  92. data/lib/karafka/status.rb +29 -0
  93. data/lib/karafka/templates/application_consumer.rb.erb +7 -0
  94. data/lib/karafka/templates/application_responder.rb.erb +11 -0
  95. data/lib/karafka/templates/karafka.rb.erb +92 -0
  96. data/lib/karafka/version.rb +7 -0
  97. data/log/.gitkeep +0 -0
  98. metadata +336 -0
  99. metadata.gz.sig +0 -0
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Instrumentation
5
+ # Listener that sets a proc title with a nice descriptive value
6
+ class ProctitleListener
7
+ # Updates proc title to an initializing one
8
+ # @param _event [Dry::Events::Event] event details including payload
9
+ def on_app_initializing(_event)
10
+ setproctitle('initializing')
11
+ end
12
+
13
+ # Updates proc title to a running one
14
+ # @param _event [Dry::Events::Event] event details including payload
15
+ def on_app_running(_event)
16
+ setproctitle('running')
17
+ end
18
+
19
+ # Updates proc title to a stopping one
20
+ # @param _event [Dry::Events::Event] event details including payload
21
+ def on_app_stopping(_event)
22
+ setproctitle('stopping')
23
+ end
24
+
25
+ private
26
+
27
+ # Sets a proper proc title with our constant prefix
28
+ # @param status [String] any status we want to set
29
+ def setproctitle(status)
30
+ ::Process.setproctitle(
31
+ "karafka #{Karafka::App.config.client_id} (#{status})"
32
+ )
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,138 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Instrumentation
5
+ # Default listener that hooks up to our instrumentation and uses its events for logging
6
+ # It can be removed/replaced or anything without any harm to the Karafka app flow
7
+ class StdoutListener
8
+ # Log levels that we use in this particular listener
9
+ USED_LOG_LEVELS = %i[
10
+ debug
11
+ info
12
+ error
13
+ fatal
14
+ ].freeze
15
+
16
+ # Logs details about incoming batches and with which consumer we will consume them
17
+ # @param event [Dry::Events::Event] event details including payload
18
+ def on_connection_batch_delegator_call(event)
19
+ consumer = event[:consumer]
20
+ topic = consumer.topic.name
21
+ kafka_messages = event[:kafka_batch].messages
22
+ info(
23
+ <<~MSG.chomp.tr("\n", ' ')
24
+ #{kafka_messages.count} messages
25
+ on #{topic} topic
26
+ delegated to #{consumer.class}
27
+ MSG
28
+ )
29
+ end
30
+
31
+ # Logs details about incoming message and with which consumer we will consume it
32
+ # @param event [Dry::Events::Event] event details including payload
33
+ def on_connection_message_delegator_call(event)
34
+ consumer = event[:consumer]
35
+ topic = consumer.topic.name
36
+ info "1 message on #{topic} topic delegated to #{consumer.class}"
37
+ end
38
+
39
+ # Logs details about each received message value deserialization
40
+ # @param event [Dry::Events::Event] event details including payload
41
+ def on_params_params_deserialize(event)
42
+ # Keep in mind, that a caller here is a param object not a controller,
43
+ # so it returns a topic as a string, not a routing topic
44
+ debug(
45
+ <<~MSG.chomp.tr("\n", ' ')
46
+ Params deserialization for #{event[:caller].topic} topic
47
+ successful in #{event[:time]} ms
48
+ MSG
49
+ )
50
+ end
51
+
52
+ # Logs unsuccessful deserialization attempts of incoming data
53
+ # @param event [Dry::Events::Event] event details including payload
54
+ def on_params_params_deserialize_error(event)
55
+ error "Params deserialization error for #{event[:caller].topic} topic: #{event[:error]}"
56
+ end
57
+
58
+ # Logs errors that occurred in a listener fetch loop
59
+ # @param event [Dry::Events::Event] event details including payload
60
+ # @note It's an error as we can recover from it not a fatal
61
+ def on_connection_listener_fetch_loop_error(event)
62
+ error "Listener fetch loop error: #{event[:error]}"
63
+ end
64
+
65
+ # Logs errors that are related to the connection itself
66
+ # @param event [Dry::Events::Event] event details including payload
67
+ # @note Karafka will attempt to reconnect, so an error not a fatal
68
+ def on_connection_client_fetch_loop_error(event)
69
+ error "Client fetch loop error: #{event[:error]}"
70
+ end
71
+
72
+ # Logs info about crashed fetcher
73
+ # @param event [Dry::Events::Event] event details including payload
74
+ # @note If this happens, Karafka will shutdown as it means a critical error
75
+ # in one of the threads
76
+ def on_fetcher_call_error(event)
77
+ fatal "Fetcher crash due to an error: #{event[:error]}"
78
+ end
79
+
80
+ # Logs info about processing of a certain dataset with an inline backend
81
+ # @param event [Dry::Events::Event] event details including payload
82
+ def on_backends_inline_process(event)
83
+ count = event[:caller].send(:params_batch).to_a.size
84
+ topic = event[:caller].topic.name
85
+ time = event[:time]
86
+ info "Inline processing of topic #{topic} with #{count} messages took #{time} ms"
87
+ end
88
+
89
+ # Logs info about system signals that Karafka received
90
+ # @param event [Dry::Events::Event] event details including payload
91
+ def on_process_notice_signal(event)
92
+ info "Received #{event[:signal]} system signal"
93
+ end
94
+
95
+ # Logs info about responder usage withing a controller flow
96
+ # @param event [Dry::Events::Event] event details including payload
97
+ def on_consumers_responders_respond_with(event)
98
+ calling = event[:caller]
99
+ responder = calling.topic.responder
100
+ data = event[:data]
101
+ info "Responded from #{calling.class} using #{responder} with following data #{data}"
102
+ end
103
+
104
+ # Logs info that we're initializing Karafka app
105
+ # @param _event [Dry::Events::Event] event details including payload
106
+ def on_app_initializing(_event)
107
+ info "Initializing Karafka server #{::Process.pid}"
108
+ end
109
+
110
+ # Logs info that we're running Karafka app
111
+ # @param _event [Dry::Events::Event] event details including payload
112
+ def on_app_running(_event)
113
+ info "Running Karafka server #{::Process.pid}"
114
+ end
115
+
116
+ # Logs info that we're going to stop the Karafka server
117
+ # @param _event [Dry::Events::Event] event details including payload
118
+ def on_app_stopping(_event)
119
+ # We use a separate thread as logging can't be called from trap context
120
+ Thread.new { info "Stopping Karafka server #{::Process.pid}" }
121
+ end
122
+
123
+ # Logs an error that Karafka was unable to stop the server gracefully and it had to do a
124
+ # forced exit
125
+ # @param _event [Dry::Events::Event] event details including payload
126
+ def on_app_stopping_error(_event)
127
+ # We use a separate thread as logging can't be called from trap context
128
+ Thread.new { error "Forceful Karafka server #{::Process.pid} stop" }
129
+ end
130
+
131
+ USED_LOG_LEVELS.each do |log_level|
132
+ define_method log_level do |*args|
133
+ Karafka.logger.send(log_level, *args)
134
+ end
135
+ end
136
+ end
137
+ end
138
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ module Builders
6
+ # Builder for creating metadata object based on the message or batch informations
7
+ # @note We have 2 ways of creating metadata based on the way ruby-kafka operates
8
+ module Metadata
9
+ class << self
10
+ # Creates metadata based on the kafka batch data
11
+ # @param kafka_batch [Kafka::FetchedBatch] kafka batch details
12
+ # @param topic [Karafka::Routing::Topic] topic for which we've fetched the batch
13
+ # @return [Karafka::Params::Metadata] metadata object
14
+ def from_kafka_batch(kafka_batch, topic)
15
+ Karafka::Params::Metadata
16
+ .new
17
+ .merge!(
18
+ 'batch_size' => kafka_batch.messages.count,
19
+ 'first_offset' => kafka_batch.first_offset,
20
+ 'highwater_mark_offset' => kafka_batch.highwater_mark_offset,
21
+ 'last_offset' => kafka_batch.last_offset,
22
+ 'offset_lag' => kafka_batch.offset_lag,
23
+ 'deserializer' => topic.deserializer,
24
+ 'partition' => kafka_batch.partition,
25
+ 'topic' => kafka_batch.topic,
26
+ 'unknown_last_offset' => kafka_batch.unknown_last_offset?
27
+ )
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ # Due to the fact, that we create params related objects in couple contexts / places
6
+ # plus backends can build up them their own way we have this namespace.
7
+ # It allows to isolate actual params objects from their building process that can be
8
+ # context dependent.
9
+ module Builders
10
+ # Builder for params
11
+ module Params
12
+ class << self
13
+ # @param kafka_message [Kafka::FetchedMessage] message fetched from Kafka
14
+ # @param topic [Karafka::Routing::Topic] topic for which this message was fetched
15
+ # @return [Karafka::Params::Params] params object
16
+ def from_kafka_message(kafka_message, topic)
17
+ Karafka::Params::Params
18
+ .new
19
+ .merge!(
20
+ 'create_time' => kafka_message.create_time,
21
+ 'headers' => kafka_message.headers || {},
22
+ 'is_control_record' => kafka_message.is_control_record,
23
+ 'key' => kafka_message.key,
24
+ 'offset' => kafka_message.offset,
25
+ 'deserializer' => topic.deserializer,
26
+ 'partition' => kafka_message.partition,
27
+ 'receive_time' => Time.now,
28
+ 'topic' => kafka_message.topic,
29
+ 'payload' => kafka_message.value
30
+ )
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ module Builders
6
+ # Builder for creating params batch instances
7
+ module ParamsBatch
8
+ class << self
9
+ # Creates params batch with params inside based on the incoming messages
10
+ # and the topic from which it comes
11
+ # @param kafka_messages [Array<Kafka::FetchedMessage>] raw fetched messages
12
+ # @param topic [Karafka::Routing::Topic] topic for which we're received messages
13
+ # @return [Karafka::Params::ParamsBatch<Karafka::Params::Params>] batch with params
14
+ def from_kafka_messages(kafka_messages, topic)
15
+ params_array = kafka_messages.map! do |message|
16
+ Karafka::Params::Builders::Params.from_kafka_message(message, topic)
17
+ end
18
+
19
+ Karafka::Params::ParamsBatch.new(params_array)
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ # Simple metadata object that stores all non-message information received from Kafka cluster
6
+ # while fetching the data
7
+ class Metadata < Hash
8
+ # Attributes that should be accessible as methods as well (not only hash)
9
+ METHOD_ATTRIBUTES = %w[
10
+ batch_size
11
+ first_offset
12
+ highwater_mark_offset
13
+ last_offset
14
+ offset_lag
15
+ deserializer
16
+ partition
17
+ topic
18
+ ].freeze
19
+
20
+ private_constant :METHOD_ATTRIBUTES
21
+
22
+ METHOD_ATTRIBUTES.each do |attr|
23
+ # Defines a method call accessor to a particular hash field.
24
+ define_method(attr) do
25
+ self[attr]
26
+ end
27
+ end
28
+
29
+ # @return [Boolean] is the last offset known or unknown
30
+ def unknown_last_offset?
31
+ self['unknown_last_offset']
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Params namespace encapsulating all the logic that is directly related to params handling
5
+ module Params
6
+ # It provides lazy loading not only until the first usage, but also allows us to skip
7
+ # using deserializer until we execute our logic. That way we can operate with
8
+ # heavy-deserialization data without slowing down the whole application.
9
+ class Params < Hash
10
+ # Params attributes that should be available via a method call invocation for Kafka
11
+ # client compatibility.
12
+ # Kafka passes internally Kafka::FetchedMessage object and the ruby-kafka consumer
13
+ # uses those fields via method calls, so in order to be able to pass there our params
14
+ # objects, have to have same api.
15
+ METHOD_ATTRIBUTES = %w[
16
+ create_time
17
+ headers
18
+ is_control_record
19
+ key
20
+ offset
21
+ deserializer
22
+ deserialized
23
+ partition
24
+ receive_time
25
+ topic
26
+ payload
27
+ ].freeze
28
+
29
+ private_constant :METHOD_ATTRIBUTES
30
+
31
+ METHOD_ATTRIBUTES.each do |attr|
32
+ # Defines a method call accessor to a particular hash field.
33
+ # @note Won't work for complex key names that contain spaces, etc
34
+ # @param key [Symbol] name of a field that we want to retrieve with a method call
35
+ # @example
36
+ # key_attr_reader :example
37
+ # params.example #=> 'my example payload'
38
+ define_method(attr) do
39
+ self[attr]
40
+ end
41
+ end
42
+
43
+ # @return [Karafka::Params::Params] This method will trigger deserializer execution. If we
44
+ # decide to retrieve data, deserializer will be executed to get data. Output of that will
45
+ # be merged to the current object. This object will be also marked as already deserialized,
46
+ # so we won't deserialize it again.
47
+ def deserialize!
48
+ return self if self['deserialized']
49
+
50
+ self['deserialized'] = true
51
+ self['payload'] = deserialize
52
+ self
53
+ end
54
+
55
+ private
56
+
57
+ # @return [Object] deserialized data
58
+ def deserialize
59
+ Karafka.monitor.instrument('params.params.deserialize', caller: self) do
60
+ self['deserializer'].call(self)
61
+ end
62
+ rescue ::StandardError => e
63
+ Karafka.monitor.instrument('params.params.deserialize.error', caller: self, error: e)
64
+ raise e
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ # Params batch represents a set of messages received from Kafka.
6
+ # @note Params internally are lazy loaded before first use. That way we can skip
7
+ # deserialization process if we have after_fetch that rejects some incoming messages
8
+ # without using params It can be also used when handling really heavy data.
9
+ class ParamsBatch
10
+ include Enumerable
11
+
12
+ # @param params_array [Array<Karafka::Params::Params>] array with karafka params
13
+ # @return [Karafka::Params::ParamsBatch] lazy evaluated params batch object
14
+ def initialize(params_array)
15
+ @params_array = params_array
16
+ end
17
+
18
+ # @yieldparam [Karafka::Params::Params] each deserialized and loaded params instance
19
+ # @note Invocation of this method will cause loading and deserializing each param after
20
+ # another. If you want to get access without deserializing, please access params_array
21
+ # directly
22
+ def each
23
+ @params_array.each { |param| yield(param.deserialize!) }
24
+ end
25
+
26
+ # @return [Array<Karafka::Params::Params>] returns all the params in a loaded state, so they
27
+ # can be used for batch insert, etc. Without invoking all, up until first use, they won't
28
+ # be deserialized
29
+ def deserialize!
30
+ each(&:itself)
31
+ end
32
+
33
+ # @return [Array<Object>] array with deserialized payloads. This method can be useful when
34
+ # we don't care about metadata and just want to extract all the data payloads from the
35
+ # batch
36
+ def payloads
37
+ deserialize!.map(&:payload)
38
+ end
39
+
40
+ # @return [Karafka::Params::Params] first element after the deserialization process
41
+ def first
42
+ @params_array.first.deserialize!
43
+ end
44
+
45
+ # @return [Karafka::Params::Params] last element after the deserialization process
46
+ def last
47
+ @params_array.last.deserialize!
48
+ end
49
+
50
+ # @return [Array<Karafka::Params::Params>] pure array with params (not deserialized)
51
+ def to_a
52
+ @params_array
53
+ end
54
+
55
+ # @return [Integer] number of messages in the batch
56
+ def size
57
+ @params_array.size
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Namespace for various other libs patches
5
+ module Patches
6
+ # Patches for Ruby Kafka gem
7
+ module RubyKafka
8
+ # This patch allows us to inject business logic in between fetches and before the consumer
9
+ # stop, so we can perform stop commit or anything else that we need since
10
+ # ruby-kafka fetch loop does not allow that directly
11
+ # We don't won't to use poll ruby-kafka api as it brings many more problems that we would
12
+ # have to take care of. That way, nothing like that ever happens but we get the control
13
+ # over the stopping process that we need (since we're the once that initiate it for each
14
+ # thread)
15
+ def consumer_loop
16
+ super do
17
+ consumers = Karafka::Persistence::Consumers
18
+ .current
19
+ .values
20
+ .flat_map(&:values)
21
+ .select { |consumer| consumer.class.respond_to?(:after_fetch) }
22
+
23
+ if Karafka::App.stopping?
24
+ publish_event(consumers, 'before_stop')
25
+ Karafka::Persistence::Client.read.stop
26
+ else
27
+ publish_event(consumers, 'before_poll')
28
+ yield
29
+ publish_event(consumers, 'after_poll')
30
+ end
31
+ end
32
+ end
33
+
34
+ private
35
+
36
+ # Notifies consumers about particular events happening
37
+ # @param consumers [Array<Object>] all consumers that want to be notified about an event
38
+ # @param event_name [String] name of the event that happened
39
+ def publish_event(consumers, event_name)
40
+ consumers.each do |consumer|
41
+ key = "consumers.#{Helpers::Inflector.map(consumer.class.to_s)}.#{event_name}"
42
+ Karafka::App.monitor.instrument(key, context: consumer)
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end