karafka 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (99) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +2 -0
  3. data.tar.gz.sig +0 -0
  4. data/.coditsu/ci.yml +3 -0
  5. data/.console_irbrc +11 -0
  6. data/.github/FUNDING.yml +3 -0
  7. data/.github/ISSUE_TEMPLATE/bug_report.md +50 -0
  8. data/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
  9. data/.gitignore +69 -0
  10. data/.rspec +1 -0
  11. data/.ruby-gemset +1 -0
  12. data/.ruby-version +1 -0
  13. data/.travis.yml +36 -0
  14. data/CHANGELOG.md +520 -0
  15. data/CODE_OF_CONDUCT.md +46 -0
  16. data/CONTRIBUTING.md +41 -0
  17. data/Gemfile +12 -0
  18. data/Gemfile.lock +137 -0
  19. data/MIT-LICENCE +18 -0
  20. data/README.md +101 -0
  21. data/bin/karafka +19 -0
  22. data/certs/mensfeld.pem +25 -0
  23. data/config/errors.yml +39 -0
  24. data/karafka.gemspec +44 -0
  25. data/lib/karafka.rb +71 -0
  26. data/lib/karafka/app.rb +53 -0
  27. data/lib/karafka/attributes_map.rb +68 -0
  28. data/lib/karafka/backends/inline.rb +16 -0
  29. data/lib/karafka/base_consumer.rb +57 -0
  30. data/lib/karafka/base_responder.rb +226 -0
  31. data/lib/karafka/cli.rb +54 -0
  32. data/lib/karafka/cli/base.rb +78 -0
  33. data/lib/karafka/cli/console.rb +31 -0
  34. data/lib/karafka/cli/flow.rb +45 -0
  35. data/lib/karafka/cli/info.rb +31 -0
  36. data/lib/karafka/cli/install.rb +64 -0
  37. data/lib/karafka/cli/server.rb +71 -0
  38. data/lib/karafka/code_reloader.rb +67 -0
  39. data/lib/karafka/connection/api_adapter.rb +155 -0
  40. data/lib/karafka/connection/batch_delegator.rb +51 -0
  41. data/lib/karafka/connection/builder.rb +16 -0
  42. data/lib/karafka/connection/client.rb +117 -0
  43. data/lib/karafka/connection/listener.rb +71 -0
  44. data/lib/karafka/connection/message_delegator.rb +36 -0
  45. data/lib/karafka/consumers/callbacks.rb +71 -0
  46. data/lib/karafka/consumers/includer.rb +63 -0
  47. data/lib/karafka/consumers/metadata.rb +10 -0
  48. data/lib/karafka/consumers/responders.rb +24 -0
  49. data/lib/karafka/consumers/single_params.rb +15 -0
  50. data/lib/karafka/contracts.rb +10 -0
  51. data/lib/karafka/contracts/config.rb +21 -0
  52. data/lib/karafka/contracts/consumer_group.rb +206 -0
  53. data/lib/karafka/contracts/consumer_group_topic.rb +19 -0
  54. data/lib/karafka/contracts/responder_usage.rb +54 -0
  55. data/lib/karafka/contracts/server_cli_options.rb +29 -0
  56. data/lib/karafka/errors.rb +51 -0
  57. data/lib/karafka/fetcher.rb +42 -0
  58. data/lib/karafka/helpers/class_matcher.rb +88 -0
  59. data/lib/karafka/helpers/config_retriever.rb +46 -0
  60. data/lib/karafka/helpers/inflector.rb +26 -0
  61. data/lib/karafka/helpers/multi_delegator.rb +32 -0
  62. data/lib/karafka/instrumentation/logger.rb +57 -0
  63. data/lib/karafka/instrumentation/monitor.rb +70 -0
  64. data/lib/karafka/instrumentation/proctitle_listener.rb +36 -0
  65. data/lib/karafka/instrumentation/stdout_listener.rb +138 -0
  66. data/lib/karafka/params/builders/metadata.rb +33 -0
  67. data/lib/karafka/params/builders/params.rb +36 -0
  68. data/lib/karafka/params/builders/params_batch.rb +25 -0
  69. data/lib/karafka/params/metadata.rb +35 -0
  70. data/lib/karafka/params/params.rb +68 -0
  71. data/lib/karafka/params/params_batch.rb +61 -0
  72. data/lib/karafka/patches/ruby_kafka.rb +47 -0
  73. data/lib/karafka/persistence/client.rb +29 -0
  74. data/lib/karafka/persistence/consumers.rb +45 -0
  75. data/lib/karafka/persistence/topics.rb +48 -0
  76. data/lib/karafka/process.rb +60 -0
  77. data/lib/karafka/responders/builder.rb +36 -0
  78. data/lib/karafka/responders/topic.rb +55 -0
  79. data/lib/karafka/routing/builder.rb +89 -0
  80. data/lib/karafka/routing/consumer_group.rb +61 -0
  81. data/lib/karafka/routing/consumer_mapper.rb +34 -0
  82. data/lib/karafka/routing/proxy.rb +46 -0
  83. data/lib/karafka/routing/router.rb +29 -0
  84. data/lib/karafka/routing/topic.rb +62 -0
  85. data/lib/karafka/routing/topic_mapper.rb +53 -0
  86. data/lib/karafka/serialization/json/deserializer.rb +27 -0
  87. data/lib/karafka/serialization/json/serializer.rb +31 -0
  88. data/lib/karafka/server.rb +83 -0
  89. data/lib/karafka/setup/config.rb +221 -0
  90. data/lib/karafka/setup/configurators/water_drop.rb +36 -0
  91. data/lib/karafka/setup/dsl.rb +21 -0
  92. data/lib/karafka/status.rb +29 -0
  93. data/lib/karafka/templates/application_consumer.rb.erb +7 -0
  94. data/lib/karafka/templates/application_responder.rb.erb +11 -0
  95. data/lib/karafka/templates/karafka.rb.erb +92 -0
  96. data/lib/karafka/version.rb +7 -0
  97. data/log/.gitkeep +0 -0
  98. metadata +336 -0
  99. metadata.gz.sig +0 -0
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Instrumentation
5
+ # Listener that sets a proc title with a nice descriptive value
6
+ class ProctitleListener
7
+ # Updates proc title to an initializing one
8
+ # @param _event [Dry::Events::Event] event details including payload
9
+ def on_app_initializing(_event)
10
+ setproctitle('initializing')
11
+ end
12
+
13
+ # Updates proc title to a running one
14
+ # @param _event [Dry::Events::Event] event details including payload
15
+ def on_app_running(_event)
16
+ setproctitle('running')
17
+ end
18
+
19
+ # Updates proc title to a stopping one
20
+ # @param _event [Dry::Events::Event] event details including payload
21
+ def on_app_stopping(_event)
22
+ setproctitle('stopping')
23
+ end
24
+
25
+ private
26
+
27
+ # Sets a proper proc title with our constant prefix
28
+ # @param status [String] any status we want to set
29
+ def setproctitle(status)
30
+ ::Process.setproctitle(
31
+ "karafka #{Karafka::App.config.client_id} (#{status})"
32
+ )
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,138 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Instrumentation
5
+ # Default listener that hooks up to our instrumentation and uses its events for logging
6
+ # It can be removed/replaced or anything without any harm to the Karafka app flow
7
+ class StdoutListener
8
+ # Log levels that we use in this particular listener
9
+ USED_LOG_LEVELS = %i[
10
+ debug
11
+ info
12
+ error
13
+ fatal
14
+ ].freeze
15
+
16
+ # Logs details about incoming batches and with which consumer we will consume them
17
+ # @param event [Dry::Events::Event] event details including payload
18
+ def on_connection_batch_delegator_call(event)
19
+ consumer = event[:consumer]
20
+ topic = consumer.topic.name
21
+ kafka_messages = event[:kafka_batch].messages
22
+ info(
23
+ <<~MSG.chomp.tr("\n", ' ')
24
+ #{kafka_messages.count} messages
25
+ on #{topic} topic
26
+ delegated to #{consumer.class}
27
+ MSG
28
+ )
29
+ end
30
+
31
+ # Logs details about incoming message and with which consumer we will consume it
32
+ # @param event [Dry::Events::Event] event details including payload
33
+ def on_connection_message_delegator_call(event)
34
+ consumer = event[:consumer]
35
+ topic = consumer.topic.name
36
+ info "1 message on #{topic} topic delegated to #{consumer.class}"
37
+ end
38
+
39
+ # Logs details about each received message value deserialization
40
+ # @param event [Dry::Events::Event] event details including payload
41
+ def on_params_params_deserialize(event)
42
+ # Keep in mind, that a caller here is a param object not a controller,
43
+ # so it returns a topic as a string, not a routing topic
44
+ debug(
45
+ <<~MSG.chomp.tr("\n", ' ')
46
+ Params deserialization for #{event[:caller].topic} topic
47
+ successful in #{event[:time]} ms
48
+ MSG
49
+ )
50
+ end
51
+
52
+ # Logs unsuccessful deserialization attempts of incoming data
53
+ # @param event [Dry::Events::Event] event details including payload
54
+ def on_params_params_deserialize_error(event)
55
+ error "Params deserialization error for #{event[:caller].topic} topic: #{event[:error]}"
56
+ end
57
+
58
+ # Logs errors that occurred in a listener fetch loop
59
+ # @param event [Dry::Events::Event] event details including payload
60
+ # @note It's an error as we can recover from it not a fatal
61
+ def on_connection_listener_fetch_loop_error(event)
62
+ error "Listener fetch loop error: #{event[:error]}"
63
+ end
64
+
65
+ # Logs errors that are related to the connection itself
66
+ # @param event [Dry::Events::Event] event details including payload
67
+ # @note Karafka will attempt to reconnect, so an error not a fatal
68
+ def on_connection_client_fetch_loop_error(event)
69
+ error "Client fetch loop error: #{event[:error]}"
70
+ end
71
+
72
+ # Logs info about crashed fetcher
73
+ # @param event [Dry::Events::Event] event details including payload
74
+ # @note If this happens, Karafka will shutdown as it means a critical error
75
+ # in one of the threads
76
+ def on_fetcher_call_error(event)
77
+ fatal "Fetcher crash due to an error: #{event[:error]}"
78
+ end
79
+
80
+ # Logs info about processing of a certain dataset with an inline backend
81
+ # @param event [Dry::Events::Event] event details including payload
82
+ def on_backends_inline_process(event)
83
+ count = event[:caller].send(:params_batch).to_a.size
84
+ topic = event[:caller].topic.name
85
+ time = event[:time]
86
+ info "Inline processing of topic #{topic} with #{count} messages took #{time} ms"
87
+ end
88
+
89
+ # Logs info about system signals that Karafka received
90
+ # @param event [Dry::Events::Event] event details including payload
91
+ def on_process_notice_signal(event)
92
+ info "Received #{event[:signal]} system signal"
93
+ end
94
+
95
+ # Logs info about responder usage withing a controller flow
96
+ # @param event [Dry::Events::Event] event details including payload
97
+ def on_consumers_responders_respond_with(event)
98
+ calling = event[:caller]
99
+ responder = calling.topic.responder
100
+ data = event[:data]
101
+ info "Responded from #{calling.class} using #{responder} with following data #{data}"
102
+ end
103
+
104
+ # Logs info that we're initializing Karafka app
105
+ # @param _event [Dry::Events::Event] event details including payload
106
+ def on_app_initializing(_event)
107
+ info "Initializing Karafka server #{::Process.pid}"
108
+ end
109
+
110
+ # Logs info that we're running Karafka app
111
+ # @param _event [Dry::Events::Event] event details including payload
112
+ def on_app_running(_event)
113
+ info "Running Karafka server #{::Process.pid}"
114
+ end
115
+
116
+ # Logs info that we're going to stop the Karafka server
117
+ # @param _event [Dry::Events::Event] event details including payload
118
+ def on_app_stopping(_event)
119
+ # We use a separate thread as logging can't be called from trap context
120
+ Thread.new { info "Stopping Karafka server #{::Process.pid}" }
121
+ end
122
+
123
+ # Logs an error that Karafka was unable to stop the server gracefully and it had to do a
124
+ # forced exit
125
+ # @param _event [Dry::Events::Event] event details including payload
126
+ def on_app_stopping_error(_event)
127
+ # We use a separate thread as logging can't be called from trap context
128
+ Thread.new { error "Forceful Karafka server #{::Process.pid} stop" }
129
+ end
130
+
131
+ USED_LOG_LEVELS.each do |log_level|
132
+ define_method log_level do |*args|
133
+ Karafka.logger.send(log_level, *args)
134
+ end
135
+ end
136
+ end
137
+ end
138
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ module Builders
6
+ # Builder for creating metadata object based on the message or batch informations
7
+ # @note We have 2 ways of creating metadata based on the way ruby-kafka operates
8
+ module Metadata
9
+ class << self
10
+ # Creates metadata based on the kafka batch data
11
+ # @param kafka_batch [Kafka::FetchedBatch] kafka batch details
12
+ # @param topic [Karafka::Routing::Topic] topic for which we've fetched the batch
13
+ # @return [Karafka::Params::Metadata] metadata object
14
+ def from_kafka_batch(kafka_batch, topic)
15
+ Karafka::Params::Metadata
16
+ .new
17
+ .merge!(
18
+ 'batch_size' => kafka_batch.messages.count,
19
+ 'first_offset' => kafka_batch.first_offset,
20
+ 'highwater_mark_offset' => kafka_batch.highwater_mark_offset,
21
+ 'last_offset' => kafka_batch.last_offset,
22
+ 'offset_lag' => kafka_batch.offset_lag,
23
+ 'deserializer' => topic.deserializer,
24
+ 'partition' => kafka_batch.partition,
25
+ 'topic' => kafka_batch.topic,
26
+ 'unknown_last_offset' => kafka_batch.unknown_last_offset?
27
+ )
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ # Due to the fact, that we create params related objects in couple contexts / places
6
+ # plus backends can build up them their own way we have this namespace.
7
+ # It allows to isolate actual params objects from their building process that can be
8
+ # context dependent.
9
+ module Builders
10
+ # Builder for params
11
+ module Params
12
+ class << self
13
+ # @param kafka_message [Kafka::FetchedMessage] message fetched from Kafka
14
+ # @param topic [Karafka::Routing::Topic] topic for which this message was fetched
15
+ # @return [Karafka::Params::Params] params object
16
+ def from_kafka_message(kafka_message, topic)
17
+ Karafka::Params::Params
18
+ .new
19
+ .merge!(
20
+ 'create_time' => kafka_message.create_time,
21
+ 'headers' => kafka_message.headers || {},
22
+ 'is_control_record' => kafka_message.is_control_record,
23
+ 'key' => kafka_message.key,
24
+ 'offset' => kafka_message.offset,
25
+ 'deserializer' => topic.deserializer,
26
+ 'partition' => kafka_message.partition,
27
+ 'receive_time' => Time.now,
28
+ 'topic' => kafka_message.topic,
29
+ 'payload' => kafka_message.value
30
+ )
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ module Builders
6
+ # Builder for creating params batch instances
7
+ module ParamsBatch
8
+ class << self
9
+ # Creates params batch with params inside based on the incoming messages
10
+ # and the topic from which it comes
11
+ # @param kafka_messages [Array<Kafka::FetchedMessage>] raw fetched messages
12
+ # @param topic [Karafka::Routing::Topic] topic for which we're received messages
13
+ # @return [Karafka::Params::ParamsBatch<Karafka::Params::Params>] batch with params
14
+ def from_kafka_messages(kafka_messages, topic)
15
+ params_array = kafka_messages.map! do |message|
16
+ Karafka::Params::Builders::Params.from_kafka_message(message, topic)
17
+ end
18
+
19
+ Karafka::Params::ParamsBatch.new(params_array)
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ # Simple metadata object that stores all non-message information received from Kafka cluster
6
+ # while fetching the data
7
+ class Metadata < Hash
8
+ # Attributes that should be accessible as methods as well (not only hash)
9
+ METHOD_ATTRIBUTES = %w[
10
+ batch_size
11
+ first_offset
12
+ highwater_mark_offset
13
+ last_offset
14
+ offset_lag
15
+ deserializer
16
+ partition
17
+ topic
18
+ ].freeze
19
+
20
+ private_constant :METHOD_ATTRIBUTES
21
+
22
+ METHOD_ATTRIBUTES.each do |attr|
23
+ # Defines a method call accessor to a particular hash field.
24
+ define_method(attr) do
25
+ self[attr]
26
+ end
27
+ end
28
+
29
+ # @return [Boolean] is the last offset known or unknown
30
+ def unknown_last_offset?
31
+ self['unknown_last_offset']
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Params namespace encapsulating all the logic that is directly related to params handling
5
+ module Params
6
+ # It provides lazy loading not only until the first usage, but also allows us to skip
7
+ # using deserializer until we execute our logic. That way we can operate with
8
+ # heavy-deserialization data without slowing down the whole application.
9
+ class Params < Hash
10
+ # Params attributes that should be available via a method call invocation for Kafka
11
+ # client compatibility.
12
+ # Kafka passes internally Kafka::FetchedMessage object and the ruby-kafka consumer
13
+ # uses those fields via method calls, so in order to be able to pass there our params
14
+ # objects, have to have same api.
15
+ METHOD_ATTRIBUTES = %w[
16
+ create_time
17
+ headers
18
+ is_control_record
19
+ key
20
+ offset
21
+ deserializer
22
+ deserialized
23
+ partition
24
+ receive_time
25
+ topic
26
+ payload
27
+ ].freeze
28
+
29
+ private_constant :METHOD_ATTRIBUTES
30
+
31
+ METHOD_ATTRIBUTES.each do |attr|
32
+ # Defines a method call accessor to a particular hash field.
33
+ # @note Won't work for complex key names that contain spaces, etc
34
+ # @param key [Symbol] name of a field that we want to retrieve with a method call
35
+ # @example
36
+ # key_attr_reader :example
37
+ # params.example #=> 'my example payload'
38
+ define_method(attr) do
39
+ self[attr]
40
+ end
41
+ end
42
+
43
+ # @return [Karafka::Params::Params] This method will trigger deserializer execution. If we
44
+ # decide to retrieve data, deserializer will be executed to get data. Output of that will
45
+ # be merged to the current object. This object will be also marked as already deserialized,
46
+ # so we won't deserialize it again.
47
+ def deserialize!
48
+ return self if self['deserialized']
49
+
50
+ self['deserialized'] = true
51
+ self['payload'] = deserialize
52
+ self
53
+ end
54
+
55
+ private
56
+
57
+ # @return [Object] deserialized data
58
+ def deserialize
59
+ Karafka.monitor.instrument('params.params.deserialize', caller: self) do
60
+ self['deserializer'].call(self)
61
+ end
62
+ rescue ::StandardError => e
63
+ Karafka.monitor.instrument('params.params.deserialize.error', caller: self, error: e)
64
+ raise e
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ # Params batch represents a set of messages received from Kafka.
6
+ # @note Params internally are lazy loaded before first use. That way we can skip
7
+ # deserialization process if we have after_fetch that rejects some incoming messages
8
+ # without using params It can be also used when handling really heavy data.
9
+ class ParamsBatch
10
+ include Enumerable
11
+
12
+ # @param params_array [Array<Karafka::Params::Params>] array with karafka params
13
+ # @return [Karafka::Params::ParamsBatch] lazy evaluated params batch object
14
+ def initialize(params_array)
15
+ @params_array = params_array
16
+ end
17
+
18
+ # @yieldparam [Karafka::Params::Params] each deserialized and loaded params instance
19
+ # @note Invocation of this method will cause loading and deserializing each param after
20
+ # another. If you want to get access without deserializing, please access params_array
21
+ # directly
22
+ def each
23
+ @params_array.each { |param| yield(param.deserialize!) }
24
+ end
25
+
26
+ # @return [Array<Karafka::Params::Params>] returns all the params in a loaded state, so they
27
+ # can be used for batch insert, etc. Without invoking all, up until first use, they won't
28
+ # be deserialized
29
+ def deserialize!
30
+ each(&:itself)
31
+ end
32
+
33
+ # @return [Array<Object>] array with deserialized payloads. This method can be useful when
34
+ # we don't care about metadata and just want to extract all the data payloads from the
35
+ # batch
36
+ def payloads
37
+ deserialize!.map(&:payload)
38
+ end
39
+
40
+ # @return [Karafka::Params::Params] first element after the deserialization process
41
+ def first
42
+ @params_array.first.deserialize!
43
+ end
44
+
45
+ # @return [Karafka::Params::Params] last element after the deserialization process
46
+ def last
47
+ @params_array.last.deserialize!
48
+ end
49
+
50
+ # @return [Array<Karafka::Params::Params>] pure array with params (not deserialized)
51
+ def to_a
52
+ @params_array
53
+ end
54
+
55
+ # @return [Integer] number of messages in the batch
56
+ def size
57
+ @params_array.size
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Namespace for various other libs patches
5
+ module Patches
6
+ # Patches for Ruby Kafka gem
7
+ module RubyKafka
8
+ # This patch allows us to inject business logic in between fetches and before the consumer
9
+ # stop, so we can perform stop commit or anything else that we need since
10
+ # ruby-kafka fetch loop does not allow that directly
11
+ # We don't won't to use poll ruby-kafka api as it brings many more problems that we would
12
+ # have to take care of. That way, nothing like that ever happens but we get the control
13
+ # over the stopping process that we need (since we're the once that initiate it for each
14
+ # thread)
15
+ def consumer_loop
16
+ super do
17
+ consumers = Karafka::Persistence::Consumers
18
+ .current
19
+ .values
20
+ .flat_map(&:values)
21
+ .select { |consumer| consumer.class.respond_to?(:after_fetch) }
22
+
23
+ if Karafka::App.stopping?
24
+ publish_event(consumers, 'before_stop')
25
+ Karafka::Persistence::Client.read.stop
26
+ else
27
+ publish_event(consumers, 'before_poll')
28
+ yield
29
+ publish_event(consumers, 'after_poll')
30
+ end
31
+ end
32
+ end
33
+
34
+ private
35
+
36
+ # Notifies consumers about particular events happening
37
+ # @param consumers [Array<Object>] all consumers that want to be notified about an event
38
+ # @param event_name [String] name of the event that happened
39
+ def publish_event(consumers, event_name)
40
+ consumers.each do |consumer|
41
+ key = "consumers.#{Helpers::Inflector.map(consumer.class.to_s)}.#{event_name}"
42
+ Karafka::App.monitor.instrument(key, context: consumer)
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end