karafka 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (99) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +2 -0
  3. data.tar.gz.sig +0 -0
  4. data/.coditsu/ci.yml +3 -0
  5. data/.console_irbrc +11 -0
  6. data/.github/FUNDING.yml +3 -0
  7. data/.github/ISSUE_TEMPLATE/bug_report.md +50 -0
  8. data/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
  9. data/.gitignore +69 -0
  10. data/.rspec +1 -0
  11. data/.ruby-gemset +1 -0
  12. data/.ruby-version +1 -0
  13. data/.travis.yml +36 -0
  14. data/CHANGELOG.md +520 -0
  15. data/CODE_OF_CONDUCT.md +46 -0
  16. data/CONTRIBUTING.md +41 -0
  17. data/Gemfile +12 -0
  18. data/Gemfile.lock +137 -0
  19. data/MIT-LICENCE +18 -0
  20. data/README.md +101 -0
  21. data/bin/karafka +19 -0
  22. data/certs/mensfeld.pem +25 -0
  23. data/config/errors.yml +39 -0
  24. data/karafka.gemspec +44 -0
  25. data/lib/karafka.rb +71 -0
  26. data/lib/karafka/app.rb +53 -0
  27. data/lib/karafka/attributes_map.rb +68 -0
  28. data/lib/karafka/backends/inline.rb +16 -0
  29. data/lib/karafka/base_consumer.rb +57 -0
  30. data/lib/karafka/base_responder.rb +226 -0
  31. data/lib/karafka/cli.rb +54 -0
  32. data/lib/karafka/cli/base.rb +78 -0
  33. data/lib/karafka/cli/console.rb +31 -0
  34. data/lib/karafka/cli/flow.rb +45 -0
  35. data/lib/karafka/cli/info.rb +31 -0
  36. data/lib/karafka/cli/install.rb +64 -0
  37. data/lib/karafka/cli/server.rb +71 -0
  38. data/lib/karafka/code_reloader.rb +67 -0
  39. data/lib/karafka/connection/api_adapter.rb +155 -0
  40. data/lib/karafka/connection/batch_delegator.rb +51 -0
  41. data/lib/karafka/connection/builder.rb +16 -0
  42. data/lib/karafka/connection/client.rb +117 -0
  43. data/lib/karafka/connection/listener.rb +71 -0
  44. data/lib/karafka/connection/message_delegator.rb +36 -0
  45. data/lib/karafka/consumers/callbacks.rb +71 -0
  46. data/lib/karafka/consumers/includer.rb +63 -0
  47. data/lib/karafka/consumers/metadata.rb +10 -0
  48. data/lib/karafka/consumers/responders.rb +24 -0
  49. data/lib/karafka/consumers/single_params.rb +15 -0
  50. data/lib/karafka/contracts.rb +10 -0
  51. data/lib/karafka/contracts/config.rb +21 -0
  52. data/lib/karafka/contracts/consumer_group.rb +206 -0
  53. data/lib/karafka/contracts/consumer_group_topic.rb +19 -0
  54. data/lib/karafka/contracts/responder_usage.rb +54 -0
  55. data/lib/karafka/contracts/server_cli_options.rb +29 -0
  56. data/lib/karafka/errors.rb +51 -0
  57. data/lib/karafka/fetcher.rb +42 -0
  58. data/lib/karafka/helpers/class_matcher.rb +88 -0
  59. data/lib/karafka/helpers/config_retriever.rb +46 -0
  60. data/lib/karafka/helpers/inflector.rb +26 -0
  61. data/lib/karafka/helpers/multi_delegator.rb +32 -0
  62. data/lib/karafka/instrumentation/logger.rb +57 -0
  63. data/lib/karafka/instrumentation/monitor.rb +70 -0
  64. data/lib/karafka/instrumentation/proctitle_listener.rb +36 -0
  65. data/lib/karafka/instrumentation/stdout_listener.rb +138 -0
  66. data/lib/karafka/params/builders/metadata.rb +33 -0
  67. data/lib/karafka/params/builders/params.rb +36 -0
  68. data/lib/karafka/params/builders/params_batch.rb +25 -0
  69. data/lib/karafka/params/metadata.rb +35 -0
  70. data/lib/karafka/params/params.rb +68 -0
  71. data/lib/karafka/params/params_batch.rb +61 -0
  72. data/lib/karafka/patches/ruby_kafka.rb +47 -0
  73. data/lib/karafka/persistence/client.rb +29 -0
  74. data/lib/karafka/persistence/consumers.rb +45 -0
  75. data/lib/karafka/persistence/topics.rb +48 -0
  76. data/lib/karafka/process.rb +60 -0
  77. data/lib/karafka/responders/builder.rb +36 -0
  78. data/lib/karafka/responders/topic.rb +55 -0
  79. data/lib/karafka/routing/builder.rb +89 -0
  80. data/lib/karafka/routing/consumer_group.rb +61 -0
  81. data/lib/karafka/routing/consumer_mapper.rb +34 -0
  82. data/lib/karafka/routing/proxy.rb +46 -0
  83. data/lib/karafka/routing/router.rb +29 -0
  84. data/lib/karafka/routing/topic.rb +62 -0
  85. data/lib/karafka/routing/topic_mapper.rb +53 -0
  86. data/lib/karafka/serialization/json/deserializer.rb +27 -0
  87. data/lib/karafka/serialization/json/serializer.rb +31 -0
  88. data/lib/karafka/server.rb +83 -0
  89. data/lib/karafka/setup/config.rb +221 -0
  90. data/lib/karafka/setup/configurators/water_drop.rb +36 -0
  91. data/lib/karafka/setup/dsl.rb +21 -0
  92. data/lib/karafka/status.rb +29 -0
  93. data/lib/karafka/templates/application_consumer.rb.erb +7 -0
  94. data/lib/karafka/templates/application_responder.rb.erb +11 -0
  95. data/lib/karafka/templates/karafka.rb.erb +92 -0
  96. data/lib/karafka/version.rb +7 -0
  97. data/log/.gitkeep +0 -0
  98. metadata +336 -0
  99. metadata.gz.sig +0 -0
@@ -0,0 +1,155 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Namespace for all the things related to Kafka connection
5
+ module Connection
6
+ # Mapper used to convert our internal settings into ruby-kafka settings based on their
7
+ # API requirements.
8
+ # Since ruby-kafka has more and more options and there are few "levels" on which
9
+ # we have to apply them (despite the fact, that in Karafka you configure all of it
10
+ # in one place), we have to remap it into what ruby-kafka driver requires
11
+ # @note The good thing about Kafka.new method is that it ignores all options that
12
+ # do nothing. So we don't have to worry about injecting our internal settings
13
+ # into the client and breaking stuff
14
+ module ApiAdapter
15
+ class << self
16
+ # Builds all the configuration settings for Kafka.new method
17
+ # @return [Array<Hash>] Array with all the client arguments including hash with all
18
+ # the settings required by Kafka.new method
19
+ # @note We return array, so we can inject any arguments we want, in case of changes in the
20
+ # raw driver
21
+ def client
22
+ # This one is a default that takes all the settings except special
23
+ # cases defined in the map
24
+ settings = {
25
+ logger: ::Karafka.logger,
26
+ client_id: ::Karafka::App.config.client_id
27
+ }
28
+
29
+ kafka_configs.each do |setting_name, setting_value|
30
+ # All options for config adapter should be ignored as we're just interested
31
+ # in what is left, as we want to pass all the options that are "typical"
32
+ # and not listed in the api_adapter special cases mapping. All the values
33
+ # from the api_adapter mapping go somewhere else, not to the client directly
34
+ next if AttributesMap.api_adapter.values.flatten.include?(setting_name)
35
+
36
+ settings[setting_name] = setting_value
37
+ end
38
+
39
+ settings_hash = sanitize(settings)
40
+
41
+ # Normalization for the way Kafka::Client accepts arguments from 0.5.3
42
+ [settings_hash.delete(:seed_brokers), settings_hash]
43
+ end
44
+
45
+ # Builds all the configuration settings for kafka#consumer method
46
+ # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
47
+ # @return [Array<Hash>] array with all the consumer arguments including hash with all
48
+ # the settings required by Kafka#consumer
49
+ def consumer(consumer_group)
50
+ settings = { group_id: consumer_group.id }
51
+ settings = fetch_for(:consumer, consumer_group, settings)
52
+ [sanitize(settings)]
53
+ end
54
+
55
+ # Builds all the configuration settings for kafka consumer consume_each_batch and
56
+ # consume_each_message methods
57
+ # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
58
+ # @return [Array<Hash>] Array with all the arguments required by consuming method
59
+ # including hash with all the settings required by
60
+ # Kafka::Consumer#consume_each_message and Kafka::Consumer#consume_each_batch method
61
+ def consumption(consumer_group)
62
+ [
63
+ sanitize(
64
+ fetch_for(
65
+ :consumption,
66
+ consumer_group,
67
+ automatically_mark_as_processed: consumer_group.automatically_mark_as_consumed
68
+ )
69
+ )
70
+ ]
71
+ end
72
+
73
+ # Builds all the configuration settings for kafka consumer#subscribe method
74
+ # @param topic [Karafka::Routing::Topic] topic that holds details for a given subscription
75
+ # @return [Hash] hash with all the settings required by kafka consumer#subscribe method
76
+ def subscribe(topic)
77
+ settings = fetch_for(:subscribe, topic)
78
+ [Karafka::App.config.topic_mapper.outgoing(topic.name), sanitize(settings)]
79
+ end
80
+
81
+ # Builds all the configuration settings required by kafka consumer#pause method
82
+ # @param topic [String] topic that we want to pause
83
+ # @param partition [Integer] number partition that we want to pause
84
+ # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
85
+ # @return [Array] array with all the details required to pause kafka consumer
86
+ def pause(topic, partition, consumer_group)
87
+ [
88
+ Karafka::App.config.topic_mapper.outgoing(topic),
89
+ partition,
90
+ {
91
+ timeout: consumer_group.pause_timeout,
92
+ max_timeout: consumer_group.pause_max_timeout,
93
+ exponential_backoff: consumer_group.pause_exponential_backoff
94
+ }
95
+ ]
96
+ end
97
+
98
+ # Remaps topic details taking the topic mapper feature into consideration.
99
+ # @param params [Karafka::Params::Params] params instance
100
+ # @return [Array] array with all the details needed by ruby-kafka to mark message
101
+ # as processed
102
+ # @note When default empty topic mapper is used, no need for any conversion as the
103
+ # internal and external format are exactly the same
104
+ def mark_message_as_processed(params)
105
+ # Majority of users don't use custom topic mappers. No need to change anything when it
106
+ # is a default mapper that does not change anything. Only some cloud providers require
107
+ # topics to be remapped
108
+ return [params] if Karafka::App.config.topic_mapper.is_a?(Karafka::Routing::TopicMapper)
109
+
110
+ # @note We don't use tap as it is around 13% slower than non-dup version
111
+ dupped = params.dup
112
+ dupped['topic'] = Karafka::App.config.topic_mapper.outgoing(params.topic)
113
+ [dupped]
114
+ end
115
+
116
+ private
117
+
118
+ # Fetches proper settings for a given map namespace
119
+ # @param namespace_key [Symbol] namespace from attributes map config adapter hash
120
+ # @param route_layer [Object] route topic or consumer group
121
+ # @param preexisting_settings [Hash] hash with some preexisting settings that might have
122
+ # been loaded in a different way
123
+ def fetch_for(namespace_key, route_layer, preexisting_settings = {})
124
+ kafka_configs.each_key do |setting_name|
125
+ # Ignore settings that are not related to our namespace
126
+ next unless AttributesMap.api_adapter[namespace_key].include?(setting_name)
127
+
128
+ # Ignore settings that are already initialized
129
+ # In case they are in preexisting settings fetched differently
130
+ next if preexisting_settings.key?(setting_name)
131
+
132
+ # Fetch all the settings from a given layer object. Objects can handle the fallback
133
+ # to the kafka settings, so
134
+ preexisting_settings[setting_name] = route_layer.send(setting_name)
135
+ end
136
+
137
+ preexisting_settings
138
+ end
139
+
140
+ # Removes nil containing keys from the final settings so it can use Kafkas driver
141
+ # defaults for those
142
+ # @param settings [Hash] settings that may contain nil values
143
+ # @return [Hash] settings without nil using keys (non of karafka options should be nil)
144
+ def sanitize(settings)
145
+ settings.reject { |_key, value| value.nil? }
146
+ end
147
+
148
+ # @return [Hash] Kafka config details as a hash
149
+ def kafka_configs
150
+ ::Karafka::App.config.kafka.to_h
151
+ end
152
+ end
153
+ end
154
+ end
155
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Class that delegates processing of batch received messages for which we listen to
6
+ # a proper processor
7
+ module BatchDelegator
8
+ class << self
9
+ # Delegates messages (does something with them)
10
+ # It will either schedule or run a proper processor action for messages
11
+ # @param group_id [String] group_id of a group from which a given message came
12
+ # @param kafka_batch [<Kafka::FetchedBatch>] raw messages fetched batch
13
+ # @note This should be looped to obtain a constant delegating of new messages
14
+ def call(group_id, kafka_batch)
15
+ topic = Persistence::Topics.fetch(group_id, kafka_batch.topic)
16
+ consumer = Persistence::Consumers.fetch(topic, kafka_batch.partition)
17
+
18
+ Karafka.monitor.instrument(
19
+ 'connection.batch_delegator.call',
20
+ caller: self,
21
+ consumer: consumer,
22
+ kafka_batch: kafka_batch
23
+ ) do
24
+ # Due to how ruby-kafka is built, we have the metadata that is stored on the batch
25
+ # level only available for batch consuming
26
+ consumer.metadata = Params::Builders::Metadata.from_kafka_batch(kafka_batch, topic)
27
+ kafka_messages = kafka_batch.messages
28
+
29
+ # Depending on a case (persisted or not) we might use new consumer instance per
30
+ # each batch, or use the same one for all of them (for implementing buffering, etc.)
31
+ if topic.batch_consuming
32
+ consumer.params_batch = Params::Builders::ParamsBatch.from_kafka_messages(
33
+ kafka_messages,
34
+ topic
35
+ )
36
+ consumer.call
37
+ else
38
+ kafka_messages.each do |kafka_message|
39
+ consumer.params_batch = Params::Builders::ParamsBatch.from_kafka_messages(
40
+ [kafka_message],
41
+ topic
42
+ )
43
+ consumer.call
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Builder used to construct Kafka client
6
+ module Builder
7
+ class << self
8
+ # Builds a Kafka::Client instance that we use to work with Kafka cluster
9
+ # @return [::Kafka::Client] returns a Kafka client
10
+ def call
11
+ Kafka.new(*ApiAdapter.client)
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,117 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Class used as a wrapper around Ruby-Kafka client to simplify additional
6
+ # features that we provide/might provide in future and to hide the internal implementation
7
+ class Client
8
+ extend Forwardable
9
+
10
+ %i[
11
+ seek
12
+ trigger_heartbeat
13
+ trigger_heartbeat!
14
+ ].each do |delegated_method|
15
+ def_delegator :kafka_consumer, delegated_method
16
+ end
17
+
18
+ # Creates a queue consumer client that will pull the data from Kafka
19
+ # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group for which
20
+ # we create a client
21
+ # @return [Karafka::Connection::Client] group consumer that can subscribe to
22
+ # multiple topics
23
+ def initialize(consumer_group)
24
+ @consumer_group = consumer_group
25
+ Persistence::Client.write(self)
26
+ end
27
+
28
+ # Opens connection, gets messages and calls a block for each of the incoming messages
29
+ # @yieldparam [Array<Kafka::FetchedMessage>, Symbol] kafka response with an info about
30
+ # the type of the fetcher that is being used
31
+ # @note This will yield with raw messages - no preprocessing or reformatting.
32
+ def fetch_loop
33
+ settings = ApiAdapter.consumption(consumer_group)
34
+
35
+ if consumer_group.batch_fetching
36
+ kafka_consumer.each_batch(*settings) { |batch| yield(batch, :batch) }
37
+ else
38
+ kafka_consumer.each_message(*settings) { |message| yield(message, :message) }
39
+ end
40
+ # @note We catch only the processing errors as any other are considered critical (exceptions)
41
+ # and should require a client restart with a backoff
42
+ rescue Kafka::ProcessingError => e
43
+ # If there was an error during consumption, we have to log it, pause current partition
44
+ # and process other things
45
+ Karafka.monitor.instrument(
46
+ 'connection.client.fetch_loop.error',
47
+ caller: self,
48
+ error: e.cause
49
+ )
50
+ pause(e.topic, e.partition)
51
+ retry
52
+ end
53
+
54
+ # Gracefully stops topic consumption
55
+ # @note Stopping running consumers without a really important reason is not recommended
56
+ # as until all the consumers are stopped, the server will keep running serving only
57
+ # part of the messages
58
+ def stop
59
+ @kafka_consumer&.stop
60
+ @kafka_consumer = nil
61
+ end
62
+
63
+ # Pauses fetching and consumption of a given topic partition
64
+ # @param topic [String] topic that we want to pause
65
+ # @param partition [Integer] number partition that we want to pause
66
+ def pause(topic, partition)
67
+ kafka_consumer.pause(*ApiAdapter.pause(topic, partition, consumer_group))
68
+ end
69
+
70
+ # Marks given message as consumed
71
+ # @param [Karafka::Params::Params] params message that we want to mark as processed
72
+ # @note This method won't trigger automatic offsets commits, rather relying on the ruby-kafka
73
+ # offsets time-interval based committing
74
+ def mark_as_consumed(params)
75
+ kafka_consumer.mark_message_as_processed(
76
+ *ApiAdapter.mark_message_as_processed(params)
77
+ )
78
+ end
79
+
80
+ # Marks a given message as consumed and commit the offsets in a blocking way
81
+ # @param [Karafka::Params::Params] params message that we want to mark as processed
82
+ # @note This method commits the offset for each manual marking to be sure
83
+ # that offset commit happen asap in case of a crash
84
+ def mark_as_consumed!(params)
85
+ mark_as_consumed(params)
86
+ # Trigger an immediate, blocking offset commit in order to minimize the risk of crashing
87
+ # before the automatic triggers have kicked in.
88
+ kafka_consumer.commit_offsets
89
+ end
90
+
91
+ private
92
+
93
+ attr_reader :consumer_group
94
+
95
+ # @return [Kafka::Consumer] returns a ready to consume Kafka consumer
96
+ # that is set up to consume from topics of a given consumer group
97
+ def kafka_consumer
98
+ # @note We don't cache the connection internally because we cache kafka_consumer that uses
99
+ # kafka client object instance
100
+ @kafka_consumer ||= Builder.call.consumer(
101
+ *ApiAdapter.consumer(consumer_group)
102
+ ).tap do |consumer|
103
+ consumer_group.topics.each do |topic|
104
+ consumer.subscribe(*ApiAdapter.subscribe(topic))
105
+ end
106
+ end
107
+ rescue Kafka::ConnectionError
108
+ # If we would not wait it will spam log file with failed
109
+ # attempts if Kafka is down
110
+ sleep(consumer_group.reconnect_timeout)
111
+ # We don't log and just re-raise - this will be logged
112
+ # down the road
113
+ raise
114
+ end
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # A single listener that listens to incoming messages from a single route
6
+ # @note It does not loop on itself - it needs to be executed in a loop
7
+ # @note Listener itself does nothing with the message - it will return to the block
8
+ # a raw Kafka::FetchedMessage
9
+ class Listener
10
+ # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group that holds details
11
+ # on what topics and with what settings should we listen
12
+ # @return [Karafka::Connection::Listener] listener instance
13
+ def initialize(consumer_group)
14
+ @consumer_group = consumer_group
15
+ end
16
+
17
+ # Runs prefetch callbacks and executes the main listener fetch loop
18
+ def call
19
+ Karafka.monitor.instrument(
20
+ 'connection.listener.before_fetch_loop',
21
+ consumer_group: @consumer_group,
22
+ client: client
23
+ )
24
+ fetch_loop
25
+ end
26
+
27
+ private
28
+
29
+ # Opens connection, gets messages and calls a block for each of the incoming messages
30
+ # @note We catch all the errors here, so they don't affect other listeners (or this one)
31
+ # so we will be able to listen and consume other incoming messages.
32
+ # Since it is run inside Karafka::Connection::ActorCluster - catching all the exceptions
33
+ # won't crash the whole cluster. Here we mostly focus on catching the exceptions related to
34
+ # Kafka connections / Internet connection issues / Etc. Business logic problems should not
35
+ # propagate this far
36
+ def fetch_loop
37
+ # @note What happens here is a delegation of processing to a proper processor based
38
+ # on the incoming messages characteristics
39
+ client.fetch_loop do |raw_data, type|
40
+ Karafka.monitor.instrument('connection.listener.fetch_loop')
41
+
42
+ case type
43
+ when :message
44
+ MessageDelegator.call(@consumer_group.id, raw_data)
45
+ when :batch
46
+ BatchDelegator.call(@consumer_group.id, raw_data)
47
+ end
48
+ end
49
+ # This is on purpose - see the notes for this method
50
+ # rubocop:disable RescueException
51
+ rescue Exception => e
52
+ Karafka.monitor.instrument('connection.listener.fetch_loop.error', caller: self, error: e)
53
+ # rubocop:enable RescueException
54
+ # We can stop client without a problem, as it will reinitialize itself when running the
55
+ # `fetch_loop` again
56
+ @client.stop
57
+ # We need to clear the consumers cache for current connection when fatal error happens and
58
+ # we reset the connection. Otherwise for consumers with manual offset management, the
59
+ # persistence might have stored some data that would be reprocessed
60
+ Karafka::Persistence::Consumers.clear
61
+ sleep(@consumer_group.reconnect_timeout) && retry
62
+ end
63
+
64
+ # @return [Karafka::Connection::Client] wrapped kafka consuming client for a given topic
65
+ # consumption
66
+ def client
67
+ @client ||= Client.new(@consumer_group)
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Class that delegates processing of a single received message for which we listen to
6
+ # a proper processor
7
+ module MessageDelegator
8
+ class << self
9
+ # Delegates message (does something with it)
10
+ # It will either schedule or run a proper processor action for the incoming message
11
+ # @param group_id [String] group_id of a group from which a given message came
12
+ # @param kafka_message [<Kafka::FetchedMessage>] raw message from kafka
13
+ # @note This should be looped to obtain a constant delegating of new messages
14
+ def call(group_id, kafka_message)
15
+ topic = Persistence::Topics.fetch(group_id, kafka_message.topic)
16
+ consumer = Persistence::Consumers.fetch(topic, kafka_message.partition)
17
+
18
+ Karafka.monitor.instrument(
19
+ 'connection.message_delegator.call',
20
+ caller: self,
21
+ consumer: consumer,
22
+ kafka_message: kafka_message
23
+ ) do
24
+ # @note We always get a single message within single delegator, which means that
25
+ # we don't care if user marked it as a batch consumed or not.
26
+ consumer.params_batch = Params::Builders::ParamsBatch.from_kafka_messages(
27
+ [kafka_message],
28
+ topic
29
+ )
30
+ consumer.call
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end