karafka 1.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +7 -0
  2. data/.coditsu.yml +3 -0
  3. data/.console_irbrc +13 -0
  4. data/.gitignore +68 -0
  5. data/.rspec +1 -0
  6. data/.ruby-gemset +1 -0
  7. data/.ruby-version +1 -0
  8. data/.travis.yml +49 -0
  9. data/CHANGELOG.md +458 -0
  10. data/CODE_OF_CONDUCT.md +46 -0
  11. data/CONTRIBUTING.md +41 -0
  12. data/Gemfile +15 -0
  13. data/Gemfile.lock +126 -0
  14. data/MIT-LICENCE +18 -0
  15. data/README.md +102 -0
  16. data/bin/karafka +19 -0
  17. data/config/errors.yml +6 -0
  18. data/karafka.gemspec +42 -0
  19. data/lib/karafka.rb +79 -0
  20. data/lib/karafka/app.rb +45 -0
  21. data/lib/karafka/attributes_map.rb +69 -0
  22. data/lib/karafka/backends/inline.rb +16 -0
  23. data/lib/karafka/base_consumer.rb +68 -0
  24. data/lib/karafka/base_responder.rb +208 -0
  25. data/lib/karafka/callbacks.rb +30 -0
  26. data/lib/karafka/callbacks/config.rb +22 -0
  27. data/lib/karafka/callbacks/dsl.rb +16 -0
  28. data/lib/karafka/cli.rb +54 -0
  29. data/lib/karafka/cli/base.rb +78 -0
  30. data/lib/karafka/cli/console.rb +29 -0
  31. data/lib/karafka/cli/flow.rb +46 -0
  32. data/lib/karafka/cli/info.rb +29 -0
  33. data/lib/karafka/cli/install.rb +42 -0
  34. data/lib/karafka/cli/server.rb +66 -0
  35. data/lib/karafka/connection/api_adapter.rb +148 -0
  36. data/lib/karafka/connection/builder.rb +16 -0
  37. data/lib/karafka/connection/client.rb +107 -0
  38. data/lib/karafka/connection/delegator.rb +46 -0
  39. data/lib/karafka/connection/listener.rb +60 -0
  40. data/lib/karafka/consumers/callbacks.rb +54 -0
  41. data/lib/karafka/consumers/includer.rb +51 -0
  42. data/lib/karafka/consumers/responders.rb +24 -0
  43. data/lib/karafka/consumers/single_params.rb +15 -0
  44. data/lib/karafka/errors.rb +50 -0
  45. data/lib/karafka/fetcher.rb +44 -0
  46. data/lib/karafka/helpers/class_matcher.rb +78 -0
  47. data/lib/karafka/helpers/config_retriever.rb +46 -0
  48. data/lib/karafka/helpers/multi_delegator.rb +33 -0
  49. data/lib/karafka/instrumentation/listener.rb +112 -0
  50. data/lib/karafka/instrumentation/logger.rb +55 -0
  51. data/lib/karafka/instrumentation/monitor.rb +64 -0
  52. data/lib/karafka/loader.rb +28 -0
  53. data/lib/karafka/params/dsl.rb +158 -0
  54. data/lib/karafka/params/params_batch.rb +46 -0
  55. data/lib/karafka/parsers/json.rb +38 -0
  56. data/lib/karafka/patches/dry_configurable.rb +33 -0
  57. data/lib/karafka/patches/ruby_kafka.rb +34 -0
  58. data/lib/karafka/persistence/client.rb +25 -0
  59. data/lib/karafka/persistence/consumer.rb +38 -0
  60. data/lib/karafka/persistence/topic.rb +29 -0
  61. data/lib/karafka/process.rb +62 -0
  62. data/lib/karafka/responders/builder.rb +36 -0
  63. data/lib/karafka/responders/topic.rb +57 -0
  64. data/lib/karafka/routing/builder.rb +61 -0
  65. data/lib/karafka/routing/consumer_group.rb +61 -0
  66. data/lib/karafka/routing/consumer_mapper.rb +34 -0
  67. data/lib/karafka/routing/proxy.rb +37 -0
  68. data/lib/karafka/routing/router.rb +29 -0
  69. data/lib/karafka/routing/topic.rb +60 -0
  70. data/lib/karafka/routing/topic_mapper.rb +55 -0
  71. data/lib/karafka/schemas/config.rb +24 -0
  72. data/lib/karafka/schemas/consumer_group.rb +78 -0
  73. data/lib/karafka/schemas/consumer_group_topic.rb +18 -0
  74. data/lib/karafka/schemas/responder_usage.rb +39 -0
  75. data/lib/karafka/schemas/server_cli_options.rb +43 -0
  76. data/lib/karafka/server.rb +85 -0
  77. data/lib/karafka/setup/config.rb +193 -0
  78. data/lib/karafka/setup/configurators/base.rb +29 -0
  79. data/lib/karafka/setup/configurators/params.rb +25 -0
  80. data/lib/karafka/setup/configurators/water_drop.rb +32 -0
  81. data/lib/karafka/setup/dsl.rb +22 -0
  82. data/lib/karafka/status.rb +25 -0
  83. data/lib/karafka/templates/application_consumer.rb.example +6 -0
  84. data/lib/karafka/templates/application_responder.rb.example +11 -0
  85. data/lib/karafka/templates/karafka.rb.example +54 -0
  86. data/lib/karafka/version.rb +7 -0
  87. data/log/.gitkeep +0 -0
  88. metadata +303 -0
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Builder used to construct Kafka client
6
+ module Builder
7
+ class << self
8
+ # Builds a Kafka::Cient instance that we use to work with Kafka cluster
9
+ # @return [::Kafka::Client] returns a Kafka client
10
+ def call
11
+ Kafka.new(*ApiAdapter.client)
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,107 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Class used as a wrapper around Ruby-Kafka client to simplify additional
6
+ # features that we provide/might provide in future and to hide the internal implementation
7
+ class Client
8
+ extend Forwardable
9
+
10
+ def_delegator :kafka_consumer, :seek
11
+
12
+ # Creates a queue consumer client that will pull the data from Kafka
13
+ # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group for which
14
+ # we create a client
15
+ # @return [Karafka::Connection::Client] group consumer that can subscribe to
16
+ # multiple topics
17
+ def initialize(consumer_group)
18
+ @consumer_group = consumer_group
19
+ Persistence::Client.write(self)
20
+ end
21
+
22
+ # Opens connection, gets messages and calls a block for each of the incoming messages
23
+ # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
24
+ # @note This will yield with raw messages - no preprocessing or reformatting.
25
+ def fetch_loop
26
+ settings = ApiAdapter.consumption(consumer_group)
27
+
28
+ if consumer_group.batch_fetching
29
+ kafka_consumer.each_batch(*settings) { |batch| yield(batch.messages) }
30
+ else
31
+ # always yield an array of messages, so we have consistent API (always a batch)
32
+ kafka_consumer.each_message(*settings) { |message| yield([message]) }
33
+ end
34
+ rescue Kafka::ProcessingError => error
35
+ # If there was an error during consumption, we have to log it, pause current partition
36
+ # and process other things
37
+ Karafka.monitor.instrument(
38
+ 'connection.client.fetch_loop.error',
39
+ caller: self,
40
+ error: error.cause
41
+ )
42
+ pause(error.topic, error.partition)
43
+ retry
44
+ end
45
+
46
+ # Gracefuly stops topic consumption
47
+ # @note Stopping running consumers without a really important reason is not recommended
48
+ # as until all the consumers are stopped, the server will keep running serving only
49
+ # part of the messages
50
+ def stop
51
+ @kafka_consumer&.stop
52
+ @kafka_consumer = nil
53
+ end
54
+
55
+ # Pauses fetching and consumption of a given topic partition
56
+ # @param topic [String] topic that we want to pause
57
+ # @param partition [Integer] number partition that we want to pause
58
+ def pause(topic, partition)
59
+ kafka_consumer.pause(*ApiAdapter.pause(topic, partition, consumer_group))
60
+ end
61
+
62
+ # Marks a given message as consumed and commit the offsets
63
+ # @note In opposite to ruby-kafka, we commit the offset for each manual marking to be sure
64
+ # that offset commit happen asap in case of a crash
65
+ # @param [Karafka::Params::Params] params message that we want to mark as processed
66
+ def mark_as_consumed(params)
67
+ kafka_consumer.mark_message_as_processed(
68
+ *ApiAdapter.mark_message_as_processed(params)
69
+ )
70
+ # Trigger an immediate, blocking offset commit in order to minimize the risk of crashing
71
+ # before the automatic triggers have kicked in.
72
+ kafka_consumer.commit_offsets
73
+ end
74
+
75
+ # Triggers a non-optional blocking heartbeat that notifies Kafka about the fact, that this
76
+ # consumer / client is still up and running
77
+ def trigger_heartbeat
78
+ kafka_consumer.trigger_heartbeat!
79
+ end
80
+
81
+ private
82
+
83
+ attr_reader :consumer_group
84
+
85
+ # @return [Kafka::Consumer] returns a ready to consume Kafka consumer
86
+ # that is set up to consume from topics of a given consumer group
87
+ def kafka_consumer
88
+ # @note We don't cache the connection internally because we cache kafka_consumer that uses
89
+ # kafka client object instance
90
+ @kafka_consumer ||= Builder.call.consumer(
91
+ *ApiAdapter.consumer(consumer_group)
92
+ ).tap do |consumer|
93
+ consumer_group.topics.each do |topic|
94
+ consumer.subscribe(*ApiAdapter.subscribe(topic))
95
+ end
96
+ end
97
+ rescue Kafka::ConnectionError
98
+ # If we would not wait it would totally spam log file with failed
99
+ # attempts if Kafka is down
100
+ sleep(consumer_group.reconnect_timeout)
101
+ # We don't log and just reraise - this will be logged
102
+ # down the road
103
+ raise
104
+ end
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Class that delegates processing of messages for which we listen to a proper processor
6
+ module Delegator
7
+ class << self
8
+ # Delegates messages (does something with them)
9
+ # It will either schedule or run a proper processor action for messages
10
+ # @note This should be looped to obtain a constant delegating of new messages
11
+ # @note We catch all the errors here, to make sure that none failures
12
+ # for a given consumption will affect other consumed messages
13
+ # If we wouldn't catch it, it would propagate up until killing the thread
14
+ # @note It is a one huge method, because of performance reasons. It is much faster then
15
+ # using send or invoking additional methods
16
+ # @param group_id [String] group_id of a group from which a given message came
17
+ # @param kafka_messages [Array<Kafka::FetchedMessage>] raw messages fetched from kafka
18
+ def call(group_id, kafka_messages)
19
+ # @note We always get messages by topic and partition so we can take topic from the
20
+ # first one and it will be valid for all the messages
21
+ topic = Persistence::Topic.fetch(group_id, kafka_messages[0].topic)
22
+ consumer = Persistence::Consumer.fetch(topic, kafka_messages[0].partition)
23
+
24
+ Karafka.monitor.instrument(
25
+ 'connection.delegator.call',
26
+ caller: self,
27
+ consumer: consumer,
28
+ kafka_messages: kafka_messages
29
+ ) do
30
+ # Depending on a case (persisted or not) we might use new consumer instance per
31
+ # each batch, or use the same one for all of them (for implementing buffering, etc.)
32
+ if topic.batch_consuming
33
+ consumer.params_batch = kafka_messages
34
+ consumer.call
35
+ else
36
+ kafka_messages.each do |kafka_message|
37
+ consumer.params_batch = [kafka_message]
38
+ consumer.call
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # A single listener that listens to incoming messages from a single route
6
+ # @note It does not loop on itself - it needs to be executed in a loop
7
+ # @note Listener itself does nothing with the message - it will return to the block
8
+ # a raw Kafka::FetchedMessage
9
+ class Listener
10
+ # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group that holds details
11
+ # on what topics and with what settings should we listen
12
+ # @return [Karafka::Connection::Listener] listener instance
13
+ def initialize(consumer_group)
14
+ @consumer_group = consumer_group
15
+ end
16
+
17
+ # Runs prefetch callbacks and executes the main listener fetch loop
18
+ def call
19
+ Karafka::Callbacks.before_fetch_loop(
20
+ @consumer_group,
21
+ client
22
+ )
23
+ fetch_loop
24
+ end
25
+
26
+ private
27
+
28
+ # Opens connection, gets messages and calls a block for each of the incoming messages
29
+ # @yieldparam [String] consumer group id
30
+ # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
31
+ # @note This will yield with a raw message - no preprocessing or reformatting
32
+ # @note We catch all the errors here, so they don't affect other listeners (or this one)
33
+ # so we will be able to listen and consume other incoming messages.
34
+ # Since it is run inside Karafka::Connection::ActorCluster - catching all the exceptions
35
+ # won't crash the whole cluster. Here we mostly focus on catchin the exceptions related to
36
+ # Kafka connections / Internet connection issues / Etc. Business logic problems should not
37
+ # propagate this far
38
+ def fetch_loop
39
+ client.fetch_loop do |raw_messages|
40
+ # @note What happens here is a delegation of processing to a proper processor based
41
+ # on the incoming messages characteristics
42
+ Karafka::Connection::Delegator.call(@consumer_group.id, raw_messages)
43
+ end
44
+ # This is on purpose - see the notes for this method
45
+ # rubocop:disable RescueException
46
+ rescue Exception => e
47
+ Karafka.monitor.instrument('connection.listener.fetch_loop.error', caller: self, error: e)
48
+ # rubocop:enable RescueException
49
+ @client.stop
50
+ sleep(@consumer_group.reconnect_timeout) && retry
51
+ end
52
+
53
+ # @return [Karafka::Connection::Client] wrapped kafka consuming client for a given topic
54
+ # consumption
55
+ def client
56
+ @client ||= Client.new(@consumer_group)
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Consumers
5
+ # Additional callbacks that can be used to trigger some actions on certain moments like
6
+ # manual offset management, committing or anything else outside of a standard messages flow
7
+ # They are not included by default, as we don't want to provide functionalities that are
8
+ # not required by users by default
9
+ # Please refer to the wiki callbacks page for more details on how to use them
10
+ module Callbacks
11
+ # Types of events on which we run callbacks
12
+ TYPES = %i[
13
+ after_fetch
14
+ after_poll
15
+ before_poll
16
+ before_stop
17
+ ].freeze
18
+
19
+ # Class methods needed to make callbacks run
20
+ module ClassMethods
21
+ TYPES.each do |type|
22
+ # A Creates a callback wrapper
23
+ # @param method_name [Symbol, String] method name or nil if we plan to provide a block
24
+ # @yield A block with a code that should be executed before scheduling
25
+ define_method type do |method_name = nil, &block|
26
+ set_callback type, :before, method_name || block
27
+ end
28
+ end
29
+ end
30
+
31
+ # @param consumer_class [Class] consumer class that we extend with callbacks
32
+ def self.included(consumer_class)
33
+ consumer_class.class_eval do
34
+ extend ClassMethods
35
+ include ActiveSupport::Callbacks
36
+
37
+ # The call method is wrapped with a set of callbacks
38
+ # We won't run process if any of the callbacks throw abort
39
+ # @see http://api.rubyonrails.org/classes/ActiveSupport/Callbacks/ClassMethods.html#method-i-get_callbacks
40
+ TYPES.each { |type| define_callbacks type }
41
+ end
42
+ end
43
+
44
+ # Executes the default consumer flow, runs callbacks and if not halted will call process
45
+ # method of a proper backend. It is here because it interacts with the default Karafka
46
+ # call flow and needs to be overwritten to support callbacks
47
+ def call
48
+ run_callbacks :after_fetch do
49
+ process
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Additional functionalities for consumers
5
+ module Consumers
6
+ # Module used to inject functionalities into a given consumer class, based on the consumer
7
+ # topic and its settings
8
+ # We don't need all the behaviors in all the cases, so it is not worth having everything
9
+ # in all the cases all the time
10
+ module Includer
11
+ class << self
12
+ # @param consumer_class [Class] consumer class, that will get some functionalities
13
+ # based on the topic under which it operates
14
+ def call(consumer_class)
15
+ topic = consumer_class.topic
16
+
17
+ bind_backend(consumer_class, topic)
18
+ bind_params(consumer_class, topic)
19
+ bind_responders(consumer_class, topic)
20
+ end
21
+
22
+ private
23
+
24
+ # Figures out backend for a given consumer class, based on the topic backend and
25
+ # includes it into the consumer class
26
+ # @param consumer_class [Class] consumer class
27
+ # @param topic [Karafka::Routing::Topic] topic of a consumer class
28
+ def bind_backend(consumer_class, topic)
29
+ backend = Kernel.const_get("::Karafka::Backends::#{topic.backend.to_s.capitalize}")
30
+ consumer_class.include backend
31
+ end
32
+
33
+ # Adds a single #params support for non batch processed topics
34
+ # @param consumer_class [Class] consumer class
35
+ # @param topic [Karafka::Routing::Topic] topic of a consumer class
36
+ def bind_params(consumer_class, topic)
37
+ return if topic.batch_consuming
38
+ consumer_class.include SingleParams
39
+ end
40
+
41
+ # Adds responders support for topics and consumers with responders defined for them
42
+ # @param consumer_class [Class] consumer class
43
+ # @param topic [Karafka::Routing::Topic] topic of a consumer class
44
+ def bind_responders(consumer_class, topic)
45
+ return unless topic.responder
46
+ consumer_class.include Responders
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Consumers
5
+ # Feature that allows us to use responders flow in consumer
6
+ module Responders
7
+ # Responds with given data using given responder. This allows us to have a similar way of
8
+ # defining flows like synchronous protocols
9
+ # @param data Anything we want to pass to responder based on which we want to trigger further
10
+ # Kafka responding
11
+ def respond_with(*data)
12
+ Karafka.monitor.instrument(
13
+ 'consumers.responders.respond_with',
14
+ caller: self,
15
+ data: data
16
+ ) do
17
+ # @note we build a new instance of responder each time, as a long-running (persisted)
18
+ # consumers can respond multiple times during the lifecycle
19
+ topic.responder.new(topic.parser).call(*data)
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Consumers
5
+ # Params alias for single message consumption consumers
6
+ module SingleParams
7
+ private
8
+
9
+ # @return [Karafka::Params::Params] params instance for non batch consumption consumers
10
+ def params
11
+ params_batch.first
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Namespace used to encapsulate all the internal errors of Karafka
5
+ module Errors
6
+ # Base class for all the Karafka internal errors
7
+ BaseError = Class.new(StandardError)
8
+
9
+ # Should be raised when we attemp to parse incoming params but parsing fails
10
+ # If this error (or its descendant) is detected, we will pass the raw message
11
+ # into params and proceed further
12
+ ParserError = Class.new(BaseError)
13
+
14
+ # Raised when router receives topic name which does not correspond with any routes
15
+ # This can only happen in a case when:
16
+ # - you've received a message and we cannot match it with a consumer
17
+ # - you've changed the routing, so router can no longer associate your topic to
18
+ # any consumer
19
+ # - or in a case when you do a lot of metaprogramming and you change routing/etc on runtime
20
+ #
21
+ # In case this happens, you will have to create a temporary route that will allow
22
+ # you to "eat" everything from the Sidekiq queue.
23
+ # @see https://github.com/karafka/karafka/issues/135
24
+ NonMatchingRouteError = Class.new(BaseError)
25
+
26
+ # Raised when we don't use or use responder not in the way it expected to based on the
27
+ # topics usage definitions
28
+ InvalidResponderUsage = Class.new(BaseError)
29
+
30
+ # Raised when options that we provide to the responder to respond aren't what the schema
31
+ # requires
32
+ InvalidResponderMessageOptions = Class.new(BaseError)
33
+
34
+ # Raised when configuration doesn't match with validation schema
35
+ InvalidConfiguration = Class.new(BaseError)
36
+
37
+ # Raised when we try to use Karafka CLI commands (except install) without a bootfile
38
+ MissingBootFile = Class.new(BaseError)
39
+
40
+ # Raised when we want to read a persisted thread messages consumer but it is unavailable
41
+ # This should never happen and if it does, please contact us
42
+ MissingClient = Class.new(BaseError)
43
+
44
+ # Raised when want to hook up to an event that is not registered and supported
45
+ UnregisteredMonitorEvent = Class.new(BaseError)
46
+
47
+ # Raised when we've waited enough for shutting down an unresponding process
48
+ ForcefulShutdown = Class.new(BaseError)
49
+ end
50
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Class used to run the Karafka consumer and handle shutting down, restarting etc
5
+ # @note Creating multiple fetchers will result in having multiple connections to the same
6
+ # topics, which means that if there are no partitions, it won't use them.
7
+ class Fetcher
8
+ class << self
9
+ # Starts listening on all the listeners asynchronously
10
+ # Fetch loop should never end, which means that we won't create more actor clusters
11
+ # so we don't have to terminate them
12
+ def call
13
+ threads = listeners.map do |listener|
14
+ # We abort on exception because there should be an exception handling developed for
15
+ # each listener running in separate threads, so the exceptions should never leak
16
+ # and if that happens, it means that something really bad happened and we should stop
17
+ # the whole process
18
+ Thread
19
+ .new { listener.call }
20
+ .tap { |thread| thread.abort_on_exception = true }
21
+ end
22
+
23
+ # We aggregate threads here for a supervised shutdown process
24
+ threads.each { |thread| Karafka::Server.consumer_threads << thread }
25
+ threads.each(&:join)
26
+ # If anything crashes here, we need to raise the error and crush the runner because it means
27
+ # that something terrible happened
28
+ rescue StandardError => e
29
+ Karafka.monitor.instrument('fetcher.call.error', caller: self, error: e)
30
+ Karafka::App.stop!
31
+ raise e
32
+ end
33
+
34
+ private
35
+
36
+ # @return [Array<Karafka::Connection::Listener>] listeners that will consume messages
37
+ def listeners
38
+ @listeners ||= App.consumer_groups.active.map do |consumer_group|
39
+ Karafka::Connection::Listener.new(consumer_group)
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end