karafka 1.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +7 -0
  2. data/.coditsu.yml +3 -0
  3. data/.console_irbrc +13 -0
  4. data/.gitignore +68 -0
  5. data/.rspec +1 -0
  6. data/.ruby-gemset +1 -0
  7. data/.ruby-version +1 -0
  8. data/.travis.yml +49 -0
  9. data/CHANGELOG.md +458 -0
  10. data/CODE_OF_CONDUCT.md +46 -0
  11. data/CONTRIBUTING.md +41 -0
  12. data/Gemfile +15 -0
  13. data/Gemfile.lock +126 -0
  14. data/MIT-LICENCE +18 -0
  15. data/README.md +102 -0
  16. data/bin/karafka +19 -0
  17. data/config/errors.yml +6 -0
  18. data/karafka.gemspec +42 -0
  19. data/lib/karafka.rb +79 -0
  20. data/lib/karafka/app.rb +45 -0
  21. data/lib/karafka/attributes_map.rb +69 -0
  22. data/lib/karafka/backends/inline.rb +16 -0
  23. data/lib/karafka/base_consumer.rb +68 -0
  24. data/lib/karafka/base_responder.rb +208 -0
  25. data/lib/karafka/callbacks.rb +30 -0
  26. data/lib/karafka/callbacks/config.rb +22 -0
  27. data/lib/karafka/callbacks/dsl.rb +16 -0
  28. data/lib/karafka/cli.rb +54 -0
  29. data/lib/karafka/cli/base.rb +78 -0
  30. data/lib/karafka/cli/console.rb +29 -0
  31. data/lib/karafka/cli/flow.rb +46 -0
  32. data/lib/karafka/cli/info.rb +29 -0
  33. data/lib/karafka/cli/install.rb +42 -0
  34. data/lib/karafka/cli/server.rb +66 -0
  35. data/lib/karafka/connection/api_adapter.rb +148 -0
  36. data/lib/karafka/connection/builder.rb +16 -0
  37. data/lib/karafka/connection/client.rb +107 -0
  38. data/lib/karafka/connection/delegator.rb +46 -0
  39. data/lib/karafka/connection/listener.rb +60 -0
  40. data/lib/karafka/consumers/callbacks.rb +54 -0
  41. data/lib/karafka/consumers/includer.rb +51 -0
  42. data/lib/karafka/consumers/responders.rb +24 -0
  43. data/lib/karafka/consumers/single_params.rb +15 -0
  44. data/lib/karafka/errors.rb +50 -0
  45. data/lib/karafka/fetcher.rb +44 -0
  46. data/lib/karafka/helpers/class_matcher.rb +78 -0
  47. data/lib/karafka/helpers/config_retriever.rb +46 -0
  48. data/lib/karafka/helpers/multi_delegator.rb +33 -0
  49. data/lib/karafka/instrumentation/listener.rb +112 -0
  50. data/lib/karafka/instrumentation/logger.rb +55 -0
  51. data/lib/karafka/instrumentation/monitor.rb +64 -0
  52. data/lib/karafka/loader.rb +28 -0
  53. data/lib/karafka/params/dsl.rb +158 -0
  54. data/lib/karafka/params/params_batch.rb +46 -0
  55. data/lib/karafka/parsers/json.rb +38 -0
  56. data/lib/karafka/patches/dry_configurable.rb +33 -0
  57. data/lib/karafka/patches/ruby_kafka.rb +34 -0
  58. data/lib/karafka/persistence/client.rb +25 -0
  59. data/lib/karafka/persistence/consumer.rb +38 -0
  60. data/lib/karafka/persistence/topic.rb +29 -0
  61. data/lib/karafka/process.rb +62 -0
  62. data/lib/karafka/responders/builder.rb +36 -0
  63. data/lib/karafka/responders/topic.rb +57 -0
  64. data/lib/karafka/routing/builder.rb +61 -0
  65. data/lib/karafka/routing/consumer_group.rb +61 -0
  66. data/lib/karafka/routing/consumer_mapper.rb +34 -0
  67. data/lib/karafka/routing/proxy.rb +37 -0
  68. data/lib/karafka/routing/router.rb +29 -0
  69. data/lib/karafka/routing/topic.rb +60 -0
  70. data/lib/karafka/routing/topic_mapper.rb +55 -0
  71. data/lib/karafka/schemas/config.rb +24 -0
  72. data/lib/karafka/schemas/consumer_group.rb +78 -0
  73. data/lib/karafka/schemas/consumer_group_topic.rb +18 -0
  74. data/lib/karafka/schemas/responder_usage.rb +39 -0
  75. data/lib/karafka/schemas/server_cli_options.rb +43 -0
  76. data/lib/karafka/server.rb +85 -0
  77. data/lib/karafka/setup/config.rb +193 -0
  78. data/lib/karafka/setup/configurators/base.rb +29 -0
  79. data/lib/karafka/setup/configurators/params.rb +25 -0
  80. data/lib/karafka/setup/configurators/water_drop.rb +32 -0
  81. data/lib/karafka/setup/dsl.rb +22 -0
  82. data/lib/karafka/status.rb +25 -0
  83. data/lib/karafka/templates/application_consumer.rb.example +6 -0
  84. data/lib/karafka/templates/application_responder.rb.example +11 -0
  85. data/lib/karafka/templates/karafka.rb.example +54 -0
  86. data/lib/karafka/version.rb +7 -0
  87. data/log/.gitkeep +0 -0
  88. metadata +303 -0
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ # Params batch represents a set of messages received from Kafka.
6
+ # @note Params internally are lazy loaded before first use. That way we can skip parsing
7
+ # process if we have after_fetch that rejects some incoming messages without using params
8
+ # It can be also used when handling really heavy data (in terms of parsing).
9
+ class ParamsBatch
10
+ include Enumerable
11
+
12
+ # Builds up a params batch based on raw kafka messages
13
+ # @param messages_batch [Array<Kafka::FetchedMessage>] messages batch
14
+ # @param topic_parser [Class] topic parser for unparsing messages values
15
+ def initialize(messages_batch, topic_parser)
16
+ @params_batch = messages_batch.map! do |message|
17
+ Karafka::Params::Params.build(message, topic_parser)
18
+ end
19
+ end
20
+
21
+ # @yieldparam [Karafka::Params::Params] each parsed and loaded params instance
22
+ # @note Invocation of this method will cause loading and parsing each param after another.
23
+ # If you want to get access without parsing, please access params_batch directly
24
+ def each
25
+ @params_batch.each { |param| yield(param.retrieve!) }
26
+ end
27
+
28
+ # @return [Array<Karafka::Params::Params>] returns all the params in a loaded state, so they
29
+ # can be used for batch insert, etc. Without invoking all, up until first use, they won't
30
+ # be parsed
31
+ def parsed
32
+ each(&:itself)
33
+ end
34
+
35
+ # @return [Karafka::Params::Params] last element after the unparsing process
36
+ def last
37
+ @params_batch.last.retrieve!
38
+ end
39
+
40
+ # @return [Array<Karafka::Params::Params>] pure array with params (not parsed)
41
+ def to_a
42
+ @params_batch
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Module for all supported by default parsers for incoming/outgoing data
5
+ module Parsers
6
+ # Default Karafka Json parser for serializing and deserializing data
7
+ class Json
8
+ # @param content [String] content based on which we want to get our hash
9
+ # @return [Hash] hash with parsed JSON data
10
+ # @example
11
+ # Json.parse("{\"a\":1}") #=> { 'a' => 1 }
12
+ def self.parse(content)
13
+ ::MultiJson.load(content)
14
+ rescue ::MultiJson::ParseError => e
15
+ raise ::Karafka::Errors::ParserError, e
16
+ end
17
+
18
+ # @param content [Object] any object that we want to convert to a json string
19
+ # @return [String] Valid JSON string containing serialized data
20
+ # @raise [Karafka::Errors::ParserError] raised when we don't have a way to parse
21
+ # given content to a json string format
22
+ # @note When string is passed to this method, we assume that it is already a json
23
+ # string and we don't serialize it again. This allows us to serialize data before
24
+ # it is being forwarded to a parser if we want to have a custom (not that simple)
25
+ # json serialization
26
+ #
27
+ # @example From an ActiveRecord object
28
+ # Json.generate(Repository.first) #=> "{\"repository\":{\"id\":\"04b504e0\"}}"
29
+ # @example From a string (no changes)
30
+ # Json.generate("{\"a\":1}") #=> "{\"a\":1}"
31
+ def self.generate(content)
32
+ return content if content.is_a?(String)
33
+ return content.to_json if content.respond_to?(:to_json)
34
+ raise Karafka::Errors::ParserError, content
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Namespace for patches of external gems/libraries
5
+ module Patches
6
+ # Patch that will allow to use proc based lazy evaluated settings with Dry Configurable
7
+ # @see https://github.com/dry-rb/dry-configurable/blob/master/lib/dry/configurable.rb
8
+ module DryConfigurable
9
+ # We overwrite ::Dry::Configurable::Config to change on proc behaviour
10
+ # Unfortunately it does not provide an on call proc evaluation, so
11
+ # this feature had to be added here on demand/
12
+ # @param args Any arguments that DryConfigurable::Config accepts
13
+ def define!(*args)
14
+ super.tap { @config.each_key(&method(:rebuild)) }
15
+ end
16
+
17
+ private
18
+
19
+ # Method that rebuilds a given accessor, so when it consists a proc value, it will
20
+ # evaluate it upon return for blocks that don't require any arguments, otherwise
21
+ # it will return the block
22
+ # @param method_name [Symbol] name of an accessor that we want to rebuild
23
+ def rebuild(method_name)
24
+ define_singleton_method method_name do
25
+ value = super()
26
+ return value unless value.is_a?(Proc)
27
+ return value unless value.parameters.empty?
28
+ value.call
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Patches
5
+ # Patches for Ruby Kafka gem
6
+ module RubyKafka
7
+ # This patch allows us to inject business logic in between fetches and before the consumer
8
+ # stop, so we can perform stop commit or anything else that we need since
9
+ # ruby-kafka fetch loop does not allow that directly
10
+ # We don't wan't to use poll ruby-kafka api as it brings many more problems that we would
11
+ # have to take care of. That way, nothing like that ever happens but we get the control
12
+ # over the stopping process that we need (since we're the once that initiate it for each
13
+ # thread)
14
+ def consumer_loop
15
+ super do
16
+ consumers = Karafka::Persistence::Consumer
17
+ .all
18
+ .values
19
+ .flat_map(&:values)
20
+ .select { |ctrl| ctrl.respond_to?(:run_callbacks) }
21
+
22
+ if Karafka::App.stopped?
23
+ consumers.each { |ctrl| ctrl.run_callbacks :before_stop }
24
+ Karafka::Persistence::Client.read.stop
25
+ else
26
+ consumers.each { |ctrl| ctrl.run_callbacks :before_poll }
27
+ yield
28
+ consumers.each { |ctrl| ctrl.run_callbacks :after_poll }
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Persistence
5
+ # Persistence layer to store current thread messages consumer client for further use
6
+ class Client
7
+ # Thread.current key under which we store current thread messages consumer client
8
+ PERSISTENCE_SCOPE = :client
9
+
10
+ # @param client [Karafka::Connection::Client] messages consumer client of
11
+ # a current thread
12
+ # @return [Karafka::Connection::Client] persisted messages consumer client
13
+ def self.write(client)
14
+ Thread.current[PERSISTENCE_SCOPE] = client
15
+ end
16
+
17
+ # @return [Karafka::Connection::Client] persisted messages consumer client
18
+ # @raise [Karafka::Errors::MissingConsumer] raised when no thread messages consumer
19
+ # client but we try to use it anyway
20
+ def self.read
21
+ Thread.current[PERSISTENCE_SCOPE] || raise(Errors::MissingClient)
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Module used to provide a persistent cache layer for Karafka components that need to be
5
+ # shared inside of a same thread
6
+ module Persistence
7
+ # Module used to provide a persistent cache across batch requests for a given
8
+ # topic and partition to store some additional details when the persistent mode
9
+ # for a given topic is turned on
10
+ class Consumer
11
+ # Thread.current scope under which we store consumers data
12
+ PERSISTENCE_SCOPE = :consumers
13
+
14
+ class << self
15
+ # @return [Hash] current thread persistence scope hash with all the consumers
16
+ def all
17
+ # @note This does not need to be threadsafe (Hash) as it is always executed in a
18
+ # current thread context
19
+ Thread.current[PERSISTENCE_SCOPE] ||= Hash.new { |hash, key| hash[key] = {} }
20
+ end
21
+
22
+ # Used to build (if block given) and/or fetch a current consumer instance that will be
23
+ # used to process messages from a given topic and partition
24
+ # @return [Karafka::BaseConsumer] base consumer descendant
25
+ # @param topic [Karafka::Routing::Topic] topic instance for which we might cache
26
+ # @param partition [Integer] number of partition for which we want to cache
27
+ def fetch(topic, partition)
28
+ # We always store a current instance for callback reasons
29
+ if topic.persistent
30
+ all[topic][partition] ||= topic.consumer.new
31
+ else
32
+ all[topic][partition] = topic.consumer.new
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Persistence
5
+ # Local cache for routing topics
6
+ # We use it in order not to build string instances and remap incoming topic upon each
7
+ # message / message batches received
8
+ class Topic
9
+ # Thread.current scope under which we store topics data
10
+ PERSISTENCE_SCOPE = :topics
11
+
12
+ # @param group_id [String] group id for which we fetch a topic representation
13
+ # @param raw_topic_name [String] raw topic name (before remapping) for which we fetch a
14
+ # topic representation
15
+ # @return [Karafka::Routing::Topic] remapped topic representation that can be used further
16
+ # on when working with given parameters
17
+ def self.fetch(group_id, raw_topic_name)
18
+ Thread.current[PERSISTENCE_SCOPE] ||= Hash.new { |hash, key| hash[key] = {} }
19
+
20
+ Thread.current[PERSISTENCE_SCOPE][group_id][raw_topic_name] ||= begin
21
+ # We map from incoming topic name, as it might be namespaced, etc.
22
+ # @see topic_mapper internal docs
23
+ mapped_topic_name = Karafka::App.config.topic_mapper.incoming(raw_topic_name)
24
+ Routing::Router.find("#{group_id}_#{mapped_topic_name}")
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Class used to catch signals from ruby Signal class in order to manage Karafka stop
5
+ # @note There might be only one process - this class is a singleton
6
+ class Process
7
+ include Singleton
8
+
9
+ # Signal types that we handle
10
+ HANDLED_SIGNALS = %i[
11
+ SIGINT
12
+ SIGQUIT
13
+ SIGTERM
14
+ ].freeze
15
+
16
+ HANDLED_SIGNALS.each do |signal|
17
+ # Assigns a callback that will happen when certain signal will be send
18
+ # to Karafka server instance
19
+ # @note It does not define the callback itself -it needs to be passed in a block
20
+ # @example Define an action that should be taken on_sigint
21
+ # process.on_sigint do
22
+ # Karafka.logger.info('Log something here')
23
+ # exit
24
+ # end
25
+ define_method :"on_#{signal.to_s.downcase}" do |&block|
26
+ @callbacks[signal] << block
27
+ end
28
+ end
29
+
30
+ # Creates an instance of process and creates empty hash for callbacks
31
+ def initialize
32
+ @callbacks = Hash.new { |hsh, key| hsh[key] = [] }
33
+ end
34
+
35
+ # Method catches all HANDLED_SIGNALS and performs appropriate callbacks (if defined)
36
+ # @note If there are no callbacks, this method will just ignore a given signal that was sent
37
+ def supervise
38
+ HANDLED_SIGNALS.each { |signal| trap_signal(signal) }
39
+ end
40
+
41
+ private
42
+
43
+ # Traps a single signal and performs callbacks (if any) or just ignores this signal
44
+ # @param [Symbol] signal type that we want to catch
45
+ def trap_signal(signal)
46
+ trap(signal) do
47
+ notice_signal(signal)
48
+ (@callbacks[signal] || []).each(&:call)
49
+ end
50
+ end
51
+
52
+ # Informs monitoring about trapped signal
53
+ # @param [Symbol] signal type that we received
54
+ # @note We cannot perform logging from trap context, that's why
55
+ # we have to spin up a new thread to do this
56
+ def notice_signal(signal)
57
+ Thread.new do
58
+ Karafka.monitor.instrument('process.notice_signal', caller: self, signal: signal)
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Responders namespace encapsulates all the internal responder implementation parts
5
+ module Responders
6
+ # Responders builder is used to finding (based on the consumer class name) a responder
7
+ # that match the consumer. We use it when user does not provide a responder inside routing,
8
+ # but he still names responder with the same convention (and namespaces) as consumer
9
+ #
10
+ # @example Matching responder exists
11
+ # Karafka::Responder::Builder(NewEventsConsumer).build #=> NewEventsResponder
12
+ # @example Matching responder does not exist
13
+ # Karafka::Responder::Builder(NewBuildsConsumer).build #=> nil
14
+ class Builder
15
+ # @param consumer_class [Karafka::BaseConsumer, nil] descendant of
16
+ # Karafka::BaseConsumer
17
+ # @example Tries to find a responder that matches a given consumer. If nothing found,
18
+ # will return nil (nil is accepted, because it means that a given consumer don't
19
+ # pipe stuff further on)
20
+ def initialize(consumer_class)
21
+ @consumer_class = consumer_class
22
+ end
23
+
24
+ # Tries to figure out a responder based on a consumer class name
25
+ # @return [Class] Responder class (not an instance)
26
+ # @return [nil] or nil if there's no matching responding class
27
+ def build
28
+ Helpers::ClassMatcher.new(
29
+ @consumer_class,
30
+ from: 'Consumer',
31
+ to: 'Responder'
32
+ ).match
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Responders
5
+ # Topic describes a single topic on which we want to respond with responding requirements
6
+ # @example Define topic (required by default)
7
+ # Karafka::Responders::Topic.new(:topic_name, {}) #=> #<Karafka::Responders::Topic...
8
+ # @example Define optional topic
9
+ # Karafka::Responders::Topic.new(:topic_name, required: false)
10
+ # @example Define topic that on which we want to respond multiple times
11
+ # Karafka::Responders::Topic.new(:topic_name, multiple_usage: true)
12
+ class Topic
13
+ # Name of the topic on which we want to respond
14
+ attr_reader :name
15
+
16
+ # @param name [Symbol, String] name of a topic on which we want to respond
17
+ # @param options [Hash] non-default options for this topic
18
+ # @return [Karafka::Responders::Topic] topic description object
19
+ def initialize(name, options)
20
+ @name = name.to_s
21
+ @options = options
22
+ end
23
+
24
+ # @return [Boolean] is this a required topic (if not, it is optional)
25
+ def required?
26
+ @options.key?(:required) ? @options[:required] : true
27
+ end
28
+
29
+ # @return [Boolean] do we expect to use it multiple times in a single respond flow
30
+ def multiple_usage?
31
+ @options[:multiple_usage] || false
32
+ end
33
+
34
+ # @return [Boolean] was usage of this topic registered or not
35
+ def registered?
36
+ @options[:registered] == true
37
+ end
38
+
39
+ # @return [Boolean] do we want to use async producer. Defaults to false as the sync producer
40
+ # is safer and introduces less problems
41
+ def async?
42
+ @options.key?(:async) ? @options[:async] : false
43
+ end
44
+
45
+ # @return [Hash] hash with this topic attributes and options
46
+ def to_h
47
+ {
48
+ name: name,
49
+ multiple_usage: multiple_usage?,
50
+ required: required?,
51
+ registered: registered?,
52
+ async: async?
53
+ }
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Routing
5
+ # Builder used as a DSL layer for building consumers and telling them which topics to consume
6
+ # @example Build a simple (most common) route
7
+ # consumers do
8
+ # topic :new_videos do
9
+ # consumer NewVideosConsumer
10
+ # end
11
+ # end
12
+ class Builder < Array
13
+ include Singleton
14
+
15
+ # Used to draw routes for Karafka
16
+ # @note After it is done drawing it will store and validate all the routes to make sure that
17
+ # they are correct and that there are no topic/group duplications (this is forbidden)
18
+ # @yield Evaluates provided block in a builder context so we can describe routes
19
+ # @example
20
+ # draw do
21
+ # topic :xyz do
22
+ # end
23
+ # end
24
+ def draw(&block)
25
+ instance_eval(&block)
26
+
27
+ each do |consumer_group|
28
+ hashed_group = consumer_group.to_h
29
+ validation_result = Karafka::Schemas::ConsumerGroup.call(hashed_group)
30
+ return if validation_result.success?
31
+ raise Errors::InvalidConfiguration, validation_result.errors
32
+ end
33
+ end
34
+
35
+ # @return [Array<Karafka::Routing::ConsumerGroup>] only active consumer groups that
36
+ # we want to use. Since Karafka supports multi-process setup, we need to be able
37
+ # to pick only those consumer groups that should be active in our given process context
38
+ def active
39
+ select(&:active?)
40
+ end
41
+
42
+ private
43
+
44
+ # Builds and saves given consumer group
45
+ # @param group_id [String, Symbol] name for consumer group
46
+ # @yield Evaluates a given block in a consumer group context
47
+ def consumer_group(group_id, &block)
48
+ consumer_group = ConsumerGroup.new(group_id.to_s)
49
+ self << Proxy.new(consumer_group, &block).target
50
+ end
51
+
52
+ # @param topic_name [String, Symbol] name of a topic from which we want to consumer
53
+ # @yield Evaluates a given block in a topic context
54
+ def topic(topic_name, &block)
55
+ consumer_group(topic_name) do
56
+ topic(topic_name, &block).tap(&:build)
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end