karafka 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +7 -0
  2. data/.console_irbrc +13 -0
  3. data/.gitignore +68 -0
  4. data/.rspec +1 -0
  5. data/.ruby-gemset +1 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +18 -0
  8. data/CHANGELOG.md +415 -0
  9. data/CODE_OF_CONDUCT.md +46 -0
  10. data/CONTRIBUTING.md +41 -0
  11. data/Gemfile +11 -0
  12. data/Gemfile.lock +123 -0
  13. data/MIT-LICENCE +18 -0
  14. data/README.md +89 -0
  15. data/bin/karafka +19 -0
  16. data/config/errors.yml +6 -0
  17. data/karafka.gemspec +37 -0
  18. data/lib/karafka.rb +78 -0
  19. data/lib/karafka/app.rb +45 -0
  20. data/lib/karafka/attributes_map.rb +67 -0
  21. data/lib/karafka/backends/inline.rb +16 -0
  22. data/lib/karafka/base_consumer.rb +68 -0
  23. data/lib/karafka/base_responder.rb +204 -0
  24. data/lib/karafka/callbacks.rb +30 -0
  25. data/lib/karafka/callbacks/config.rb +22 -0
  26. data/lib/karafka/callbacks/dsl.rb +16 -0
  27. data/lib/karafka/cli.rb +54 -0
  28. data/lib/karafka/cli/base.rb +78 -0
  29. data/lib/karafka/cli/console.rb +29 -0
  30. data/lib/karafka/cli/flow.rb +46 -0
  31. data/lib/karafka/cli/info.rb +29 -0
  32. data/lib/karafka/cli/install.rb +42 -0
  33. data/lib/karafka/cli/server.rb +66 -0
  34. data/lib/karafka/connection/client.rb +117 -0
  35. data/lib/karafka/connection/config_adapter.rb +120 -0
  36. data/lib/karafka/connection/delegator.rb +46 -0
  37. data/lib/karafka/connection/listener.rb +60 -0
  38. data/lib/karafka/consumers/callbacks.rb +54 -0
  39. data/lib/karafka/consumers/includer.rb +51 -0
  40. data/lib/karafka/consumers/responders.rb +24 -0
  41. data/lib/karafka/consumers/single_params.rb +15 -0
  42. data/lib/karafka/errors.rb +50 -0
  43. data/lib/karafka/fetcher.rb +44 -0
  44. data/lib/karafka/helpers/class_matcher.rb +78 -0
  45. data/lib/karafka/helpers/config_retriever.rb +46 -0
  46. data/lib/karafka/helpers/multi_delegator.rb +33 -0
  47. data/lib/karafka/instrumentation/listener.rb +112 -0
  48. data/lib/karafka/instrumentation/logger.rb +55 -0
  49. data/lib/karafka/instrumentation/monitor.rb +64 -0
  50. data/lib/karafka/loader.rb +28 -0
  51. data/lib/karafka/params/dsl.rb +156 -0
  52. data/lib/karafka/params/params_batch.rb +46 -0
  53. data/lib/karafka/parsers/json.rb +38 -0
  54. data/lib/karafka/patches/dry_configurable.rb +35 -0
  55. data/lib/karafka/patches/ruby_kafka.rb +34 -0
  56. data/lib/karafka/persistence/client.rb +25 -0
  57. data/lib/karafka/persistence/consumer.rb +38 -0
  58. data/lib/karafka/persistence/topic.rb +29 -0
  59. data/lib/karafka/process.rb +64 -0
  60. data/lib/karafka/responders/builder.rb +36 -0
  61. data/lib/karafka/responders/topic.rb +57 -0
  62. data/lib/karafka/routing/builder.rb +61 -0
  63. data/lib/karafka/routing/consumer_group.rb +61 -0
  64. data/lib/karafka/routing/consumer_mapper.rb +34 -0
  65. data/lib/karafka/routing/proxy.rb +37 -0
  66. data/lib/karafka/routing/router.rb +29 -0
  67. data/lib/karafka/routing/topic.rb +60 -0
  68. data/lib/karafka/routing/topic_mapper.rb +55 -0
  69. data/lib/karafka/schemas/config.rb +24 -0
  70. data/lib/karafka/schemas/consumer_group.rb +77 -0
  71. data/lib/karafka/schemas/consumer_group_topic.rb +18 -0
  72. data/lib/karafka/schemas/responder_usage.rb +39 -0
  73. data/lib/karafka/schemas/server_cli_options.rb +43 -0
  74. data/lib/karafka/server.rb +94 -0
  75. data/lib/karafka/setup/config.rb +189 -0
  76. data/lib/karafka/setup/configurators/base.rb +29 -0
  77. data/lib/karafka/setup/configurators/params.rb +25 -0
  78. data/lib/karafka/setup/configurators/water_drop.rb +32 -0
  79. data/lib/karafka/setup/dsl.rb +22 -0
  80. data/lib/karafka/status.rb +25 -0
  81. data/lib/karafka/templates/application_consumer.rb.example +6 -0
  82. data/lib/karafka/templates/application_responder.rb.example +11 -0
  83. data/lib/karafka/templates/karafka.rb.example +54 -0
  84. data/lib/karafka/version.rb +7 -0
  85. data/log/.gitkeep +0 -0
  86. metadata +301 -0
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ # Params batch represents a set of messages received from Kafka.
6
+ # @note Params internally are lazy loaded before first use. That way we can skip parsing
7
+ # process if we have after_fetch that rejects some incoming messages without using params
8
+ # It can be also used when handling really heavy data (in terms of parsing).
9
+ class ParamsBatch
10
+ include Enumerable
11
+
12
+ # Builds up a params batch based on raw kafka messages
13
+ # @param messages_batch [Array<Kafka::FetchedMessage>] messages batch
14
+ # @param topic_parser [Class] topic parser for unparsing messages values
15
+ def initialize(messages_batch, topic_parser)
16
+ @params_batch = messages_batch.map! do |message|
17
+ Karafka::Params::Params.build(message, topic_parser)
18
+ end
19
+ end
20
+
21
+ # @yieldparam [Karafka::Params::Params] each parsed and loaded params instance
22
+ # @note Invocation of this method will cause loading and parsing each param after another.
23
+ # If you want to get access without parsing, please access params_batch directly
24
+ def each
25
+ @params_batch.each { |param| yield(param.retrieve!) }
26
+ end
27
+
28
+ # @return [Array<Karafka::Params::Params>] returns all the params in a loaded state, so they
29
+ # can be used for batch insert, etc. Without invoking all, up until first use, they won't
30
+ # be parsed
31
+ def parsed
32
+ each(&:itself)
33
+ end
34
+
35
+ # @return [Karafka::Params::Params] last element after the unparsing process
36
+ def last
37
+ @params_batch.last.retrieve!
38
+ end
39
+
40
+ # @return [Array<Karafka::Params::Params>] pure array with params (not parsed)
41
+ def to_a
42
+ @params_batch
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Module for all supported by default parsers for incoming/outgoing data
5
+ module Parsers
6
+ # Default Karafka Json parser for serializing and deserializing data
7
+ class Json
8
+ # @param content [String] content based on which we want to get our hash
9
+ # @return [Hash] hash with parsed JSON data
10
+ # @example
11
+ # Json.parse("{\"a\":1}") #=> { 'a' => 1 }
12
+ def self.parse(content)
13
+ ::MultiJson.load(content)
14
+ rescue ::MultiJson::ParseError => e
15
+ raise ::Karafka::Errors::ParserError, e
16
+ end
17
+
18
+ # @param content [Object] any object that we want to convert to a json string
19
+ # @return [String] Valid JSON string containing serialized data
20
+ # @raise [Karafka::Errors::ParserError] raised when we don't have a way to parse
21
+ # given content to a json string format
22
+ # @note When string is passed to this method, we assume that it is already a json
23
+ # string and we don't serialize it again. This allows us to serialize data before
24
+ # it is being forwarded to a parser if we want to have a custom (not that simple)
25
+ # json serialization
26
+ #
27
+ # @example From an ActiveRecord object
28
+ # Json.generate(Repository.first) #=> "{\"repository\":{\"id\":\"04b504e0\"}}"
29
+ # @example From a string (no changes)
30
+ # Json.generate("{\"a\":1}") #=> "{\"a\":1}"
31
+ def self.generate(content)
32
+ return content if content.is_a?(String)
33
+ return content.to_json if content.respond_to?(:to_json)
34
+ raise Karafka::Errors::ParserError, content
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Namespace for patches of external gems/libraries
5
+ module Patches
6
+ # Patch that will allow to use proc based lazy evaluated settings with Dry Configurable
7
+ # @see https://github.com/dry-rb/dry-configurable/blob/master/lib/dry/configurable.rb
8
+ module DryConfigurable
9
+ # We overwrite ::Dry::Configurable::Config to change on proc behaviour
10
+ # Unfortunately it does not provide an on call proc evaluation, so
11
+ # this feature had to be added here on demand/
12
+ # @param args Any arguments that DryConfigurable::Config accepts
13
+ def initialize(*args)
14
+ super
15
+
16
+ @config.each_key(&method(:rebuild))
17
+ end
18
+
19
+ private
20
+
21
+ # Method that rebuilds a given accessor, so when it consists a proc value, it will
22
+ # evaluate it upon return for blocks that don't require any arguments, otherwise
23
+ # it will return the block
24
+ # @param method_name [Symbol] name of an accessor that we want to rebuild
25
+ def rebuild(method_name)
26
+ define_singleton_method method_name do
27
+ value = super()
28
+ return value unless value.is_a?(Proc)
29
+ return value unless value.parameters.empty?
30
+ value.call
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Patches
5
+ # Patches for Ruby Kafka gem
6
+ module RubyKafka
7
+ # This patch allows us to inject business logic in between fetches and before the consumer
8
+ # stop, so we can perform stop commit or anything else that we need since
9
+ # ruby-kafka fetch loop does not allow that directly
10
+ # We don't wan't to use poll ruby-kafka api as it brings many more problems that we would
11
+ # have to take care of. That way, nothing like that ever happens but we get the control
12
+ # over the stopping process that we need (since we're the once that initiate it for each
13
+ # thread)
14
+ def consumer_loop
15
+ super do
16
+ consumers = Karafka::Persistence::Consumer
17
+ .all
18
+ .values
19
+ .flat_map(&:values)
20
+ .select { |ctrl| ctrl.respond_to?(:run_callbacks) }
21
+
22
+ if Karafka::App.stopped?
23
+ consumers.each { |ctrl| ctrl.run_callbacks :before_stop }
24
+ Karafka::Persistence::Client.read.stop
25
+ else
26
+ consumers.each { |ctrl| ctrl.run_callbacks :before_poll }
27
+ yield
28
+ consumers.each { |ctrl| ctrl.run_callbacks :after_poll }
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Persistence
5
+ # Persistence layer to store current thread messages consumer client for further use
6
+ class Client
7
+ # Thread.current key under which we store current thread messages consumer client
8
+ PERSISTENCE_SCOPE = :client
9
+
10
+ # @param client [Karafka::Connection::Client] messages consumer client of
11
+ # a current thread
12
+ # @return [Karafka::Connection::Client] persisted messages consumer client
13
+ def self.write(client)
14
+ Thread.current[PERSISTENCE_SCOPE] = client
15
+ end
16
+
17
+ # @return [Karafka::Connection::Client] persisted messages consumer client
18
+ # @raise [Karafka::Errors::MissingConsumer] raised when no thread messages consumer
19
+ # client but we try to use it anyway
20
+ def self.read
21
+ Thread.current[PERSISTENCE_SCOPE] || raise(Errors::MissingClient)
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Module used to provide a persistent cache layer for Karafka components that need to be
5
+ # shared inside of a same thread
6
+ module Persistence
7
+ # Module used to provide a persistent cache across batch requests for a given
8
+ # topic and partition to store some additional details when the persistent mode
9
+ # for a given topic is turned on
10
+ class Consumer
11
+ # Thread.current scope under which we store consumers data
12
+ PERSISTENCE_SCOPE = :consumers
13
+
14
+ class << self
15
+ # @return [Hash] current thread persistence scope hash with all the consumers
16
+ def all
17
+ # @note This does not need to be threadsafe (Hash) as it is always executed in a
18
+ # current thread context
19
+ Thread.current[PERSISTENCE_SCOPE] ||= Hash.new { |hash, key| hash[key] = {} }
20
+ end
21
+
22
+ # Used to build (if block given) and/or fetch a current consumer instance that will be
23
+ # used to process messages from a given topic and partition
24
+ # @return [Karafka::BaseConsumer] base consumer descendant
25
+ # @param topic [Karafka::Routing::Topic] topic instance for which we might cache
26
+ # @param partition [Integer] number of partition for which we want to cache
27
+ def fetch(topic, partition)
28
+ # We always store a current instance for callback reasons
29
+ if topic.persistent
30
+ all[topic][partition] ||= topic.consumer.new
31
+ else
32
+ all[topic][partition] = topic.consumer.new
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Persistence
5
+ # Local cache for routing topics
6
+ # We use it in order not to build string instances and remap incoming topic upon each
7
+ # message / message batches received
8
+ class Topic
9
+ # Thread.current scope under which we store topics data
10
+ PERSISTENCE_SCOPE = :topics
11
+
12
+ # @param group_id [String] group id for which we fetch a topic representation
13
+ # @param raw_topic_name [String] raw topic name (before remapping) for which we fetch a
14
+ # topic representation
15
+ # @return [Karafka::Routing::Topic] remapped topic representation that can be used further
16
+ # on when working with given parameters
17
+ def self.fetch(group_id, raw_topic_name)
18
+ Thread.current[PERSISTENCE_SCOPE] ||= Hash.new { |hash, key| hash[key] = {} }
19
+
20
+ Thread.current[PERSISTENCE_SCOPE][group_id][raw_topic_name] ||= begin
21
+ # We map from incoming topic name, as it might be namespaced, etc.
22
+ # @see topic_mapper internal docs
23
+ mapped_topic_name = Karafka::App.config.topic_mapper.incoming(raw_topic_name)
24
+ Routing::Router.find("#{group_id}_#{mapped_topic_name}")
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Class used to catch signals from ruby Signal class in order to manage Karafka stop
5
+ # @note There might be only one process - this class is a singleton
6
+ class Process
7
+ include Singleton
8
+
9
+ # Signal types that we handle
10
+ HANDLED_SIGNALS = %i[
11
+ SIGINT
12
+ SIGQUIT
13
+ SIGTERM
14
+ ].freeze
15
+
16
+ HANDLED_SIGNALS.each do |signal|
17
+ # Assigns a callback that will happen when certain signal will be send
18
+ # to Karafka server instance
19
+ # @note It does not define the callback itself -it needs to be passed in a block
20
+ # @example Define an action that should be taken on_sigint
21
+ # process.on_sigint do
22
+ # Karafka.logger.info('Log something here')
23
+ # exit
24
+ # end
25
+ define_method :"on_#{signal.to_s.downcase}" do |&block|
26
+ @callbacks[signal] << block
27
+ end
28
+ end
29
+
30
+ # Creates an instance of process and creates empty hash for callbacks
31
+ def initialize
32
+ @callbacks = Hash.new { |hsh, key| hsh[key] = [] }
33
+ end
34
+
35
+ # Method catches all HANDLED_SIGNALS and performs appropriate callbacks (if defined)
36
+ # @note If there are no callbacks, this method will just ignore a given signal that was sent
37
+ # @yield [Block] block of code that we want to execute and supervise
38
+ def supervise
39
+ HANDLED_SIGNALS.each { |signal| trap_signal(signal) }
40
+ yield
41
+ end
42
+
43
+ private
44
+
45
+ # Traps a single signal and performs callbacks (if any) or just ignores this signal
46
+ # @param [Symbol] signal type that we want to catch
47
+ def trap_signal(signal)
48
+ trap(signal) do
49
+ notice_signal(signal)
50
+ (@callbacks[signal] || []).each(&:call)
51
+ end
52
+ end
53
+
54
+ # Informs monitoring about trapped signal
55
+ # @param [Symbol] signal type that we received
56
+ # @note We cannot perform logging from trap context, that's why
57
+ # we have to spin up a new thread to do this
58
+ def notice_signal(signal)
59
+ Thread.new do
60
+ Karafka.monitor.instrument('process.notice_signal', caller: self, signal: signal)
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Responders namespace encapsulates all the internal responder implementation parts
5
+ module Responders
6
+ # Responders builder is used to finding (based on the consumer class name) a responder
7
+ # that match the consumer. We use it when user does not provide a responder inside routing,
8
+ # but he still names responder with the same convention (and namespaces) as consumer
9
+ #
10
+ # @example Matching responder exists
11
+ # Karafka::Responder::Builder(NewEventsConsumer).build #=> NewEventsResponder
12
+ # @example Matching responder does not exist
13
+ # Karafka::Responder::Builder(NewBuildsConsumer).build #=> nil
14
+ class Builder
15
+ # @param consumer_class [Karafka::BaseConsumer, nil] descendant of
16
+ # Karafka::BaseConsumer
17
+ # @example Tries to find a responder that matches a given consumer. If nothing found,
18
+ # will return nil (nil is accepted, because it means that a given consumer don't
19
+ # pipe stuff further on)
20
+ def initialize(consumer_class)
21
+ @consumer_class = consumer_class
22
+ end
23
+
24
+ # Tries to figure out a responder based on a consumer class name
25
+ # @return [Class] Responder class (not an instance)
26
+ # @return [nil] or nil if there's no matching responding class
27
+ def build
28
+ Helpers::ClassMatcher.new(
29
+ @consumer_class,
30
+ from: 'Consumer',
31
+ to: 'Responder'
32
+ ).match
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Responders
5
+ # Topic describes a single topic on which we want to respond with responding requirements
6
+ # @example Define topic (required by default)
7
+ # Karafka::Responders::Topic.new(:topic_name, {}) #=> #<Karafka::Responders::Topic...
8
+ # @example Define optional topic
9
+ # Karafka::Responders::Topic.new(:topic_name, required: false)
10
+ # @example Define topic that on which we want to respond multiple times
11
+ # Karafka::Responders::Topic.new(:topic_name, multiple_usage: true)
12
+ class Topic
13
+ # Name of the topic on which we want to respond
14
+ attr_reader :name
15
+
16
+ # @param name [Symbol, String] name of a topic on which we want to respond
17
+ # @param options [Hash] non-default options for this topic
18
+ # @return [Karafka::Responders::Topic] topic description object
19
+ def initialize(name, options)
20
+ @name = name.to_s
21
+ @options = options
22
+ end
23
+
24
+ # @return [Boolean] is this a required topic (if not, it is optional)
25
+ def required?
26
+ @options.key?(:required) ? @options[:required] : true
27
+ end
28
+
29
+ # @return [Boolean] do we expect to use it multiple times in a single respond flow
30
+ def multiple_usage?
31
+ @options[:multiple_usage] || false
32
+ end
33
+
34
+ # @return [Boolean] was usage of this topic registered or not
35
+ def registered?
36
+ @options[:registered] == true
37
+ end
38
+
39
+ # @return [Boolean] do we want to use async producer. Defaults to false as the sync producer
40
+ # is safer and introduces less problems
41
+ def async?
42
+ @options.key?(:async) ? @options[:async] : false
43
+ end
44
+
45
+ # @return [Hash] hash with this topic attributes and options
46
+ def to_h
47
+ {
48
+ name: name,
49
+ multiple_usage: multiple_usage?,
50
+ required: required?,
51
+ registered: registered?,
52
+ async: async?
53
+ }
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Routing
5
+ # Builder used as a DSL layer for building consumers and telling them which topics to consume
6
+ # @example Build a simple (most common) route
7
+ # consumers do
8
+ # topic :new_videos do
9
+ # consumer NewVideosConsumer
10
+ # end
11
+ # end
12
+ class Builder < Array
13
+ include Singleton
14
+
15
+ # Used to draw routes for Karafka
16
+ # @note After it is done drawing it will store and validate all the routes to make sure that
17
+ # they are correct and that there are no topic/group duplications (this is forbidden)
18
+ # @yield Evaluates provided block in a builder context so we can describe routes
19
+ # @example
20
+ # draw do
21
+ # topic :xyz do
22
+ # end
23
+ # end
24
+ def draw(&block)
25
+ instance_eval(&block)
26
+
27
+ each do |consumer_group|
28
+ hashed_group = consumer_group.to_h
29
+ validation_result = Karafka::Schemas::ConsumerGroup.call(hashed_group)
30
+ return if validation_result.success?
31
+ raise Errors::InvalidConfiguration, validation_result.errors
32
+ end
33
+ end
34
+
35
+ # @return [Array<Karafka::Routing::ConsumerGroup>] only active consumer groups that
36
+ # we want to use. Since Karafka supports multi-process setup, we need to be able
37
+ # to pick only those consumer groups that should be active in our given process context
38
+ def active
39
+ select(&:active?)
40
+ end
41
+
42
+ private
43
+
44
+ # Builds and saves given consumer group
45
+ # @param group_id [String, Symbol] name for consumer group
46
+ # @yield Evaluates a given block in a consumer group context
47
+ def consumer_group(group_id, &block)
48
+ consumer_group = ConsumerGroup.new(group_id.to_s)
49
+ self << Proxy.new(consumer_group, &block).target
50
+ end
51
+
52
+ # @param topic_name [String, Symbol] name of a topic from which we want to consumer
53
+ # @yield Evaluates a given block in a topic context
54
+ def topic(topic_name, &block)
55
+ consumer_group(topic_name) do
56
+ topic(topic_name, &block).tap(&:build)
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end