karafka 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.console_irbrc +13 -0
  3. data/.github/ISSUE_TEMPLATE.md +2 -0
  4. data/.gitignore +68 -0
  5. data/.rspec +1 -0
  6. data/.ruby-gemset +1 -0
  7. data/.ruby-version +1 -0
  8. data/.travis.yml +17 -0
  9. data/CHANGELOG.md +371 -0
  10. data/CODE_OF_CONDUCT.md +46 -0
  11. data/CONTRIBUTING.md +42 -0
  12. data/Gemfile +12 -0
  13. data/Gemfile.lock +111 -0
  14. data/MIT-LICENCE +18 -0
  15. data/README.md +95 -0
  16. data/bin/karafka +19 -0
  17. data/config/errors.yml +6 -0
  18. data/karafka.gemspec +35 -0
  19. data/lib/karafka.rb +68 -0
  20. data/lib/karafka/app.rb +52 -0
  21. data/lib/karafka/attributes_map.rb +67 -0
  22. data/lib/karafka/backends/inline.rb +17 -0
  23. data/lib/karafka/base_controller.rb +60 -0
  24. data/lib/karafka/base_responder.rb +185 -0
  25. data/lib/karafka/cli.rb +54 -0
  26. data/lib/karafka/cli/base.rb +78 -0
  27. data/lib/karafka/cli/console.rb +29 -0
  28. data/lib/karafka/cli/flow.rb +46 -0
  29. data/lib/karafka/cli/info.rb +29 -0
  30. data/lib/karafka/cli/install.rb +43 -0
  31. data/lib/karafka/cli/server.rb +67 -0
  32. data/lib/karafka/connection/config_adapter.rb +112 -0
  33. data/lib/karafka/connection/consumer.rb +121 -0
  34. data/lib/karafka/connection/listener.rb +51 -0
  35. data/lib/karafka/connection/processor.rb +61 -0
  36. data/lib/karafka/controllers/callbacks.rb +54 -0
  37. data/lib/karafka/controllers/includer.rb +51 -0
  38. data/lib/karafka/controllers/responders.rb +19 -0
  39. data/lib/karafka/controllers/single_params.rb +15 -0
  40. data/lib/karafka/errors.rb +43 -0
  41. data/lib/karafka/fetcher.rb +48 -0
  42. data/lib/karafka/helpers/class_matcher.rb +78 -0
  43. data/lib/karafka/helpers/config_retriever.rb +46 -0
  44. data/lib/karafka/helpers/multi_delegator.rb +33 -0
  45. data/lib/karafka/loader.rb +29 -0
  46. data/lib/karafka/logger.rb +53 -0
  47. data/lib/karafka/monitor.rb +98 -0
  48. data/lib/karafka/params/params.rb +128 -0
  49. data/lib/karafka/params/params_batch.rb +41 -0
  50. data/lib/karafka/parsers/json.rb +38 -0
  51. data/lib/karafka/patches/dry_configurable.rb +31 -0
  52. data/lib/karafka/patches/ruby_kafka.rb +34 -0
  53. data/lib/karafka/persistence/consumer.rb +25 -0
  54. data/lib/karafka/persistence/controller.rb +38 -0
  55. data/lib/karafka/process.rb +63 -0
  56. data/lib/karafka/responders/builder.rb +35 -0
  57. data/lib/karafka/responders/topic.rb +57 -0
  58. data/lib/karafka/routing/builder.rb +61 -0
  59. data/lib/karafka/routing/consumer_group.rb +61 -0
  60. data/lib/karafka/routing/consumer_mapper.rb +33 -0
  61. data/lib/karafka/routing/proxy.rb +37 -0
  62. data/lib/karafka/routing/router.rb +29 -0
  63. data/lib/karafka/routing/topic.rb +66 -0
  64. data/lib/karafka/routing/topic_mapper.rb +55 -0
  65. data/lib/karafka/schemas/config.rb +21 -0
  66. data/lib/karafka/schemas/consumer_group.rb +65 -0
  67. data/lib/karafka/schemas/consumer_group_topic.rb +18 -0
  68. data/lib/karafka/schemas/responder_usage.rb +39 -0
  69. data/lib/karafka/schemas/server_cli_options.rb +43 -0
  70. data/lib/karafka/server.rb +62 -0
  71. data/lib/karafka/setup/config.rb +163 -0
  72. data/lib/karafka/setup/configurators/base.rb +35 -0
  73. data/lib/karafka/setup/configurators/water_drop.rb +29 -0
  74. data/lib/karafka/status.rb +25 -0
  75. data/lib/karafka/templates/application_controller.rb.example +7 -0
  76. data/lib/karafka/templates/application_responder.rb.example +11 -0
  77. data/lib/karafka/templates/karafka.rb.example +41 -0
  78. data/lib/karafka/version.rb +7 -0
  79. data/log/.gitkeep +0 -0
  80. metadata +267 -0
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Module containing classes and methods that provide some additional functionalities
5
+ module Helpers
6
+ # @note Taken from http://stackoverflow.com/questions/6407141
7
+ # Multidelegator is used to delegate calls to multiple targets
8
+ class MultiDelegator
9
+ # @param targets to which we want to delegate methods
10
+ #
11
+ def initialize(*targets)
12
+ @targets = targets
13
+ end
14
+
15
+ class << self
16
+ # @param methods names that should be delegated to
17
+ # @example Delegate write and close to STDOUT and file
18
+ # Logger.new MultiDelegator.delegate(:write, :close).to(STDOUT, log_file)
19
+ def delegate(*methods)
20
+ methods.each do |m|
21
+ define_method(m) do |*args|
22
+ @targets.map { |t| t.send(m, *args) }
23
+ end
24
+ end
25
+
26
+ self
27
+ end
28
+
29
+ alias to new
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Loader for requiring all the files in a proper order
5
+ module Loader
6
+ # Order in which we want to load app files
7
+ DIRS = %w[
8
+ config/initializers
9
+ lib
10
+ app
11
+ ].freeze
12
+
13
+ # Will load files in a proper order (based on DIRS)
14
+ # @param [String] root path from which we want to start
15
+ def self.load(root)
16
+ DIRS.each do |dir|
17
+ path = File.join(root, dir)
18
+ next unless File.exist?(path)
19
+ load!(path)
20
+ end
21
+ end
22
+
23
+ # Requires all the ruby files from one path in a proper order
24
+ # @param path [String] path (dir) from which we want to load ruby files in a proper order
25
+ def self.load!(path)
26
+ require_all(path)
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Default logger for Event Delegator
5
+ # @note It uses ::Logger features - providing basic logging
6
+ class Logger < ::Logger
7
+ include Singleton
8
+
9
+ # Map containing informations about log level for given environment
10
+ ENV_MAP = {
11
+ 'production' => ::Logger::ERROR,
12
+ 'test' => ::Logger::ERROR,
13
+ 'development' => ::Logger::INFO,
14
+ 'debug' => ::Logger::DEBUG,
15
+ default: ::Logger::INFO
16
+ }.freeze
17
+
18
+ # Creates a new instance of logger ensuring that it has a place to write to
19
+ def initialize(*_args)
20
+ ensure_dir_exists
21
+ super(target)
22
+ self.level = ENV_MAP[Karafka.env] || ENV_MAP[:default]
23
+ end
24
+
25
+ private
26
+
27
+ # @return [Karafka::Helpers::MultiDelegator] multi delegator instance
28
+ # to which we will be writtng logs
29
+ # We use this approach to log stuff to file and to the STDOUT at the same time
30
+ def target
31
+ Karafka::Helpers::MultiDelegator
32
+ .delegate(:write, :close)
33
+ .to(STDOUT, file)
34
+ end
35
+
36
+ # Makes sure the log directory exists
37
+ def ensure_dir_exists
38
+ dir = File.dirname(log_path)
39
+ FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
40
+ end
41
+
42
+ # @return [Pathname] Path to a file to which we should log
43
+ def log_path
44
+ @log_path ||= Karafka::App.root.join("log/#{Karafka.env}.log")
45
+ end
46
+
47
+ # @return [File] file to which we want to write our logs
48
+ # @note File is being opened in append mode ('a')
49
+ def file
50
+ @file ||= File.open(log_path, 'a')
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Monitor is used to hookup external monitoring services to monitor how Karafka works
5
+ # It provides a standarized API for checking incoming messages/enqueueing etc
6
+ # By default it implements logging functionalities but can be replaced with any more
7
+ # sophisticated logging/monitoring system like Errbit, Airbrake, NewRelic
8
+ # @note This class acts as a singleton because we are only permitted to have single monitor
9
+ # per running process (just as logger)
10
+ # Keep in mind, that if you create your own monitor object, you will have to implement also
11
+ # logging functionality (or just inherit, super and do whatever you want)
12
+ class Monitor
13
+ include Singleton
14
+
15
+ # This method is executed in many important places in the code (during data flow), like
16
+ # the moment before #consume_async, etc. For full list just grep for 'monitor.notice'
17
+ # @param caller_class [Class] class of object that executed this call
18
+ # @param options [Hash] hash with options that we passed to notice. It differs based
19
+ # on of who and when is calling
20
+ # @note We don't provide a name of method in which this was called, because we can take
21
+ # it directly from Ruby (see #caller_label method of this class for more details)
22
+ # @example Notice about consuming with controller_class
23
+ # Karafka.monitor.notice(self.class, controller_class: controller_class)
24
+ # @example Notice about terminating with a signal
25
+ # Karafka.monitor.notice(self.class, signal: signal)
26
+ def notice(caller_class, options = {})
27
+ logger.info("#{caller_class}##{caller_label} with #{options}")
28
+ end
29
+
30
+ # This method is executed when we want to notify about an error that happened somewhere
31
+ # in the system
32
+ # @param caller_class [Class] class of object that executed this call
33
+ # @param e [Exception] exception that was raised
34
+ # @note We don't provide a name of method in which this was called, because we can take
35
+ # it directly from Ruby (see #caller_label method of this class for more details)
36
+ # @example Notify about error
37
+ # Karafka.monitor.notice(self.class, e)
38
+ def notice_error(caller_class, e)
39
+ caller_exceptions_map.each do |level, types|
40
+ next unless types.include?(caller_class)
41
+
42
+ return logger.public_send(level, e)
43
+ end
44
+
45
+ logger.info(e)
46
+ end
47
+
48
+ private
49
+
50
+ # @return [Hash] Hash containing informations on which level of notification should
51
+ # we use for exceptions that happen in certain parts of Karafka
52
+ # @note Keep in mind that any not handled here class should be logged with info
53
+ # @note Those are not maps of exceptions classes but of classes that were callers of this
54
+ # particular exception
55
+ def caller_exceptions_map
56
+ @caller_exceptions_map ||= {
57
+ error: [
58
+ Karafka::Connection::Consumer,
59
+ Karafka::Connection::Listener,
60
+ Karafka::Params::Params
61
+ ],
62
+ fatal: [
63
+ Karafka::Fetcher
64
+ ]
65
+ }
66
+ end
67
+
68
+ # @return [String] label of method that invoked #notice or #notice_error
69
+ # @example Check label of method that invoked #notice
70
+ # caller_label #=> 'fetch'
71
+ # @example Check label of method that invoked #notice in a block
72
+ # caller_label #=> 'block in fetch'
73
+ # @example Check label of method that invoked #notice_error
74
+ # caller_label #=> 'rescue in target'
75
+ def caller_label
76
+ # We need to calculate ancestors because if someone inherits
77
+ # from this class, caller chains is longer
78
+ index = self.class.ancestors.index(Karafka::Monitor)
79
+ # caller_locations has a differs in result whether it is a subclass of
80
+ # Karafka::Monitor, the basic Karafka::Monitor itself or a super for a subclass.
81
+ # So to cover all the cases we need to differentiate.
82
+ # @see https://github.com/karafka/karafka/issues/128
83
+ # @note It won't work if the monitor caller_label caller class is defined using
84
+ # define method
85
+ super_execution = caller_locations(1, 2)[0].label == caller_locations(1, 2)[1].label
86
+
87
+ scope = super_execution ? 1 : nil
88
+ scope ||= index.positive? ? 0 : 1
89
+
90
+ caller_locations(index + 1, 2)[scope].label
91
+ end
92
+
93
+ # @return [Logger] logger instance
94
+ def logger
95
+ Karafka.logger
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,128 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Params namespace encapsulating all the logic that is directly related to params handling
5
+ module Params
6
+ # Class-wrapper for hash with indifferent access with additional lazy loading feature
7
+ # It provides lazy loading not only until the first usage, but also allows us to skip
8
+ # using parser until we execute our logic. That way we can operate with
9
+ # heavy-parsing data without slowing down the whole application.
10
+ class Params < HashWithIndifferentAccess
11
+ # Kafka::FetchedMessage attributes that we want to use inside of params
12
+ KAFKA_MESSAGE_ATTRIBUTES = %i[
13
+ value
14
+ partition
15
+ offset
16
+ key
17
+ create_time
18
+ ].freeze
19
+
20
+ # Params attributes that should be available via a method call invocation for Kafka
21
+ # client compatibility.
22
+ # Kafka passes internally Kafka::FetchedMessage object and the ruby-kafka consumer
23
+ # uses those fields via method calls, so in order to be able to pass there our params
24
+ # objects, have to have same api.
25
+ PARAMS_METHOD_ATTRIBUTES = %i[
26
+ topic
27
+ partition
28
+ offset
29
+ key
30
+ create_time
31
+ ].freeze
32
+
33
+ class << self
34
+ # We allow building instances only via the #build method
35
+
36
+ # @param message [Kafka::FetchedMessage, Hash] message that we get out of Kafka
37
+ # in case of building params inside main Karafka process in
38
+ # Karafka::Connection::Consumer, or a hash when we retrieve data that is already parsed
39
+ # @param parser [Class] parser class that we will use to unparse data
40
+ # @return [Karafka::Params::Params] Karafka params object not yet used parser for
41
+ # retrieving data that we've got from Kafka
42
+ # @example Build params instance from a hash
43
+ # Karafka::Params::Params.build({ key: 'value' }) #=> params object
44
+ # @example Build params instance from a Kafka::FetchedMessage object
45
+ # Karafka::Params::Params.build(message) #=> params object
46
+ def build(message, parser)
47
+ # Hash case happens inside backends that interchange data
48
+ if message.is_a?(Hash)
49
+ new(parser: parser).send(:merge!, message)
50
+ else
51
+ # This happens inside Kafka::FetchedProcessor
52
+ new(
53
+ parser: parser,
54
+ parsed: false,
55
+ received_at: Time.now
56
+ ).tap do |instance|
57
+ KAFKA_MESSAGE_ATTRIBUTES.each do |attribute|
58
+ instance[attribute] = message.send(attribute)
59
+ end
60
+
61
+ # When we get raw messages, they might have a topic, that was modified by a
62
+ # topic mapper. We need to "reverse" this change and map back to the non-modified
63
+ # format, so our internal flow is not corrupted with the mapping
64
+ instance[:topic] = Karafka::App.config.topic_mapper.incoming(message.topic)
65
+ end
66
+ end
67
+ end
68
+
69
+ # Defines a method call accessor to a particular hash field.
70
+ # @note Won't work for complex key names that contain spaces, etc
71
+ # @param key [Symbol] name of a field that we want to retrieve with a method call
72
+ # @example
73
+ # key_attr_reader :example
74
+ # params.example #=> 'my example value'
75
+ def key_attr_reader(key)
76
+ define_method key do
77
+ self[key]
78
+ end
79
+ end
80
+ end
81
+
82
+ # @return [Karafka::Params::Params] this will trigger parser execution. If we decide to
83
+ # retrieve data, parser will be executed to parse data. Output of parsing will be merged
84
+ # to the current object. This object will be also marked as already parsed, so we won't
85
+ # parse it again.
86
+ def retrieve!
87
+ return self if self[:parsed]
88
+
89
+ merge!(parse(delete(:value)))
90
+ end
91
+
92
+ PARAMS_METHOD_ATTRIBUTES.each(&method(:key_attr_reader))
93
+
94
+ private
95
+
96
+ # Overwritten merge! method - it behaves differently for keys that are the same in our hash
97
+ # and in a other_hash - it will not replace keys that are the same in our hash
98
+ # and in the other one
99
+ # @param other_hash [Hash, HashWithIndifferentAccess] hash that we want to merge into current
100
+ # @return [Karafka::Params::Params] our parameters hash with merged values
101
+ # @example Merge with hash without same keys
102
+ # new(a: 1, b: 2).merge!(c: 3) #=> { a: 1, b: 2, c: 3 }
103
+ # @example Merge with hash with same keys (symbol based)
104
+ # new(a: 1).merge!(a: 2) #=> { a: 1 }
105
+ # @example Merge with hash with same keys (string based)
106
+ # new(a: 1).merge!('a' => 2) #=> { a: 1 }
107
+ # @example Merge with hash with same keys (current string based)
108
+ # new('a' => 1).merge!(a: 2) #=> { a: 1 }
109
+ def merge!(other_hash)
110
+ super(other_hash) { |_key, base_value, _new_value| base_value }
111
+ end
112
+
113
+ # @param value [String] Raw data that we want to parse using controller's parser
114
+ # @note If something goes wrong, it will return raw data in a hash with a message key
115
+ # @return [Hash] parsed data or a hash with message key containing raw data if something
116
+ # went wrong during parsing
117
+ def parse(value)
118
+ self[:parser].parse(value)
119
+ # We catch both of them, because for default JSON - we use JSON parser directly
120
+ rescue ::Karafka::Errors::ParserError => e
121
+ Karafka.monitor.notice_error(self.class, e)
122
+ raise e
123
+ ensure
124
+ self[:parsed] = true
125
+ end
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ # Params batch represents a set of messages received from Kafka.
6
+ # @note Params internally are lazy loaded before first use. That way we can skip parsing
7
+ # process if we have after_fetched that rejects some incoming messages without using params
8
+ # It can be also used when handling really heavy data (in terms of parsing).
9
+ class ParamsBatch
10
+ include Enumerable
11
+
12
+ # Builds up a params batch based on raw kafka messages
13
+ # @param messages_batch [Array<Kafka::FetchedMessage>] messages batch
14
+ # @param topic_parser [Class] topic parser for unparsing messages values
15
+ def initialize(messages_batch, topic_parser)
16
+ @params_batch = messages_batch.map do |message|
17
+ Karafka::Params::Params.build(message, topic_parser)
18
+ end
19
+ end
20
+
21
+ # @yieldparam [Karafka::Params::Params] each parsed and loaded params instance
22
+ # @note Invocation of this method will cause loading and parsing each param after another.
23
+ # If you want to get access without parsing, please access params_batch directly
24
+ def each
25
+ @params_batch.each { |param| yield(param.retrieve!) }
26
+ end
27
+
28
+ # @return [Array<Karafka::Params::Params>] returns all the params in a loaded state, so they
29
+ # can be used for batch insert, etc. Without invoking all, up until first use, they won't
30
+ # be parsed
31
+ def parsed
32
+ each(&:itself)
33
+ end
34
+
35
+ # @return [Array<Karafka::Params::Params>] pure array with params (not parsed)
36
+ def to_a
37
+ @params_batch
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Module for all supported by default parsers for incoming/outgoing data
5
+ module Parsers
6
+ # Default Karafka Json parser for serializing and deserializing data
7
+ class Json
8
+ # @param content [String] content based on which we want to get our hash
9
+ # @return [Hash] hash with parsed JSON data
10
+ # @example
11
+ # Json.parse("{\"a\":1}") #=> { 'a' => 1 }
12
+ def self.parse(content)
13
+ ::MultiJson.load(content)
14
+ rescue ::MultiJson::ParseError => e
15
+ raise ::Karafka::Errors::ParserError, e
16
+ end
17
+
18
+ # @param content [Object] any object that we want to convert to a json string
19
+ # @return [String] Valid JSON string containing serialized data
20
+ # @raise [Karafka::Errors::ParserError] raised when we don't have a way to parse
21
+ # given content to a json string format
22
+ # @note When string is passed to this method, we assume that it is already a json
23
+ # string and we don't serialize it again. This allows us to serialize data before
24
+ # it is being forwarded to a parser if we want to have a custom (not that simple)
25
+ # json serialization
26
+ #
27
+ # @example From an ActiveRecord object
28
+ # Json.generate(Repository.first) #=> "{\"repository\":{\"id\":\"04b504e0\"}}"
29
+ # @example From a string (no changes)
30
+ # Json.generate("{\"a\":1}") #=> "{\"a\":1}"
31
+ def self.generate(content)
32
+ return content if content.is_a?(String)
33
+ return content.to_json if content.respond_to?(:to_json)
34
+ raise Karafka::Errors::ParserError, content
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Namespace for patches of external gems/libraries
5
+ module Patches
6
+ # Patch that will allow to use proc based lazy evaluated settings with Dry Configurable
7
+ # @see https://github.com/dry-rb/dry-configurable/blob/master/lib/dry/configurable.rb
8
+ module DryConfigurable
9
+ # We overwrite ::Dry::Configurable::Config to change on proc behaviour
10
+ # Unfortunately it does not provide an on call proc evaluation, so
11
+ # this feature had to be added here on demand/
12
+ # @param args Any arguments that DryConfigurable::Config accepts
13
+ def initialize(*args)
14
+ super
15
+
16
+ @config.each_key(&method(:rebuild))
17
+ end
18
+
19
+ private
20
+
21
+ # Method that rebuilds a given accessor, so when it consists a proc value, it will
22
+ # evaluate it upon return
23
+ # @param method_name [Symbol] name of an accessor that we want to rebuild
24
+ def rebuild(method_name)
25
+ define_singleton_method method_name do
26
+ super().is_a?(Proc) ? super().call : super()
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end