karafka 0.5.0.3 → 0.6.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/.console_irbrc +13 -0
  3. data/.github/ISSUE_TEMPLATE.md +2 -0
  4. data/.gitignore +1 -0
  5. data/CHANGELOG.md +59 -1
  6. data/CODE_OF_CONDUCT.md +46 -0
  7. data/CONTRIBUTING.md +67 -0
  8. data/Gemfile +2 -1
  9. data/Gemfile.lock +46 -147
  10. data/README.md +51 -952
  11. data/Rakefile +5 -14
  12. data/karafka.gemspec +19 -13
  13. data/lib/karafka.rb +7 -4
  14. data/lib/karafka/app.rb +10 -6
  15. data/lib/karafka/attributes_map.rb +67 -0
  16. data/lib/karafka/base_controller.rb +42 -52
  17. data/lib/karafka/base_responder.rb +30 -14
  18. data/lib/karafka/base_worker.rb +11 -26
  19. data/lib/karafka/cli.rb +2 -0
  20. data/lib/karafka/cli/base.rb +2 -0
  21. data/lib/karafka/cli/console.rb +7 -1
  22. data/lib/karafka/cli/flow.rb +13 -13
  23. data/lib/karafka/cli/info.rb +7 -4
  24. data/lib/karafka/cli/install.rb +4 -3
  25. data/lib/karafka/cli/server.rb +3 -1
  26. data/lib/karafka/cli/worker.rb +2 -0
  27. data/lib/karafka/connection/config_adapter.rb +103 -0
  28. data/lib/karafka/connection/listener.rb +16 -12
  29. data/lib/karafka/connection/messages_consumer.rb +86 -0
  30. data/lib/karafka/connection/messages_processor.rb +74 -0
  31. data/lib/karafka/errors.rb +15 -29
  32. data/lib/karafka/fetcher.rb +10 -8
  33. data/lib/karafka/helpers/class_matcher.rb +2 -0
  34. data/lib/karafka/helpers/config_retriever.rb +46 -0
  35. data/lib/karafka/helpers/multi_delegator.rb +2 -0
  36. data/lib/karafka/loader.rb +4 -2
  37. data/lib/karafka/logger.rb +37 -36
  38. data/lib/karafka/monitor.rb +3 -1
  39. data/lib/karafka/params/interchanger.rb +2 -0
  40. data/lib/karafka/params/params.rb +34 -41
  41. data/lib/karafka/params/params_batch.rb +46 -0
  42. data/lib/karafka/parsers/json.rb +4 -2
  43. data/lib/karafka/patches/dry_configurable.rb +2 -0
  44. data/lib/karafka/process.rb +4 -2
  45. data/lib/karafka/responders/builder.rb +2 -0
  46. data/lib/karafka/responders/topic.rb +14 -6
  47. data/lib/karafka/routing/builder.rb +22 -59
  48. data/lib/karafka/routing/consumer_group.rb +54 -0
  49. data/lib/karafka/routing/mapper.rb +2 -0
  50. data/lib/karafka/routing/proxy.rb +37 -0
  51. data/lib/karafka/routing/router.rb +18 -16
  52. data/lib/karafka/routing/topic.rb +78 -0
  53. data/lib/karafka/schemas/config.rb +36 -0
  54. data/lib/karafka/schemas/consumer_group.rb +56 -0
  55. data/lib/karafka/schemas/responder_usage.rb +38 -0
  56. data/lib/karafka/server.rb +5 -3
  57. data/lib/karafka/setup/config.rb +79 -32
  58. data/lib/karafka/setup/configurators/base.rb +2 -0
  59. data/lib/karafka/setup/configurators/celluloid.rb +2 -0
  60. data/lib/karafka/setup/configurators/sidekiq.rb +2 -0
  61. data/lib/karafka/setup/configurators/water_drop.rb +15 -3
  62. data/lib/karafka/status.rb +2 -0
  63. data/lib/karafka/templates/app.rb.example +15 -5
  64. data/lib/karafka/templates/application_worker.rb.example +0 -6
  65. data/lib/karafka/version.rb +2 -1
  66. data/lib/karafka/workers/builder.rb +2 -0
  67. metadata +109 -60
  68. data/lib/karafka/cli/routes.rb +0 -36
  69. data/lib/karafka/connection/consumer.rb +0 -33
  70. data/lib/karafka/connection/message.rb +0 -17
  71. data/lib/karafka/connection/topic_consumer.rb +0 -94
  72. data/lib/karafka/responders/usage_validator.rb +0 -60
  73. data/lib/karafka/routing/route.rb +0 -113
  74. data/lib/karafka/setup/config_schema.rb +0 -44
  75. data/lib/karafka/setup/configurators/worker_glass.rb +0 -13
  76. data/lib/karafka/templates/config.ru.example +0 -13
@@ -1,13 +1,15 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Karafka
2
4
  # Namespace used to encapsulate all the internal errors of Karafka
3
5
  module Errors
4
6
  # Base class for all the Karafka internal errors
5
- class BaseError < StandardError; end
7
+ BaseError = Class.new(StandardError)
6
8
 
7
9
  # Should be raised when we attemp to parse incoming params but parsing fails
8
10
  # If this error (or its descendant) is detected, we will pass the raw message
9
11
  # into params and proceed further
10
- class ParserError < BaseError; end
12
+ ParserError = Class.new(BaseError)
11
13
 
12
14
  # Raised when router receives topic name which does not correspond with any routes
13
15
  # This can only happen in a case when:
@@ -19,41 +21,25 @@ module Karafka
19
21
  # In case this happens, you will have to create a temporary route that will allow
20
22
  # you to "eat" everything from the Sidekiq queue.
21
23
  # @see https://github.com/karafka/karafka/issues/135
22
- class NonMatchingRouteError < BaseError; end
23
-
24
- # Raised when we have few controllers(inherited from Karafka::BaseController)
25
- # with the same group name
26
- class DuplicatedGroupError < BaseError; end
27
-
28
- # Raised when we have few controllers(inherited from Karafka::BaseController)
29
- # with the same topic name
30
- class DuplicatedTopicError < BaseError; end
31
-
32
- # Raised when we want to use topic name that has unsupported characters
33
- class InvalidTopicName < BaseError; end
34
-
35
- # Raised when we want to use group name that has unsupported characters
36
- class InvalidGroupName < BaseError; end
24
+ NonMatchingRouteError = Class.new(BaseError)
37
25
 
38
26
  # Raised when application does not have ApplicationWorker or other class that directly
39
27
  # inherits from Karafka::BaseWorker
40
- class BaseWorkerDescentantMissing < BaseError; end
28
+ BaseWorkerDescentantMissing = Class.new(BaseError)
41
29
 
42
30
  # Raised when we want to use #respond_with in controllers but we didn't define
43
31
  # (and we couldn't find) any appropriate responder for a given controller
44
- class ResponderMissing < BaseError; end
45
-
46
- # Raised when we want to use #respond_to in responders with a topic that we didn't register
47
- class UnregisteredTopic < BaseError; end
32
+ ResponderMissing = Class.new(BaseError)
48
33
 
49
- # Raised when we send more than one message to a single topic but we didn't allow that when
50
- # we were registering topic in a responder
51
- class TopicMultipleUsage < BaseError; end
52
-
53
- # Raised when we didn't use a topic that was defined as non-optional (required)
54
- class UnusedResponderRequiredTopic < BaseError; end
34
+ # Raised when we don't use or use responder not in the way it expected to based on the
35
+ # topics usage definitions
36
+ InvalidResponderUsage = Class.new(BaseError)
55
37
 
56
38
  # Raised when configuration doesn't match with validation schema
57
- class InvalidConfiguration < BaseError; end
39
+ InvalidConfiguration = Class.new(BaseError)
40
+
41
+ # Raised when processing messages in batches but still want to use #params instead of
42
+ # #params_batch
43
+ ParamsMethodUnavailable = Class.new(BaseError)
58
44
  end
59
45
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Karafka
2
4
  # Class used to run the Karafka consumer and handle shutting down, restarting etc
3
5
  # @note Creating multiple fetchers will result in having multiple connections to the same
@@ -8,7 +10,7 @@ module Karafka
8
10
  # so we don't have to terminate them
9
11
  def fetch_loop
10
12
  futures = listeners.map do |listener|
11
- listener.future.public_send(:fetch_loop, consumer)
13
+ listener.future.public_send(:fetch_loop, processor)
12
14
  end
13
15
 
14
16
  futures.map(&:value)
@@ -24,16 +26,16 @@ module Karafka
24
26
 
25
27
  # @return [Array<Karafka::Connection::Listener>] listeners that will consume messages
26
28
  def listeners
27
- @listeners ||= App.routes.map do |route|
28
- Karafka::Connection::Listener.new(route)
29
+ @listeners ||= App.consumer_groups.map do |consumer_group|
30
+ Karafka::Connection::Listener.new(consumer_group)
29
31
  end
30
32
  end
31
33
 
32
- # @return [Proc] proc that should be processed when a message arrives
33
- # @yieldparam message [Kafka::FetchedMessage] message from kafka (raw one)
34
- def consumer
35
- lambda do |message|
36
- Karafka::Connection::Consumer.new.consume(message)
34
+ # @return [Proc] proc that should be processed when a messages arrive
35
+ # @yieldparam messages [Array<Kafka::FetchedMessage>] messages from kafka (raw)
36
+ def processor
37
+ lambda do |consumer_group_id, messages|
38
+ Karafka::Connection::MessagesProcessor.process(consumer_group_id, messages)
37
39
  end
38
40
  end
39
41
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Karafka
2
4
  module Helpers
3
5
  # Class used to autodetect corresponding classes that are internally inside Karafka framework
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Helpers
5
+ # A helper method that allows us to build methods that try to get a given
6
+ # attribute from its instance value and if it fails, will fallback to
7
+ # the default config or config.kafka value for a given attribute.
8
+ # It is used to simplify the checkings.
9
+ # @note Worth noticing, that the value might be equal to false, so even
10
+ # then we need to return it. That's why we check for nil?
11
+ # @example Define config retried attribute for start_from_beginning
12
+ # class Test
13
+ # extend Karafka::Helpers::ConfigRetriever
14
+ # config_retriever_for :start_from_beginning
15
+ # end
16
+ #
17
+ # Test.new.start_from_beginning #=> false
18
+ # test_instance = Test.new
19
+ # test_instance.start_from_beginning = true
20
+ # test_instance.start_from_beginning #=> true
21
+ module ConfigRetriever
22
+ # Builds proper methods for setting and retrieving (with fallback) given attribute value
23
+ # @param attribute [Symbol] attribute name based on which we will build
24
+ # accessor with fallback
25
+ def config_retriever_for(attribute)
26
+ attr_writer attribute unless method_defined? :"#{attribute}="
27
+
28
+ # Don't redefine if we already have accessor for a given element
29
+ return if method_defined? attribute
30
+
31
+ define_method attribute do
32
+ current_value = instance_variable_get(:"@#{attribute}")
33
+ return current_value unless current_value.nil?
34
+
35
+ value = if Karafka::App.config.respond_to?(attribute)
36
+ Karafka::App.config.public_send(attribute)
37
+ else
38
+ Karafka::App.config.kafka.public_send(attribute)
39
+ end
40
+
41
+ instance_variable_set(:"@#{attribute}", value)
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Karafka
2
4
  # Module containing classes and methods that provide some additional functionalities
3
5
  module Helpers
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Karafka
2
4
  # Loader for requiring all the files in a proper order
3
5
  # Some files needs to be required before other, so it will
@@ -5,7 +7,7 @@ module Karafka
5
7
  # any other.
6
8
  class Loader
7
9
  # Order in which we want to load app files
8
- DIRS = %w(
10
+ DIRS = %w[
9
11
  config/initializers
10
12
  lib
11
13
  app/helpers
@@ -20,7 +22,7 @@ module Karafka
20
22
  app/controllers
21
23
  app/aspects
22
24
  app
23
- ).freeze
25
+ ].freeze
24
26
 
25
27
  # Will load files in a proper order (based on DIRS)
26
28
  # @param [String] root path from which we want to start
@@ -1,7 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Karafka
2
4
  # Default logger for Event Delegator
3
5
  # @note It uses ::Logger features - providing basic logging
4
6
  class Logger < ::Logger
7
+ include Singleton
8
+
5
9
  # Map containing informations about log level for given environment
6
10
  ENV_MAP = {
7
11
  'production' => ::Logger::ERROR,
@@ -11,42 +15,39 @@ module Karafka
11
15
  default: ::Logger::INFO
12
16
  }.freeze
13
17
 
14
- class << self
15
- # Returns a logger instance with appropriate settings, log level and environment
16
- def instance
17
- ensure_dir_exists
18
- instance = new(target)
19
- instance.level = ENV_MAP[Karafka.env] || ENV_MAP[:default]
20
- instance
21
- end
22
-
23
- private
24
-
25
- # @return [Karafka::Helpers::MultiDelegator] multi delegator instance
26
- # to which we will be writtng logs
27
- # We use this approach to log stuff to file and to the STDOUT at the same time
28
- def target
29
- Karafka::Helpers::MultiDelegator
30
- .delegate(:write, :close)
31
- .to(STDOUT, file)
32
- end
33
-
34
- # Makes sure the log directory exists
35
- def ensure_dir_exists
36
- dir = File.dirname(log_path)
37
- FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
38
- end
39
-
40
- # @return [Pathname] Path to a file to which we should log
41
- def log_path
42
- Karafka::App.root.join("log/#{Karafka.env}.log")
43
- end
44
-
45
- # @return [File] file to which we want to write our logs
46
- # @note File is being opened in append mode ('a')
47
- def file
48
- File.open(log_path, 'a')
49
- end
18
+ # Creates a new instance of logger ensuring that it has a place to write to
19
+ def initialize(*_args)
20
+ ensure_dir_exists
21
+ super(target)
22
+ self.level = ENV_MAP[Karafka.env] || ENV_MAP[:default]
23
+ end
24
+
25
+ private
26
+
27
+ # @return [Karafka::Helpers::MultiDelegator] multi delegator instance
28
+ # to which we will be writtng logs
29
+ # We use this approach to log stuff to file and to the STDOUT at the same time
30
+ def target
31
+ Karafka::Helpers::MultiDelegator
32
+ .delegate(:write, :close)
33
+ .to(STDOUT, file)
34
+ end
35
+
36
+ # Makes sure the log directory exists
37
+ def ensure_dir_exists
38
+ dir = File.dirname(log_path)
39
+ FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
40
+ end
41
+
42
+ # @return [Pathname] Path to a file to which we should log
43
+ def log_path
44
+ @log_path ||= Karafka::App.root.join("log/#{Karafka.env}.log")
45
+ end
46
+
47
+ # @return [File] file to which we want to write our logs
48
+ # @note File is being opened in append mode ('a')
49
+ def file
50
+ @file ||= File.open(log_path, 'a')
50
51
  end
51
52
  end
52
53
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Karafka
2
4
  # Monitor is used to hookup external monitoring services to monitor how Karafka works
3
5
  # It provides a standarized API for checking incoming messages/enqueueing etc
@@ -53,7 +55,7 @@ module Karafka
53
55
  def caller_exceptions_map
54
56
  @caller_exceptions_map ||= {
55
57
  error: [
56
- Karafka::Connection::Consumer,
58
+ Karafka::Connection::MessagesProcessor,
57
59
  Karafka::Connection::Listener,
58
60
  Karafka::Params::Params
59
61
  ],
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Karafka
2
4
  module Params
3
5
  # Interchangers allow us to format/encode/pack data that is being send to perform_async
@@ -1,59 +1,52 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Karafka
2
4
  # Params namespace encapsulating all the logic that is directly related to params handling
3
5
  module Params
4
6
  # Class-wrapper for hash with indifferent access with additional lazy loading feature
5
7
  # It provides lazy loading not only until the first usage, but also allows us to skip
6
8
  # using parser until we execute our logic inside worker. That way we can operate with
7
- # heavy-parsing data without slowing down the whole application. If we won't use
8
- # params in before_enqueue (or if we don't us before_enqueue at all), it will make
9
- # Karafka faster, because it will pass data as it is directly to Sidekiq
9
+ # heavy-parsing data without slowing down the whole application.
10
10
  class Params < HashWithIndifferentAccess
11
+ # Kafka::FetchedMessage attributes that we want to use inside of params
12
+ KAFKA_MESSAGE_ATTRIBUTES = %i[
13
+ topic
14
+ value
15
+ partition
16
+ offset
17
+ key
18
+ ].freeze
19
+
11
20
  class << self
12
21
  # We allow building instances only via the #build method
13
- private_class_method :new
14
22
 
15
- # @param message [Karafka::Connection::Message, Hash] message that we get out of Kafka
23
+ # @param message [Kafka::FetchedMessage, Hash] message that we get out of Kafka
16
24
  # in case of building params inside main Karafka process in
17
- # Karafka::Connection::Consumer, or a hash when we retrieve data from Sidekiq
18
- # @param controller [Karafka::BaseController] Karafka's base controllers descendant
19
- # instance that wants to use params
25
+ # Karafka::Connection::Consumer, or a hash when we retrieve data that is already parsed
26
+ # @param parser [Class] parser class that we will use to unparse data
20
27
  # @return [Karafka::Params::Params] Karafka params object not yet used parser for
21
28
  # retrieving data that we've got from Kafka
22
29
  # @example Build params instance from a hash
23
- # Karafka::Params::Params.build({ key: 'value' }, DataController.new) #=> params object
24
- # @example Build params instance from a Karafka::Connection::Message object
25
- # Karafka::Params::Params.build(message, IncomingController.new) #=> params object
26
- def build(message, controller)
30
+ # Karafka::Params::Params.build({ key: 'value' }) #=> params object
31
+ # @example Build params instance from a Kafka::FetchedMessage object
32
+ # Karafka::Params::Params.build(message) #=> params object
33
+ def build(message, parser)
27
34
  # Hash case happens inside workers
28
35
  if message.is_a?(Hash)
29
- defaults(controller).merge!(message)
36
+ new(parser: parser).merge!(message)
30
37
  else
31
- # This happens inside Karafka::Connection::Consumer
32
- defaults(controller).merge!(
38
+ # This happens inside Kafka::FetchedMessagesProcessor
39
+ new(
40
+ parser: parser,
33
41
  parsed: false,
34
- received_at: Time.now,
35
- content: message.content
36
- )
42
+ received_at: Time.now
43
+ ).tap do |instance|
44
+ KAFKA_MESSAGE_ATTRIBUTES.each do |attribute|
45
+ instance[attribute] = message.send(attribute)
46
+ end
47
+ end
37
48
  end
38
49
  end
39
-
40
- private
41
-
42
- # @param controller [Karafka::BaseController] Karafka's base controllers descendant
43
- # instance that wants to use params
44
- # @return [Karafka::Params::Params] freshly initialized only with default values object
45
- # that can be populated with incoming data
46
- def defaults(controller)
47
- # We initialize some default values that will be used both in Karafka main process and
48
- # inside workers
49
- new(
50
- controller: controller.class,
51
- worker: controller.worker,
52
- parser: controller.parser,
53
- topic: controller.topic,
54
- responder: controller.responder
55
- )
56
- end
57
50
  end
58
51
 
59
52
  # @return [Karafka::Params::Params] this will trigger parser execution. If we decide to
@@ -63,7 +56,7 @@ module Karafka
63
56
  def retrieve
64
57
  return self if self[:parsed]
65
58
 
66
- merge!(parse(delete(:content)))
59
+ merge!(parse(delete(:value)))
67
60
  end
68
61
 
69
62
  # Overwritten merge! method - it behaves differently for keys that are the same in our hash
@@ -85,16 +78,16 @@ module Karafka
85
78
 
86
79
  private
87
80
 
88
- # @param content [String] Raw data that we want to parse using controller's parser
81
+ # @param value [String] Raw data that we want to parse using controller's parser
89
82
  # @note If something goes wrong, it will return raw data in a hash with a message key
90
83
  # @return [Hash] parsed data or a hash with message key containing raw data if something
91
84
  # went wrong during parsing
92
- def parse(content)
93
- self[:parser].parse(content)
85
+ def parse(value)
86
+ self[:parser].parse(value)
94
87
  # We catch both of them, because for default JSON - we use JSON parser directly
95
88
  rescue ::Karafka::Errors::ParserError => e
96
89
  Karafka.monitor.notice_error(self.class, e)
97
- return { message: content }
90
+ raise e
98
91
  ensure
99
92
  self[:parsed] = true
100
93
  end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ # Params batch represents a set of messages received from Kafka.
6
+ # @note Params internally are lazy loaded before first use. That way we can skip parsing
7
+ # process if we have before_enqueue that rejects some incoming messages without using params
8
+ # It can be also used when handling really heavy data (in terms of parsing). Without direct
9
+ # usage outside of worker scope, it will pass raw data into sidekiq, so we won't use Karafka
10
+ # working time to parse this data. It will happen only in the worker (where it can take time)
11
+ # that way Karafka will be able to process data really quickly. On the other hand, if we
12
+ # decide to use params somewhere before it hits worker logic, it won't parse it again in
13
+ # the worker - it will use already loaded data and pass it to Redis
14
+ class ParamsBatch
15
+ include Enumerable
16
+
17
+ # Builds up a params batch based on raw kafka messages
18
+ # @param messages_batch [Array<Kafka::FetchedMessage>] messages batch
19
+ # @param topic_parser [Class] topic parser for unparsing messages values
20
+ def initialize(messages_batch, topic_parser)
21
+ @params_batch = messages_batch.map do |message|
22
+ Karafka::Params::Params.build(message, topic_parser)
23
+ end
24
+ end
25
+
26
+ # @yieldparam [Karafka::Params::Params] each parsed and loaded params instance
27
+ # @note Invocation of this method will cause loading and parsing each param after another.
28
+ # If you want to get access without parsing, please access params_batch directly
29
+ def each
30
+ @params_batch.each { |param| yield(param.retrieve) }
31
+ end
32
+
33
+ # @return [Array<Karafka::Params::Params>] returns all the params in a loaded state, so they
34
+ # can be used for batch insert, etc. Without invoking all, up until first use, they won't
35
+ # be parsed
36
+ def parsed
37
+ each(&:itself)
38
+ end
39
+
40
+ # @return [Array<Karafka::Params::Params>] pure array with params (not parsed)
41
+ def to_a
42
+ @params_batch
43
+ end
44
+ end
45
+ end
46
+ end