karafka 0.5.0.3 → 0.6.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/.console_irbrc +13 -0
  3. data/.github/ISSUE_TEMPLATE.md +2 -0
  4. data/.gitignore +1 -0
  5. data/CHANGELOG.md +59 -1
  6. data/CODE_OF_CONDUCT.md +46 -0
  7. data/CONTRIBUTING.md +67 -0
  8. data/Gemfile +2 -1
  9. data/Gemfile.lock +46 -147
  10. data/README.md +51 -952
  11. data/Rakefile +5 -14
  12. data/karafka.gemspec +19 -13
  13. data/lib/karafka.rb +7 -4
  14. data/lib/karafka/app.rb +10 -6
  15. data/lib/karafka/attributes_map.rb +67 -0
  16. data/lib/karafka/base_controller.rb +42 -52
  17. data/lib/karafka/base_responder.rb +30 -14
  18. data/lib/karafka/base_worker.rb +11 -26
  19. data/lib/karafka/cli.rb +2 -0
  20. data/lib/karafka/cli/base.rb +2 -0
  21. data/lib/karafka/cli/console.rb +7 -1
  22. data/lib/karafka/cli/flow.rb +13 -13
  23. data/lib/karafka/cli/info.rb +7 -4
  24. data/lib/karafka/cli/install.rb +4 -3
  25. data/lib/karafka/cli/server.rb +3 -1
  26. data/lib/karafka/cli/worker.rb +2 -0
  27. data/lib/karafka/connection/config_adapter.rb +103 -0
  28. data/lib/karafka/connection/listener.rb +16 -12
  29. data/lib/karafka/connection/messages_consumer.rb +86 -0
  30. data/lib/karafka/connection/messages_processor.rb +74 -0
  31. data/lib/karafka/errors.rb +15 -29
  32. data/lib/karafka/fetcher.rb +10 -8
  33. data/lib/karafka/helpers/class_matcher.rb +2 -0
  34. data/lib/karafka/helpers/config_retriever.rb +46 -0
  35. data/lib/karafka/helpers/multi_delegator.rb +2 -0
  36. data/lib/karafka/loader.rb +4 -2
  37. data/lib/karafka/logger.rb +37 -36
  38. data/lib/karafka/monitor.rb +3 -1
  39. data/lib/karafka/params/interchanger.rb +2 -0
  40. data/lib/karafka/params/params.rb +34 -41
  41. data/lib/karafka/params/params_batch.rb +46 -0
  42. data/lib/karafka/parsers/json.rb +4 -2
  43. data/lib/karafka/patches/dry_configurable.rb +2 -0
  44. data/lib/karafka/process.rb +4 -2
  45. data/lib/karafka/responders/builder.rb +2 -0
  46. data/lib/karafka/responders/topic.rb +14 -6
  47. data/lib/karafka/routing/builder.rb +22 -59
  48. data/lib/karafka/routing/consumer_group.rb +54 -0
  49. data/lib/karafka/routing/mapper.rb +2 -0
  50. data/lib/karafka/routing/proxy.rb +37 -0
  51. data/lib/karafka/routing/router.rb +18 -16
  52. data/lib/karafka/routing/topic.rb +78 -0
  53. data/lib/karafka/schemas/config.rb +36 -0
  54. data/lib/karafka/schemas/consumer_group.rb +56 -0
  55. data/lib/karafka/schemas/responder_usage.rb +38 -0
  56. data/lib/karafka/server.rb +5 -3
  57. data/lib/karafka/setup/config.rb +79 -32
  58. data/lib/karafka/setup/configurators/base.rb +2 -0
  59. data/lib/karafka/setup/configurators/celluloid.rb +2 -0
  60. data/lib/karafka/setup/configurators/sidekiq.rb +2 -0
  61. data/lib/karafka/setup/configurators/water_drop.rb +15 -3
  62. data/lib/karafka/status.rb +2 -0
  63. data/lib/karafka/templates/app.rb.example +15 -5
  64. data/lib/karafka/templates/application_worker.rb.example +0 -6
  65. data/lib/karafka/version.rb +2 -1
  66. data/lib/karafka/workers/builder.rb +2 -0
  67. metadata +109 -60
  68. data/lib/karafka/cli/routes.rb +0 -36
  69. data/lib/karafka/connection/consumer.rb +0 -33
  70. data/lib/karafka/connection/message.rb +0 -17
  71. data/lib/karafka/connection/topic_consumer.rb +0 -94
  72. data/lib/karafka/responders/usage_validator.rb +0 -60
  73. data/lib/karafka/routing/route.rb +0 -113
  74. data/lib/karafka/setup/config_schema.rb +0 -44
  75. data/lib/karafka/setup/configurators/worker_glass.rb +0 -13
  76. data/lib/karafka/templates/config.ru.example +0 -13
@@ -1,13 +1,15 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Karafka
2
4
  # Namespace used to encapsulate all the internal errors of Karafka
3
5
  module Errors
4
6
  # Base class for all the Karafka internal errors
5
- class BaseError < StandardError; end
7
+ BaseError = Class.new(StandardError)
6
8
 
7
9
  # Should be raised when we attemp to parse incoming params but parsing fails
8
10
  # If this error (or its descendant) is detected, we will pass the raw message
9
11
  # into params and proceed further
10
- class ParserError < BaseError; end
12
+ ParserError = Class.new(BaseError)
11
13
 
12
14
  # Raised when router receives topic name which does not correspond with any routes
13
15
  # This can only happen in a case when:
@@ -19,41 +21,25 @@ module Karafka
19
21
  # In case this happens, you will have to create a temporary route that will allow
20
22
  # you to "eat" everything from the Sidekiq queue.
21
23
  # @see https://github.com/karafka/karafka/issues/135
22
- class NonMatchingRouteError < BaseError; end
23
-
24
- # Raised when we have few controllers(inherited from Karafka::BaseController)
25
- # with the same group name
26
- class DuplicatedGroupError < BaseError; end
27
-
28
- # Raised when we have few controllers(inherited from Karafka::BaseController)
29
- # with the same topic name
30
- class DuplicatedTopicError < BaseError; end
31
-
32
- # Raised when we want to use topic name that has unsupported characters
33
- class InvalidTopicName < BaseError; end
34
-
35
- # Raised when we want to use group name that has unsupported characters
36
- class InvalidGroupName < BaseError; end
24
+ NonMatchingRouteError = Class.new(BaseError)
37
25
 
38
26
  # Raised when application does not have ApplicationWorker or other class that directly
39
27
  # inherits from Karafka::BaseWorker
40
- class BaseWorkerDescentantMissing < BaseError; end
28
+ BaseWorkerDescentantMissing = Class.new(BaseError)
41
29
 
42
30
  # Raised when we want to use #respond_with in controllers but we didn't define
43
31
  # (and we couldn't find) any appropriate responder for a given controller
44
- class ResponderMissing < BaseError; end
45
-
46
- # Raised when we want to use #respond_to in responders with a topic that we didn't register
47
- class UnregisteredTopic < BaseError; end
32
+ ResponderMissing = Class.new(BaseError)
48
33
 
49
- # Raised when we send more than one message to a single topic but we didn't allow that when
50
- # we were registering topic in a responder
51
- class TopicMultipleUsage < BaseError; end
52
-
53
- # Raised when we didn't use a topic that was defined as non-optional (required)
54
- class UnusedResponderRequiredTopic < BaseError; end
34
+ # Raised when we don't use or use responder not in the way it expected to based on the
35
+ # topics usage definitions
36
+ InvalidResponderUsage = Class.new(BaseError)
55
37
 
56
38
  # Raised when configuration doesn't match with validation schema
57
- class InvalidConfiguration < BaseError; end
39
+ InvalidConfiguration = Class.new(BaseError)
40
+
41
+ # Raised when processing messages in batches but still want to use #params instead of
42
+ # #params_batch
43
+ ParamsMethodUnavailable = Class.new(BaseError)
58
44
  end
59
45
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Karafka
2
4
  # Class used to run the Karafka consumer and handle shutting down, restarting etc
3
5
  # @note Creating multiple fetchers will result in having multiple connections to the same
@@ -8,7 +10,7 @@ module Karafka
8
10
  # so we don't have to terminate them
9
11
  def fetch_loop
10
12
  futures = listeners.map do |listener|
11
- listener.future.public_send(:fetch_loop, consumer)
13
+ listener.future.public_send(:fetch_loop, processor)
12
14
  end
13
15
 
14
16
  futures.map(&:value)
@@ -24,16 +26,16 @@ module Karafka
24
26
 
25
27
  # @return [Array<Karafka::Connection::Listener>] listeners that will consume messages
26
28
  def listeners
27
- @listeners ||= App.routes.map do |route|
28
- Karafka::Connection::Listener.new(route)
29
+ @listeners ||= App.consumer_groups.map do |consumer_group|
30
+ Karafka::Connection::Listener.new(consumer_group)
29
31
  end
30
32
  end
31
33
 
32
- # @return [Proc] proc that should be processed when a message arrives
33
- # @yieldparam message [Kafka::FetchedMessage] message from kafka (raw one)
34
- def consumer
35
- lambda do |message|
36
- Karafka::Connection::Consumer.new.consume(message)
34
+ # @return [Proc] proc that should be processed when a messages arrive
35
+ # @yieldparam messages [Array<Kafka::FetchedMessage>] messages from kafka (raw)
36
+ def processor
37
+ lambda do |consumer_group_id, messages|
38
+ Karafka::Connection::MessagesProcessor.process(consumer_group_id, messages)
37
39
  end
38
40
  end
39
41
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Karafka
2
4
  module Helpers
3
5
  # Class used to autodetect corresponding classes that are internally inside Karafka framework
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Helpers
5
+ # A helper method that allows us to build methods that try to get a given
6
+ # attribute from its instance value and if it fails, will fallback to
7
+ # the default config or config.kafka value for a given attribute.
8
+ # It is used to simplify the checkings.
9
+ # @note Worth noticing, that the value might be equal to false, so even
10
+ # then we need to return it. That's why we check for nil?
11
+ # @example Define config retried attribute for start_from_beginning
12
+ # class Test
13
+ # extend Karafka::Helpers::ConfigRetriever
14
+ # config_retriever_for :start_from_beginning
15
+ # end
16
+ #
17
+ # Test.new.start_from_beginning #=> false
18
+ # test_instance = Test.new
19
+ # test_instance.start_from_beginning = true
20
+ # test_instance.start_from_beginning #=> true
21
+ module ConfigRetriever
22
+ # Builds proper methods for setting and retrieving (with fallback) given attribute value
23
+ # @param attribute [Symbol] attribute name based on which we will build
24
+ # accessor with fallback
25
+ def config_retriever_for(attribute)
26
+ attr_writer attribute unless method_defined? :"#{attribute}="
27
+
28
+ # Don't redefine if we already have accessor for a given element
29
+ return if method_defined? attribute
30
+
31
+ define_method attribute do
32
+ current_value = instance_variable_get(:"@#{attribute}")
33
+ return current_value unless current_value.nil?
34
+
35
+ value = if Karafka::App.config.respond_to?(attribute)
36
+ Karafka::App.config.public_send(attribute)
37
+ else
38
+ Karafka::App.config.kafka.public_send(attribute)
39
+ end
40
+
41
+ instance_variable_set(:"@#{attribute}", value)
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Karafka
2
4
  # Module containing classes and methods that provide some additional functionalities
3
5
  module Helpers
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Karafka
2
4
  # Loader for requiring all the files in a proper order
3
5
  # Some files needs to be required before other, so it will
@@ -5,7 +7,7 @@ module Karafka
5
7
  # any other.
6
8
  class Loader
7
9
  # Order in which we want to load app files
8
- DIRS = %w(
10
+ DIRS = %w[
9
11
  config/initializers
10
12
  lib
11
13
  app/helpers
@@ -20,7 +22,7 @@ module Karafka
20
22
  app/controllers
21
23
  app/aspects
22
24
  app
23
- ).freeze
25
+ ].freeze
24
26
 
25
27
  # Will load files in a proper order (based on DIRS)
26
28
  # @param [String] root path from which we want to start
@@ -1,7 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Karafka
2
4
  # Default logger for Event Delegator
3
5
  # @note It uses ::Logger features - providing basic logging
4
6
  class Logger < ::Logger
7
+ include Singleton
8
+
5
9
  # Map containing informations about log level for given environment
6
10
  ENV_MAP = {
7
11
  'production' => ::Logger::ERROR,
@@ -11,42 +15,39 @@ module Karafka
11
15
  default: ::Logger::INFO
12
16
  }.freeze
13
17
 
14
- class << self
15
- # Returns a logger instance with appropriate settings, log level and environment
16
- def instance
17
- ensure_dir_exists
18
- instance = new(target)
19
- instance.level = ENV_MAP[Karafka.env] || ENV_MAP[:default]
20
- instance
21
- end
22
-
23
- private
24
-
25
- # @return [Karafka::Helpers::MultiDelegator] multi delegator instance
26
- # to which we will be writtng logs
27
- # We use this approach to log stuff to file and to the STDOUT at the same time
28
- def target
29
- Karafka::Helpers::MultiDelegator
30
- .delegate(:write, :close)
31
- .to(STDOUT, file)
32
- end
33
-
34
- # Makes sure the log directory exists
35
- def ensure_dir_exists
36
- dir = File.dirname(log_path)
37
- FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
38
- end
39
-
40
- # @return [Pathname] Path to a file to which we should log
41
- def log_path
42
- Karafka::App.root.join("log/#{Karafka.env}.log")
43
- end
44
-
45
- # @return [File] file to which we want to write our logs
46
- # @note File is being opened in append mode ('a')
47
- def file
48
- File.open(log_path, 'a')
49
- end
18
+ # Creates a new instance of logger ensuring that it has a place to write to
19
+ def initialize(*_args)
20
+ ensure_dir_exists
21
+ super(target)
22
+ self.level = ENV_MAP[Karafka.env] || ENV_MAP[:default]
23
+ end
24
+
25
+ private
26
+
27
+ # @return [Karafka::Helpers::MultiDelegator] multi delegator instance
28
+ # to which we will be writtng logs
29
+ # We use this approach to log stuff to file and to the STDOUT at the same time
30
+ def target
31
+ Karafka::Helpers::MultiDelegator
32
+ .delegate(:write, :close)
33
+ .to(STDOUT, file)
34
+ end
35
+
36
+ # Makes sure the log directory exists
37
+ def ensure_dir_exists
38
+ dir = File.dirname(log_path)
39
+ FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
40
+ end
41
+
42
+ # @return [Pathname] Path to a file to which we should log
43
+ def log_path
44
+ @log_path ||= Karafka::App.root.join("log/#{Karafka.env}.log")
45
+ end
46
+
47
+ # @return [File] file to which we want to write our logs
48
+ # @note File is being opened in append mode ('a')
49
+ def file
50
+ @file ||= File.open(log_path, 'a')
50
51
  end
51
52
  end
52
53
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Karafka
2
4
  # Monitor is used to hookup external monitoring services to monitor how Karafka works
3
5
  # It provides a standarized API for checking incoming messages/enqueueing etc
@@ -53,7 +55,7 @@ module Karafka
53
55
  def caller_exceptions_map
54
56
  @caller_exceptions_map ||= {
55
57
  error: [
56
- Karafka::Connection::Consumer,
58
+ Karafka::Connection::MessagesProcessor,
57
59
  Karafka::Connection::Listener,
58
60
  Karafka::Params::Params
59
61
  ],
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Karafka
2
4
  module Params
3
5
  # Interchangers allow us to format/encode/pack data that is being send to perform_async
@@ -1,59 +1,52 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Karafka
2
4
  # Params namespace encapsulating all the logic that is directly related to params handling
3
5
  module Params
4
6
  # Class-wrapper for hash with indifferent access with additional lazy loading feature
5
7
  # It provides lazy loading not only until the first usage, but also allows us to skip
6
8
  # using parser until we execute our logic inside worker. That way we can operate with
7
- # heavy-parsing data without slowing down the whole application. If we won't use
8
- # params in before_enqueue (or if we don't us before_enqueue at all), it will make
9
- # Karafka faster, because it will pass data as it is directly to Sidekiq
9
+ # heavy-parsing data without slowing down the whole application.
10
10
  class Params < HashWithIndifferentAccess
11
+ # Kafka::FetchedMessage attributes that we want to use inside of params
12
+ KAFKA_MESSAGE_ATTRIBUTES = %i[
13
+ topic
14
+ value
15
+ partition
16
+ offset
17
+ key
18
+ ].freeze
19
+
11
20
  class << self
12
21
  # We allow building instances only via the #build method
13
- private_class_method :new
14
22
 
15
- # @param message [Karafka::Connection::Message, Hash] message that we get out of Kafka
23
+ # @param message [Kafka::FetchedMessage, Hash] message that we get out of Kafka
16
24
  # in case of building params inside main Karafka process in
17
- # Karafka::Connection::Consumer, or a hash when we retrieve data from Sidekiq
18
- # @param controller [Karafka::BaseController] Karafka's base controllers descendant
19
- # instance that wants to use params
25
+ # Karafka::Connection::Consumer, or a hash when we retrieve data that is already parsed
26
+ # @param parser [Class] parser class that we will use to unparse data
20
27
  # @return [Karafka::Params::Params] Karafka params object not yet used parser for
21
28
  # retrieving data that we've got from Kafka
22
29
  # @example Build params instance from a hash
23
- # Karafka::Params::Params.build({ key: 'value' }, DataController.new) #=> params object
24
- # @example Build params instance from a Karafka::Connection::Message object
25
- # Karafka::Params::Params.build(message, IncomingController.new) #=> params object
26
- def build(message, controller)
30
+ # Karafka::Params::Params.build({ key: 'value' }) #=> params object
31
+ # @example Build params instance from a Kafka::FetchedMessage object
32
+ # Karafka::Params::Params.build(message) #=> params object
33
+ def build(message, parser)
27
34
  # Hash case happens inside workers
28
35
  if message.is_a?(Hash)
29
- defaults(controller).merge!(message)
36
+ new(parser: parser).merge!(message)
30
37
  else
31
- # This happens inside Karafka::Connection::Consumer
32
- defaults(controller).merge!(
38
+ # This happens inside Kafka::FetchedMessagesProcessor
39
+ new(
40
+ parser: parser,
33
41
  parsed: false,
34
- received_at: Time.now,
35
- content: message.content
36
- )
42
+ received_at: Time.now
43
+ ).tap do |instance|
44
+ KAFKA_MESSAGE_ATTRIBUTES.each do |attribute|
45
+ instance[attribute] = message.send(attribute)
46
+ end
47
+ end
37
48
  end
38
49
  end
39
-
40
- private
41
-
42
- # @param controller [Karafka::BaseController] Karafka's base controllers descendant
43
- # instance that wants to use params
44
- # @return [Karafka::Params::Params] freshly initialized only with default values object
45
- # that can be populated with incoming data
46
- def defaults(controller)
47
- # We initialize some default values that will be used both in Karafka main process and
48
- # inside workers
49
- new(
50
- controller: controller.class,
51
- worker: controller.worker,
52
- parser: controller.parser,
53
- topic: controller.topic,
54
- responder: controller.responder
55
- )
56
- end
57
50
  end
58
51
 
59
52
  # @return [Karafka::Params::Params] this will trigger parser execution. If we decide to
@@ -63,7 +56,7 @@ module Karafka
63
56
  def retrieve
64
57
  return self if self[:parsed]
65
58
 
66
- merge!(parse(delete(:content)))
59
+ merge!(parse(delete(:value)))
67
60
  end
68
61
 
69
62
  # Overwritten merge! method - it behaves differently for keys that are the same in our hash
@@ -85,16 +78,16 @@ module Karafka
85
78
 
86
79
  private
87
80
 
88
- # @param content [String] Raw data that we want to parse using controller's parser
81
+ # @param value [String] Raw data that we want to parse using controller's parser
89
82
  # @note If something goes wrong, it will return raw data in a hash with a message key
90
83
  # @return [Hash] parsed data or a hash with message key containing raw data if something
91
84
  # went wrong during parsing
92
- def parse(content)
93
- self[:parser].parse(content)
85
+ def parse(value)
86
+ self[:parser].parse(value)
94
87
  # We catch both of them, because for default JSON - we use JSON parser directly
95
88
  rescue ::Karafka::Errors::ParserError => e
96
89
  Karafka.monitor.notice_error(self.class, e)
97
- return { message: content }
90
+ raise e
98
91
  ensure
99
92
  self[:parsed] = true
100
93
  end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Params
5
+ # Params batch represents a set of messages received from Kafka.
6
+ # @note Params internally are lazy loaded before first use. That way we can skip parsing
7
+ # process if we have before_enqueue that rejects some incoming messages without using params
8
+ # It can be also used when handling really heavy data (in terms of parsing). Without direct
9
+ # usage outside of worker scope, it will pass raw data into sidekiq, so we won't use Karafka
10
+ # working time to parse this data. It will happen only in the worker (where it can take time)
11
+ # that way Karafka will be able to process data really quickly. On the other hand, if we
12
+ # decide to use params somewhere before it hits worker logic, it won't parse it again in
13
+ # the worker - it will use already loaded data and pass it to Redis
14
+ class ParamsBatch
15
+ include Enumerable
16
+
17
+ # Builds up a params batch based on raw kafka messages
18
+ # @param messages_batch [Array<Kafka::FetchedMessage>] messages batch
19
+ # @param topic_parser [Class] topic parser for unparsing messages values
20
+ def initialize(messages_batch, topic_parser)
21
+ @params_batch = messages_batch.map do |message|
22
+ Karafka::Params::Params.build(message, topic_parser)
23
+ end
24
+ end
25
+
26
+ # @yieldparam [Karafka::Params::Params] each parsed and loaded params instance
27
+ # @note Invocation of this method will cause loading and parsing each param after another.
28
+ # If you want to get access without parsing, please access params_batch directly
29
+ def each
30
+ @params_batch.each { |param| yield(param.retrieve) }
31
+ end
32
+
33
+ # @return [Array<Karafka::Params::Params>] returns all the params in a loaded state, so they
34
+ # can be used for batch insert, etc. Without invoking all, up until first use, they won't
35
+ # be parsed
36
+ def parsed
37
+ each(&:itself)
38
+ end
39
+
40
+ # @return [Array<Karafka::Params::Params>] pure array with params (not parsed)
41
+ def to_a
42
+ @params_batch
43
+ end
44
+ end
45
+ end
46
+ end