karafka 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.console_irbrc +13 -0
  3. data/.github/ISSUE_TEMPLATE.md +2 -0
  4. data/.gitignore +68 -0
  5. data/.rspec +1 -0
  6. data/.ruby-gemset +1 -0
  7. data/.ruby-version +1 -0
  8. data/.travis.yml +17 -0
  9. data/CHANGELOG.md +371 -0
  10. data/CODE_OF_CONDUCT.md +46 -0
  11. data/CONTRIBUTING.md +42 -0
  12. data/Gemfile +12 -0
  13. data/Gemfile.lock +111 -0
  14. data/MIT-LICENCE +18 -0
  15. data/README.md +95 -0
  16. data/bin/karafka +19 -0
  17. data/config/errors.yml +6 -0
  18. data/karafka.gemspec +35 -0
  19. data/lib/karafka.rb +68 -0
  20. data/lib/karafka/app.rb +52 -0
  21. data/lib/karafka/attributes_map.rb +67 -0
  22. data/lib/karafka/backends/inline.rb +17 -0
  23. data/lib/karafka/base_controller.rb +60 -0
  24. data/lib/karafka/base_responder.rb +185 -0
  25. data/lib/karafka/cli.rb +54 -0
  26. data/lib/karafka/cli/base.rb +78 -0
  27. data/lib/karafka/cli/console.rb +29 -0
  28. data/lib/karafka/cli/flow.rb +46 -0
  29. data/lib/karafka/cli/info.rb +29 -0
  30. data/lib/karafka/cli/install.rb +43 -0
  31. data/lib/karafka/cli/server.rb +67 -0
  32. data/lib/karafka/connection/config_adapter.rb +112 -0
  33. data/lib/karafka/connection/consumer.rb +121 -0
  34. data/lib/karafka/connection/listener.rb +51 -0
  35. data/lib/karafka/connection/processor.rb +61 -0
  36. data/lib/karafka/controllers/callbacks.rb +54 -0
  37. data/lib/karafka/controllers/includer.rb +51 -0
  38. data/lib/karafka/controllers/responders.rb +19 -0
  39. data/lib/karafka/controllers/single_params.rb +15 -0
  40. data/lib/karafka/errors.rb +43 -0
  41. data/lib/karafka/fetcher.rb +48 -0
  42. data/lib/karafka/helpers/class_matcher.rb +78 -0
  43. data/lib/karafka/helpers/config_retriever.rb +46 -0
  44. data/lib/karafka/helpers/multi_delegator.rb +33 -0
  45. data/lib/karafka/loader.rb +29 -0
  46. data/lib/karafka/logger.rb +53 -0
  47. data/lib/karafka/monitor.rb +98 -0
  48. data/lib/karafka/params/params.rb +128 -0
  49. data/lib/karafka/params/params_batch.rb +41 -0
  50. data/lib/karafka/parsers/json.rb +38 -0
  51. data/lib/karafka/patches/dry_configurable.rb +31 -0
  52. data/lib/karafka/patches/ruby_kafka.rb +34 -0
  53. data/lib/karafka/persistence/consumer.rb +25 -0
  54. data/lib/karafka/persistence/controller.rb +38 -0
  55. data/lib/karafka/process.rb +63 -0
  56. data/lib/karafka/responders/builder.rb +35 -0
  57. data/lib/karafka/responders/topic.rb +57 -0
  58. data/lib/karafka/routing/builder.rb +61 -0
  59. data/lib/karafka/routing/consumer_group.rb +61 -0
  60. data/lib/karafka/routing/consumer_mapper.rb +33 -0
  61. data/lib/karafka/routing/proxy.rb +37 -0
  62. data/lib/karafka/routing/router.rb +29 -0
  63. data/lib/karafka/routing/topic.rb +66 -0
  64. data/lib/karafka/routing/topic_mapper.rb +55 -0
  65. data/lib/karafka/schemas/config.rb +21 -0
  66. data/lib/karafka/schemas/consumer_group.rb +65 -0
  67. data/lib/karafka/schemas/consumer_group_topic.rb +18 -0
  68. data/lib/karafka/schemas/responder_usage.rb +39 -0
  69. data/lib/karafka/schemas/server_cli_options.rb +43 -0
  70. data/lib/karafka/server.rb +62 -0
  71. data/lib/karafka/setup/config.rb +163 -0
  72. data/lib/karafka/setup/configurators/base.rb +35 -0
  73. data/lib/karafka/setup/configurators/water_drop.rb +29 -0
  74. data/lib/karafka/status.rb +25 -0
  75. data/lib/karafka/templates/application_controller.rb.example +7 -0
  76. data/lib/karafka/templates/application_responder.rb.example +11 -0
  77. data/lib/karafka/templates/karafka.rb.example +41 -0
  78. data/lib/karafka/version.rb +7 -0
  79. data/log/.gitkeep +0 -0
  80. metadata +267 -0
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Karafka framework Cli
5
+ class Cli < Thor
6
+ # Console Karafka Cli action
7
+ class Console < Base
8
+ desc 'Start the Karafka console (short-cut alias: "c")'
9
+ option aliases: 'c'
10
+
11
+ # @return [String] Console executing command
12
+ # @example
13
+ # Karafka::Cli::Console.command #=> 'KARAFKA_CONSOLE=true bundle exec irb...'
14
+ def self.command
15
+ envs = [
16
+ "IRBRC='#{Karafka.gem_root}/.console_irbrc'",
17
+ 'KARAFKA_CONSOLE=true'
18
+ ]
19
+ "#{envs.join(' ')} bundle exec irb"
20
+ end
21
+
22
+ # Start the Karafka console
23
+ def call
24
+ cli.info
25
+ exec self.class.command
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Karafka framework Cli
5
+ class Cli < Thor
6
+ # Description of topics flow (incoming/outgoing)
7
+ class Flow < Base
8
+ desc 'Print application data flow (incoming => outgoing)'
9
+
10
+ # Print out all defined routes in alphabetical order
11
+ def call
12
+ topics.each do |topic|
13
+ any_topics = !topic.responder&.topics.nil?
14
+
15
+ if any_topics
16
+ puts "#{topic.name} =>"
17
+
18
+ topic.responder.topics.each_value do |responder_topic|
19
+ features = []
20
+ features << (responder_topic.required? ? 'always' : 'conditionally')
21
+ features << (responder_topic.multiple_usage? ? 'one or more' : 'exactly once')
22
+
23
+ print responder_topic.name, "(#{features.join(', ')})"
24
+ end
25
+ else
26
+ puts "#{topic.name} => (nothing)"
27
+ end
28
+ end
29
+ end
30
+
31
+ private
32
+
33
+ # @return [Array<Karafka::Routing::Topic>] all topics sorted in alphabetical order
34
+ def topics
35
+ Karafka::App.consumer_groups.map(&:topics).flatten.sort_by(&:name)
36
+ end
37
+
38
+ # Prints a given value with label in a nice way
39
+ # @param label [String] label describing value
40
+ # @param value [String] value that should be printed
41
+ def print(label, value)
42
+ printf "%-25s %s\n", " - #{label}:", value
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Karafka framework Cli
5
+ class Cli < Thor
6
+ # Info Karafka Cli action
7
+ class Info < Base
8
+ desc 'Print configuration details and other options of your application'
9
+
10
+ # Print configuration details and other options of your application
11
+ def call
12
+ config = Karafka::App.config
13
+
14
+ info = [
15
+ "Karafka framework version: #{Karafka::VERSION}",
16
+ "Application client id: #{config.client_id}",
17
+ "Backend: #{config.backend}",
18
+ "Batch fetching: #{config.batch_fetching}",
19
+ "Batch consuming: #{config.batch_consuming}",
20
+ "Boot file: #{Karafka.boot_file}",
21
+ "Environment: #{Karafka.env}",
22
+ "Kafka seed brokers: #{config.kafka.seed_brokers}"
23
+ ]
24
+
25
+ puts(info.join("\n"))
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Karafka framework Cli
5
+ class Cli < Thor
6
+ # Install Karafka Cli action
7
+ class Install < Base
8
+ desc 'Install all required things for Karafka application in current directory'
9
+
10
+ # Directories created by default
11
+ INSTALL_DIRS = %w[
12
+ app/models
13
+ app/controllers
14
+ app/responders
15
+ config
16
+ log
17
+ tmp/pids
18
+ ].freeze
19
+
20
+ # Where should we map proper files from templates
21
+ INSTALL_FILES_MAP = {
22
+ 'karafka.rb.example' => Karafka.boot_file.basename,
23
+ 'application_controller.rb.example' => 'app/controllers/application_controller.rb',
24
+ 'application_responder.rb.example' => 'app/responders/application_responder.rb'
25
+ }.freeze
26
+
27
+ # Install all required things for Karafka application in current directory
28
+ def call
29
+ INSTALL_DIRS.each do |dir|
30
+ FileUtils.mkdir_p Karafka.root.join(dir)
31
+ end
32
+
33
+ INSTALL_FILES_MAP.each do |source, target|
34
+ target = Karafka.root.join(target)
35
+ next if File.exist?(target)
36
+
37
+ source = Karafka.core_root.join("templates/#{source}")
38
+ FileUtils.cp_r(source, target)
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Karafka framework Cli
5
+ class Cli < Thor
6
+ # Server Karafka Cli action
7
+ class Server < Base
8
+ desc 'Start the Karafka server (short-cut alias: "s")'
9
+ option aliases: 's'
10
+ option :daemon, default: false, type: :boolean, aliases: :d
11
+ option :pid, default: 'tmp/pids/karafka', type: :string, aliases: :p
12
+ option :consumer_groups, type: :array, default: nil, aliases: :g
13
+
14
+ # Start the Karafka server
15
+ def call
16
+ validate!
17
+
18
+ puts 'Starting Karafka server'
19
+ cli.info
20
+
21
+ if cli.options[:daemon]
22
+ FileUtils.mkdir_p File.dirname(cli.options[:pid])
23
+ daemonize
24
+ end
25
+
26
+ # We assign active topics on a server level, as only server is expected to listen on
27
+ # part of the topics
28
+ Karafka::Server.consumer_groups = cli.options[:consumer_groups]
29
+
30
+ # Remove pidfile on stop, just before the server instance is going to be GCed
31
+ # We want to delay the moment in which the pidfile is removed as much as we can,
32
+ # so instead of removing it after the server stops running, we rely on the gc moment
33
+ # when this object gets removed (it is a bit later), so it is closer to the actual
34
+ # system process end. We do that, so monitoring and deployment tools that rely on pids
35
+ # won't alarm or start new system process up until the current one is finished
36
+ ObjectSpace.define_finalizer(self, proc { send(:clean) })
37
+
38
+ # After we fork, we can boot celluloid again
39
+ Karafka::Server.run
40
+ end
41
+
42
+ private
43
+
44
+ # Checks the server cli configuration
45
+ # options validations in terms of app setup (topics, pid existence, etc)
46
+ def validate!
47
+ result = Schemas::ServerCliOptions.call(cli.options)
48
+ return if result.success?
49
+ raise Errors::InvalidConfiguration, result.errors
50
+ end
51
+
52
+ # Detaches current process into background and writes its pidfile
53
+ def daemonize
54
+ ::Process.daemon(true)
55
+ File.open(
56
+ cli.options[:pid],
57
+ 'w'
58
+ ) { |file| file.write(::Process.pid) }
59
+ end
60
+
61
+ # Removes a pidfile (if exist)
62
+ def clean
63
+ FileUtils.rm_f(cli.options[:pid]) if cli.options[:pid]
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,112 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Namespace for all the things related to Kafka connection
5
+ module Connection
6
+ # Mapper used to convert our internal settings into ruby-kafka settings
7
+ # Since ruby-kafka has more and more options and there are few "levels" on which
8
+ # we have to apply them (despite the fact, that in Karafka you configure all of it
9
+ # in one place), we have to remap it into what ruby-kafka driver requires
10
+ # @note The good thing about Kafka.new method is that it ignores all options that
11
+ # do nothing. So we don't have to worry about injecting our internal settings
12
+ # into the client and breaking stuff
13
+ module ConfigAdapter
14
+ class << self
15
+ # Builds all the configuration settings for Kafka.new method
16
+ # @param _consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
17
+ # @return [Hash] hash with all the settings required by Kafka.new method
18
+ def client(_consumer_group)
19
+ # This one is a default that takes all the settings except special
20
+ # cases defined in the map
21
+ settings = {
22
+ logger: ::Karafka.logger,
23
+ client_id: ::Karafka::App.config.client_id
24
+ }
25
+
26
+ kafka_configs.each do |setting_name, setting_value|
27
+ # All options for config adapter should be ignored as we're just interested
28
+ # in what is left, as we want to pass all the options that are "typical"
29
+ # and not listed in the config_adapter special cases mapping. All the values
30
+ # from the config_adapter mapping go somewhere else, not to the client directly
31
+ next if AttributesMap.config_adapter.values.flatten.include?(setting_name)
32
+
33
+ settings[setting_name] = setting_value
34
+ end
35
+
36
+ sanitize(settings)
37
+ end
38
+
39
+ # Builds all the configuration settings for kafka#consumer method
40
+ # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
41
+ # @return [Hash] hash with all the settings required by Kafka#consumer method
42
+ def consumer(consumer_group)
43
+ settings = { group_id: consumer_group.id }
44
+ settings = fetch_for(:consumer, consumer_group, settings)
45
+ sanitize(settings)
46
+ end
47
+
48
+ # Builds all the configuration settings for kafka consumer consume_each_batch and
49
+ # consume_each_message methods
50
+ # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
51
+ # @return [Hash] hash with all the settings required by
52
+ # Kafka::Consumer#consume_each_message and Kafka::Consumer#consume_each_batch method
53
+ def consuming(consumer_group)
54
+ settings = {
55
+ automatically_mark_as_processed: consumer_group.automatically_mark_as_consumed
56
+ }
57
+ sanitize(fetch_for(:consuming, consumer_group, settings))
58
+ end
59
+
60
+ # Builds all the configuration settings for kafka consumer#subscribe method
61
+ # @param topic [Karafka::Routing::Topic] topic that holds details for a given subscription
62
+ # @return [Hash] hash with all the settings required by kafka consumer#subscribe method
63
+ def subscription(topic)
64
+ settings = fetch_for(:subscription, topic)
65
+ [Karafka::App.config.topic_mapper.outgoing(topic.name), sanitize(settings)]
66
+ end
67
+
68
+ # Builds all the configuration settings required by kafka consumer#pause method
69
+ # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
70
+ # @return [Hash] hash with all the settings required to pause kafka consumer
71
+ def pausing(consumer_group)
72
+ { timeout: consumer_group.pause_timeout }
73
+ end
74
+
75
+ private
76
+
77
+ # Fetches proper settings for a given map namespace
78
+ # @param namespace_key [Symbol] namespace from attributes map config adapter hash
79
+ # @param route_layer [Object] route topic or consumer group
80
+ # @param preexisting_settings [Hash] hash with some preexisting settings that might have
81
+ # been loaded in a different way
82
+ def fetch_for(namespace_key, route_layer, preexisting_settings = {})
83
+ kafka_configs.each_key do |setting_name|
84
+ # Ignore settings that are not related to our namespace
85
+ next unless AttributesMap.config_adapter[namespace_key].include?(setting_name)
86
+ # Ignore settings that are already initialized
87
+ # In case they are in preexisting settings fetched differently
88
+ next if preexisting_settings.keys.include?(setting_name)
89
+ # Fetch all the settings from a given layer object. Objects can handle the fallback
90
+ # to the kafka settings, so
91
+ preexisting_settings[setting_name] = route_layer.send(setting_name)
92
+ end
93
+
94
+ preexisting_settings
95
+ end
96
+
97
+ # Removes nil containing keys from the final settings so it can use Kafkas driver
98
+ # defaults for those
99
+ # @param settings [Hash] settings that may contain nil values
100
+ # @return [Hash] settings without nil using keys (non of karafka options should be nil)
101
+ def sanitize(settings)
102
+ settings.reject { |_key, value| value.nil? }
103
+ end
104
+
105
+ # @return [Hash] Kafka config details as a hash
106
+ def kafka_configs
107
+ ::Karafka::App.config.kafka.to_h
108
+ end
109
+ end
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Class used as a wrapper around Ruby-Kafka to simplify additional
6
+ # features that we provide/might provide in future and to hide the internal implementation
7
+ class Consumer
8
+ # Creates a queue consumer that will pull the data from Kafka
9
+ # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group for which
10
+ # we create a client
11
+ # @return [Karafka::Connection::Consumer] group consumer that can subscribe to
12
+ # multiple topics
13
+ def initialize(consumer_group)
14
+ @consumer_group = consumer_group
15
+ Persistence::Consumer.write(self)
16
+ end
17
+
18
+ # Opens connection, gets messages and calls a block for each of the incoming messages
19
+ # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
20
+ # @note This will yield with raw messages - no preprocessing or reformatting.
21
+ def fetch_loop
22
+ send(
23
+ consumer_group.batch_fetching ? :consume_each_batch : :consume_each_message
24
+ ) { |messages| yield(messages) }
25
+ rescue Kafka::ProcessingError => e
26
+ # If there was an error during consumption, we have to log it, pause current partition
27
+ # and process other things
28
+ Karafka.monitor.notice_error(self.class, e.cause)
29
+ pause(e.topic, e.partition)
30
+ retry
31
+ # This is on purpose - see the notes for this method
32
+ # rubocop:disable RescueException
33
+ rescue Exception => e
34
+ # rubocop:enable RescueException
35
+ Karafka.monitor.notice_error(self.class, e)
36
+ retry
37
+ end
38
+
39
+ # Gracefuly stops topic consumption
40
+ # @note Stopping running consumers without a really important reason is not recommended
41
+ # as until all the consumers are stopped, the server will keep running serving only
42
+ # part of the messages
43
+ def stop
44
+ @kafka_consumer&.stop
45
+ @kafka_consumer = nil
46
+ end
47
+
48
+ # Pauses fetching and consumption of a given topic partition
49
+ # @param topic [String] topic that we want to pause
50
+ # @param partition [Integer] number partition that we want to pause
51
+ def pause(topic, partition)
52
+ settings = ConfigAdapter.pausing(consumer_group)
53
+ timeout = settings[:timeout]
54
+ raise(Errors::InvalidPauseTimeout, timeout) unless timeout.positive?
55
+ kafka_consumer.pause(topic, partition, settings)
56
+ end
57
+
58
+ # Marks a given message as consumed and commit the offsets
59
+ # @note In opposite to ruby-kafka, we commit the offset for each manual marking to be sure
60
+ # that offset commit happen asap in case of a crash
61
+ # @param [Karafka::Params::Params] params message that we want to mark as processed
62
+ def mark_as_consumed(params)
63
+ kafka_consumer.mark_message_as_processed(params)
64
+ # Trigger an immediate, blocking offset commit in order to minimize the risk of crashing
65
+ # before the automatic triggers have kicked in.
66
+ kafka_consumer.commit_offsets
67
+ end
68
+
69
+ private
70
+
71
+ attr_reader :consumer_group
72
+
73
+ # Consumes messages from Kafka in batches
74
+ # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
75
+ def consume_each_batch
76
+ kafka_consumer.each_batch(
77
+ ConfigAdapter.consuming(consumer_group)
78
+ ) do |batch|
79
+ yield(batch.messages)
80
+ end
81
+ end
82
+
83
+ # Consumes messages from Kafka one by one
84
+ # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
85
+ def consume_each_message
86
+ kafka_consumer.each_message(
87
+ ConfigAdapter.consuming(consumer_group)
88
+ ) do |message|
89
+ # always yield an array of messages, so we have consistent API (always a batch)
90
+ yield([message])
91
+ end
92
+ end
93
+
94
+ # @return [Kafka::Consumer] returns a ready to consume Kafka consumer
95
+ # that is set up to consume from topics of a given consumer group
96
+ def kafka_consumer
97
+ @kafka_consumer ||= kafka.consumer(
98
+ ConfigAdapter.consumer(consumer_group)
99
+ ).tap do |consumer|
100
+ consumer_group.topics.each do |topic|
101
+ consumer.subscribe(*ConfigAdapter.subscription(topic))
102
+ end
103
+ end
104
+ rescue Kafka::ConnectionError
105
+ # If we would not wait it would totally spam log file with failed
106
+ # attempts if Kafka is down
107
+ sleep(consumer_group.reconnect_timeout)
108
+ # We don't log and just reraise - this will be logged
109
+ # down the road
110
+ raise
111
+ end
112
+
113
+ # @return [Kafka] returns a Kafka
114
+ # @note We don't cache it internally because we cache kafka_consumer that uses kafka
115
+ # object instance
116
+ def kafka
117
+ Kafka.new(ConfigAdapter.client(consumer_group))
118
+ end
119
+ end
120
+ end
121
+ end