karafka 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.console_irbrc +13 -0
  3. data/.github/ISSUE_TEMPLATE.md +2 -0
  4. data/.gitignore +68 -0
  5. data/.rspec +1 -0
  6. data/.ruby-gemset +1 -0
  7. data/.ruby-version +1 -0
  8. data/.travis.yml +17 -0
  9. data/CHANGELOG.md +371 -0
  10. data/CODE_OF_CONDUCT.md +46 -0
  11. data/CONTRIBUTING.md +42 -0
  12. data/Gemfile +12 -0
  13. data/Gemfile.lock +111 -0
  14. data/MIT-LICENCE +18 -0
  15. data/README.md +95 -0
  16. data/bin/karafka +19 -0
  17. data/config/errors.yml +6 -0
  18. data/karafka.gemspec +35 -0
  19. data/lib/karafka.rb +68 -0
  20. data/lib/karafka/app.rb +52 -0
  21. data/lib/karafka/attributes_map.rb +67 -0
  22. data/lib/karafka/backends/inline.rb +17 -0
  23. data/lib/karafka/base_controller.rb +60 -0
  24. data/lib/karafka/base_responder.rb +185 -0
  25. data/lib/karafka/cli.rb +54 -0
  26. data/lib/karafka/cli/base.rb +78 -0
  27. data/lib/karafka/cli/console.rb +29 -0
  28. data/lib/karafka/cli/flow.rb +46 -0
  29. data/lib/karafka/cli/info.rb +29 -0
  30. data/lib/karafka/cli/install.rb +43 -0
  31. data/lib/karafka/cli/server.rb +67 -0
  32. data/lib/karafka/connection/config_adapter.rb +112 -0
  33. data/lib/karafka/connection/consumer.rb +121 -0
  34. data/lib/karafka/connection/listener.rb +51 -0
  35. data/lib/karafka/connection/processor.rb +61 -0
  36. data/lib/karafka/controllers/callbacks.rb +54 -0
  37. data/lib/karafka/controllers/includer.rb +51 -0
  38. data/lib/karafka/controllers/responders.rb +19 -0
  39. data/lib/karafka/controllers/single_params.rb +15 -0
  40. data/lib/karafka/errors.rb +43 -0
  41. data/lib/karafka/fetcher.rb +48 -0
  42. data/lib/karafka/helpers/class_matcher.rb +78 -0
  43. data/lib/karafka/helpers/config_retriever.rb +46 -0
  44. data/lib/karafka/helpers/multi_delegator.rb +33 -0
  45. data/lib/karafka/loader.rb +29 -0
  46. data/lib/karafka/logger.rb +53 -0
  47. data/lib/karafka/monitor.rb +98 -0
  48. data/lib/karafka/params/params.rb +128 -0
  49. data/lib/karafka/params/params_batch.rb +41 -0
  50. data/lib/karafka/parsers/json.rb +38 -0
  51. data/lib/karafka/patches/dry_configurable.rb +31 -0
  52. data/lib/karafka/patches/ruby_kafka.rb +34 -0
  53. data/lib/karafka/persistence/consumer.rb +25 -0
  54. data/lib/karafka/persistence/controller.rb +38 -0
  55. data/lib/karafka/process.rb +63 -0
  56. data/lib/karafka/responders/builder.rb +35 -0
  57. data/lib/karafka/responders/topic.rb +57 -0
  58. data/lib/karafka/routing/builder.rb +61 -0
  59. data/lib/karafka/routing/consumer_group.rb +61 -0
  60. data/lib/karafka/routing/consumer_mapper.rb +33 -0
  61. data/lib/karafka/routing/proxy.rb +37 -0
  62. data/lib/karafka/routing/router.rb +29 -0
  63. data/lib/karafka/routing/topic.rb +66 -0
  64. data/lib/karafka/routing/topic_mapper.rb +55 -0
  65. data/lib/karafka/schemas/config.rb +21 -0
  66. data/lib/karafka/schemas/consumer_group.rb +65 -0
  67. data/lib/karafka/schemas/consumer_group_topic.rb +18 -0
  68. data/lib/karafka/schemas/responder_usage.rb +39 -0
  69. data/lib/karafka/schemas/server_cli_options.rb +43 -0
  70. data/lib/karafka/server.rb +62 -0
  71. data/lib/karafka/setup/config.rb +163 -0
  72. data/lib/karafka/setup/configurators/base.rb +35 -0
  73. data/lib/karafka/setup/configurators/water_drop.rb +29 -0
  74. data/lib/karafka/status.rb +25 -0
  75. data/lib/karafka/templates/application_controller.rb.example +7 -0
  76. data/lib/karafka/templates/application_responder.rb.example +11 -0
  77. data/lib/karafka/templates/karafka.rb.example +41 -0
  78. data/lib/karafka/version.rb +7 -0
  79. data/log/.gitkeep +0 -0
  80. metadata +267 -0
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Karafka framework Cli
5
+ class Cli < Thor
6
+ # Console Karafka Cli action
7
+ class Console < Base
8
+ desc 'Start the Karafka console (short-cut alias: "c")'
9
+ option aliases: 'c'
10
+
11
+ # @return [String] Console executing command
12
+ # @example
13
+ # Karafka::Cli::Console.command #=> 'KARAFKA_CONSOLE=true bundle exec irb...'
14
+ def self.command
15
+ envs = [
16
+ "IRBRC='#{Karafka.gem_root}/.console_irbrc'",
17
+ 'KARAFKA_CONSOLE=true'
18
+ ]
19
+ "#{envs.join(' ')} bundle exec irb"
20
+ end
21
+
22
+ # Start the Karafka console
23
+ def call
24
+ cli.info
25
+ exec self.class.command
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Karafka framework Cli
5
+ class Cli < Thor
6
+ # Description of topics flow (incoming/outgoing)
7
+ class Flow < Base
8
+ desc 'Print application data flow (incoming => outgoing)'
9
+
10
+ # Print out all defined routes in alphabetical order
11
+ def call
12
+ topics.each do |topic|
13
+ any_topics = !topic.responder&.topics.nil?
14
+
15
+ if any_topics
16
+ puts "#{topic.name} =>"
17
+
18
+ topic.responder.topics.each_value do |responder_topic|
19
+ features = []
20
+ features << (responder_topic.required? ? 'always' : 'conditionally')
21
+ features << (responder_topic.multiple_usage? ? 'one or more' : 'exactly once')
22
+
23
+ print responder_topic.name, "(#{features.join(', ')})"
24
+ end
25
+ else
26
+ puts "#{topic.name} => (nothing)"
27
+ end
28
+ end
29
+ end
30
+
31
+ private
32
+
33
+ # @return [Array<Karafka::Routing::Topic>] all topics sorted in alphabetical order
34
+ def topics
35
+ Karafka::App.consumer_groups.map(&:topics).flatten.sort_by(&:name)
36
+ end
37
+
38
+ # Prints a given value with label in a nice way
39
+ # @param label [String] label describing value
40
+ # @param value [String] value that should be printed
41
+ def print(label, value)
42
+ printf "%-25s %s\n", " - #{label}:", value
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Karafka framework Cli
5
+ class Cli < Thor
6
+ # Info Karafka Cli action
7
+ class Info < Base
8
+ desc 'Print configuration details and other options of your application'
9
+
10
+ # Print configuration details and other options of your application
11
+ def call
12
+ config = Karafka::App.config
13
+
14
+ info = [
15
+ "Karafka framework version: #{Karafka::VERSION}",
16
+ "Application client id: #{config.client_id}",
17
+ "Backend: #{config.backend}",
18
+ "Batch fetching: #{config.batch_fetching}",
19
+ "Batch consuming: #{config.batch_consuming}",
20
+ "Boot file: #{Karafka.boot_file}",
21
+ "Environment: #{Karafka.env}",
22
+ "Kafka seed brokers: #{config.kafka.seed_brokers}"
23
+ ]
24
+
25
+ puts(info.join("\n"))
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Karafka framework Cli
5
+ class Cli < Thor
6
+ # Install Karafka Cli action
7
+ class Install < Base
8
+ desc 'Install all required things for Karafka application in current directory'
9
+
10
+ # Directories created by default
11
+ INSTALL_DIRS = %w[
12
+ app/models
13
+ app/controllers
14
+ app/responders
15
+ config
16
+ log
17
+ tmp/pids
18
+ ].freeze
19
+
20
+ # Where should we map proper files from templates
21
+ INSTALL_FILES_MAP = {
22
+ 'karafka.rb.example' => Karafka.boot_file.basename,
23
+ 'application_controller.rb.example' => 'app/controllers/application_controller.rb',
24
+ 'application_responder.rb.example' => 'app/responders/application_responder.rb'
25
+ }.freeze
26
+
27
+ # Install all required things for Karafka application in current directory
28
+ def call
29
+ INSTALL_DIRS.each do |dir|
30
+ FileUtils.mkdir_p Karafka.root.join(dir)
31
+ end
32
+
33
+ INSTALL_FILES_MAP.each do |source, target|
34
+ target = Karafka.root.join(target)
35
+ next if File.exist?(target)
36
+
37
+ source = Karafka.core_root.join("templates/#{source}")
38
+ FileUtils.cp_r(source, target)
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Karafka framework Cli
5
+ class Cli < Thor
6
+ # Server Karafka Cli action
7
+ class Server < Base
8
+ desc 'Start the Karafka server (short-cut alias: "s")'
9
+ option aliases: 's'
10
+ option :daemon, default: false, type: :boolean, aliases: :d
11
+ option :pid, default: 'tmp/pids/karafka', type: :string, aliases: :p
12
+ option :consumer_groups, type: :array, default: nil, aliases: :g
13
+
14
+ # Start the Karafka server
15
+ def call
16
+ validate!
17
+
18
+ puts 'Starting Karafka server'
19
+ cli.info
20
+
21
+ if cli.options[:daemon]
22
+ FileUtils.mkdir_p File.dirname(cli.options[:pid])
23
+ daemonize
24
+ end
25
+
26
+ # We assign active topics on a server level, as only server is expected to listen on
27
+ # part of the topics
28
+ Karafka::Server.consumer_groups = cli.options[:consumer_groups]
29
+
30
+ # Remove pidfile on stop, just before the server instance is going to be GCed
31
+ # We want to delay the moment in which the pidfile is removed as much as we can,
32
+ # so instead of removing it after the server stops running, we rely on the gc moment
33
+ # when this object gets removed (it is a bit later), so it is closer to the actual
34
+ # system process end. We do that, so monitoring and deployment tools that rely on pids
35
+ # won't alarm or start new system process up until the current one is finished
36
+ ObjectSpace.define_finalizer(self, proc { send(:clean) })
37
+
38
+ # After we fork, we can boot celluloid again
39
+ Karafka::Server.run
40
+ end
41
+
42
+ private
43
+
44
+ # Checks the server cli configuration
45
+ # options validations in terms of app setup (topics, pid existence, etc)
46
+ def validate!
47
+ result = Schemas::ServerCliOptions.call(cli.options)
48
+ return if result.success?
49
+ raise Errors::InvalidConfiguration, result.errors
50
+ end
51
+
52
+ # Detaches current process into background and writes its pidfile
53
+ def daemonize
54
+ ::Process.daemon(true)
55
+ File.open(
56
+ cli.options[:pid],
57
+ 'w'
58
+ ) { |file| file.write(::Process.pid) }
59
+ end
60
+
61
+ # Removes a pidfile (if exist)
62
+ def clean
63
+ FileUtils.rm_f(cli.options[:pid]) if cli.options[:pid]
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,112 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Namespace for all the things related to Kafka connection
5
+ module Connection
6
+ # Mapper used to convert our internal settings into ruby-kafka settings
7
+ # Since ruby-kafka has more and more options and there are few "levels" on which
8
+ # we have to apply them (despite the fact, that in Karafka you configure all of it
9
+ # in one place), we have to remap it into what ruby-kafka driver requires
10
+ # @note The good thing about Kafka.new method is that it ignores all options that
11
+ # do nothing. So we don't have to worry about injecting our internal settings
12
+ # into the client and breaking stuff
13
+ module ConfigAdapter
14
+ class << self
15
+ # Builds all the configuration settings for Kafka.new method
16
+ # @param _consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
17
+ # @return [Hash] hash with all the settings required by Kafka.new method
18
+ def client(_consumer_group)
19
+ # This one is a default that takes all the settings except special
20
+ # cases defined in the map
21
+ settings = {
22
+ logger: ::Karafka.logger,
23
+ client_id: ::Karafka::App.config.client_id
24
+ }
25
+
26
+ kafka_configs.each do |setting_name, setting_value|
27
+ # All options for config adapter should be ignored as we're just interested
28
+ # in what is left, as we want to pass all the options that are "typical"
29
+ # and not listed in the config_adapter special cases mapping. All the values
30
+ # from the config_adapter mapping go somewhere else, not to the client directly
31
+ next if AttributesMap.config_adapter.values.flatten.include?(setting_name)
32
+
33
+ settings[setting_name] = setting_value
34
+ end
35
+
36
+ sanitize(settings)
37
+ end
38
+
39
+ # Builds all the configuration settings for kafka#consumer method
40
+ # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
41
+ # @return [Hash] hash with all the settings required by Kafka#consumer method
42
+ def consumer(consumer_group)
43
+ settings = { group_id: consumer_group.id }
44
+ settings = fetch_for(:consumer, consumer_group, settings)
45
+ sanitize(settings)
46
+ end
47
+
48
+ # Builds all the configuration settings for kafka consumer consume_each_batch and
49
+ # consume_each_message methods
50
+ # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
51
+ # @return [Hash] hash with all the settings required by
52
+ # Kafka::Consumer#consume_each_message and Kafka::Consumer#consume_each_batch method
53
+ def consuming(consumer_group)
54
+ settings = {
55
+ automatically_mark_as_processed: consumer_group.automatically_mark_as_consumed
56
+ }
57
+ sanitize(fetch_for(:consuming, consumer_group, settings))
58
+ end
59
+
60
+ # Builds all the configuration settings for kafka consumer#subscribe method
61
+ # @param topic [Karafka::Routing::Topic] topic that holds details for a given subscription
62
+ # @return [Hash] hash with all the settings required by kafka consumer#subscribe method
63
+ def subscription(topic)
64
+ settings = fetch_for(:subscription, topic)
65
+ [Karafka::App.config.topic_mapper.outgoing(topic.name), sanitize(settings)]
66
+ end
67
+
68
+ # Builds all the configuration settings required by kafka consumer#pause method
69
+ # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
70
+ # @return [Hash] hash with all the settings required to pause kafka consumer
71
+ def pausing(consumer_group)
72
+ { timeout: consumer_group.pause_timeout }
73
+ end
74
+
75
+ private
76
+
77
+ # Fetches proper settings for a given map namespace
78
+ # @param namespace_key [Symbol] namespace from attributes map config adapter hash
79
+ # @param route_layer [Object] route topic or consumer group
80
+ # @param preexisting_settings [Hash] hash with some preexisting settings that might have
81
+ # been loaded in a different way
82
+ def fetch_for(namespace_key, route_layer, preexisting_settings = {})
83
+ kafka_configs.each_key do |setting_name|
84
+ # Ignore settings that are not related to our namespace
85
+ next unless AttributesMap.config_adapter[namespace_key].include?(setting_name)
86
+ # Ignore settings that are already initialized
87
+ # In case they are in preexisting settings fetched differently
88
+ next if preexisting_settings.keys.include?(setting_name)
89
+ # Fetch all the settings from a given layer object. Objects can handle the fallback
90
+ # to the kafka settings, so
91
+ preexisting_settings[setting_name] = route_layer.send(setting_name)
92
+ end
93
+
94
+ preexisting_settings
95
+ end
96
+
97
+ # Removes nil containing keys from the final settings so it can use Kafkas driver
98
+ # defaults for those
99
+ # @param settings [Hash] settings that may contain nil values
100
+ # @return [Hash] settings without nil using keys (non of karafka options should be nil)
101
+ def sanitize(settings)
102
+ settings.reject { |_key, value| value.nil? }
103
+ end
104
+
105
+ # @return [Hash] Kafka config details as a hash
106
+ def kafka_configs
107
+ ::Karafka::App.config.kafka.to_h
108
+ end
109
+ end
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Class used as a wrapper around Ruby-Kafka to simplify additional
6
+ # features that we provide/might provide in future and to hide the internal implementation
7
+ class Consumer
8
+ # Creates a queue consumer that will pull the data from Kafka
9
+ # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group for which
10
+ # we create a client
11
+ # @return [Karafka::Connection::Consumer] group consumer that can subscribe to
12
+ # multiple topics
13
+ def initialize(consumer_group)
14
+ @consumer_group = consumer_group
15
+ Persistence::Consumer.write(self)
16
+ end
17
+
18
+ # Opens connection, gets messages and calls a block for each of the incoming messages
19
+ # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
20
+ # @note This will yield with raw messages - no preprocessing or reformatting.
21
+ def fetch_loop
22
+ send(
23
+ consumer_group.batch_fetching ? :consume_each_batch : :consume_each_message
24
+ ) { |messages| yield(messages) }
25
+ rescue Kafka::ProcessingError => e
26
+ # If there was an error during consumption, we have to log it, pause current partition
27
+ # and process other things
28
+ Karafka.monitor.notice_error(self.class, e.cause)
29
+ pause(e.topic, e.partition)
30
+ retry
31
+ # This is on purpose - see the notes for this method
32
+ # rubocop:disable RescueException
33
+ rescue Exception => e
34
+ # rubocop:enable RescueException
35
+ Karafka.monitor.notice_error(self.class, e)
36
+ retry
37
+ end
38
+
39
+ # Gracefuly stops topic consumption
40
+ # @note Stopping running consumers without a really important reason is not recommended
41
+ # as until all the consumers are stopped, the server will keep running serving only
42
+ # part of the messages
43
+ def stop
44
+ @kafka_consumer&.stop
45
+ @kafka_consumer = nil
46
+ end
47
+
48
+ # Pauses fetching and consumption of a given topic partition
49
+ # @param topic [String] topic that we want to pause
50
+ # @param partition [Integer] number partition that we want to pause
51
+ def pause(topic, partition)
52
+ settings = ConfigAdapter.pausing(consumer_group)
53
+ timeout = settings[:timeout]
54
+ raise(Errors::InvalidPauseTimeout, timeout) unless timeout.positive?
55
+ kafka_consumer.pause(topic, partition, settings)
56
+ end
57
+
58
+ # Marks a given message as consumed and commit the offsets
59
+ # @note In opposite to ruby-kafka, we commit the offset for each manual marking to be sure
60
+ # that offset commit happen asap in case of a crash
61
+ # @param [Karafka::Params::Params] params message that we want to mark as processed
62
+ def mark_as_consumed(params)
63
+ kafka_consumer.mark_message_as_processed(params)
64
+ # Trigger an immediate, blocking offset commit in order to minimize the risk of crashing
65
+ # before the automatic triggers have kicked in.
66
+ kafka_consumer.commit_offsets
67
+ end
68
+
69
+ private
70
+
71
+ attr_reader :consumer_group
72
+
73
+ # Consumes messages from Kafka in batches
74
+ # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
75
+ def consume_each_batch
76
+ kafka_consumer.each_batch(
77
+ ConfigAdapter.consuming(consumer_group)
78
+ ) do |batch|
79
+ yield(batch.messages)
80
+ end
81
+ end
82
+
83
+ # Consumes messages from Kafka one by one
84
+ # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
85
+ def consume_each_message
86
+ kafka_consumer.each_message(
87
+ ConfigAdapter.consuming(consumer_group)
88
+ ) do |message|
89
+ # always yield an array of messages, so we have consistent API (always a batch)
90
+ yield([message])
91
+ end
92
+ end
93
+
94
+ # @return [Kafka::Consumer] returns a ready to consume Kafka consumer
95
+ # that is set up to consume from topics of a given consumer group
96
+ def kafka_consumer
97
+ @kafka_consumer ||= kafka.consumer(
98
+ ConfigAdapter.consumer(consumer_group)
99
+ ).tap do |consumer|
100
+ consumer_group.topics.each do |topic|
101
+ consumer.subscribe(*ConfigAdapter.subscription(topic))
102
+ end
103
+ end
104
+ rescue Kafka::ConnectionError
105
+ # If we would not wait it would totally spam log file with failed
106
+ # attempts if Kafka is down
107
+ sleep(consumer_group.reconnect_timeout)
108
+ # We don't log and just reraise - this will be logged
109
+ # down the road
110
+ raise
111
+ end
112
+
113
+ # @return [Kafka] returns a Kafka
114
+ # @note We don't cache it internally because we cache kafka_consumer that uses kafka
115
+ # object instance
116
+ def kafka
117
+ Kafka.new(ConfigAdapter.client(consumer_group))
118
+ end
119
+ end
120
+ end
121
+ end