karafka 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +68 -0
  3. data/.ruby-gemset +1 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +6 -0
  6. data/CHANGELOG.md +202 -0
  7. data/Gemfile +8 -0
  8. data/Gemfile.lock +216 -0
  9. data/MIT-LICENCE +18 -0
  10. data/README.md +831 -0
  11. data/Rakefile +17 -0
  12. data/bin/karafka +7 -0
  13. data/karafka.gemspec +34 -0
  14. data/lib/karafka.rb +73 -0
  15. data/lib/karafka/app.rb +45 -0
  16. data/lib/karafka/base_controller.rb +162 -0
  17. data/lib/karafka/base_responder.rb +118 -0
  18. data/lib/karafka/base_worker.rb +41 -0
  19. data/lib/karafka/capistrano.rb +2 -0
  20. data/lib/karafka/capistrano/karafka.cap +84 -0
  21. data/lib/karafka/cli.rb +52 -0
  22. data/lib/karafka/cli/base.rb +74 -0
  23. data/lib/karafka/cli/console.rb +23 -0
  24. data/lib/karafka/cli/flow.rb +46 -0
  25. data/lib/karafka/cli/info.rb +26 -0
  26. data/lib/karafka/cli/install.rb +45 -0
  27. data/lib/karafka/cli/routes.rb +39 -0
  28. data/lib/karafka/cli/server.rb +59 -0
  29. data/lib/karafka/cli/worker.rb +26 -0
  30. data/lib/karafka/connection/consumer.rb +29 -0
  31. data/lib/karafka/connection/listener.rb +54 -0
  32. data/lib/karafka/connection/message.rb +17 -0
  33. data/lib/karafka/connection/topic_consumer.rb +48 -0
  34. data/lib/karafka/errors.rb +50 -0
  35. data/lib/karafka/fetcher.rb +40 -0
  36. data/lib/karafka/helpers/class_matcher.rb +77 -0
  37. data/lib/karafka/helpers/multi_delegator.rb +31 -0
  38. data/lib/karafka/loader.rb +77 -0
  39. data/lib/karafka/logger.rb +52 -0
  40. data/lib/karafka/monitor.rb +82 -0
  41. data/lib/karafka/params/interchanger.rb +33 -0
  42. data/lib/karafka/params/params.rb +102 -0
  43. data/lib/karafka/patches/dry/configurable/config.rb +37 -0
  44. data/lib/karafka/process.rb +61 -0
  45. data/lib/karafka/responders/builder.rb +33 -0
  46. data/lib/karafka/responders/topic.rb +43 -0
  47. data/lib/karafka/responders/usage_validator.rb +59 -0
  48. data/lib/karafka/routing/builder.rb +89 -0
  49. data/lib/karafka/routing/route.rb +80 -0
  50. data/lib/karafka/routing/router.rb +38 -0
  51. data/lib/karafka/server.rb +53 -0
  52. data/lib/karafka/setup/config.rb +57 -0
  53. data/lib/karafka/setup/configurators/base.rb +33 -0
  54. data/lib/karafka/setup/configurators/celluloid.rb +20 -0
  55. data/lib/karafka/setup/configurators/sidekiq.rb +34 -0
  56. data/lib/karafka/setup/configurators/water_drop.rb +19 -0
  57. data/lib/karafka/setup/configurators/worker_glass.rb +13 -0
  58. data/lib/karafka/status.rb +23 -0
  59. data/lib/karafka/templates/app.rb.example +26 -0
  60. data/lib/karafka/templates/application_controller.rb.example +5 -0
  61. data/lib/karafka/templates/application_responder.rb.example +9 -0
  62. data/lib/karafka/templates/application_worker.rb.example +12 -0
  63. data/lib/karafka/templates/config.ru.example +13 -0
  64. data/lib/karafka/templates/sidekiq.yml.example +26 -0
  65. data/lib/karafka/version.rb +6 -0
  66. data/lib/karafka/workers/builder.rb +49 -0
  67. data/log/.gitkeep +0 -0
  68. metadata +267 -0
@@ -0,0 +1,17 @@
1
+ require 'bundler'
2
+ require 'rake'
3
+ require 'polishgeeks-dev-tools'
4
+
5
+ PolishGeeks::DevTools.setup do |config|
6
+ config.brakeman = false
7
+ config.haml_lint = false
8
+ end
9
+
10
+ desc 'Self check using polishgeeks-dev-tools'
11
+ task :check do
12
+ PolishGeeks::DevTools::Runner.new.execute(
13
+ PolishGeeks::DevTools::Logger.new
14
+ )
15
+ end
16
+
17
+ task default: :check
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'karafka'
4
+ require Karafka.boot_file.to_s if File.exist?(Karafka.boot_file.to_s)
5
+
6
+ Karafka::Cli.prepare
7
+ Karafka::Cli.start
@@ -0,0 +1,34 @@
1
+ lib = File.expand_path('../lib', __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+
4
+ require 'karafka/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'karafka'
8
+ spec.version = ::Karafka::VERSION
9
+ spec.platform = Gem::Platform::RUBY
10
+ spec.authors = ['Maciej Mensfeld', 'Pavlo Vavruk']
11
+ spec.email = %w( maciej@mensfeld.pl pavlo.vavruk@gmail.com )
12
+ spec.homepage = 'https://github.com/karafka/karafka'
13
+ spec.summary = %q{ Ruby based Microframework for handling Apache Kafka incoming messages }
14
+ spec.description = %q{ Microframework used to simplify Kafka based Ruby applications }
15
+ spec.license = 'MIT'
16
+
17
+ spec.add_development_dependency 'bundler', '~> 1.2'
18
+
19
+ spec.add_dependency 'ruby-kafka', '= 0.3.15'
20
+ spec.add_dependency 'sidekiq', '~> 4.2'
21
+ spec.add_dependency 'worker-glass', '~> 0.2'
22
+ spec.add_dependency 'celluloid', '~> 0.17'
23
+ spec.add_dependency 'envlogic', '~> 1.0'
24
+ spec.add_dependency 'waterdrop', '~> 0.3'
25
+ spec.add_dependency 'rake', '~> 11.3'
26
+ spec.add_dependency 'thor', '~> 0.19'
27
+ spec.add_dependency 'activesupport', '~> 5.0'
28
+ spec.add_dependency 'dry-configurable', '~> 0.1.7'
29
+ spec.required_ruby_version = '>= 2.3.0'
30
+
31
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(spec)/}) }
32
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
33
+ spec.require_paths = %w( lib )
34
+ end
@@ -0,0 +1,73 @@
1
+ %w(
2
+ rake
3
+ ostruct
4
+ rubygems
5
+ bundler
6
+ English
7
+ celluloid/current
8
+ waterdrop
9
+ pathname
10
+ timeout
11
+ logger
12
+ kafka
13
+ sidekiq
14
+ worker_glass
15
+ envlogic
16
+ thor
17
+ fileutils
18
+ dry-configurable
19
+ active_support/callbacks
20
+ active_support/core_ext/class/subclasses
21
+ active_support/core_ext/hash/indifferent_access
22
+ active_support/descendants_tracker
23
+ active_support/inflector
24
+ karafka/loader
25
+ karafka/status
26
+ ).each { |lib| require lib }
27
+
28
+ # Karafka library
29
+ module Karafka
30
+ extend Envlogic
31
+
32
+ class << self
33
+ # @return [Logger] logger that we want to use. Will use ::Karafka::Logger by default
34
+ def logger
35
+ @logger ||= App.config.logger
36
+ end
37
+
38
+ # @return [::Karafka::Monitor] monitor that we want to use. Will use dummy monitor by default
39
+ def monitor
40
+ @monitor ||= App.config.monitor
41
+ end
42
+
43
+ # @return [String] root path of this gem
44
+ def gem_root
45
+ Pathname.new(File.expand_path('../..', __FILE__))
46
+ end
47
+
48
+ # @return [String] Karafka app root path (user application path)
49
+ def root
50
+ Pathname.new(File.dirname(ENV['BUNDLE_GEMFILE']))
51
+ end
52
+
53
+ # @return [String] path to Karafka gem root core
54
+ def core_root
55
+ Pathname.new(File.expand_path('../karafka', __FILE__))
56
+ end
57
+
58
+ # @return [String] path to a default file that contains booting procedure etc
59
+ # @note By default it is a file called 'app.rb' but it can be specified as you wish if you
60
+ # have Karafka that is merged into a Sinatra/Rails app and app.rb is taken.
61
+ # It will be used for console/workers/etc
62
+ # @example Standard only-Karafka case
63
+ # Karafka.boot_file #=> '/home/app_path/app.rb'
64
+ # @example Non standard case
65
+ # KARAFKA_BOOT_FILE='/home/app_path/karafka.rb'
66
+ # Karafka.boot_file #=> '/home/app_path/karafka.rb'
67
+ def boot_file
68
+ Pathname.new(ENV['KARAFKA_BOOT_FILE'] || File.join(Karafka.root, 'app.rb'))
69
+ end
70
+ end
71
+ end
72
+
73
+ Karafka::Loader.new.load!(Karafka.core_root)
@@ -0,0 +1,45 @@
1
+ module Karafka
2
+ # App class
3
+ class App
4
+ class << self
5
+ # Sets up the whole configuration
6
+ # @param [Block] block configuration block
7
+ def setup(&block)
8
+ Setup::Config.setup(&block)
9
+ initialize!
10
+ end
11
+
12
+ # Sets up all the internal components and bootstrap whole app
13
+ # We need to know details about routes in order to setup components,
14
+ # that's why we don't setup them after std setup is done
15
+ def boot!
16
+ Setup::Config.setup_components
17
+ end
18
+
19
+ # @return [Karafka::Config] config instance
20
+ def config
21
+ Setup::Config.config
22
+ end
23
+
24
+ # @return [Karafka::Routing::Builder] routes builder instance
25
+ def routes
26
+ Routing::Builder.instance
27
+ end
28
+
29
+ Status.instance_methods(false).each do |delegated|
30
+ define_method(delegated) do
31
+ Status.instance.public_send(delegated)
32
+ end
33
+ end
34
+
35
+ # Methods that should be delegated to Karafka module
36
+ %i(
37
+ root env logger monitor
38
+ ).each do |delegated|
39
+ define_method(delegated) do
40
+ Karafka.public_send(delegated)
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,162 @@
1
+ # Karafka module namespace
2
+ module Karafka
3
+ # Base controller from which all Karafka controllers should inherit
4
+ # Similar to Rails controllers we can define before_enqueue callbacks
5
+ # that will be executed
6
+ #
7
+ # Note that if before_enqueue return false, the chain will be stopped and
8
+ # the perform method won't be executed in sidekiq (won't peform_async it)
9
+ #
10
+ # @example Create simple controller
11
+ # class ExamplesController < Karafka::BaseController
12
+ # def perform
13
+ # # some logic here
14
+ # end
15
+ # end
16
+ #
17
+ # @example Create a controller with a block before_enqueue
18
+ # class ExampleController < Karafka::BaseController
19
+ # before_enqueue do
20
+ # # Here we should have some checking logic
21
+ # # If false is returned, won't schedule a perform action
22
+ # end
23
+ #
24
+ # def perform
25
+ # # some logic here
26
+ # end
27
+ # end
28
+ #
29
+ # @example Create a controller with a method before_enqueue
30
+ # class ExampleController < Karafka::BaseController
31
+ # before_enqueue :before_method
32
+ #
33
+ # def perform
34
+ # # some logic here
35
+ # end
36
+ #
37
+ # private
38
+ #
39
+ # def before_method
40
+ # # Here we should have some checking logic
41
+ # # If false is returned, won't schedule a perform action
42
+ # end
43
+ # end
44
+ #
45
+ # @example Create a controller with an after_failure action
46
+ # class ExampleController < Karafka::BaseController
47
+ # def perform
48
+ # # some logic here
49
+ # end
50
+ #
51
+ # def after_failure
52
+ # # action taken in case perform fails
53
+ # end
54
+ # end
55
+ class BaseController
56
+ extend ActiveSupport::DescendantsTracker
57
+ include ActiveSupport::Callbacks
58
+
59
+ # The schedule method is wrapped with a set of callbacks
60
+ # We won't run perform at the backend if any of the callbacks
61
+ # returns false
62
+ # @see http://api.rubyonrails.org/classes/ActiveSupport/Callbacks/ClassMethods.html#method-i-get_callbacks
63
+ define_callbacks :schedule
64
+
65
+ # This will be set based on routing settings
66
+ # From 0.4 a single controller can handle multiple topics jobs
67
+ attr_accessor :group, :topic, :worker, :parser, :interchanger, :responder
68
+
69
+ class << self
70
+ # Creates a callback that will be executed before scheduling to Sidekiq
71
+ # @param method_name [Symbol, String] method name or nil if we plan to provide a block
72
+ # @yield A block with a code that should be executed before scheduling
73
+ # @note If value returned is false, will chalt the chain and not schedlue to Sidekiq
74
+ # @example Define a block before_enqueue callback
75
+ # before_enqueue do
76
+ # # logic here
77
+ # end
78
+ #
79
+ # @example Define a class name before_enqueue callback
80
+ # before_enqueue :method_name
81
+ def before_enqueue(method_name = nil, &block)
82
+ set_callback :schedule, :before, method_name ? method_name : block
83
+ end
84
+ end
85
+
86
+ # Creates lazy loaded params object
87
+ # @note Until first params usage, it won't parse data at all
88
+ # @param message [Karafka::Connection::Message, Hash] message with raw content or a hash
89
+ # from Sidekiq that allows us to build params.
90
+ def params=(message)
91
+ @params = Karafka::Params::Params.build(message, self)
92
+ end
93
+
94
+ # Executes the default controller flow, runs callbacks and if not halted
95
+ # will schedule a perform task in sidekiq
96
+ def schedule
97
+ run_callbacks :schedule do
98
+ perform_async
99
+ end
100
+ end
101
+
102
+ # @return [Hash] hash with all controller details - it works similar to #params method however
103
+ # it won't parse data so it will return unparsed details about controller and its parameters
104
+ # @example Get data about ctrl
105
+ # ctrl.to_h #=> { "worker"=>WorkerClass, "parsed"=>false, "content"=>"{}" }
106
+ def to_h
107
+ @params
108
+ end
109
+
110
+ # Method that will perform business logic on data received from Kafka
111
+ # @note This method needs bo be implemented in a subclass. We stub it here as a failover if
112
+ # someone forgets about it or makes on with typo
113
+ def perform
114
+ raise NotImplementedError, 'Implement this in a subclass'
115
+ end
116
+
117
+ private
118
+
119
+ # @return [Karafka::Params::Params] Karafka params that is a hash with indifferent access
120
+ # @note Params internally are lazy loaded before first use. That way we can skip parsing
121
+ # process if we have before_enqueue that rejects some incoming messages without using params
122
+ # It can be also used when handling really heavy data (in terms of parsing). Without direct
123
+ # usage outside of worker scope, it will pass raw data into sidekiq, so we won't use Karafka
124
+ # working time to parse this data. It will happen only in the worker (where it can take time)
125
+ # that way Karafka will be able to process data really quickly. On the other hand, if we
126
+ # decide to use params somewhere before it hits worker logic, it won't parse it again in
127
+ # the worker - it will use already loaded data and pass it to Redis
128
+ # @note Invokation of this method will cause load all the data into params object. If you want
129
+ # to get access without parsing, please access @params directly
130
+ def params
131
+ @params.retrieve
132
+ end
133
+
134
+ # Responds with given data using given responder. This allows us to have a similar way of
135
+ # defining flows like synchronous protocols
136
+ # @param data Anything we want to pass to responder based on which we want to trigger further
137
+ # Kafka responding
138
+ # @raise [Karafka::Errors::ResponderMissing] raised when we don't have a responder defined,
139
+ # but we still try to use this method
140
+ def respond_with(*data)
141
+ raise(Errors::ResponderMissing, self.class) unless responder
142
+
143
+ Karafka.monitor.notice(self.class, data: data)
144
+ responder.new.call(*data)
145
+ end
146
+
147
+ # Enqueues the execution of perform method into a worker.
148
+ # @note Each worker needs to have a class #perform_async method that will allow us to pass
149
+ # parameters into it. We always pass topic as a first argument and this request params
150
+ # as a second one (we pass topic to be able to build back the controller in the worker)
151
+ def perform_async
152
+ Karafka.monitor.notice(self.class, to_h)
153
+
154
+ # We use @params directly (instead of #params) because of lazy loading logic that is behind
155
+ # it. See Karafka::Params::Params class for more details about that
156
+ worker.perform_async(
157
+ topic,
158
+ interchanger.load(@params)
159
+ )
160
+ end
161
+ end
162
+ end
@@ -0,0 +1,118 @@
1
+ module Karafka
2
+ # Base responder from which all Karafka responders should inherit
3
+ # Similar to Rails responders concept. It allows us to design flow from one app to another
4
+ # by isolating what responses should be sent (and where) based on a given action
5
+ # It differs from Rails responders in the way it works: in std http request we can have one
6
+ # response, here we can have unlimited number of them
7
+ #
8
+ # It has a simple API for defining where should we respond (and if it is required)
9
+ #
10
+ # @example Basic usage (each registered topic is required to be used by default)
11
+ # class Responder < BaseResponder
12
+ # topic :new_action
13
+ #
14
+ # def respond(data)
15
+ # respond_to :new_action, data
16
+ # end
17
+ # end
18
+ #
19
+ # @example Marking topic as optional (we won't have to use it)
20
+ # class Responder < BaseResponder
21
+ # topic :required_topic
22
+ # topic :new_action, optional: true
23
+ #
24
+ # def respond(data)
25
+ # respond_to :required_topic, data
26
+ # end
27
+ # end
28
+ #
29
+ # @example Multiple times used topic
30
+ # class Responder < BaseResponder
31
+ # topic :required_topic, multiple_usage: true
32
+ #
33
+ # def respond(data)
34
+ # data.each do |subset|
35
+ # respond_to :required_topic, subset
36
+ # end
37
+ # end
38
+ # end
39
+ #
40
+ # @example Accept multiple arguments to a respond method
41
+ # class Responder < BaseResponder
42
+ # topic :users_actions
43
+ # topic :articles_viewed
44
+ #
45
+ # def respond(user, article)
46
+ # respond_to :users_actions, user
47
+ # respond_to :articles_viewed, article
48
+ # end
49
+ # end
50
+ class BaseResponder
51
+ # Definitions of all topics that we want to be able to use in this responder should go here
52
+ class_attribute :topics
53
+
54
+ class << self
55
+ # Registers a topic as on to which we will be able to respond
56
+ # @param topic_name [Symbol, String] name of topic to which we want to respond
57
+ # @param options [Hash] hash with optional configuration details
58
+ def topic(topic_name, options = {})
59
+ self.topics ||= {}
60
+ topic_obj = Responders::Topic.new(topic_name, options)
61
+ self.topics[topic_obj.name] = topic_obj
62
+ end
63
+ end
64
+
65
+ # Creates a responder object
66
+ # @return [Karafka::BaseResponder] base responder descendant responder
67
+ def initialize
68
+ @used_topics = []
69
+ end
70
+
71
+ # Performs respond and validates that all the response requirement were met
72
+ # @param data Anything that we want to respond with
73
+ # @note We know that validators should be executed also before sending data to topics, however
74
+ # the implementation gets way more complicated then, that's why we check after everything
75
+ # was sent using responder
76
+ def call(*data)
77
+ respond(*data)
78
+ validate!
79
+ end
80
+
81
+ private
82
+
83
+ # Method that needs to be implemented in a subclass. It should handle responding
84
+ # on registered topics
85
+ # @raise [NotImplementedError] This method needs to be implemented in a subclass
86
+ def respond(*_data)
87
+ raise NotImplementedError, 'Implement this in a subclass'
88
+ end
89
+
90
+ # This method allow us to respond to a single topic with a given data. It can be used
91
+ # as many times as we need. Especially when we have 1:n flow
92
+ # @param topic [Symbol, String] topic to which we want to respond
93
+ # @param data [String, Object] string or object that we want to send
94
+ # @note Note that if we pass object here (not a string), this method will invoke a #to_json
95
+ # on it.
96
+ # @note Respond to does not accept multiple data arguments.
97
+ def respond_to(topic, data)
98
+ Karafka.monitor.notice(self.class, topic: topic, data: data)
99
+
100
+ topic = topic.to_s
101
+ @used_topics << topic
102
+
103
+ ::WaterDrop::Message.new(
104
+ topic,
105
+ data.is_a?(String) ? data : data.to_json
106
+ ).send!
107
+ end
108
+
109
+ # Checks if we met all the topics requirements. It will fail if we didn't send a message to
110
+ # a registered required topic, etc.
111
+ def validate!
112
+ Responders::UsageValidator.new(
113
+ self.class.topics || {},
114
+ @used_topics
115
+ ).validate!
116
+ end
117
+ end
118
+ end