karafka 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +68 -0
  3. data/.ruby-gemset +1 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +6 -0
  6. data/CHANGELOG.md +202 -0
  7. data/Gemfile +8 -0
  8. data/Gemfile.lock +216 -0
  9. data/MIT-LICENCE +18 -0
  10. data/README.md +831 -0
  11. data/Rakefile +17 -0
  12. data/bin/karafka +7 -0
  13. data/karafka.gemspec +34 -0
  14. data/lib/karafka.rb +73 -0
  15. data/lib/karafka/app.rb +45 -0
  16. data/lib/karafka/base_controller.rb +162 -0
  17. data/lib/karafka/base_responder.rb +118 -0
  18. data/lib/karafka/base_worker.rb +41 -0
  19. data/lib/karafka/capistrano.rb +2 -0
  20. data/lib/karafka/capistrano/karafka.cap +84 -0
  21. data/lib/karafka/cli.rb +52 -0
  22. data/lib/karafka/cli/base.rb +74 -0
  23. data/lib/karafka/cli/console.rb +23 -0
  24. data/lib/karafka/cli/flow.rb +46 -0
  25. data/lib/karafka/cli/info.rb +26 -0
  26. data/lib/karafka/cli/install.rb +45 -0
  27. data/lib/karafka/cli/routes.rb +39 -0
  28. data/lib/karafka/cli/server.rb +59 -0
  29. data/lib/karafka/cli/worker.rb +26 -0
  30. data/lib/karafka/connection/consumer.rb +29 -0
  31. data/lib/karafka/connection/listener.rb +54 -0
  32. data/lib/karafka/connection/message.rb +17 -0
  33. data/lib/karafka/connection/topic_consumer.rb +48 -0
  34. data/lib/karafka/errors.rb +50 -0
  35. data/lib/karafka/fetcher.rb +40 -0
  36. data/lib/karafka/helpers/class_matcher.rb +77 -0
  37. data/lib/karafka/helpers/multi_delegator.rb +31 -0
  38. data/lib/karafka/loader.rb +77 -0
  39. data/lib/karafka/logger.rb +52 -0
  40. data/lib/karafka/monitor.rb +82 -0
  41. data/lib/karafka/params/interchanger.rb +33 -0
  42. data/lib/karafka/params/params.rb +102 -0
  43. data/lib/karafka/patches/dry/configurable/config.rb +37 -0
  44. data/lib/karafka/process.rb +61 -0
  45. data/lib/karafka/responders/builder.rb +33 -0
  46. data/lib/karafka/responders/topic.rb +43 -0
  47. data/lib/karafka/responders/usage_validator.rb +59 -0
  48. data/lib/karafka/routing/builder.rb +89 -0
  49. data/lib/karafka/routing/route.rb +80 -0
  50. data/lib/karafka/routing/router.rb +38 -0
  51. data/lib/karafka/server.rb +53 -0
  52. data/lib/karafka/setup/config.rb +57 -0
  53. data/lib/karafka/setup/configurators/base.rb +33 -0
  54. data/lib/karafka/setup/configurators/celluloid.rb +20 -0
  55. data/lib/karafka/setup/configurators/sidekiq.rb +34 -0
  56. data/lib/karafka/setup/configurators/water_drop.rb +19 -0
  57. data/lib/karafka/setup/configurators/worker_glass.rb +13 -0
  58. data/lib/karafka/status.rb +23 -0
  59. data/lib/karafka/templates/app.rb.example +26 -0
  60. data/lib/karafka/templates/application_controller.rb.example +5 -0
  61. data/lib/karafka/templates/application_responder.rb.example +9 -0
  62. data/lib/karafka/templates/application_worker.rb.example +12 -0
  63. data/lib/karafka/templates/config.ru.example +13 -0
  64. data/lib/karafka/templates/sidekiq.yml.example +26 -0
  65. data/lib/karafka/version.rb +6 -0
  66. data/lib/karafka/workers/builder.rb +49 -0
  67. data/log/.gitkeep +0 -0
  68. metadata +267 -0
@@ -0,0 +1,17 @@
1
+ require 'bundler'
2
+ require 'rake'
3
+ require 'polishgeeks-dev-tools'
4
+
5
+ PolishGeeks::DevTools.setup do |config|
6
+ config.brakeman = false
7
+ config.haml_lint = false
8
+ end
9
+
10
+ desc 'Self check using polishgeeks-dev-tools'
11
+ task :check do
12
+ PolishGeeks::DevTools::Runner.new.execute(
13
+ PolishGeeks::DevTools::Logger.new
14
+ )
15
+ end
16
+
17
+ task default: :check
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'karafka'
4
+ require Karafka.boot_file.to_s if File.exist?(Karafka.boot_file.to_s)
5
+
6
+ Karafka::Cli.prepare
7
+ Karafka::Cli.start
@@ -0,0 +1,34 @@
1
+ lib = File.expand_path('../lib', __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+
4
+ require 'karafka/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'karafka'
8
+ spec.version = ::Karafka::VERSION
9
+ spec.platform = Gem::Platform::RUBY
10
+ spec.authors = ['Maciej Mensfeld', 'Pavlo Vavruk']
11
+ spec.email = %w( maciej@mensfeld.pl pavlo.vavruk@gmail.com )
12
+ spec.homepage = 'https://github.com/karafka/karafka'
13
+ spec.summary = %q{ Ruby based Microframework for handling Apache Kafka incoming messages }
14
+ spec.description = %q{ Microframework used to simplify Kafka based Ruby applications }
15
+ spec.license = 'MIT'
16
+
17
+ spec.add_development_dependency 'bundler', '~> 1.2'
18
+
19
+ spec.add_dependency 'ruby-kafka', '= 0.3.15'
20
+ spec.add_dependency 'sidekiq', '~> 4.2'
21
+ spec.add_dependency 'worker-glass', '~> 0.2'
22
+ spec.add_dependency 'celluloid', '~> 0.17'
23
+ spec.add_dependency 'envlogic', '~> 1.0'
24
+ spec.add_dependency 'waterdrop', '~> 0.3'
25
+ spec.add_dependency 'rake', '~> 11.3'
26
+ spec.add_dependency 'thor', '~> 0.19'
27
+ spec.add_dependency 'activesupport', '~> 5.0'
28
+ spec.add_dependency 'dry-configurable', '~> 0.1.7'
29
+ spec.required_ruby_version = '>= 2.3.0'
30
+
31
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(spec)/}) }
32
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
33
+ spec.require_paths = %w( lib )
34
+ end
@@ -0,0 +1,73 @@
1
+ %w(
2
+ rake
3
+ ostruct
4
+ rubygems
5
+ bundler
6
+ English
7
+ celluloid/current
8
+ waterdrop
9
+ pathname
10
+ timeout
11
+ logger
12
+ kafka
13
+ sidekiq
14
+ worker_glass
15
+ envlogic
16
+ thor
17
+ fileutils
18
+ dry-configurable
19
+ active_support/callbacks
20
+ active_support/core_ext/class/subclasses
21
+ active_support/core_ext/hash/indifferent_access
22
+ active_support/descendants_tracker
23
+ active_support/inflector
24
+ karafka/loader
25
+ karafka/status
26
+ ).each { |lib| require lib }
27
+
28
+ # Karafka library
29
+ module Karafka
30
+ extend Envlogic
31
+
32
+ class << self
33
+ # @return [Logger] logger that we want to use. Will use ::Karafka::Logger by default
34
+ def logger
35
+ @logger ||= App.config.logger
36
+ end
37
+
38
+ # @return [::Karafka::Monitor] monitor that we want to use. Will use dummy monitor by default
39
+ def monitor
40
+ @monitor ||= App.config.monitor
41
+ end
42
+
43
+ # @return [String] root path of this gem
44
+ def gem_root
45
+ Pathname.new(File.expand_path('../..', __FILE__))
46
+ end
47
+
48
+ # @return [String] Karafka app root path (user application path)
49
+ def root
50
+ Pathname.new(File.dirname(ENV['BUNDLE_GEMFILE']))
51
+ end
52
+
53
+ # @return [String] path to Karafka gem root core
54
+ def core_root
55
+ Pathname.new(File.expand_path('../karafka', __FILE__))
56
+ end
57
+
58
+ # @return [String] path to a default file that contains booting procedure etc
59
+ # @note By default it is a file called 'app.rb' but it can be specified as you wish if you
60
+ # have Karafka that is merged into a Sinatra/Rails app and app.rb is taken.
61
+ # It will be used for console/workers/etc
62
+ # @example Standard only-Karafka case
63
+ # Karafka.boot_file #=> '/home/app_path/app.rb'
64
+ # @example Non standard case
65
+ # KARAFKA_BOOT_FILE='/home/app_path/karafka.rb'
66
+ # Karafka.boot_file #=> '/home/app_path/karafka.rb'
67
+ def boot_file
68
+ Pathname.new(ENV['KARAFKA_BOOT_FILE'] || File.join(Karafka.root, 'app.rb'))
69
+ end
70
+ end
71
+ end
72
+
73
+ Karafka::Loader.new.load!(Karafka.core_root)
@@ -0,0 +1,45 @@
1
+ module Karafka
2
+ # App class
3
+ class App
4
+ class << self
5
+ # Sets up the whole configuration
6
+ # @param [Block] block configuration block
7
+ def setup(&block)
8
+ Setup::Config.setup(&block)
9
+ initialize!
10
+ end
11
+
12
+ # Sets up all the internal components and bootstrap whole app
13
+ # We need to know details about routes in order to setup components,
14
+ # that's why we don't setup them after std setup is done
15
+ def boot!
16
+ Setup::Config.setup_components
17
+ end
18
+
19
+ # @return [Karafka::Config] config instance
20
+ def config
21
+ Setup::Config.config
22
+ end
23
+
24
+ # @return [Karafka::Routing::Builder] routes builder instance
25
+ def routes
26
+ Routing::Builder.instance
27
+ end
28
+
29
+ Status.instance_methods(false).each do |delegated|
30
+ define_method(delegated) do
31
+ Status.instance.public_send(delegated)
32
+ end
33
+ end
34
+
35
+ # Methods that should be delegated to Karafka module
36
+ %i(
37
+ root env logger monitor
38
+ ).each do |delegated|
39
+ define_method(delegated) do
40
+ Karafka.public_send(delegated)
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,162 @@
1
+ # Karafka module namespace
2
+ module Karafka
3
+ # Base controller from which all Karafka controllers should inherit
4
+ # Similar to Rails controllers we can define before_enqueue callbacks
5
+ # that will be executed
6
+ #
7
+ # Note that if before_enqueue return false, the chain will be stopped and
8
+ # the perform method won't be executed in sidekiq (won't peform_async it)
9
+ #
10
+ # @example Create simple controller
11
+ # class ExamplesController < Karafka::BaseController
12
+ # def perform
13
+ # # some logic here
14
+ # end
15
+ # end
16
+ #
17
+ # @example Create a controller with a block before_enqueue
18
+ # class ExampleController < Karafka::BaseController
19
+ # before_enqueue do
20
+ # # Here we should have some checking logic
21
+ # # If false is returned, won't schedule a perform action
22
+ # end
23
+ #
24
+ # def perform
25
+ # # some logic here
26
+ # end
27
+ # end
28
+ #
29
+ # @example Create a controller with a method before_enqueue
30
+ # class ExampleController < Karafka::BaseController
31
+ # before_enqueue :before_method
32
+ #
33
+ # def perform
34
+ # # some logic here
35
+ # end
36
+ #
37
+ # private
38
+ #
39
+ # def before_method
40
+ # # Here we should have some checking logic
41
+ # # If false is returned, won't schedule a perform action
42
+ # end
43
+ # end
44
+ #
45
+ # @example Create a controller with an after_failure action
46
+ # class ExampleController < Karafka::BaseController
47
+ # def perform
48
+ # # some logic here
49
+ # end
50
+ #
51
+ # def after_failure
52
+ # # action taken in case perform fails
53
+ # end
54
+ # end
55
+ class BaseController
56
+ extend ActiveSupport::DescendantsTracker
57
+ include ActiveSupport::Callbacks
58
+
59
+ # The schedule method is wrapped with a set of callbacks
60
+ # We won't run perform at the backend if any of the callbacks
61
+ # returns false
62
+ # @see http://api.rubyonrails.org/classes/ActiveSupport/Callbacks/ClassMethods.html#method-i-get_callbacks
63
+ define_callbacks :schedule
64
+
65
+ # This will be set based on routing settings
66
+ # From 0.4 a single controller can handle multiple topics jobs
67
+ attr_accessor :group, :topic, :worker, :parser, :interchanger, :responder
68
+
69
+ class << self
70
+ # Creates a callback that will be executed before scheduling to Sidekiq
71
+ # @param method_name [Symbol, String] method name or nil if we plan to provide a block
72
+ # @yield A block with a code that should be executed before scheduling
73
+ # @note If value returned is false, will chalt the chain and not schedlue to Sidekiq
74
+ # @example Define a block before_enqueue callback
75
+ # before_enqueue do
76
+ # # logic here
77
+ # end
78
+ #
79
+ # @example Define a class name before_enqueue callback
80
+ # before_enqueue :method_name
81
+ def before_enqueue(method_name = nil, &block)
82
+ set_callback :schedule, :before, method_name ? method_name : block
83
+ end
84
+ end
85
+
86
+ # Creates lazy loaded params object
87
+ # @note Until first params usage, it won't parse data at all
88
+ # @param message [Karafka::Connection::Message, Hash] message with raw content or a hash
89
+ # from Sidekiq that allows us to build params.
90
+ def params=(message)
91
+ @params = Karafka::Params::Params.build(message, self)
92
+ end
93
+
94
+ # Executes the default controller flow, runs callbacks and if not halted
95
+ # will schedule a perform task in sidekiq
96
+ def schedule
97
+ run_callbacks :schedule do
98
+ perform_async
99
+ end
100
+ end
101
+
102
+ # @return [Hash] hash with all controller details - it works similar to #params method however
103
+ # it won't parse data so it will return unparsed details about controller and its parameters
104
+ # @example Get data about ctrl
105
+ # ctrl.to_h #=> { "worker"=>WorkerClass, "parsed"=>false, "content"=>"{}" }
106
+ def to_h
107
+ @params
108
+ end
109
+
110
+ # Method that will perform business logic on data received from Kafka
111
+ # @note This method needs bo be implemented in a subclass. We stub it here as a failover if
112
+ # someone forgets about it or makes on with typo
113
+ def perform
114
+ raise NotImplementedError, 'Implement this in a subclass'
115
+ end
116
+
117
+ private
118
+
119
+ # @return [Karafka::Params::Params] Karafka params that is a hash with indifferent access
120
+ # @note Params internally are lazy loaded before first use. That way we can skip parsing
121
+ # process if we have before_enqueue that rejects some incoming messages without using params
122
+ # It can be also used when handling really heavy data (in terms of parsing). Without direct
123
+ # usage outside of worker scope, it will pass raw data into sidekiq, so we won't use Karafka
124
+ # working time to parse this data. It will happen only in the worker (where it can take time)
125
+ # that way Karafka will be able to process data really quickly. On the other hand, if we
126
+ # decide to use params somewhere before it hits worker logic, it won't parse it again in
127
+ # the worker - it will use already loaded data and pass it to Redis
128
+ # @note Invokation of this method will cause load all the data into params object. If you want
129
+ # to get access without parsing, please access @params directly
130
+ def params
131
+ @params.retrieve
132
+ end
133
+
134
+ # Responds with given data using given responder. This allows us to have a similar way of
135
+ # defining flows like synchronous protocols
136
+ # @param data Anything we want to pass to responder based on which we want to trigger further
137
+ # Kafka responding
138
+ # @raise [Karafka::Errors::ResponderMissing] raised when we don't have a responder defined,
139
+ # but we still try to use this method
140
+ def respond_with(*data)
141
+ raise(Errors::ResponderMissing, self.class) unless responder
142
+
143
+ Karafka.monitor.notice(self.class, data: data)
144
+ responder.new.call(*data)
145
+ end
146
+
147
+ # Enqueues the execution of perform method into a worker.
148
+ # @note Each worker needs to have a class #perform_async method that will allow us to pass
149
+ # parameters into it. We always pass topic as a first argument and this request params
150
+ # as a second one (we pass topic to be able to build back the controller in the worker)
151
+ def perform_async
152
+ Karafka.monitor.notice(self.class, to_h)
153
+
154
+ # We use @params directly (instead of #params) because of lazy loading logic that is behind
155
+ # it. See Karafka::Params::Params class for more details about that
156
+ worker.perform_async(
157
+ topic,
158
+ interchanger.load(@params)
159
+ )
160
+ end
161
+ end
162
+ end
@@ -0,0 +1,118 @@
1
+ module Karafka
2
+ # Base responder from which all Karafka responders should inherit
3
+ # Similar to Rails responders concept. It allows us to design flow from one app to another
4
+ # by isolating what responses should be sent (and where) based on a given action
5
+ # It differs from Rails responders in the way it works: in std http request we can have one
6
+ # response, here we can have unlimited number of them
7
+ #
8
+ # It has a simple API for defining where should we respond (and if it is required)
9
+ #
10
+ # @example Basic usage (each registered topic is required to be used by default)
11
+ # class Responder < BaseResponder
12
+ # topic :new_action
13
+ #
14
+ # def respond(data)
15
+ # respond_to :new_action, data
16
+ # end
17
+ # end
18
+ #
19
+ # @example Marking topic as optional (we won't have to use it)
20
+ # class Responder < BaseResponder
21
+ # topic :required_topic
22
+ # topic :new_action, optional: true
23
+ #
24
+ # def respond(data)
25
+ # respond_to :required_topic, data
26
+ # end
27
+ # end
28
+ #
29
+ # @example Multiple times used topic
30
+ # class Responder < BaseResponder
31
+ # topic :required_topic, multiple_usage: true
32
+ #
33
+ # def respond(data)
34
+ # data.each do |subset|
35
+ # respond_to :required_topic, subset
36
+ # end
37
+ # end
38
+ # end
39
+ #
40
+ # @example Accept multiple arguments to a respond method
41
+ # class Responder < BaseResponder
42
+ # topic :users_actions
43
+ # topic :articles_viewed
44
+ #
45
+ # def respond(user, article)
46
+ # respond_to :users_actions, user
47
+ # respond_to :articles_viewed, article
48
+ # end
49
+ # end
50
+ class BaseResponder
51
+ # Definitions of all topics that we want to be able to use in this responder should go here
52
+ class_attribute :topics
53
+
54
+ class << self
55
+ # Registers a topic as on to which we will be able to respond
56
+ # @param topic_name [Symbol, String] name of topic to which we want to respond
57
+ # @param options [Hash] hash with optional configuration details
58
+ def topic(topic_name, options = {})
59
+ self.topics ||= {}
60
+ topic_obj = Responders::Topic.new(topic_name, options)
61
+ self.topics[topic_obj.name] = topic_obj
62
+ end
63
+ end
64
+
65
+ # Creates a responder object
66
+ # @return [Karafka::BaseResponder] base responder descendant responder
67
+ def initialize
68
+ @used_topics = []
69
+ end
70
+
71
+ # Performs respond and validates that all the response requirement were met
72
+ # @param data Anything that we want to respond with
73
+ # @note We know that validators should be executed also before sending data to topics, however
74
+ # the implementation gets way more complicated then, that's why we check after everything
75
+ # was sent using responder
76
+ def call(*data)
77
+ respond(*data)
78
+ validate!
79
+ end
80
+
81
+ private
82
+
83
+ # Method that needs to be implemented in a subclass. It should handle responding
84
+ # on registered topics
85
+ # @raise [NotImplementedError] This method needs to be implemented in a subclass
86
+ def respond(*_data)
87
+ raise NotImplementedError, 'Implement this in a subclass'
88
+ end
89
+
90
+ # This method allow us to respond to a single topic with a given data. It can be used
91
+ # as many times as we need. Especially when we have 1:n flow
92
+ # @param topic [Symbol, String] topic to which we want to respond
93
+ # @param data [String, Object] string or object that we want to send
94
+ # @note Note that if we pass object here (not a string), this method will invoke a #to_json
95
+ # on it.
96
+ # @note Respond to does not accept multiple data arguments.
97
+ def respond_to(topic, data)
98
+ Karafka.monitor.notice(self.class, topic: topic, data: data)
99
+
100
+ topic = topic.to_s
101
+ @used_topics << topic
102
+
103
+ ::WaterDrop::Message.new(
104
+ topic,
105
+ data.is_a?(String) ? data : data.to_json
106
+ ).send!
107
+ end
108
+
109
+ # Checks if we met all the topics requirements. It will fail if we didn't send a message to
110
+ # a registered required topic, etc.
111
+ def validate!
112
+ Responders::UsageValidator.new(
113
+ self.class.topics || {},
114
+ @used_topics
115
+ ).validate!
116
+ end
117
+ end
118
+ end