distribot 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +13 -0
  3. data/.rspec +3 -0
  4. data/.rubocop.yml +3 -0
  5. data/.travis.yml +10 -0
  6. data/Dockerfile +9 -0
  7. data/Gemfile +6 -0
  8. data/Gemfile.lock +153 -0
  9. data/LICENSE +201 -0
  10. data/README.md +107 -0
  11. data/Rakefile +16 -0
  12. data/bin/distribot.flow-created +6 -0
  13. data/bin/distribot.flow-finished +6 -0
  14. data/bin/distribot.handler-finished +6 -0
  15. data/bin/distribot.phase-finished +6 -0
  16. data/bin/distribot.phase-started +6 -0
  17. data/bin/distribot.task-finished +6 -0
  18. data/distribot.gemspec +35 -0
  19. data/docker-compose.yml +29 -0
  20. data/examples/controller +168 -0
  21. data/examples/distribot.eye +49 -0
  22. data/examples/status +38 -0
  23. data/examples/worker +135 -0
  24. data/lib/distribot/connector.rb +162 -0
  25. data/lib/distribot/flow.rb +200 -0
  26. data/lib/distribot/flow_created_handler.rb +12 -0
  27. data/lib/distribot/flow_finished_handler.rb +12 -0
  28. data/lib/distribot/handler.rb +40 -0
  29. data/lib/distribot/handler_finished_handler.rb +29 -0
  30. data/lib/distribot/phase.rb +46 -0
  31. data/lib/distribot/phase_finished_handler.rb +19 -0
  32. data/lib/distribot/phase_handler.rb +15 -0
  33. data/lib/distribot/phase_started_handler.rb +69 -0
  34. data/lib/distribot/task_finished_handler.rb +37 -0
  35. data/lib/distribot/worker.rb +148 -0
  36. data/lib/distribot.rb +108 -0
  37. data/provision/nodes.sh +80 -0
  38. data/provision/templates/fluentd.conf +27 -0
  39. data/spec/distribot/bunny_connector_spec.rb +196 -0
  40. data/spec/distribot/connection_sharer_spec.rb +34 -0
  41. data/spec/distribot/connector_spec.rb +63 -0
  42. data/spec/distribot/flow_created_handler_spec.rb +32 -0
  43. data/spec/distribot/flow_finished_handler_spec.rb +32 -0
  44. data/spec/distribot/flow_spec.rb +661 -0
  45. data/spec/distribot/handler_finished_handler_spec.rb +112 -0
  46. data/spec/distribot/handler_spec.rb +32 -0
  47. data/spec/distribot/module_spec.rb +163 -0
  48. data/spec/distribot/multi_subscription_spec.rb +37 -0
  49. data/spec/distribot/phase_finished_handler_spec.rb +61 -0
  50. data/spec/distribot/phase_started_handler_spec.rb +150 -0
  51. data/spec/distribot/subscription_spec.rb +40 -0
  52. data/spec/distribot/task_finished_handler_spec.rb +71 -0
  53. data/spec/distribot/worker_spec.rb +281 -0
  54. data/spec/fixtures/simple_flow.json +49 -0
  55. data/spec/spec_helper.rb +74 -0
  56. metadata +371 -0
@@ -0,0 +1,168 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'distribot'
5
+ require 'byebug'
6
+ require 'pp'
7
+ require 'active_support/core_ext/object'
8
+ require 'active_support/core_ext/array'
9
+ require 'active_support/json'
10
+ require 'dotenv'
11
+
12
+ Dotenv.load
13
+
14
+ Distribot.configure do |config|
15
+ config.redis_url = ENV['DISTRIBOT_REDIS_URL']
16
+ config.rabbitmq_url = ENV['DISTRIBOT_RABBITMQ_URL']
17
+ end
18
+
19
+ module Example
20
+
21
+ def self.phase_groups
22
+ [
23
+ [
24
+ {
25
+ name: 'start',
26
+ is_initial: true,
27
+ transitions_to: 'get-latest-stories'
28
+ },
29
+ {
30
+ name: 'get-latest-stories',
31
+ transitions_to: 'download-news',
32
+ handlers: [
33
+ {
34
+ name: 'News::ArticleLister',
35
+ version: '~> 1.0'
36
+ }
37
+ ]
38
+ },
39
+ {
40
+ name: 'download-news',
41
+ transitions_to: 'finish',
42
+ handlers: [
43
+ {
44
+ name: 'News::ArticleDownloaderWorker',
45
+ version: '1.0.0'
46
+ }
47
+ ]
48
+ },
49
+ {
50
+ name: 'finish',
51
+ is_final: true
52
+ }
53
+ ]
54
+ # [
55
+ # {
56
+ # name: 'start',
57
+ # is_initial: true,
58
+ # transitions_to: 'part1'
59
+ # },
60
+ # {
61
+ # name: 'part1',
62
+ # handlers: [
63
+ # {
64
+ # name: 'HardWorker',
65
+ # version: '~> 1.0'
66
+ # }
67
+ # ],
68
+ # transitions_to: 'part2'
69
+ # },
70
+ # {
71
+ # name: 'part2',
72
+ # handlers: %w(
73
+ # GoodWorker
74
+ # FastWorker
75
+ # ),
76
+ # transitions_to: 'part3'
77
+ # },
78
+ # {
79
+ # name: 'part3',
80
+ # handlers: %w(
81
+ # CheapWorker
82
+ # ForeignWorker
83
+ # ),
84
+ # transitions_to: 'part4'
85
+ # },
86
+ # {
87
+ # name: 'part4',
88
+ # handlers: %w(
89
+ # SlowWorker
90
+ # ),
91
+ # transitions_to: 'finish'
92
+ # },
93
+ # {
94
+ # name: 'finish',
95
+ # is_final: true
96
+ # }
97
+ # ],
98
+ # [
99
+ # {
100
+ # name: 'start',
101
+ # is_initial: true,
102
+ # transitions_to: 'searching'
103
+ # },
104
+ # {
105
+ # name: 'searching',
106
+ # handlers: %w(
107
+ # GoodWorker
108
+ # FastWorker
109
+ # CheapWorker
110
+ # ),
111
+ # transitions_to: 'finish'
112
+ # },
113
+ # {
114
+ # name: 'finish',
115
+ # is_final: true
116
+ # }
117
+ # ]
118
+ ]
119
+ end
120
+
121
+ def self.make_flow(name)
122
+ name += "#1"
123
+ @flow = Distribot::Flow.new(
124
+ name: name,
125
+ phases: phase_groups.sample
126
+ )
127
+ @flow.save! do |info|
128
+ finished = Distribot.redis.incr('finished')
129
+ flow = Distribot::Flow.find(info[:flow_id])
130
+ first_transition = flow.transitions.sort_by(&:timestamp).first
131
+ last_transition = flow.transitions.sort_by(&:timestamp).last
132
+ duration = last_transition.timestamp - first_transition.timestamp
133
+ puts "WORKFLOW FINISHED: after #{duration} seconds -- #{info}: #{finished} total"
134
+ end
135
+ @flow
136
+ end
137
+ end
138
+
139
+ Distribot.redis.set("finished", 0)
140
+
141
+ max = ARGV.shift.to_i
142
+ max = 1 if max <= 0
143
+ puts "<ENTER> when ready:"
144
+ gets
145
+
146
+ start_time = Time.now.to_f
147
+ Distribot.redis.set('dificulty', 3)
148
+ Distribot.redis.set('distribot.flows.running', 0)
149
+ Distribot.redis.set('distribot.flows.max', max)
150
+ Example.make_flow "Flow[1]"
151
+
152
+ loop do
153
+ sleep 2
154
+ max = Distribot.redis.get('distribot.flows.max').to_i
155
+ current = Distribot.redis.get('distribot.flows.running').to_i
156
+ running_time = Time.now.to_f - start_time
157
+ finished = Distribot.redis.get('finished').to_i
158
+ rate = ( finished / running_time ).to_f.round(2)
159
+ puts "\tRUNNING: at #{Time.now} -- #{current} / #{max} (#{finished} finished in #{running_time.round(2)}sec = #{rate}/sec)"
160
+ diff = max - current
161
+ diff.times do |num|
162
+ new_index = Distribot.redis.get('finished')
163
+ puts "+++++++++ SPAWN #{num + 1}/#{diff} +++++++++++"
164
+ Example.make_flow("Flow: #{new_index} -> ")
165
+ end
166
+ end
167
+
168
+
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env ruby
2
+ #^syntax detection
3
+
4
+ ROOT = File.expand_path('../../', __FILE__)
5
+
6
+ Eye.config do
7
+ logger "#{ROOT}/log/eye.log"
8
+ end
9
+
10
+ unless File.directory? "#{ROOT}/log"
11
+ `mkdir -p #{ROOT}/log`
12
+ end
13
+ unless File.directory? "#{ROOT}/tmp"
14
+ `mkdir -p #{ROOT}/tmp`
15
+ end
16
+
17
+ Eye.application :distribot do
18
+ working_dir ROOT
19
+ trigger :flapping, :times => 10, :within => 1.minute
20
+
21
+ # Usage:
22
+ # eye load eye/distribot.eye
23
+ # eye start all
24
+ # eye stop all
25
+ # eye restart distribot:phase-started
26
+ # tail -f log/*
27
+ # See https://github.com/kostya/eye for details.
28
+
29
+ things = %w(
30
+ phase-started
31
+ phase-finished
32
+ task-finished
33
+ handler-finished
34
+ flow-finished
35
+ flow-created
36
+ )
37
+
38
+ things.each do |thing|
39
+ process thing do
40
+ daemonize true
41
+ pid_file "tmp/#{thing}.pid"
42
+ stdall "log/#{thing}.log"
43
+ start_command "dotenv bundle exec bin/distribot.#{thing}"
44
+ stop_signals [:TERM, 5.seconds, :KILL]
45
+ restart_command "kill -USR2 {PID}"
46
+ restart_grace 10.seconds
47
+ end
48
+ end
49
+ end
data/examples/status ADDED
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'distribot'
4
+ require 'byebug'
5
+
6
+ redis = Distribot.redis
7
+ loop do
8
+ puts "\n"
9
+ redis
10
+ .smembers('distribot.flows.active')
11
+ .map{|id| Distribot::Flow.find(id) }
12
+ .sort_by(&:created_at)
13
+ .reverse
14
+ .each do |flow|
15
+ # Cover the odd case of fetching a flow that hasn't had its first transition yet:
16
+ latest_transition = flow.transitions.last or next
17
+
18
+ latest_phase = flow.phase(latest_transition.to) || Distribot::Phase.new(name: "UNKNOWN")
19
+ next if latest_phase.is_final
20
+ time_in_phase = Time.now.to_f - latest_transition.timestamp
21
+ total_age = Time.now.to_f - flow.created_at
22
+
23
+ if latest_phase.is_final
24
+ puts "Flow #{flow.id} is finished."
25
+ else
26
+ handler_counts = latest_phase.handlers.sort_by(&:name).map do |handler|
27
+ remaining_tasks = redis.get("distribot.flow.#{flow.id}.#{latest_phase.name}.#{handler}.finished").to_i
28
+ total_tasks = redis.get("distribot.flow.#{flow.id}.#{latest_phase.name}.#{handler}.finished.total").to_i
29
+ percent_finished = ((total_tasks - remaining_tasks) * 1.0) / (total_tasks * 1.0) * 100
30
+ finished_tasks = total_tasks - remaining_tasks
31
+ "#{handler}:\t#{['=' * (finished_tasks > 0 ? finished_tasks : 0), '-' * (remaining_tasks > 0 ? remaining_tasks : 0)].join('')} #{finished_tasks}/#{total_tasks} (#{percent_finished.round(2)}%)"
32
+ end
33
+ puts "Flow #{flow.id} (age:#{sprintf('%05.02f',total_age)} sec)\tphase:#{latest_phase.name}\t(#{time_in_phase.round(2)} seconds\t#{handler_counts.join("\t")}"
34
+ end
35
+ end
36
+ puts "-------------------------------------------------------------------------------------------------------------------------------------------------------"
37
+ sleep 0.5
38
+ end
data/examples/worker ADDED
@@ -0,0 +1,135 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'distribot'
5
+ require 'byebug'
6
+ require 'pp'
7
+ require 'active_support/core_ext/object'
8
+ require 'active_support/core_ext/array'
9
+ require 'active_support/json'
10
+ require 'dotenv'
11
+
12
+ Dotenv.load
13
+
14
+ Distribot.configure do |config|
15
+ config.redis_url = ENV['DISTRIBOT_REDIS_URL']
16
+ config.rabbitmq_url = ENV['DISTRIBOT_RABBITMQ_URL']
17
+ end
18
+
19
+ class SimpleWorker
20
+ include Distribot::Worker
21
+ version '1.0.0'
22
+ enumerate_with :enumerate
23
+ process_tasks_with :process
24
+
25
+ def enumerate(_context, &callback)
26
+ jobs = [ ]
27
+ 2.times do |chunk|
28
+ max = 5
29
+ jobs << (1..max).to_a.map{|n| {id: SecureRandom.uuid, args: [chunk, n] } }
30
+ end
31
+ callback.call(jobs.flatten)
32
+ end
33
+
34
+ def process(_context, job)
35
+ logger.info job.to_s
36
+ raise "Test Error!" if rand >= 0.9
37
+ job_time = Distribot.redis.get('difficulty').to_f
38
+ sleep job_time <= 0 ? 3 : job_time
39
+ end
40
+ end
41
+
42
+ class HardWorker < SimpleWorker;
43
+ version '1.1.0'
44
+ enumerate_with :enumerate
45
+ process_tasks_with :process
46
+ end
47
+ class GoodWorker < SimpleWorker;
48
+ version '1.0.0'
49
+ enumerate_with :enumerate
50
+ process_tasks_with :process
51
+ end
52
+ class FastWorker < SimpleWorker;
53
+ version '1.0.0'
54
+ enumerate_with :enumerate
55
+ process_tasks_with :process
56
+ end
57
+ class CheapWorker < SimpleWorker;
58
+ version '1.0.0'
59
+ enumerate_with :enumerate
60
+ process_tasks_with :process
61
+ end
62
+ class ForeignWorker < SimpleWorker;
63
+ version '1.0.0'
64
+ enumerate_with :enumerate
65
+ process_tasks_with :process
66
+ end
67
+ class SlowWorker < SimpleWorker;
68
+ version '1.0.0'
69
+ enumerate_with :enumerate
70
+ process_tasks_with :process
71
+ end
72
+
73
+ module News
74
+ class ArticleLister
75
+ include Distribot::Worker
76
+ version '1.0.1'
77
+ enumerate_with :enumerate
78
+ process_tasks_with :process
79
+ def enumerate(context, &callback)
80
+ @enumerations ||= 0
81
+ story_ids = (1..20).to_a
82
+ tasks = story_ids.map { |id| {story_id: "#{@enumerations}.#{id}"} }
83
+ @enumerations += 1
84
+ return tasks
85
+ end
86
+ def process(context, task)
87
+ json = { url: "https://infra/?story_id=#{task[:story_id]}" }.to_json
88
+ data_key = "flow.#{context.flow_id}.stories"
89
+ parsed = JSON.parse(json, symbolize_names: true) rescue {}
90
+ if parsed.key? :url
91
+ Distribot.redis.multi do |redis|
92
+ place = [data_key, task[:story_id]].join(':')
93
+ redis.set place, json
94
+ redis.sadd data_key, place
95
+ end
96
+ end
97
+ end
98
+ end
99
+
100
+ class ArticleDownloaderWorker
101
+ include Distribot::Worker
102
+ require 'wrest'
103
+ version '1.0.0'
104
+ enumerate_with :enumerate
105
+ process_tasks_with :process
106
+
107
+ def enumerate(context)
108
+ data_key = "flow.#{context.flow_id}.stories"
109
+ tasks = Distribot.redis.smembers(data_key).map { |location| {data_locator: location} }
110
+ Distribot.redis.del data_key
111
+ return tasks
112
+ end
113
+
114
+ def process(context, task)
115
+ data = JSON.parse(Distribot.redis.get(task[:data_locator]), symbolize_names: true)
116
+ puts data
117
+ # data[:url].to_uri(verify_mode: OpenSSL::SSL::VERIFY_NONE).get
118
+ Distribot.redis.del(task[:data_locator])
119
+ end
120
+ end
121
+ end
122
+
123
+ puts News::ArticleLister.new.run
124
+ puts News::ArticleDownloaderWorker.new.run
125
+
126
+ # Distribot.logger.info HardWorker.new.run
127
+ # Distribot.logger.info GoodWorker.new.run
128
+ # Distribot.logger.info FastWorker.new.run
129
+ # Distribot.logger.info CheapWorker.new.run
130
+ # Distribot.logger.info ForeignWorker.new.run
131
+ # Distribot.logger.info SlowWorker.new.run
132
+
133
+ puts "Worker up and running!"
134
+
135
+ sleep
@@ -0,0 +1,162 @@
1
+
2
+ require 'pp'
3
+ require 'securerandom'
4
+ require 'bunny'
5
+ require 'byebug'
6
+ require 'active_support/json'
7
+ require 'uri'
8
+ require 'wrest'
9
+
10
+ module Distribot
11
+ class Connector
12
+ attr_accessor :amqp_url, :bunny, :channel
13
+ def initialize(amqp_url = 'amqp://localhost:5672')
14
+ self.amqp_url = amqp_url
15
+ setup
16
+ end
17
+
18
+ def queues
19
+ uri = URI.parse(amqp_url)
20
+ uri.scheme = 'http'
21
+ uri.port = '15672'.to_i
22
+ uri.path = '/api/queues'
23
+ uri.to_s.to_uri.get.deserialize.map { |x| x['name'] }
24
+ end
25
+
26
+ def logger
27
+ Distribot.logger
28
+ end
29
+
30
+ private
31
+
32
+ def setup
33
+ self.bunny = Bunny.new(amqp_url)
34
+ bunny.start
35
+ self.channel = bunny.create_channel
36
+ channel.prefetch(1)
37
+ end
38
+ end
39
+
40
+ class ConnectionSharer
41
+ attr_accessor :bunny, :channel
42
+ def initialize(bunny)
43
+ self.bunny = bunny
44
+ @channel = nil
45
+ end
46
+
47
+ def channel
48
+ @channel ||= bunny.create_channel
49
+ end
50
+
51
+ def logger
52
+ Distribot.logger
53
+ end
54
+ end
55
+
56
+ class Subscription < ConnectionSharer
57
+ attr_accessor :queue
58
+ def start(topic, options = {}, &block)
59
+ self.queue = channel.queue(topic, auto_delete: false, durable: true)
60
+ subscribe_args = options.merge(manual_ack: true)
61
+ queue.subscribe(subscribe_args) do |delivery_info, _properties, payload|
62
+ begin
63
+ parsed_message = JSON.parse(payload, symbolize_names: true)
64
+ block.call(parsed_message)
65
+ channel.acknowledge(delivery_info.delivery_tag, false)
66
+ rescue StandardError => e
67
+ logger.error "ERROR: #{e} -- #{e.backtrace.join("\n")}"
68
+ channel.basic_reject(delivery_info.delivery_tag, true)
69
+ end
70
+ end
71
+ self
72
+ end
73
+ end
74
+
75
+ class MultiSubscription < ConnectionSharer
76
+ attr_accessor :queue
77
+ def start(topic, options = {}, &block)
78
+ self.queue = channel.queue('', exclusive: true, auto_delete: true)
79
+ exchange = channel.fanout(topic)
80
+ queue.bind(exchange).subscribe(options) do |_delivery_info, _, payload|
81
+ begin
82
+ block.call(JSON.parse(payload, symbolize_names: true))
83
+ rescue StandardError => e
84
+ logger.error "Error #{e} - #{payload} --- #{e.backtrace.join("\n")}"
85
+ end
86
+ end
87
+ self
88
+ end
89
+ end
90
+
91
+ class BunnyConnector < Connector
92
+ attr_accessor :subscribers, :channel
93
+ def initialize(*args)
94
+ super(*args)
95
+ self.subscribers = []
96
+ end
97
+
98
+ def channel
99
+ @channel ||= bunny.create_channel
100
+ end
101
+
102
+ def queue_exists?(topic)
103
+ bunny.queue_exists?(topic)
104
+ end
105
+
106
+ def subscribe(topic, options = {}, &block)
107
+ if options[:solo]
108
+ options.delete :solo
109
+ setup
110
+ end
111
+ subscriber = Subscription.new(bunny)
112
+ subscribers << subscriber.start(topic, options) do |message|
113
+ logger.debug "received(#{topic} -> #{message})"
114
+ block.call(message)
115
+ end
116
+ end
117
+
118
+ def subscribe_multi(topic, options = {}, &block)
119
+ subscriber = MultiSubscription.new(bunny)
120
+ subscribers << subscriber.start(topic, options) do |message|
121
+ logger.debug "received-multi(#{topic} -> #{message})"
122
+ block.call(message)
123
+ end
124
+ end
125
+
126
+ def publish(topic, message)
127
+ queue = stubbornly :get_queue do
128
+ channel.queue(topic, auto_delete: false, durable: true)
129
+ end
130
+ logger.debug "publish(#{topic} -> #{message})"
131
+ channel.default_exchange.publish message.to_json, routing_key: queue.name
132
+ end
133
+
134
+ def broadcast(topic, message)
135
+ exchange = channel.fanout(topic)
136
+ logger.debug "broadcast(#{topic} -> #{message})"
137
+ exchange.publish(message.to_json, routing_key: topic)
138
+ end
139
+
140
+ private
141
+
142
+ def setup
143
+ self.bunny = Bunny.new(amqp_url)
144
+ bunny.start
145
+ end
146
+
147
+ def stubbornly(task, &block)
148
+ result = nil
149
+ loop do
150
+ begin
151
+ result = block.call
152
+ break
153
+ rescue Timeout::Error
154
+ logger.error "Connection timed out during '#{task}' :retrying in 1sec"
155
+ sleep 1
156
+ next
157
+ end
158
+ end
159
+ result
160
+ end
161
+ end
162
+ end