distribot 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +13 -0
  3. data/.rspec +3 -0
  4. data/.rubocop.yml +3 -0
  5. data/.travis.yml +10 -0
  6. data/Dockerfile +9 -0
  7. data/Gemfile +6 -0
  8. data/Gemfile.lock +153 -0
  9. data/LICENSE +201 -0
  10. data/README.md +107 -0
  11. data/Rakefile +16 -0
  12. data/bin/distribot.flow-created +6 -0
  13. data/bin/distribot.flow-finished +6 -0
  14. data/bin/distribot.handler-finished +6 -0
  15. data/bin/distribot.phase-finished +6 -0
  16. data/bin/distribot.phase-started +6 -0
  17. data/bin/distribot.task-finished +6 -0
  18. data/distribot.gemspec +35 -0
  19. data/docker-compose.yml +29 -0
  20. data/examples/controller +168 -0
  21. data/examples/distribot.eye +49 -0
  22. data/examples/status +38 -0
  23. data/examples/worker +135 -0
  24. data/lib/distribot/connector.rb +162 -0
  25. data/lib/distribot/flow.rb +200 -0
  26. data/lib/distribot/flow_created_handler.rb +12 -0
  27. data/lib/distribot/flow_finished_handler.rb +12 -0
  28. data/lib/distribot/handler.rb +40 -0
  29. data/lib/distribot/handler_finished_handler.rb +29 -0
  30. data/lib/distribot/phase.rb +46 -0
  31. data/lib/distribot/phase_finished_handler.rb +19 -0
  32. data/lib/distribot/phase_handler.rb +15 -0
  33. data/lib/distribot/phase_started_handler.rb +69 -0
  34. data/lib/distribot/task_finished_handler.rb +37 -0
  35. data/lib/distribot/worker.rb +148 -0
  36. data/lib/distribot.rb +108 -0
  37. data/provision/nodes.sh +80 -0
  38. data/provision/templates/fluentd.conf +27 -0
  39. data/spec/distribot/bunny_connector_spec.rb +196 -0
  40. data/spec/distribot/connection_sharer_spec.rb +34 -0
  41. data/spec/distribot/connector_spec.rb +63 -0
  42. data/spec/distribot/flow_created_handler_spec.rb +32 -0
  43. data/spec/distribot/flow_finished_handler_spec.rb +32 -0
  44. data/spec/distribot/flow_spec.rb +661 -0
  45. data/spec/distribot/handler_finished_handler_spec.rb +112 -0
  46. data/spec/distribot/handler_spec.rb +32 -0
  47. data/spec/distribot/module_spec.rb +163 -0
  48. data/spec/distribot/multi_subscription_spec.rb +37 -0
  49. data/spec/distribot/phase_finished_handler_spec.rb +61 -0
  50. data/spec/distribot/phase_started_handler_spec.rb +150 -0
  51. data/spec/distribot/subscription_spec.rb +40 -0
  52. data/spec/distribot/task_finished_handler_spec.rb +71 -0
  53. data/spec/distribot/worker_spec.rb +281 -0
  54. data/spec/fixtures/simple_flow.json +49 -0
  55. data/spec/spec_helper.rb +74 -0
  56. metadata +371 -0
@@ -0,0 +1,168 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'distribot'
5
+ require 'byebug'
6
+ require 'pp'
7
+ require 'active_support/core_ext/object'
8
+ require 'active_support/core_ext/array'
9
+ require 'active_support/json'
10
+ require 'dotenv'
11
+
12
+ Dotenv.load
13
+
14
+ Distribot.configure do |config|
15
+ config.redis_url = ENV['DISTRIBOT_REDIS_URL']
16
+ config.rabbitmq_url = ENV['DISTRIBOT_RABBITMQ_URL']
17
+ end
18
+
19
+ module Example
20
+
21
+ def self.phase_groups
22
+ [
23
+ [
24
+ {
25
+ name: 'start',
26
+ is_initial: true,
27
+ transitions_to: 'get-latest-stories'
28
+ },
29
+ {
30
+ name: 'get-latest-stories',
31
+ transitions_to: 'download-news',
32
+ handlers: [
33
+ {
34
+ name: 'News::ArticleLister',
35
+ version: '~> 1.0'
36
+ }
37
+ ]
38
+ },
39
+ {
40
+ name: 'download-news',
41
+ transitions_to: 'finish',
42
+ handlers: [
43
+ {
44
+ name: 'News::ArticleDownloaderWorker',
45
+ version: '1.0.0'
46
+ }
47
+ ]
48
+ },
49
+ {
50
+ name: 'finish',
51
+ is_final: true
52
+ }
53
+ ]
54
+ # [
55
+ # {
56
+ # name: 'start',
57
+ # is_initial: true,
58
+ # transitions_to: 'part1'
59
+ # },
60
+ # {
61
+ # name: 'part1',
62
+ # handlers: [
63
+ # {
64
+ # name: 'HardWorker',
65
+ # version: '~> 1.0'
66
+ # }
67
+ # ],
68
+ # transitions_to: 'part2'
69
+ # },
70
+ # {
71
+ # name: 'part2',
72
+ # handlers: %w(
73
+ # GoodWorker
74
+ # FastWorker
75
+ # ),
76
+ # transitions_to: 'part3'
77
+ # },
78
+ # {
79
+ # name: 'part3',
80
+ # handlers: %w(
81
+ # CheapWorker
82
+ # ForeignWorker
83
+ # ),
84
+ # transitions_to: 'part4'
85
+ # },
86
+ # {
87
+ # name: 'part4',
88
+ # handlers: %w(
89
+ # SlowWorker
90
+ # ),
91
+ # transitions_to: 'finish'
92
+ # },
93
+ # {
94
+ # name: 'finish',
95
+ # is_final: true
96
+ # }
97
+ # ],
98
+ # [
99
+ # {
100
+ # name: 'start',
101
+ # is_initial: true,
102
+ # transitions_to: 'searching'
103
+ # },
104
+ # {
105
+ # name: 'searching',
106
+ # handlers: %w(
107
+ # GoodWorker
108
+ # FastWorker
109
+ # CheapWorker
110
+ # ),
111
+ # transitions_to: 'finish'
112
+ # },
113
+ # {
114
+ # name: 'finish',
115
+ # is_final: true
116
+ # }
117
+ # ]
118
+ ]
119
+ end
120
+
121
+ def self.make_flow(name)
122
+ name += "#1"
123
+ @flow = Distribot::Flow.new(
124
+ name: name,
125
+ phases: phase_groups.sample
126
+ )
127
+ @flow.save! do |info|
128
+ finished = Distribot.redis.incr('finished')
129
+ flow = Distribot::Flow.find(info[:flow_id])
130
+ first_transition = flow.transitions.sort_by(&:timestamp).first
131
+ last_transition = flow.transitions.sort_by(&:timestamp).last
132
+ duration = last_transition.timestamp - first_transition.timestamp
133
+ puts "WORKFLOW FINISHED: after #{duration} seconds -- #{info}: #{finished} total"
134
+ end
135
+ @flow
136
+ end
137
+ end
138
+
139
+ Distribot.redis.set("finished", 0)
140
+
141
+ max = ARGV.shift.to_i
142
+ max = 1 if max <= 0
143
+ puts "<ENTER> when ready:"
144
+ gets
145
+
146
+ start_time = Time.now.to_f
147
+ Distribot.redis.set('dificulty', 3)
148
+ Distribot.redis.set('distribot.flows.running', 0)
149
+ Distribot.redis.set('distribot.flows.max', max)
150
+ Example.make_flow "Flow[1]"
151
+
152
+ loop do
153
+ sleep 2
154
+ max = Distribot.redis.get('distribot.flows.max').to_i
155
+ current = Distribot.redis.get('distribot.flows.running').to_i
156
+ running_time = Time.now.to_f - start_time
157
+ finished = Distribot.redis.get('finished').to_i
158
+ rate = ( finished / running_time ).to_f.round(2)
159
+ puts "\tRUNNING: at #{Time.now} -- #{current} / #{max} (#{finished} finished in #{running_time.round(2)}sec = #{rate}/sec)"
160
+ diff = max - current
161
+ diff.times do |num|
162
+ new_index = Distribot.redis.get('finished')
163
+ puts "+++++++++ SPAWN #{num + 1}/#{diff} +++++++++++"
164
+ Example.make_flow("Flow: #{new_index} -> ")
165
+ end
166
+ end
167
+
168
+
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env ruby
2
+ #^syntax detection
3
+
4
+ ROOT = File.expand_path('../../', __FILE__)
5
+
6
+ Eye.config do
7
+ logger "#{ROOT}/log/eye.log"
8
+ end
9
+
10
+ unless File.directory? "#{ROOT}/log"
11
+ `mkdir -p #{ROOT}/log`
12
+ end
13
+ unless File.directory? "#{ROOT}/tmp"
14
+ `mkdir -p #{ROOT}/tmp`
15
+ end
16
+
17
+ Eye.application :distribot do
18
+ working_dir ROOT
19
+ trigger :flapping, :times => 10, :within => 1.minute
20
+
21
+ # Usage:
22
+ # eye load eye/distribot.eye
23
+ # eye start all
24
+ # eye stop all
25
+ # eye restart distribot:phase-started
26
+ # tail -f log/*
27
+ # See https://github.com/kostya/eye for details.
28
+
29
+ things = %w(
30
+ phase-started
31
+ phase-finished
32
+ task-finished
33
+ handler-finished
34
+ flow-finished
35
+ flow-created
36
+ )
37
+
38
+ things.each do |thing|
39
+ process thing do
40
+ daemonize true
41
+ pid_file "tmp/#{thing}.pid"
42
+ stdall "log/#{thing}.log"
43
+ start_command "dotenv bundle exec bin/distribot.#{thing}"
44
+ stop_signals [:TERM, 5.seconds, :KILL]
45
+ restart_command "kill -USR2 {PID}"
46
+ restart_grace 10.seconds
47
+ end
48
+ end
49
+ end
data/examples/status ADDED
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'distribot'
4
+ require 'byebug'
5
+
6
+ redis = Distribot.redis
7
+ loop do
8
+ puts "\n"
9
+ redis
10
+ .smembers('distribot.flows.active')
11
+ .map{|id| Distribot::Flow.find(id) }
12
+ .sort_by(&:created_at)
13
+ .reverse
14
+ .each do |flow|
15
+ # Cover the odd case of fetching a flow that hasn't had its first transition yet:
16
+ latest_transition = flow.transitions.last or next
17
+
18
+ latest_phase = flow.phase(latest_transition.to) || Distribot::Phase.new(name: "UNKNOWN")
19
+ next if latest_phase.is_final
20
+ time_in_phase = Time.now.to_f - latest_transition.timestamp
21
+ total_age = Time.now.to_f - flow.created_at
22
+
23
+ if latest_phase.is_final
24
+ puts "Flow #{flow.id} is finished."
25
+ else
26
+ handler_counts = latest_phase.handlers.sort_by(&:name).map do |handler|
27
+ remaining_tasks = redis.get("distribot.flow.#{flow.id}.#{latest_phase.name}.#{handler}.finished").to_i
28
+ total_tasks = redis.get("distribot.flow.#{flow.id}.#{latest_phase.name}.#{handler}.finished.total").to_i
29
+ percent_finished = ((total_tasks - remaining_tasks) * 1.0) / (total_tasks * 1.0) * 100
30
+ finished_tasks = total_tasks - remaining_tasks
31
+ "#{handler}:\t#{['=' * (finished_tasks > 0 ? finished_tasks : 0), '-' * (remaining_tasks > 0 ? remaining_tasks : 0)].join('')} #{finished_tasks}/#{total_tasks} (#{percent_finished.round(2)}%)"
32
+ end
33
+ puts "Flow #{flow.id} (age:#{sprintf('%05.02f',total_age)} sec)\tphase:#{latest_phase.name}\t(#{time_in_phase.round(2)} seconds\t#{handler_counts.join("\t")}"
34
+ end
35
+ end
36
+ puts "-------------------------------------------------------------------------------------------------------------------------------------------------------"
37
+ sleep 0.5
38
+ end
data/examples/worker ADDED
@@ -0,0 +1,135 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'distribot'
5
+ require 'byebug'
6
+ require 'pp'
7
+ require 'active_support/core_ext/object'
8
+ require 'active_support/core_ext/array'
9
+ require 'active_support/json'
10
+ require 'dotenv'
11
+
12
+ Dotenv.load
13
+
14
+ Distribot.configure do |config|
15
+ config.redis_url = ENV['DISTRIBOT_REDIS_URL']
16
+ config.rabbitmq_url = ENV['DISTRIBOT_RABBITMQ_URL']
17
+ end
18
+
19
+ class SimpleWorker
20
+ include Distribot::Worker
21
+ version '1.0.0'
22
+ enumerate_with :enumerate
23
+ process_tasks_with :process
24
+
25
+ def enumerate(_context, &callback)
26
+ jobs = [ ]
27
+ 2.times do |chunk|
28
+ max = 5
29
+ jobs << (1..max).to_a.map{|n| {id: SecureRandom.uuid, args: [chunk, n] } }
30
+ end
31
+ callback.call(jobs.flatten)
32
+ end
33
+
34
+ def process(_context, job)
35
+ logger.info job.to_s
36
+ raise "Test Error!" if rand >= 0.9
37
+ job_time = Distribot.redis.get('difficulty').to_f
38
+ sleep job_time <= 0 ? 3 : job_time
39
+ end
40
+ end
41
+
42
+ class HardWorker < SimpleWorker;
43
+ version '1.1.0'
44
+ enumerate_with :enumerate
45
+ process_tasks_with :process
46
+ end
47
+ class GoodWorker < SimpleWorker;
48
+ version '1.0.0'
49
+ enumerate_with :enumerate
50
+ process_tasks_with :process
51
+ end
52
+ class FastWorker < SimpleWorker;
53
+ version '1.0.0'
54
+ enumerate_with :enumerate
55
+ process_tasks_with :process
56
+ end
57
+ class CheapWorker < SimpleWorker;
58
+ version '1.0.0'
59
+ enumerate_with :enumerate
60
+ process_tasks_with :process
61
+ end
62
+ class ForeignWorker < SimpleWorker;
63
+ version '1.0.0'
64
+ enumerate_with :enumerate
65
+ process_tasks_with :process
66
+ end
67
+ class SlowWorker < SimpleWorker;
68
+ version '1.0.0'
69
+ enumerate_with :enumerate
70
+ process_tasks_with :process
71
+ end
72
+
73
+ module News
74
+ class ArticleLister
75
+ include Distribot::Worker
76
+ version '1.0.1'
77
+ enumerate_with :enumerate
78
+ process_tasks_with :process
79
+ def enumerate(context, &callback)
80
+ @enumerations ||= 0
81
+ story_ids = (1..20).to_a
82
+ tasks = story_ids.map { |id| {story_id: "#{@enumerations}.#{id}"} }
83
+ @enumerations += 1
84
+ return tasks
85
+ end
86
+ def process(context, task)
87
+ json = { url: "https://infra/?story_id=#{task[:story_id]}" }.to_json
88
+ data_key = "flow.#{context.flow_id}.stories"
89
+ parsed = JSON.parse(json, symbolize_names: true) rescue {}
90
+ if parsed.key? :url
91
+ Distribot.redis.multi do |redis|
92
+ place = [data_key, task[:story_id]].join(':')
93
+ redis.set place, json
94
+ redis.sadd data_key, place
95
+ end
96
+ end
97
+ end
98
+ end
99
+
100
+ class ArticleDownloaderWorker
101
+ include Distribot::Worker
102
+ require 'wrest'
103
+ version '1.0.0'
104
+ enumerate_with :enumerate
105
+ process_tasks_with :process
106
+
107
+ def enumerate(context)
108
+ data_key = "flow.#{context.flow_id}.stories"
109
+ tasks = Distribot.redis.smembers(data_key).map { |location| {data_locator: location} }
110
+ Distribot.redis.del data_key
111
+ return tasks
112
+ end
113
+
114
+ def process(context, task)
115
+ data = JSON.parse(Distribot.redis.get(task[:data_locator]), symbolize_names: true)
116
+ puts data
117
+ # data[:url].to_uri(verify_mode: OpenSSL::SSL::VERIFY_NONE).get
118
+ Distribot.redis.del(task[:data_locator])
119
+ end
120
+ end
121
+ end
122
+
123
+ puts News::ArticleLister.new.run
124
+ puts News::ArticleDownloaderWorker.new.run
125
+
126
+ # Distribot.logger.info HardWorker.new.run
127
+ # Distribot.logger.info GoodWorker.new.run
128
+ # Distribot.logger.info FastWorker.new.run
129
+ # Distribot.logger.info CheapWorker.new.run
130
+ # Distribot.logger.info ForeignWorker.new.run
131
+ # Distribot.logger.info SlowWorker.new.run
132
+
133
+ puts "Worker up and running!"
134
+
135
+ sleep
@@ -0,0 +1,162 @@
1
+
2
+ require 'pp'
3
+ require 'securerandom'
4
+ require 'bunny'
5
+ require 'byebug'
6
+ require 'active_support/json'
7
+ require 'uri'
8
+ require 'wrest'
9
+
10
+ module Distribot
11
+ class Connector
12
+ attr_accessor :amqp_url, :bunny, :channel
13
+ def initialize(amqp_url = 'amqp://localhost:5672')
14
+ self.amqp_url = amqp_url
15
+ setup
16
+ end
17
+
18
+ def queues
19
+ uri = URI.parse(amqp_url)
20
+ uri.scheme = 'http'
21
+ uri.port = '15672'.to_i
22
+ uri.path = '/api/queues'
23
+ uri.to_s.to_uri.get.deserialize.map { |x| x['name'] }
24
+ end
25
+
26
+ def logger
27
+ Distribot.logger
28
+ end
29
+
30
+ private
31
+
32
+ def setup
33
+ self.bunny = Bunny.new(amqp_url)
34
+ bunny.start
35
+ self.channel = bunny.create_channel
36
+ channel.prefetch(1)
37
+ end
38
+ end
39
+
40
+ class ConnectionSharer
41
+ attr_accessor :bunny, :channel
42
+ def initialize(bunny)
43
+ self.bunny = bunny
44
+ @channel = nil
45
+ end
46
+
47
+ def channel
48
+ @channel ||= bunny.create_channel
49
+ end
50
+
51
+ def logger
52
+ Distribot.logger
53
+ end
54
+ end
55
+
56
+ class Subscription < ConnectionSharer
57
+ attr_accessor :queue
58
+ def start(topic, options = {}, &block)
59
+ self.queue = channel.queue(topic, auto_delete: false, durable: true)
60
+ subscribe_args = options.merge(manual_ack: true)
61
+ queue.subscribe(subscribe_args) do |delivery_info, _properties, payload|
62
+ begin
63
+ parsed_message = JSON.parse(payload, symbolize_names: true)
64
+ block.call(parsed_message)
65
+ channel.acknowledge(delivery_info.delivery_tag, false)
66
+ rescue StandardError => e
67
+ logger.error "ERROR: #{e} -- #{e.backtrace.join("\n")}"
68
+ channel.basic_reject(delivery_info.delivery_tag, true)
69
+ end
70
+ end
71
+ self
72
+ end
73
+ end
74
+
75
+ class MultiSubscription < ConnectionSharer
76
+ attr_accessor :queue
77
+ def start(topic, options = {}, &block)
78
+ self.queue = channel.queue('', exclusive: true, auto_delete: true)
79
+ exchange = channel.fanout(topic)
80
+ queue.bind(exchange).subscribe(options) do |_delivery_info, _, payload|
81
+ begin
82
+ block.call(JSON.parse(payload, symbolize_names: true))
83
+ rescue StandardError => e
84
+ logger.error "Error #{e} - #{payload} --- #{e.backtrace.join("\n")}"
85
+ end
86
+ end
87
+ self
88
+ end
89
+ end
90
+
91
+ class BunnyConnector < Connector
92
+ attr_accessor :subscribers, :channel
93
+ def initialize(*args)
94
+ super(*args)
95
+ self.subscribers = []
96
+ end
97
+
98
+ def channel
99
+ @channel ||= bunny.create_channel
100
+ end
101
+
102
+ def queue_exists?(topic)
103
+ bunny.queue_exists?(topic)
104
+ end
105
+
106
+ def subscribe(topic, options = {}, &block)
107
+ if options[:solo]
108
+ options.delete :solo
109
+ setup
110
+ end
111
+ subscriber = Subscription.new(bunny)
112
+ subscribers << subscriber.start(topic, options) do |message|
113
+ logger.debug "received(#{topic} -> #{message})"
114
+ block.call(message)
115
+ end
116
+ end
117
+
118
+ def subscribe_multi(topic, options = {}, &block)
119
+ subscriber = MultiSubscription.new(bunny)
120
+ subscribers << subscriber.start(topic, options) do |message|
121
+ logger.debug "received-multi(#{topic} -> #{message})"
122
+ block.call(message)
123
+ end
124
+ end
125
+
126
+ def publish(topic, message)
127
+ queue = stubbornly :get_queue do
128
+ channel.queue(topic, auto_delete: false, durable: true)
129
+ end
130
+ logger.debug "publish(#{topic} -> #{message})"
131
+ channel.default_exchange.publish message.to_json, routing_key: queue.name
132
+ end
133
+
134
+ def broadcast(topic, message)
135
+ exchange = channel.fanout(topic)
136
+ logger.debug "broadcast(#{topic} -> #{message})"
137
+ exchange.publish(message.to_json, routing_key: topic)
138
+ end
139
+
140
+ private
141
+
142
+ def setup
143
+ self.bunny = Bunny.new(amqp_url)
144
+ bunny.start
145
+ end
146
+
147
+ def stubbornly(task, &block)
148
+ result = nil
149
+ loop do
150
+ begin
151
+ result = block.call
152
+ break
153
+ rescue Timeout::Error
154
+ logger.error "Connection timed out during '#{task}' :retrying in 1sec"
155
+ sleep 1
156
+ next
157
+ end
158
+ end
159
+ result
160
+ end
161
+ end
162
+ end