robot-controller 0.2.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +23 -10
- data/VERSION +1 -1
- data/bin/controller +9 -12
- data/example/config/boot.rb +0 -6
- data/example/config/environments/robots_development.yml +86 -0
- data/lib/robot-controller.rb +5 -0
- data/lib/robot-controller/bluepill.rb +79 -0
- data/lib/robot-controller/robots.rb +126 -0
- data/robot-controller.gemspec +1 -1
- metadata +7 -6
- data/example/config/environments/bluepill_development.rb +0 -98
- data/example/config/environments/workflows_development.rb +0 -25
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c01c2e8362bea0af35c415127c35e2b6fd5a643d
|
4
|
+
data.tar.gz: 3bea75dfaa22001504e2a7db019ae406e4b81c00
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e4c6de1781095edee97a52db140f4e6b9e7ca538ccaf66982ef0ce7f50467e2a998ac8c37334bc89fabd58dfe0420a7eb66604b51e593effd6a6870655f6bcf4
|
7
|
+
data.tar.gz: cc1b11f2655f960195a56ce760d19ebc39babd02e797002b19dce8ad3c4336213f2bb7a8ce7ad8b736ece255a259b2cf47db832fd1fcc2ffe4b8751dcc12160e
|
data/README.md
CHANGED
@@ -16,18 +16,31 @@ In your `Rakefile`, add the following (if you don't want to include the environm
|
|
16
16
|
|
17
17
|
Create the following configuration files based on the examples in `example/config`:
|
18
18
|
|
19
|
-
config/
|
20
|
-
|
21
|
-
|
22
|
-
|
19
|
+
config/environments/robots_development.yml
|
20
|
+
|
21
|
+
Then to use the controller to boot the robots:
|
22
|
+
|
23
|
+
% bundle exec controller boot
|
24
|
+
|
25
|
+
If you want to *override* the bluepill configuration but still use the
|
26
|
+
controller, then add:
|
27
|
+
|
28
|
+
config/bluepill.rb
|
23
29
|
|
24
30
|
### Usage
|
25
31
|
|
26
|
-
Usage: controller
|
27
|
-
controller
|
32
|
+
Usage: controller ( boot | quit )
|
33
|
+
controller ( start | status | stop | restart | log ) [worker]
|
34
|
+
controller [--help]
|
28
35
|
|
29
36
|
Example:
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
37
|
+
% controller boot # start bluepilld and jobs
|
38
|
+
% controller status # check on status of jobs
|
39
|
+
% controller log dor_accessionWF_descriptive-metadata # view log for worker
|
40
|
+
% controller stop # stop jobs
|
41
|
+
% controller quit # stop bluepilld
|
42
|
+
|
43
|
+
Environment:
|
44
|
+
BLUEPILL_BASEDIR - where bluepill stores its state (default: run/bluepill)
|
45
|
+
BLUEPILL_LOGFILE - output log (default: log/bluepill.log)
|
46
|
+
ROBOT_ENVIRONMENT - (default: development)
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.3.0
|
data/bin/controller
CHANGED
@@ -17,11 +17,6 @@ Environment:
|
|
17
17
|
BLUEPILL_BASEDIR - where bluepill stores its state (default: run/bluepill)
|
18
18
|
BLUEPILL_LOGFILE - output log (default: log/bluepill.log)
|
19
19
|
ROBOT_ENVIRONMENT - (default: development)
|
20
|
-
|
21
|
-
Configuration files (in search order)
|
22
|
-
config/environments/bluepill_development.rb
|
23
|
-
config/environments/bluepill.rb
|
24
|
-
config/bluepill.rb
|
25
20
|
'
|
26
21
|
exit -1
|
27
22
|
end
|
@@ -36,13 +31,15 @@ cmd << " --base-dir #{ENV['BLUEPILL_BASE_DIR']}"
|
|
36
31
|
cmd << " --logfile #{ENV['BLUEPILL_LOGFILE']}"
|
37
32
|
|
38
33
|
if ARGV[0] == 'boot'
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
34
|
+
fn = 'config/bluepill.rb' # allow override
|
35
|
+
unless File.file?(fn)
|
36
|
+
require 'robot-controller'
|
37
|
+
fn = RobotController.bluepill_config
|
38
|
+
end
|
39
|
+
if File.file?(fn)
|
40
|
+
puts "Loading #{fn}"
|
41
|
+
system "#{cmd} load #{fn}"
|
42
|
+
exit 0
|
46
43
|
end
|
47
44
|
puts "ERROR: Cannot find bluepill configuration file for #{ENV['ROBOT_ENVIRONMENT']}"
|
48
45
|
exit -1
|
data/example/config/boot.rb
CHANGED
@@ -0,0 +1,86 @@
|
|
1
|
+
#
|
2
|
+
# Robot allocation strategy
|
3
|
+
#
|
4
|
+
# Format:
|
5
|
+
#
|
6
|
+
# host:
|
7
|
+
# - robot[:lane[:instances]]
|
8
|
+
#
|
9
|
+
# where
|
10
|
+
# 1. robot is a single robot identifier (fully-qualified with
|
11
|
+
# REPO_SUITE_ROBOT, e.g., "dor_accessionWF_technical-metadata").
|
12
|
+
# 2. lane is a single integer (4), a range (3-5), or a list (2,4,8),
|
13
|
+
# or an asterix (*).
|
14
|
+
# 3. instances is a single integer.
|
15
|
+
#
|
16
|
+
# Both lane and instances are optional. Lane defaults to *, and
|
17
|
+
# instances defaults to 1.
|
18
|
+
#
|
19
|
+
# When a robot is allocated to multiple lanes, it reads them in
|
20
|
+
# PRIORITY ORDER. That is, if a robot is listening to lanes 1, 2, and 3,
|
21
|
+
# it works on lane 1 until empty, then lane 2 until empty, and then
|
22
|
+
# lane 3 until empty. In the meantime, if a job comes in on a faster
|
23
|
+
# lane, it works on that after finishing it's current job (i.e., after
|
24
|
+
# working on a job in lane 3, if a job comes in on lane 1 in the intermin,
|
25
|
+
# the robot will work on the lane 1 job next before returning to lane 3).
|
26
|
+
#
|
27
|
+
# In general, lanes 1-5 are reserved for priority queues, and lanes 6-N
|
28
|
+
# are dedicated lanes.
|
29
|
+
#
|
30
|
+
# Note that the syntax is YAML, so the lists must not contain spaces or
|
31
|
+
# needs to be quoted.
|
32
|
+
#
|
33
|
+
# RIGHT
|
34
|
+
# - dor_accessionWF_technical-metadata:10:5
|
35
|
+
# - 'dor_accessionWF_technical-metadata : 10 : 5'
|
36
|
+
#
|
37
|
+
# WRONG
|
38
|
+
# - dor_accessionWF_technical-metadata : 10 : 5
|
39
|
+
# - dor_accessionWF_technical-metadata: 10: 5
|
40
|
+
#
|
41
|
+
|
42
|
+
#
|
43
|
+
# Robot 1 (8 CPU) hosts shelving and publish only
|
44
|
+
#
|
45
|
+
sul-robots1-dev:
|
46
|
+
- dor_accessionWF_shelve:*:3 # 3 robots for all lanes
|
47
|
+
- dor_accessionWF_shelve:6:3 # 3 robots for lane 6
|
48
|
+
- dor_accessionWF_shelve:7:3 # 3 robots for lane 7
|
49
|
+
- dor_accessionWF_shelve:8:3 # 3 robots for lane 8
|
50
|
+
- dor_accessionWF_shelve:9:3 # 3 robots for lane 9
|
51
|
+
- dor_accessionWF_shelve:10:3 # 3 robots for lane 10
|
52
|
+
- dor_accessionWF_publish:1-5:3 # 3 robots for lanes 1 through 5
|
53
|
+
- dor_accessionWF_publish:6:3 # 3 robots for lane 6
|
54
|
+
- dor_accessionWF_publish:7:3 # 3 robots for lane 7
|
55
|
+
- dor_accessionWF_publish:8:3 # 3 robots for lane 8
|
56
|
+
- dor_accessionWF_publish:9:3 # 3 robots for lane 9
|
57
|
+
- dor_accessionWF_publish:10:3 # 3 robots for lane 10
|
58
|
+
- dor_accessionWF_publish:6,9 # 1 robot for lane 6 and 9
|
59
|
+
|
60
|
+
#
|
61
|
+
# Robot 2 (16 CPU) hosts technical metadata creation only
|
62
|
+
#
|
63
|
+
sul-robots2-dev:
|
64
|
+
- dor_accessionWF_technical-metadata:1:5 # 5 robots for lane 1
|
65
|
+
- dor_accessionWF_technical-metadata:2:5 # 5 robots for lane 2
|
66
|
+
- dor_accessionWF_technical-metadata:3-5:5 # 5 robots for lanes 3, 4, 5
|
67
|
+
- dor_accessionWF_technical-metadata:6:2 # 2 robots for lane 6
|
68
|
+
- dor_accessionWF_technical-metadata:7:2 # 2 robots for lane 7
|
69
|
+
- dor_accessionWF_technical-metadata:8:2 # 2 robots for lane 8
|
70
|
+
- dor_accessionWF_technical-metadata:9:2 # 2 robots for lane 9
|
71
|
+
- dor_accessionWF_technical-metadata:10:2 # 2 robots for lane 10
|
72
|
+
|
73
|
+
#
|
74
|
+
# Robot 3 (4 CPU) hosts helper robots for all accessioning workflows
|
75
|
+
#
|
76
|
+
sul-robots3-dev:
|
77
|
+
- dor_accessionWF_descriptive-metadata # 1 robot for all lanes
|
78
|
+
- dor_accessionWF_rights-metadata # 1 robot for all lanes
|
79
|
+
- dor_accessionWF_content-metadata # 1 robot for all lanes
|
80
|
+
- dor_accessionWF_technical-metadata # 1 robot for all lanes
|
81
|
+
- dor_accessionWF_remediate-object # 1 robot for all lanes
|
82
|
+
- dor_accessionWF_shelve:*:5 # 5 robots for all lanes
|
83
|
+
- dor_accessionWF_publish:*:10 # 10 robots for all lanes
|
84
|
+
- dor_accessionWF_provenance-metadata # 1 robot for all lanes
|
85
|
+
- dor_accessionWF_sdr-ingest-transfer # 1 robot for all lanes
|
86
|
+
- dor_accessionWF_end-accession # 1 robot for all lanes
|
data/lib/robot-controller.rb
CHANGED
@@ -0,0 +1,79 @@
|
|
1
|
+
WORKDIR = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
2
|
+
robot_environment = ENV['ROBOT_ENVIRONMENT'] || 'development'
|
3
|
+
require 'robot-controller/robots'
|
4
|
+
#
|
5
|
+
# Expect ROBOTS = [
|
6
|
+
# {:robot => 'x', :queues => ['a', 'b'], :n => 1}
|
7
|
+
# {:robot => 'z', :queues => ['b'], :n => 3}
|
8
|
+
# ]
|
9
|
+
#
|
10
|
+
Bluepill.application File.basename(File.dirname(File.dirname(WORKDIR))),
|
11
|
+
:log_file => "#{WORKDIR}/log/bluepill.log" do |app|
|
12
|
+
app.working_dir = WORKDIR
|
13
|
+
ROBOTS.each_index do |i|
|
14
|
+
# prefix process name with index number to prevent duplicate process names
|
15
|
+
prefix = sprintf("robot%02d", i+1)
|
16
|
+
app.process("#{prefix}_#{ROBOTS[i][:robot]}") do |process|
|
17
|
+
puts "Creating robot #{process.name}"
|
18
|
+
|
19
|
+
# queue order is *VERY* important
|
20
|
+
queues = ROBOTS[i][:queues].join(',')
|
21
|
+
|
22
|
+
# use environment for these resque variables
|
23
|
+
process.environment = {
|
24
|
+
'QUEUES' => queues,
|
25
|
+
'ROBOT_ENVIRONMENT' => robot_environment
|
26
|
+
}
|
27
|
+
process.environment['VERBOSE'] = 'yes' if robot_environment != 'production'
|
28
|
+
|
29
|
+
# process configuration
|
30
|
+
process.group = robot_environment
|
31
|
+
process.stdout = process.stderr = "#{WORKDIR}/log/#{ROBOTS[i][:robot]}.log"
|
32
|
+
|
33
|
+
# spawn worker processes using robot-controller
|
34
|
+
process.environment['COUNT'] = ROBOTS[i][:n]
|
35
|
+
process.start_command = "rake workers"
|
36
|
+
|
37
|
+
# we use bluepill to daemonize the resque workers rather than using
|
38
|
+
# resque's BACKGROUND flag
|
39
|
+
process.daemonize = true
|
40
|
+
|
41
|
+
# bluepill manages pid files
|
42
|
+
# process.pid_file = "#{WORKDIR}/run/#{process.name}.pid"
|
43
|
+
|
44
|
+
# graceful stops
|
45
|
+
process.stop_grace_time = 360.seconds # must be greater than stop_signals total
|
46
|
+
process.stop_signals = [
|
47
|
+
:quit, 300.seconds, # waits for jobs, then exits gracefully
|
48
|
+
:term, 10.seconds, # kills jobs and exits
|
49
|
+
:kill # no mercy
|
50
|
+
]
|
51
|
+
|
52
|
+
# process monitoring
|
53
|
+
|
54
|
+
# backoff if process is flapping between states
|
55
|
+
# process.checks :flapping,
|
56
|
+
# :times => 2, :within => 30.seconds,
|
57
|
+
# :retry_in => 7.seconds
|
58
|
+
|
59
|
+
# restart if process runs for longer than 15 mins of CPU time
|
60
|
+
# process.checks :running_time,
|
61
|
+
# :every => 5.minutes, :below => 15.minutes
|
62
|
+
|
63
|
+
# restart if CPU usage > 75% for 3 times, check every 10 seconds
|
64
|
+
# process.checks :cpu_usage,
|
65
|
+
# :every => 10.seconds,
|
66
|
+
# :below => 75, :times => 3,
|
67
|
+
# :include_children => true
|
68
|
+
#
|
69
|
+
# restart the process or any of its children
|
70
|
+
# if MEM usage > 100MB for 3 times, check every 10 seconds
|
71
|
+
# process.checks :mem_usage,
|
72
|
+
# :every => 10.seconds,
|
73
|
+
# :below => 100.megabytes, :times => 3,
|
74
|
+
# :include_children => true
|
75
|
+
|
76
|
+
# NOTE: there is an implicit process.keepalive
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
class RobotConfigParser
|
4
|
+
ROBOT_INSTANCE_MAX = 16
|
5
|
+
LANE_INSTANCE_MAX = 99 # sprintf("%02d") maximum
|
6
|
+
|
7
|
+
# parse_instances(1) == 1
|
8
|
+
# parse_instances(16) == 16
|
9
|
+
# parse_instances(0) == 1
|
10
|
+
# parse_instances(99) => RuntimeError
|
11
|
+
def parse_instances(n)
|
12
|
+
if n > ROBOT_INSTANCE_MAX
|
13
|
+
raise RuntimeError, "TooManyInstances: #{n} > #{ROBOT_INSTANCE_MAX}"
|
14
|
+
end
|
15
|
+
n = 1 if n < 1
|
16
|
+
n
|
17
|
+
end
|
18
|
+
|
19
|
+
# parse_lanes('*') == ['*']
|
20
|
+
# parse_lanes('0') == [0]
|
21
|
+
# parse_lanes('1') == [1]
|
22
|
+
# parse_lanes('1-5') == [1,2,3,4,5]
|
23
|
+
# parse_lanes('1,2,3') == [1,2,3]
|
24
|
+
# parse_lanes('1-5,8') == [1,2,3,4,5,8]
|
25
|
+
# parse_lanes('-1') == [0, 1]
|
26
|
+
# parse_lanes('100') == RuntimeException
|
27
|
+
def parse_lanes(lanes_spec)
|
28
|
+
lanes = []
|
29
|
+
|
30
|
+
# parse each comma-seperated specification
|
31
|
+
lanes_spec.split(/,/).each do |i|
|
32
|
+
# this is a range element
|
33
|
+
if i =~ /-/
|
34
|
+
x = i.split(/-/)
|
35
|
+
Range.new(x[0].to_i, x[1].to_i).each do |j|
|
36
|
+
lanes << j
|
37
|
+
end
|
38
|
+
# a wildcard
|
39
|
+
elsif i == '*'
|
40
|
+
lanes << '*'
|
41
|
+
# simple integer
|
42
|
+
else
|
43
|
+
lanes << i.to_i
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# verify that lanes are all within 1 .. LANE_INSTANCE_MAX
|
48
|
+
lanes.each do |j|
|
49
|
+
if j.is_a?(Integer)
|
50
|
+
if j > LANE_INSTANCE_MAX
|
51
|
+
raise RuntimeError, "SyntaxError: Lane #{j} > #{LANE_INSTANCE_MAX}"
|
52
|
+
elsif j < 0
|
53
|
+
raise RuntimeError, "SyntaxError: Lane #{j} < 0"
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
lanes
|
58
|
+
end
|
59
|
+
|
60
|
+
# build_queues('a','1') => ['a_01']
|
61
|
+
# build_queues('a','1,3') => ['a_01', 'a_03']
|
62
|
+
# build_queues('a','1-3') => ['a_01', 'a_02', 'a_03']
|
63
|
+
def build_queues(robot, lanes)
|
64
|
+
queues = []
|
65
|
+
parse_lanes(lanes).each do |i|
|
66
|
+
queues << [robot, i == '*' ? '*' : sprintf("%02d", i)].join('_')
|
67
|
+
end
|
68
|
+
queues
|
69
|
+
end
|
70
|
+
|
71
|
+
# main entry point
|
72
|
+
def load(env)
|
73
|
+
# read the YAML file
|
74
|
+
robots_fn = File.join('config', 'environments', "robots_#{env}.yml")
|
75
|
+
unless File.file?(robots_fn)
|
76
|
+
raise RuntimeError, "FileNotFound: #{robots_fn}"
|
77
|
+
end
|
78
|
+
|
79
|
+
puts "Loading #{robots_fn}"
|
80
|
+
robots = YAML.load_file(robots_fn)
|
81
|
+
# puts robots
|
82
|
+
|
83
|
+
# determine current host
|
84
|
+
host = `hostname -s`.strip
|
85
|
+
# puts host
|
86
|
+
|
87
|
+
# host = 'sul-robots1-dev' # XXX
|
88
|
+
unless robots.include?(host)
|
89
|
+
raise RuntimeError, "HostMismatch: #{host} not defined in #{robots_fn}"
|
90
|
+
end
|
91
|
+
|
92
|
+
# parse YAML lines for host where i is robot[:lane[:instances]]
|
93
|
+
r = []
|
94
|
+
robots[host].each do |i|
|
95
|
+
robot = i.split(/:/)
|
96
|
+
robot.each do |j|
|
97
|
+
if j.strip == ''
|
98
|
+
raise RuntimeError, "SyntaxError: #{i}"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# add defaults
|
103
|
+
if robot.size == 1
|
104
|
+
robot << '*'
|
105
|
+
end
|
106
|
+
if robot.size == 2
|
107
|
+
robot << '1'
|
108
|
+
end
|
109
|
+
|
110
|
+
# build queues for robot instances
|
111
|
+
unless robot.size == 3
|
112
|
+
raise RuntimeError, "SyntaxError: #{i}"
|
113
|
+
end
|
114
|
+
robot[2] = parse_instances(robot[2].to_i)
|
115
|
+
# puts robot.join(' : ')
|
116
|
+
queues = build_queues(robot[0], robot[1])
|
117
|
+
# puts queues
|
118
|
+
|
119
|
+
r << {:robot => robot[0], :queues => queues, :n => robot[2] }
|
120
|
+
end
|
121
|
+
r
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
ROBOTS = RobotConfigParser.new.load(ENV['ROBOT_ENVIRONMENT'] || 'development')
|
126
|
+
# puts ROBOTS
|
data/robot-controller.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: robot-controller
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Darren Hardy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-05-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bluepill
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ~>
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.0.
|
19
|
+
version: 0.0.67
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ~>
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.0.
|
26
|
+
version: 0.0.67
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: awesome_print
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -124,10 +124,11 @@ files:
|
|
124
124
|
- VERSION
|
125
125
|
- bin/controller
|
126
126
|
- example/config/boot.rb
|
127
|
-
- example/config/environments/
|
128
|
-
- example/config/environments/workflows_development.rb
|
127
|
+
- example/config/environments/robots_development.yml
|
129
128
|
- example/lib/tasks/environment.rake
|
130
129
|
- lib/robot-controller.rb
|
130
|
+
- lib/robot-controller/bluepill.rb
|
131
|
+
- lib/robot-controller/robots.rb
|
131
132
|
- lib/robot-controller/tasks.rb
|
132
133
|
- lib/tasks/doc.rake
|
133
134
|
- robot-controller.gemspec
|
@@ -1,98 +0,0 @@
|
|
1
|
-
WORKDIR=File.expand_path(File.join(File.dirname(__FILE__), '..', '..'))
|
2
|
-
robot_environment = ENV['ROBOT_ENVIRONMENT'] || 'development'
|
3
|
-
workflows = File.expand_path(File.join(WORKDIR, 'config', 'environments', "workflows_#{robot_environment}.rb"))
|
4
|
-
puts "Loading #{workflows}"
|
5
|
-
require workflows
|
6
|
-
|
7
|
-
Bluepill.application 'robot-controller',
|
8
|
-
:log_file => "#{WORKDIR}/log/bluepill.log" do |app|
|
9
|
-
app.working_dir = WORKDIR
|
10
|
-
WORKFLOW_STEPS.each do |qualified_wf|
|
11
|
-
wf = qualified_wf.gsub(/:/, '_')
|
12
|
-
app.process(wf) do |process|
|
13
|
-
# use configuration for number of workers -- default is 1
|
14
|
-
n = WORKFLOW_N[qualified_wf] ? WORKFLOW_N[qualified_wf].to_i : 1
|
15
|
-
puts "Creating #{n} worker#{n>1?'s':' '} for #{qualified_wf}"
|
16
|
-
|
17
|
-
# queue order is *VERY* important
|
18
|
-
#
|
19
|
-
# XXX: make this configurable based on wf
|
20
|
-
# WORKFLOW_PRIORITIES[wf] is the name of a second worker that reads the given queues
|
21
|
-
#
|
22
|
-
# see RobotMaster::Queue#queue_name for naming convention
|
23
|
-
# @example
|
24
|
-
# queue_name('dor:assemblyWF:jp2-create')
|
25
|
-
# => 'dor_assemblyWF_jp2-create_default'
|
26
|
-
# queue_name('dor:assemblyWF:jp2-create', 100)
|
27
|
-
# => 'dor_assemblyWF_jp2-create_high'
|
28
|
-
#
|
29
|
-
queues = []
|
30
|
-
%w{critical high default low}.each do |p|
|
31
|
-
queues << "#{wf}_#{p}"
|
32
|
-
end
|
33
|
-
queues = queues.join(',')
|
34
|
-
# puts "Using queues #{queues}"
|
35
|
-
|
36
|
-
# use environment for these resque variables
|
37
|
-
process.environment = {
|
38
|
-
'QUEUES' => "#{queues}",
|
39
|
-
'VERBOSE' => 'yes',
|
40
|
-
'ROBOT_ENVIRONMENT' => robot_environment
|
41
|
-
}
|
42
|
-
|
43
|
-
# process configuration
|
44
|
-
process.group = robot_environment
|
45
|
-
process.stdout = process.stderr = "#{WORKDIR}/log/#{wf}.log"
|
46
|
-
|
47
|
-
# let bluepill manage pid files
|
48
|
-
# process.pid_file = "#{WORKDIR}/run/#{wf}.pid"
|
49
|
-
|
50
|
-
# spawn n worker processes
|
51
|
-
if n > 1
|
52
|
-
process.start_command = "env COUNT=#{n} rake workers" # not resque:workers
|
53
|
-
else # 1 worker
|
54
|
-
process.start_command = "rake environment resque:work"
|
55
|
-
end
|
56
|
-
# puts "Using #{process.start_command}"
|
57
|
-
# puts "Using #{process.environment}"
|
58
|
-
|
59
|
-
# we use bluepill to daemonize the resque workers rather than using
|
60
|
-
# resque's BACKGROUND flag
|
61
|
-
process.daemonize = true
|
62
|
-
|
63
|
-
# graceful stops
|
64
|
-
process.stop_grace_time = 60.seconds # must be greater than stop_signals total
|
65
|
-
process.stop_signals = [
|
66
|
-
:quit, 45.seconds, # waits for jobs, then exits gracefully
|
67
|
-
:term, 10.seconds, # kills jobs and exits
|
68
|
-
:kill # no mercy
|
69
|
-
]
|
70
|
-
|
71
|
-
# process monitoring
|
72
|
-
|
73
|
-
# backoff if process is flapping between states
|
74
|
-
# process.checks :flapping,
|
75
|
-
# :times => 2, :within => 30.seconds,
|
76
|
-
# :retry_in => 7.seconds
|
77
|
-
|
78
|
-
# restart if process runs for longer than 15 mins of CPU time
|
79
|
-
# process.checks :running_time,
|
80
|
-
# :every => 5.minutes, :below => 15.minutes
|
81
|
-
|
82
|
-
# restart if CPU usage > 75% for 3 times, check every 10 seconds
|
83
|
-
# process.checks :cpu_usage,
|
84
|
-
# :every => 10.seconds,
|
85
|
-
# :below => 75, :times => 3,
|
86
|
-
# :include_children => true
|
87
|
-
#
|
88
|
-
# restart the process or any of its children
|
89
|
-
# if MEM usage > 100MB for 3 times, check every 10 seconds
|
90
|
-
# process.checks :mem_usage,
|
91
|
-
# :every => 10.seconds,
|
92
|
-
# :below => 100.megabytes, :times => 3,
|
93
|
-
# :include_children => true
|
94
|
-
|
95
|
-
# NOTE: there is an implicit process.keepalive
|
96
|
-
end
|
97
|
-
end
|
98
|
-
end
|
@@ -1,25 +0,0 @@
|
|
1
|
-
# will spawn worker(s) for each of the given workflows (fully qualified as "repo:wf:robot")
|
2
|
-
WORKFLOW_STEPS = %w{
|
3
|
-
dor:accessionWF:start-accession
|
4
|
-
dor:accessionWF:descriptive-metadata
|
5
|
-
dor:accessionWF:rights-metadata
|
6
|
-
dor:accessionWF:content-metadata
|
7
|
-
dor:accessionWF:technical-metadata
|
8
|
-
dor:accessionWF:remediate-object
|
9
|
-
dor:accessionWF:shelve
|
10
|
-
dor:accessionWF:publish
|
11
|
-
dor:accessionWF:provenance-metadata
|
12
|
-
dor:accessionWF:sdr-ingest-transfer
|
13
|
-
dor:accessionWF:sdr-ingest-received
|
14
|
-
dor:accessionWF:end-accession
|
15
|
-
dor:assemblyWF:start-assembly
|
16
|
-
dor:assemblyWF:jp2-create
|
17
|
-
dor:assemblyWF:checksum-compute
|
18
|
-
dor:assemblyWF:exif-collect
|
19
|
-
dor:assemblyWF:accessioning-initiate
|
20
|
-
}
|
21
|
-
|
22
|
-
# number of workers for the given workflows
|
23
|
-
WORKFLOW_N = Hash[*%w{
|
24
|
-
dor:assemblyWF:checksum-compute 3
|
25
|
-
}]
|