robot-controller 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +23 -10
- data/VERSION +1 -1
- data/bin/controller +9 -12
- data/example/config/boot.rb +0 -6
- data/example/config/environments/robots_development.yml +86 -0
- data/lib/robot-controller.rb +5 -0
- data/lib/robot-controller/bluepill.rb +79 -0
- data/lib/robot-controller/robots.rb +126 -0
- data/robot-controller.gemspec +1 -1
- metadata +7 -6
- data/example/config/environments/bluepill_development.rb +0 -98
- data/example/config/environments/workflows_development.rb +0 -25
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c01c2e8362bea0af35c415127c35e2b6fd5a643d
|
4
|
+
data.tar.gz: 3bea75dfaa22001504e2a7db019ae406e4b81c00
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e4c6de1781095edee97a52db140f4e6b9e7ca538ccaf66982ef0ce7f50467e2a998ac8c37334bc89fabd58dfe0420a7eb66604b51e593effd6a6870655f6bcf4
|
7
|
+
data.tar.gz: cc1b11f2655f960195a56ce760d19ebc39babd02e797002b19dce8ad3c4336213f2bb7a8ce7ad8b736ece255a259b2cf47db832fd1fcc2ffe4b8751dcc12160e
|
data/README.md
CHANGED
@@ -16,18 +16,31 @@ In your `Rakefile`, add the following (if you don't want to include the environm
|
|
16
16
|
|
17
17
|
Create the following configuration files based on the examples in `example/config`:
|
18
18
|
|
19
|
-
config/
|
20
|
-
|
21
|
-
|
22
|
-
|
19
|
+
config/environments/robots_development.yml
|
20
|
+
|
21
|
+
Then to use the controller to boot the robots:
|
22
|
+
|
23
|
+
% bundle exec controller boot
|
24
|
+
|
25
|
+
If you want to *override* the bluepill configuration but still use the
|
26
|
+
controller, then add:
|
27
|
+
|
28
|
+
config/bluepill.rb
|
23
29
|
|
24
30
|
### Usage
|
25
31
|
|
26
|
-
Usage: controller
|
27
|
-
controller
|
32
|
+
Usage: controller ( boot | quit )
|
33
|
+
controller ( start | status | stop | restart | log ) [worker]
|
34
|
+
controller [--help]
|
28
35
|
|
29
36
|
Example:
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
37
|
+
% controller boot # start bluepilld and jobs
|
38
|
+
% controller status # check on status of jobs
|
39
|
+
% controller log dor_accessionWF_descriptive-metadata # view log for worker
|
40
|
+
% controller stop # stop jobs
|
41
|
+
% controller quit # stop bluepilld
|
42
|
+
|
43
|
+
Environment:
|
44
|
+
BLUEPILL_BASEDIR - where bluepill stores its state (default: run/bluepill)
|
45
|
+
BLUEPILL_LOGFILE - output log (default: log/bluepill.log)
|
46
|
+
ROBOT_ENVIRONMENT - (default: development)
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.3.0
|
data/bin/controller
CHANGED
@@ -17,11 +17,6 @@ Environment:
|
|
17
17
|
BLUEPILL_BASEDIR - where bluepill stores its state (default: run/bluepill)
|
18
18
|
BLUEPILL_LOGFILE - output log (default: log/bluepill.log)
|
19
19
|
ROBOT_ENVIRONMENT - (default: development)
|
20
|
-
|
21
|
-
Configuration files (in search order)
|
22
|
-
config/environments/bluepill_development.rb
|
23
|
-
config/environments/bluepill.rb
|
24
|
-
config/bluepill.rb
|
25
20
|
'
|
26
21
|
exit -1
|
27
22
|
end
|
@@ -36,13 +31,15 @@ cmd << " --base-dir #{ENV['BLUEPILL_BASE_DIR']}"
|
|
36
31
|
cmd << " --logfile #{ENV['BLUEPILL_LOGFILE']}"
|
37
32
|
|
38
33
|
if ARGV[0] == 'boot'
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
34
|
+
fn = 'config/bluepill.rb' # allow override
|
35
|
+
unless File.file?(fn)
|
36
|
+
require 'robot-controller'
|
37
|
+
fn = RobotController.bluepill_config
|
38
|
+
end
|
39
|
+
if File.file?(fn)
|
40
|
+
puts "Loading #{fn}"
|
41
|
+
system "#{cmd} load #{fn}"
|
42
|
+
exit 0
|
46
43
|
end
|
47
44
|
puts "ERROR: Cannot find bluepill configuration file for #{ENV['ROBOT_ENVIRONMENT']}"
|
48
45
|
exit -1
|
data/example/config/boot.rb
CHANGED
@@ -0,0 +1,86 @@
|
|
1
|
+
#
|
2
|
+
# Robot allocation strategy
|
3
|
+
#
|
4
|
+
# Format:
|
5
|
+
#
|
6
|
+
# host:
|
7
|
+
# - robot[:lane[:instances]]
|
8
|
+
#
|
9
|
+
# where
|
10
|
+
# 1. robot is a single robot identifier (fully-qualified with
|
11
|
+
# REPO_SUITE_ROBOT, e.g., "dor_accessionWF_technical-metadata").
|
12
|
+
# 2. lane is a single integer (4), a range (3-5), or a list (2,4,8),
|
13
|
+
# or an asterix (*).
|
14
|
+
# 3. instances is a single integer.
|
15
|
+
#
|
16
|
+
# Both lane and instances are optional. Lane defaults to *, and
|
17
|
+
# instances defaults to 1.
|
18
|
+
#
|
19
|
+
# When a robot is allocated to multiple lanes, it reads them in
|
20
|
+
# PRIORITY ORDER. That is, if a robot is listening to lanes 1, 2, and 3,
|
21
|
+
# it works on lane 1 until empty, then lane 2 until empty, and then
|
22
|
+
# lane 3 until empty. In the meantime, if a job comes in on a faster
|
23
|
+
# lane, it works on that after finishing it's current job (i.e., after
|
24
|
+
# working on a job in lane 3, if a job comes in on lane 1 in the intermin,
|
25
|
+
# the robot will work on the lane 1 job next before returning to lane 3).
|
26
|
+
#
|
27
|
+
# In general, lanes 1-5 are reserved for priority queues, and lanes 6-N
|
28
|
+
# are dedicated lanes.
|
29
|
+
#
|
30
|
+
# Note that the syntax is YAML, so the lists must not contain spaces or
|
31
|
+
# needs to be quoted.
|
32
|
+
#
|
33
|
+
# RIGHT
|
34
|
+
# - dor_accessionWF_technical-metadata:10:5
|
35
|
+
# - 'dor_accessionWF_technical-metadata : 10 : 5'
|
36
|
+
#
|
37
|
+
# WRONG
|
38
|
+
# - dor_accessionWF_technical-metadata : 10 : 5
|
39
|
+
# - dor_accessionWF_technical-metadata: 10: 5
|
40
|
+
#
|
41
|
+
|
42
|
+
#
|
43
|
+
# Robot 1 (8 CPU) hosts shelving and publish only
|
44
|
+
#
|
45
|
+
sul-robots1-dev:
|
46
|
+
- dor_accessionWF_shelve:*:3 # 3 robots for all lanes
|
47
|
+
- dor_accessionWF_shelve:6:3 # 3 robots for lane 6
|
48
|
+
- dor_accessionWF_shelve:7:3 # 3 robots for lane 7
|
49
|
+
- dor_accessionWF_shelve:8:3 # 3 robots for lane 8
|
50
|
+
- dor_accessionWF_shelve:9:3 # 3 robots for lane 9
|
51
|
+
- dor_accessionWF_shelve:10:3 # 3 robots for lane 10
|
52
|
+
- dor_accessionWF_publish:1-5:3 # 3 robots for lanes 1 through 5
|
53
|
+
- dor_accessionWF_publish:6:3 # 3 robots for lane 6
|
54
|
+
- dor_accessionWF_publish:7:3 # 3 robots for lane 7
|
55
|
+
- dor_accessionWF_publish:8:3 # 3 robots for lane 8
|
56
|
+
- dor_accessionWF_publish:9:3 # 3 robots for lane 9
|
57
|
+
- dor_accessionWF_publish:10:3 # 3 robots for lane 10
|
58
|
+
- dor_accessionWF_publish:6,9 # 1 robot for lane 6 and 9
|
59
|
+
|
60
|
+
#
|
61
|
+
# Robot 2 (16 CPU) hosts technical metadata creation only
|
62
|
+
#
|
63
|
+
sul-robots2-dev:
|
64
|
+
- dor_accessionWF_technical-metadata:1:5 # 5 robots for lane 1
|
65
|
+
- dor_accessionWF_technical-metadata:2:5 # 5 robots for lane 2
|
66
|
+
- dor_accessionWF_technical-metadata:3-5:5 # 5 robots for lanes 3, 4, 5
|
67
|
+
- dor_accessionWF_technical-metadata:6:2 # 2 robots for lane 6
|
68
|
+
- dor_accessionWF_technical-metadata:7:2 # 2 robots for lane 7
|
69
|
+
- dor_accessionWF_technical-metadata:8:2 # 2 robots for lane 8
|
70
|
+
- dor_accessionWF_technical-metadata:9:2 # 2 robots for lane 9
|
71
|
+
- dor_accessionWF_technical-metadata:10:2 # 2 robots for lane 10
|
72
|
+
|
73
|
+
#
|
74
|
+
# Robot 3 (4 CPU) hosts helper robots for all accessioning workflows
|
75
|
+
#
|
76
|
+
sul-robots3-dev:
|
77
|
+
- dor_accessionWF_descriptive-metadata # 1 robot for all lanes
|
78
|
+
- dor_accessionWF_rights-metadata # 1 robot for all lanes
|
79
|
+
- dor_accessionWF_content-metadata # 1 robot for all lanes
|
80
|
+
- dor_accessionWF_technical-metadata # 1 robot for all lanes
|
81
|
+
- dor_accessionWF_remediate-object # 1 robot for all lanes
|
82
|
+
- dor_accessionWF_shelve:*:5 # 5 robots for all lanes
|
83
|
+
- dor_accessionWF_publish:*:10 # 10 robots for all lanes
|
84
|
+
- dor_accessionWF_provenance-metadata # 1 robot for all lanes
|
85
|
+
- dor_accessionWF_sdr-ingest-transfer # 1 robot for all lanes
|
86
|
+
- dor_accessionWF_end-accession # 1 robot for all lanes
|
data/lib/robot-controller.rb
CHANGED
@@ -0,0 +1,79 @@
|
|
1
|
+
WORKDIR = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
2
|
+
robot_environment = ENV['ROBOT_ENVIRONMENT'] || 'development'
|
3
|
+
require 'robot-controller/robots'
|
4
|
+
#
|
5
|
+
# Expect ROBOTS = [
|
6
|
+
# {:robot => 'x', :queues => ['a', 'b'], :n => 1}
|
7
|
+
# {:robot => 'z', :queues => ['b'], :n => 3}
|
8
|
+
# ]
|
9
|
+
#
|
10
|
+
Bluepill.application File.basename(File.dirname(File.dirname(WORKDIR))),
|
11
|
+
:log_file => "#{WORKDIR}/log/bluepill.log" do |app|
|
12
|
+
app.working_dir = WORKDIR
|
13
|
+
ROBOTS.each_index do |i|
|
14
|
+
# prefix process name with index number to prevent duplicate process names
|
15
|
+
prefix = sprintf("robot%02d", i+1)
|
16
|
+
app.process("#{prefix}_#{ROBOTS[i][:robot]}") do |process|
|
17
|
+
puts "Creating robot #{process.name}"
|
18
|
+
|
19
|
+
# queue order is *VERY* important
|
20
|
+
queues = ROBOTS[i][:queues].join(',')
|
21
|
+
|
22
|
+
# use environment for these resque variables
|
23
|
+
process.environment = {
|
24
|
+
'QUEUES' => queues,
|
25
|
+
'ROBOT_ENVIRONMENT' => robot_environment
|
26
|
+
}
|
27
|
+
process.environment['VERBOSE'] = 'yes' if robot_environment != 'production'
|
28
|
+
|
29
|
+
# process configuration
|
30
|
+
process.group = robot_environment
|
31
|
+
process.stdout = process.stderr = "#{WORKDIR}/log/#{ROBOTS[i][:robot]}.log"
|
32
|
+
|
33
|
+
# spawn worker processes using robot-controller
|
34
|
+
process.environment['COUNT'] = ROBOTS[i][:n]
|
35
|
+
process.start_command = "rake workers"
|
36
|
+
|
37
|
+
# we use bluepill to daemonize the resque workers rather than using
|
38
|
+
# resque's BACKGROUND flag
|
39
|
+
process.daemonize = true
|
40
|
+
|
41
|
+
# bluepill manages pid files
|
42
|
+
# process.pid_file = "#{WORKDIR}/run/#{process.name}.pid"
|
43
|
+
|
44
|
+
# graceful stops
|
45
|
+
process.stop_grace_time = 360.seconds # must be greater than stop_signals total
|
46
|
+
process.stop_signals = [
|
47
|
+
:quit, 300.seconds, # waits for jobs, then exits gracefully
|
48
|
+
:term, 10.seconds, # kills jobs and exits
|
49
|
+
:kill # no mercy
|
50
|
+
]
|
51
|
+
|
52
|
+
# process monitoring
|
53
|
+
|
54
|
+
# backoff if process is flapping between states
|
55
|
+
# process.checks :flapping,
|
56
|
+
# :times => 2, :within => 30.seconds,
|
57
|
+
# :retry_in => 7.seconds
|
58
|
+
|
59
|
+
# restart if process runs for longer than 15 mins of CPU time
|
60
|
+
# process.checks :running_time,
|
61
|
+
# :every => 5.minutes, :below => 15.minutes
|
62
|
+
|
63
|
+
# restart if CPU usage > 75% for 3 times, check every 10 seconds
|
64
|
+
# process.checks :cpu_usage,
|
65
|
+
# :every => 10.seconds,
|
66
|
+
# :below => 75, :times => 3,
|
67
|
+
# :include_children => true
|
68
|
+
#
|
69
|
+
# restart the process or any of its children
|
70
|
+
# if MEM usage > 100MB for 3 times, check every 10 seconds
|
71
|
+
# process.checks :mem_usage,
|
72
|
+
# :every => 10.seconds,
|
73
|
+
# :below => 100.megabytes, :times => 3,
|
74
|
+
# :include_children => true
|
75
|
+
|
76
|
+
# NOTE: there is an implicit process.keepalive
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
class RobotConfigParser
|
4
|
+
ROBOT_INSTANCE_MAX = 16
|
5
|
+
LANE_INSTANCE_MAX = 99 # sprintf("%02d") maximum
|
6
|
+
|
7
|
+
# parse_instances(1) == 1
|
8
|
+
# parse_instances(16) == 16
|
9
|
+
# parse_instances(0) == 1
|
10
|
+
# parse_instances(99) => RuntimeError
|
11
|
+
def parse_instances(n)
|
12
|
+
if n > ROBOT_INSTANCE_MAX
|
13
|
+
raise RuntimeError, "TooManyInstances: #{n} > #{ROBOT_INSTANCE_MAX}"
|
14
|
+
end
|
15
|
+
n = 1 if n < 1
|
16
|
+
n
|
17
|
+
end
|
18
|
+
|
19
|
+
# parse_lanes('*') == ['*']
|
20
|
+
# parse_lanes('0') == [0]
|
21
|
+
# parse_lanes('1') == [1]
|
22
|
+
# parse_lanes('1-5') == [1,2,3,4,5]
|
23
|
+
# parse_lanes('1,2,3') == [1,2,3]
|
24
|
+
# parse_lanes('1-5,8') == [1,2,3,4,5,8]
|
25
|
+
# parse_lanes('-1') == [0, 1]
|
26
|
+
# parse_lanes('100') == RuntimeException
|
27
|
+
def parse_lanes(lanes_spec)
|
28
|
+
lanes = []
|
29
|
+
|
30
|
+
# parse each comma-seperated specification
|
31
|
+
lanes_spec.split(/,/).each do |i|
|
32
|
+
# this is a range element
|
33
|
+
if i =~ /-/
|
34
|
+
x = i.split(/-/)
|
35
|
+
Range.new(x[0].to_i, x[1].to_i).each do |j|
|
36
|
+
lanes << j
|
37
|
+
end
|
38
|
+
# a wildcard
|
39
|
+
elsif i == '*'
|
40
|
+
lanes << '*'
|
41
|
+
# simple integer
|
42
|
+
else
|
43
|
+
lanes << i.to_i
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# verify that lanes are all within 1 .. LANE_INSTANCE_MAX
|
48
|
+
lanes.each do |j|
|
49
|
+
if j.is_a?(Integer)
|
50
|
+
if j > LANE_INSTANCE_MAX
|
51
|
+
raise RuntimeError, "SyntaxError: Lane #{j} > #{LANE_INSTANCE_MAX}"
|
52
|
+
elsif j < 0
|
53
|
+
raise RuntimeError, "SyntaxError: Lane #{j} < 0"
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
lanes
|
58
|
+
end
|
59
|
+
|
60
|
+
# build_queues('a','1') => ['a_01']
|
61
|
+
# build_queues('a','1,3') => ['a_01', 'a_03']
|
62
|
+
# build_queues('a','1-3') => ['a_01', 'a_02', 'a_03']
|
63
|
+
def build_queues(robot, lanes)
|
64
|
+
queues = []
|
65
|
+
parse_lanes(lanes).each do |i|
|
66
|
+
queues << [robot, i == '*' ? '*' : sprintf("%02d", i)].join('_')
|
67
|
+
end
|
68
|
+
queues
|
69
|
+
end
|
70
|
+
|
71
|
+
# main entry point
|
72
|
+
def load(env)
|
73
|
+
# read the YAML file
|
74
|
+
robots_fn = File.join('config', 'environments', "robots_#{env}.yml")
|
75
|
+
unless File.file?(robots_fn)
|
76
|
+
raise RuntimeError, "FileNotFound: #{robots_fn}"
|
77
|
+
end
|
78
|
+
|
79
|
+
puts "Loading #{robots_fn}"
|
80
|
+
robots = YAML.load_file(robots_fn)
|
81
|
+
# puts robots
|
82
|
+
|
83
|
+
# determine current host
|
84
|
+
host = `hostname -s`.strip
|
85
|
+
# puts host
|
86
|
+
|
87
|
+
# host = 'sul-robots1-dev' # XXX
|
88
|
+
unless robots.include?(host)
|
89
|
+
raise RuntimeError, "HostMismatch: #{host} not defined in #{robots_fn}"
|
90
|
+
end
|
91
|
+
|
92
|
+
# parse YAML lines for host where i is robot[:lane[:instances]]
|
93
|
+
r = []
|
94
|
+
robots[host].each do |i|
|
95
|
+
robot = i.split(/:/)
|
96
|
+
robot.each do |j|
|
97
|
+
if j.strip == ''
|
98
|
+
raise RuntimeError, "SyntaxError: #{i}"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# add defaults
|
103
|
+
if robot.size == 1
|
104
|
+
robot << '*'
|
105
|
+
end
|
106
|
+
if robot.size == 2
|
107
|
+
robot << '1'
|
108
|
+
end
|
109
|
+
|
110
|
+
# build queues for robot instances
|
111
|
+
unless robot.size == 3
|
112
|
+
raise RuntimeError, "SyntaxError: #{i}"
|
113
|
+
end
|
114
|
+
robot[2] = parse_instances(robot[2].to_i)
|
115
|
+
# puts robot.join(' : ')
|
116
|
+
queues = build_queues(robot[0], robot[1])
|
117
|
+
# puts queues
|
118
|
+
|
119
|
+
r << {:robot => robot[0], :queues => queues, :n => robot[2] }
|
120
|
+
end
|
121
|
+
r
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
ROBOTS = RobotConfigParser.new.load(ENV['ROBOT_ENVIRONMENT'] || 'development')
|
126
|
+
# puts ROBOTS
|
data/robot-controller.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: robot-controller
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Darren Hardy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-05-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bluepill
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ~>
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.0.
|
19
|
+
version: 0.0.67
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ~>
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.0.
|
26
|
+
version: 0.0.67
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: awesome_print
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -124,10 +124,11 @@ files:
|
|
124
124
|
- VERSION
|
125
125
|
- bin/controller
|
126
126
|
- example/config/boot.rb
|
127
|
-
- example/config/environments/
|
128
|
-
- example/config/environments/workflows_development.rb
|
127
|
+
- example/config/environments/robots_development.yml
|
129
128
|
- example/lib/tasks/environment.rake
|
130
129
|
- lib/robot-controller.rb
|
130
|
+
- lib/robot-controller/bluepill.rb
|
131
|
+
- lib/robot-controller/robots.rb
|
131
132
|
- lib/robot-controller/tasks.rb
|
132
133
|
- lib/tasks/doc.rake
|
133
134
|
- robot-controller.gemspec
|
@@ -1,98 +0,0 @@
|
|
1
|
-
WORKDIR=File.expand_path(File.join(File.dirname(__FILE__), '..', '..'))
|
2
|
-
robot_environment = ENV['ROBOT_ENVIRONMENT'] || 'development'
|
3
|
-
workflows = File.expand_path(File.join(WORKDIR, 'config', 'environments', "workflows_#{robot_environment}.rb"))
|
4
|
-
puts "Loading #{workflows}"
|
5
|
-
require workflows
|
6
|
-
|
7
|
-
Bluepill.application 'robot-controller',
|
8
|
-
:log_file => "#{WORKDIR}/log/bluepill.log" do |app|
|
9
|
-
app.working_dir = WORKDIR
|
10
|
-
WORKFLOW_STEPS.each do |qualified_wf|
|
11
|
-
wf = qualified_wf.gsub(/:/, '_')
|
12
|
-
app.process(wf) do |process|
|
13
|
-
# use configuration for number of workers -- default is 1
|
14
|
-
n = WORKFLOW_N[qualified_wf] ? WORKFLOW_N[qualified_wf].to_i : 1
|
15
|
-
puts "Creating #{n} worker#{n>1?'s':' '} for #{qualified_wf}"
|
16
|
-
|
17
|
-
# queue order is *VERY* important
|
18
|
-
#
|
19
|
-
# XXX: make this configurable based on wf
|
20
|
-
# WORKFLOW_PRIORITIES[wf] is the name of a second worker that reads the given queues
|
21
|
-
#
|
22
|
-
# see RobotMaster::Queue#queue_name for naming convention
|
23
|
-
# @example
|
24
|
-
# queue_name('dor:assemblyWF:jp2-create')
|
25
|
-
# => 'dor_assemblyWF_jp2-create_default'
|
26
|
-
# queue_name('dor:assemblyWF:jp2-create', 100)
|
27
|
-
# => 'dor_assemblyWF_jp2-create_high'
|
28
|
-
#
|
29
|
-
queues = []
|
30
|
-
%w{critical high default low}.each do |p|
|
31
|
-
queues << "#{wf}_#{p}"
|
32
|
-
end
|
33
|
-
queues = queues.join(',')
|
34
|
-
# puts "Using queues #{queues}"
|
35
|
-
|
36
|
-
# use environment for these resque variables
|
37
|
-
process.environment = {
|
38
|
-
'QUEUES' => "#{queues}",
|
39
|
-
'VERBOSE' => 'yes',
|
40
|
-
'ROBOT_ENVIRONMENT' => robot_environment
|
41
|
-
}
|
42
|
-
|
43
|
-
# process configuration
|
44
|
-
process.group = robot_environment
|
45
|
-
process.stdout = process.stderr = "#{WORKDIR}/log/#{wf}.log"
|
46
|
-
|
47
|
-
# let bluepill manage pid files
|
48
|
-
# process.pid_file = "#{WORKDIR}/run/#{wf}.pid"
|
49
|
-
|
50
|
-
# spawn n worker processes
|
51
|
-
if n > 1
|
52
|
-
process.start_command = "env COUNT=#{n} rake workers" # not resque:workers
|
53
|
-
else # 1 worker
|
54
|
-
process.start_command = "rake environment resque:work"
|
55
|
-
end
|
56
|
-
# puts "Using #{process.start_command}"
|
57
|
-
# puts "Using #{process.environment}"
|
58
|
-
|
59
|
-
# we use bluepill to daemonize the resque workers rather than using
|
60
|
-
# resque's BACKGROUND flag
|
61
|
-
process.daemonize = true
|
62
|
-
|
63
|
-
# graceful stops
|
64
|
-
process.stop_grace_time = 60.seconds # must be greater than stop_signals total
|
65
|
-
process.stop_signals = [
|
66
|
-
:quit, 45.seconds, # waits for jobs, then exits gracefully
|
67
|
-
:term, 10.seconds, # kills jobs and exits
|
68
|
-
:kill # no mercy
|
69
|
-
]
|
70
|
-
|
71
|
-
# process monitoring
|
72
|
-
|
73
|
-
# backoff if process is flapping between states
|
74
|
-
# process.checks :flapping,
|
75
|
-
# :times => 2, :within => 30.seconds,
|
76
|
-
# :retry_in => 7.seconds
|
77
|
-
|
78
|
-
# restart if process runs for longer than 15 mins of CPU time
|
79
|
-
# process.checks :running_time,
|
80
|
-
# :every => 5.minutes, :below => 15.minutes
|
81
|
-
|
82
|
-
# restart if CPU usage > 75% for 3 times, check every 10 seconds
|
83
|
-
# process.checks :cpu_usage,
|
84
|
-
# :every => 10.seconds,
|
85
|
-
# :below => 75, :times => 3,
|
86
|
-
# :include_children => true
|
87
|
-
#
|
88
|
-
# restart the process or any of its children
|
89
|
-
# if MEM usage > 100MB for 3 times, check every 10 seconds
|
90
|
-
# process.checks :mem_usage,
|
91
|
-
# :every => 10.seconds,
|
92
|
-
# :below => 100.megabytes, :times => 3,
|
93
|
-
# :include_children => true
|
94
|
-
|
95
|
-
# NOTE: there is an implicit process.keepalive
|
96
|
-
end
|
97
|
-
end
|
98
|
-
end
|
@@ -1,25 +0,0 @@
|
|
1
|
-
# will spawn worker(s) for each of the given workflows (fully qualified as "repo:wf:robot")
|
2
|
-
WORKFLOW_STEPS = %w{
|
3
|
-
dor:accessionWF:start-accession
|
4
|
-
dor:accessionWF:descriptive-metadata
|
5
|
-
dor:accessionWF:rights-metadata
|
6
|
-
dor:accessionWF:content-metadata
|
7
|
-
dor:accessionWF:technical-metadata
|
8
|
-
dor:accessionWF:remediate-object
|
9
|
-
dor:accessionWF:shelve
|
10
|
-
dor:accessionWF:publish
|
11
|
-
dor:accessionWF:provenance-metadata
|
12
|
-
dor:accessionWF:sdr-ingest-transfer
|
13
|
-
dor:accessionWF:sdr-ingest-received
|
14
|
-
dor:accessionWF:end-accession
|
15
|
-
dor:assemblyWF:start-assembly
|
16
|
-
dor:assemblyWF:jp2-create
|
17
|
-
dor:assemblyWF:checksum-compute
|
18
|
-
dor:assemblyWF:exif-collect
|
19
|
-
dor:assemblyWF:accessioning-initiate
|
20
|
-
}
|
21
|
-
|
22
|
-
# number of workers for the given workflows
|
23
|
-
WORKFLOW_N = Hash[*%w{
|
24
|
-
dor:assemblyWF:checksum-compute 3
|
25
|
-
}]
|