rbbt-util 5.30.9 → 5.31.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/hpc.rb +3 -0
- data/lib/rbbt/hpc/batch.rb +623 -0
- data/lib/rbbt/hpc/lsf.rb +119 -0
- data/lib/rbbt/hpc/orchestrate.rb +24 -19
- data/lib/rbbt/hpc/slurm.rb +62 -559
- data/lib/rbbt/resource/path.rb +3 -1
- data/lib/rbbt/tsv/accessor.rb +5 -2
- data/lib/rbbt/tsv/dumper.rb +1 -0
- data/lib/rbbt/tsv/parallel/traverse.rb +1 -1
- data/lib/rbbt/tsv/stream.rb +5 -6
- data/lib/rbbt/util/cmd.rb +15 -1
- data/lib/rbbt/util/config.rb +2 -2
- data/lib/rbbt/util/log.rb +22 -1
- data/lib/rbbt/util/log/progress.rb +17 -2
- data/lib/rbbt/util/log/progress/report.rb +36 -3
- data/lib/rbbt/util/misc/development.rb +2 -2
- data/lib/rbbt/util/misc/inspect.rb +17 -1
- data/lib/rbbt/util/misc/omics.rb +60 -1
- data/lib/rbbt/util/misc/options.rb +5 -0
- data/lib/rbbt/workflow/accessor.rb +7 -2
- data/lib/rbbt/workflow/definition.rb +7 -3
- data/lib/rbbt/workflow/step/accessor.rb +1 -1
- data/lib/rbbt/workflow/step/run.rb +9 -0
- data/lib/rbbt/workflow/usage.rb +13 -13
- data/lib/rbbt/workflow/util/archive.rb +5 -3
- data/lib/rbbt/workflow/util/provenance.rb +26 -21
- data/share/config.ru +3 -3
- data/share/rbbt_commands/{slurm → hpc}/clean +91 -18
- data/share/rbbt_commands/{slurm → hpc}/list +119 -31
- data/share/rbbt_commands/hpc/orchestrate +81 -0
- data/share/rbbt_commands/hpc/tail +81 -0
- data/share/rbbt_commands/hpc/task +80 -0
- data/test/rbbt/hpc/test_batch.rb +65 -0
- data/test/rbbt/hpc/test_slurm.rb +30 -0
- data/test/rbbt/util/misc/test_development.rb +11 -0
- data/test/rbbt/util/test_config.rb +13 -3
- data/test/test_helper.rb +3 -1
- metadata +16 -7
- data/share/rbbt_commands/slurm/orchestrate +0 -48
- data/share/rbbt_commands/slurm/task +0 -46
data/lib/rbbt/hpc/lsf.rb
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'rbbt/hpc/batch'
|
2
|
+
|
3
|
+
module HPC
|
4
|
+
module LSF
|
5
|
+
extend HPC::TemplateGeneration
|
6
|
+
extend HPC::Orchestration
|
7
|
+
|
8
|
+
def self.batch_system_variables
|
9
|
+
<<-EOF
|
10
|
+
MAX_MEMORY=$LSB_MAX_MEM_RUSAGE || let MAX_MEMORY="$(grep MemTotal /proc/meminfo|grep -o "[[:digit:]]*") / 1024"
|
11
|
+
BATCH_JOB_ID=$LSF_JOBID
|
12
|
+
BATCH_SYSTEM=LSF
|
13
|
+
EOF
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.header(options = {})
|
17
|
+
options = options.dup
|
18
|
+
|
19
|
+
queue = Misc.process_options options, :queue
|
20
|
+
task_cpus = Misc.process_options options, :task_cpus
|
21
|
+
time = Misc.process_options options, :time
|
22
|
+
nodes = Misc.process_options options, :nodes
|
23
|
+
workdir = Misc.process_options options, :workdir
|
24
|
+
exclusive = Misc.process_options options, :exclusive
|
25
|
+
|
26
|
+
batch_dir = Misc.process_options options, :batch_dir
|
27
|
+
batch_name = Misc.process_options options, :batch_name
|
28
|
+
batch_name ||= File.basename(batch_dir)
|
29
|
+
|
30
|
+
fout = File.join(batch_dir, 'std.out')
|
31
|
+
ferr = File.join(batch_dir, 'std.err')
|
32
|
+
|
33
|
+
time = Misc.format_seconds Misc.timespan(time) unless time.include? ":"
|
34
|
+
|
35
|
+
time = time.split(":").values_at(0, 1) * ":"
|
36
|
+
|
37
|
+
header =<<-EOF
|
38
|
+
#!/bin/bash
|
39
|
+
#BSUB -J "#{batch_name}"
|
40
|
+
#BSUB -cwd "#{workdir}"
|
41
|
+
#BSUB -oo "#{fout}"
|
42
|
+
#BSUB -eo "#{ferr}"
|
43
|
+
#BSUB -q "#{queue}"
|
44
|
+
#BSUB -n "#{task_cpus}"
|
45
|
+
#BSUB -W "#{time}"
|
46
|
+
EOF
|
47
|
+
|
48
|
+
header << "#BSUB -x" << "\n" if exclusive
|
49
|
+
|
50
|
+
header
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.run_template(batch_dir, dry_run)
|
54
|
+
|
55
|
+
fout = File.join(batch_dir, 'std.out')
|
56
|
+
ferr = File.join(batch_dir, 'std.err')
|
57
|
+
fjob = File.join(batch_dir, 'job.id')
|
58
|
+
fdep = File.join(batch_dir, 'dependencies.list')
|
59
|
+
fcfdep = File.join(batch_dir, 'canfail_dependencies.list')
|
60
|
+
fexit = File.join(batch_dir, 'exit.status')
|
61
|
+
fsync = File.join(batch_dir, 'sync.log')
|
62
|
+
fcmd = File.join(batch_dir, 'command.batch')
|
63
|
+
|
64
|
+
return if Open.exists?(fexit)
|
65
|
+
|
66
|
+
STDERR.puts Log.color(:magenta, "Issuing LSF file: #{fcmd}")
|
67
|
+
STDERR.puts Open.read(fcmd)
|
68
|
+
|
69
|
+
if File.exists?(fjob)
|
70
|
+
job = Open.read(fjob).to_i
|
71
|
+
else
|
72
|
+
|
73
|
+
dependencies = Open.read(fdep).split("\n") if File.exists? fdep
|
74
|
+
canfail_dependencies = Open.read(fcfdep).split("\n") if File.exists? fcfdep
|
75
|
+
|
76
|
+
normal_dep_list = dependencies && dependencies.any? ? dependencies.collect{|d| "post_done(#{d})"} : []
|
77
|
+
canfail_dep_list = canfail_dependencies && canfail_dependencies.any? ? canfail_dependencies.collect{|d| "done(#{d})"} : []
|
78
|
+
|
79
|
+
dep_list = normal_dep_list + canfail_dep_list
|
80
|
+
|
81
|
+
if dep_list.any?
|
82
|
+
dep_str = '-w "' + dep_list * " && " + '"'
|
83
|
+
else
|
84
|
+
dep_str = ""
|
85
|
+
end
|
86
|
+
|
87
|
+
cmd = "bsub #{dep_str} < '#{fcmd}'"
|
88
|
+
|
89
|
+
if File.exists?(fout)
|
90
|
+
return
|
91
|
+
elsif dry_run
|
92
|
+
STDERR.puts Log.color(:magenta, "To execute run: ") + Log.color(:blue, cmd)
|
93
|
+
STDERR.puts Log.color(:magenta, "To monitor progress run (needs local rbbt): ") + Log.color(:blue, "rbbt lsf tail '#{batch_dir}'")
|
94
|
+
raise HPC::SBATCH, batch_dir
|
95
|
+
else
|
96
|
+
Open.rm fsync
|
97
|
+
Open.rm fexit
|
98
|
+
Open.rm fout
|
99
|
+
Open.rm ferr
|
100
|
+
|
101
|
+
|
102
|
+
job = CMD.cmd(cmd).read.scan(/\d+/).first.to_i
|
103
|
+
Log.debug "BSUB job id: #{job}"
|
104
|
+
Open.write(fjob, job.to_s)
|
105
|
+
job
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def self.job_status(job = nil)
|
111
|
+
if job.nil?
|
112
|
+
CMD.cmd("bjobs -w").read
|
113
|
+
else
|
114
|
+
CMD.cmd("bjobs -w #{job}").read
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
data/lib/rbbt/hpc/orchestrate.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
require 'rbbt/workflow/util/orchestrator'
|
2
2
|
module HPC
|
3
|
-
module
|
3
|
+
module Orchestration
|
4
4
|
|
5
|
-
def
|
5
|
+
def job_rules(rules, job)
|
6
6
|
workflow = job.workflow.to_s
|
7
7
|
task_name = job.task_name.to_s
|
8
8
|
task_name = job.overriden.to_s if Symbol === job.overriden
|
@@ -53,25 +53,26 @@ module HPC
|
|
53
53
|
job_rules
|
54
54
|
end
|
55
55
|
|
56
|
-
def
|
56
|
+
def get_job_dependencies(job, job_rules = nil)
|
57
57
|
deps = job.dependencies || []
|
58
58
|
deps += job.input_dependencies || []
|
59
59
|
deps
|
60
60
|
end
|
61
61
|
|
62
|
-
def
|
62
|
+
def get_recursive_job_dependencies(job)
|
63
63
|
deps = get_job_dependencies(job)
|
64
64
|
(deps + deps.collect{|dep| get_recursive_job_dependencies(dep) }).flatten
|
65
65
|
end
|
66
66
|
|
67
|
-
def
|
67
|
+
def piggyback(job, job_rules, job_deps)
|
68
68
|
return false unless job_rules["skip"]
|
69
69
|
final_deps = job_deps - job_deps.collect{|dep| get_recursive_job_dependencies(dep)}.flatten.uniq
|
70
|
+
final_deps = final_deps.reject{|dep| dep.done? }
|
70
71
|
return final_deps.first if final_deps.length == 1
|
71
72
|
return false
|
72
73
|
end
|
73
74
|
|
74
|
-
def
|
75
|
+
def get_chains(job, rules, chains = {})
|
75
76
|
job_rules = self.job_rules(rules, job)
|
76
77
|
job_deps = get_job_dependencies(job)
|
77
78
|
|
@@ -101,22 +102,22 @@ module HPC
|
|
101
102
|
chains
|
102
103
|
end
|
103
104
|
|
104
|
-
def
|
105
|
+
def workload(job, rules, chains, options, seen = nil)
|
105
106
|
return [] if job.done?
|
106
107
|
if seen.nil?
|
107
|
-
seen = {}
|
108
|
+
seen = {}
|
108
109
|
target_job = true
|
109
110
|
end
|
110
111
|
|
111
112
|
job_rules = self.job_rules(rules, job)
|
112
113
|
job_deps = get_job_dependencies(job)
|
113
114
|
|
114
|
-
|
115
115
|
chain = chains[job]
|
116
|
-
chain
|
116
|
+
chain = chain.reject{|j| seen.include? j.path} if chain
|
117
|
+
chain = chain.reject{|dep| dep.done? } if chain
|
117
118
|
piggyback = piggyback(job, job_rules, job_deps)
|
118
119
|
dep_ids = job_deps.collect do |dep|
|
119
|
-
seen[dep]
|
120
|
+
seen[dep.path] ||= nil if chain && chain.include?(dep) #&& ! job.input_dependencies.include?(dep)
|
120
121
|
next_options = IndiferentHash.setup(options.dup)
|
121
122
|
if piggyback and piggyback == dep
|
122
123
|
next_options[:piggyback] ||= []
|
@@ -129,19 +130,22 @@ module HPC
|
|
129
130
|
|
130
131
|
ids = [ids].flatten.compact.collect{|id| ['canfail', id] * ":"} if job.canfail_paths.include? dep.path
|
131
132
|
|
132
|
-
seen[dep] = ids
|
133
|
+
seen[dep.path] = ids
|
133
134
|
ids
|
134
135
|
end.compact.flatten.uniq
|
135
136
|
|
136
|
-
return seen[job] || dep_ids if seen.include?(job)
|
137
|
-
|
137
|
+
return seen[job.path] || dep_ids if seen.include?(job.path)
|
138
|
+
|
139
|
+
if piggyback and seen[piggyback.path]
|
140
|
+
return seen[job.path] = seen[piggyback.path]
|
141
|
+
end
|
138
142
|
|
139
143
|
job_rules.delete :chain_tasks
|
140
144
|
job_rules.delete :tasks
|
141
145
|
job_rules.delete :workflow
|
142
146
|
|
143
147
|
|
144
|
-
job_options = IndiferentHash.setup(options.merge(job_rules).merge(:
|
148
|
+
job_options = IndiferentHash.setup(options.merge(job_rules).merge(:batch_dependencies => dep_ids))
|
145
149
|
job_options.delete :orchestration_rules
|
146
150
|
|
147
151
|
config_keys = job_rules.delete(:config_keys)
|
@@ -172,13 +176,13 @@ module HPC
|
|
172
176
|
|
173
177
|
manifest.uniq!
|
174
178
|
|
175
|
-
job_options[:manifest] = manifest.collect{|j| j.
|
179
|
+
job_options[:manifest] = manifest.collect{|j| j.task_signature }
|
176
180
|
|
177
181
|
job_options[:config_keys] = job_options[:config_keys].split(",").uniq * "," if job_options[:config_keys]
|
178
182
|
|
179
183
|
if options[:dry_run]
|
180
184
|
puts Log.color(:magenta, "Manifest: ") + Log.color(:blue, job_options[:manifest] * ", ") + " - tasks: #{job_options[:task_cpus] || 1} - time: #{job_options[:time]} - config: #{job_options[:config_keys]}"
|
181
|
-
puts Log.color(:yellow, "Deps: ") + Log.color(:blue, job_options[:
|
185
|
+
puts Log.color(:yellow, "Deps: ") + Log.color(:blue, job_options[:batch_dependencies]*", ")
|
182
186
|
job_options[:manifest].first
|
183
187
|
else
|
184
188
|
run_job(job, job_options)
|
@@ -186,13 +190,14 @@ module HPC
|
|
186
190
|
end
|
187
191
|
|
188
192
|
|
189
|
-
def
|
193
|
+
def orchestrate_job(job, options)
|
190
194
|
options.delete "recursive_clean"
|
191
195
|
options.delete "clean_task"
|
192
196
|
options.delete "clean"
|
193
197
|
options.delete "tail"
|
194
|
-
options.delete "
|
198
|
+
options.delete "printpath"
|
195
199
|
options.delete "detach"
|
200
|
+
options.delete "jobname"
|
196
201
|
|
197
202
|
rules = YAML.load(Open.read(options[:orchestration_rules])) if options[:orchestration_rules]
|
198
203
|
rules ||= {}
|
data/lib/rbbt/hpc/slurm.rb
CHANGED
@@ -1,448 +1,101 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
attr_accessor :directory
|
4
|
-
def initialize(directory)
|
5
|
-
@directory = directory
|
6
|
-
end
|
7
|
-
end
|
8
|
-
|
9
|
-
module SLURM
|
10
|
-
|
11
|
-
def self.template(args, options = {})
|
12
|
-
|
13
|
-
development = options.delete :drbbt
|
14
|
-
singularity = options.delete :singularity
|
15
|
-
contain = options.delete :contain
|
16
|
-
sync = options.delete :sync
|
17
|
-
user_group = options.delete :user_group
|
18
|
-
contain_and_sync = options.delete :contain_and_sync
|
19
|
-
wipe_container = options.delete :wipe_container
|
20
|
-
copy_image = options.delete :copy_image
|
21
|
-
exclusive = options.delete :exclusive
|
22
|
-
highmem = options.delete :highmem
|
23
|
-
|
24
|
-
manifest = options.delete :manifest
|
25
|
-
|
26
|
-
queue = options.delete(:queue) || Rbbt::Config.get('queue', :slurm_queue, :slurm, :SLURM, :default => 'bsc_ls')
|
27
|
-
task_cpus = options.delete(:task_cpus) || 1
|
28
|
-
nodes = options.delete(:nodes) || 1
|
29
|
-
time = options.delete(:time) || "0:02:00"
|
30
|
-
|
31
|
-
inputs_dir = options.delete :inputs_dir
|
32
|
-
config_keys = options.delete :config_keys
|
33
|
-
|
34
|
-
user = ENV['USER'] || `whoami`.strip
|
35
|
-
group = File.basename(File.dirname(ENV['HOME']))
|
1
|
+
require 'rbbt/hpc/batch'
|
2
|
+
require 'rbbt/hpc/orchestrate'
|
36
3
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
wipe_container = "post" if wipe_container.nil?
|
42
|
-
end
|
43
|
-
|
44
|
-
contain = nil if contain == "" || contain == "none"
|
45
|
-
sync = nil if sync == "" || sync == "none"
|
4
|
+
module HPC
|
5
|
+
module SLURM
|
6
|
+
extend HPC::TemplateGeneration
|
7
|
+
extend HPC::Orchestration
|
46
8
|
|
47
|
-
|
9
|
+
def self.batch_system_variables
|
10
|
+
<<-EOF
|
11
|
+
let "MAX_MEMORY=$SLURM_MEM_PER_CPU * $SLURM_CPUS_PER_TASK" || let MAX_MEMORY="$(grep MemTotal /proc/meminfo|grep -o "[[:digit:]]*") / 1024"
|
12
|
+
BATCH_JOB_ID=$SLURM_JOB_ID
|
13
|
+
BATCH_SYSTEM=SLURM
|
14
|
+
EOF
|
15
|
+
end
|
48
16
|
|
49
|
-
|
50
|
-
options.
|
51
|
-
slurm_basedir = options[:slurm_basedir] ||= File.expand_path(File.join('~/rbbt-slurm', name)) if slurm_basedir.nil?
|
52
|
-
options.delete(:slurm_basedir)
|
17
|
+
def self.header(options = {})
|
18
|
+
options = options.dup
|
53
19
|
|
54
|
-
|
20
|
+
queue = Misc.process_options options, :queue
|
21
|
+
task_cpus = Misc.process_options options, :task_cpus
|
22
|
+
time = Misc.process_options options, :time
|
23
|
+
nodes = Misc.process_options options, :nodes
|
24
|
+
workdir = Misc.process_options options, :workdir
|
25
|
+
exclusive = Misc.process_options options, :exclusive
|
55
26
|
|
56
|
-
|
57
|
-
|
58
|
-
case v
|
59
|
-
when TrueClass
|
60
|
-
'--' << o
|
61
|
-
when FalseClass
|
62
|
-
'--' << o << "=false"
|
63
|
-
else
|
64
|
-
['--' << o, "'#{v.to_s.gsub("'", '\'')}'"] * " "
|
65
|
-
end
|
66
|
-
end * " "
|
27
|
+
batch_dir = Misc.process_options options, :batch_dir
|
28
|
+
batch_name = Misc.process_options options, :batch_name
|
67
29
|
|
68
|
-
|
30
|
+
fout = File.join(batch_dir, 'std.out')
|
31
|
+
ferr = File.join(batch_dir, 'std.err')
|
69
32
|
|
70
33
|
time = Misc.format_seconds Misc.timespan(time) unless time.include? ":"
|
71
34
|
|
72
|
-
|
73
|
-
#{{{ PREPARE LOCAL LOGFILES
|
74
|
-
|
75
|
-
Open.mkdir slurm_basedir
|
76
|
-
|
77
|
-
fout = File.join(slurm_basedir, 'std.out')
|
78
|
-
ferr = File.join(slurm_basedir, 'std.err')
|
79
|
-
fjob = File.join(slurm_basedir, 'job.id')
|
80
|
-
fexit = File.join(slurm_basedir, 'exit.status')
|
81
|
-
fsync = File.join(slurm_basedir, 'sync.log')
|
82
|
-
fsyncexit = File.join(slurm_basedir, 'sync.status')
|
83
|
-
fcmd = File.join(slurm_basedir, 'command.slurm')
|
84
|
-
|
85
|
-
#{{{ GENERATE TEMPLATE
|
86
|
-
|
87
|
-
# HEADER
|
88
35
|
header =<<-EOF
|
89
36
|
#!/bin/bash
|
90
|
-
#SBATCH --
|
91
|
-
#SBATCH --
|
92
|
-
#SBATCH --workdir="#{Dir.pwd}"
|
37
|
+
#SBATCH --job-name="#{batch_name}"
|
38
|
+
#SBATCH --workdir="#{workdir}"
|
93
39
|
#SBATCH --output="#{fout}"
|
94
40
|
#SBATCH --error="#{ferr}"
|
41
|
+
#SBATCH --qos="#{queue}"
|
95
42
|
#SBATCH --cpus-per-task="#{task_cpus}"
|
96
43
|
#SBATCH --time="#{time}"
|
97
44
|
#SBATCH --nodes="#{nodes}"
|
98
45
|
EOF
|
99
46
|
|
100
|
-
|
101
|
-
|
102
|
-
if highmem
|
103
|
-
header +=<<-EOF
|
104
|
-
#SBATCH --constraint=highmem
|
105
|
-
EOF
|
106
|
-
end
|
107
|
-
|
108
|
-
if exclusive
|
109
|
-
header +=<<-EOF
|
110
|
-
#SBATCH --exclusive
|
111
|
-
EOF
|
112
|
-
end
|
113
|
-
|
114
|
-
# ENV
|
115
|
-
env = ""
|
116
|
-
env +=<<-EOF
|
117
|
-
# Prepare env
|
118
|
-
[[ -f ~/config/load.sh ]] && source ~/config/load.sh
|
119
|
-
module load java
|
120
|
-
|
121
|
-
# Calculate max available memory
|
122
|
-
let "MAX_MEMORY=$SLURM_MEM_PER_CPU * $SLURM_CPUS_PER_TASK" || let MAX_MEMORY="$(grep MemTotal /proc/meminfo|grep -o "[[:digit:]]*") / 1024"
|
123
|
-
EOF
|
124
|
-
|
125
|
-
|
126
|
-
# RUN
|
127
|
-
run = ""
|
128
|
-
exec_cmd = %(env _JAVA_OPTIONS="-Xms1g -Xmx${MAX_MEMORY}m")
|
129
|
-
|
130
|
-
|
131
|
-
if singularity
|
132
|
-
#{{{ SINGULARITY
|
133
|
-
|
134
|
-
singularity_exec = %(singularity exec -e -B $SINGULARITY_OPT_DIR:/singularity_opt/ -B /apps/)
|
135
|
-
|
136
|
-
env +=<<-EOF
|
137
|
-
module load intel/2018.1
|
138
|
-
module load singularity
|
139
|
-
PROJECTS_ROOT="/gpfs/projects/bsc26/"
|
140
|
-
SINGULARITY_IMG="$PROJECTS_ROOT/rbbt.singularity.img"
|
141
|
-
SINGULARITY_OPT_DIR="$PROJECTS_ROOT/singularity_opt/"
|
142
|
-
SINGULARITY_RUBY_INLINE="$HOME/.singularity_ruby_inline"
|
143
|
-
mkdir -p "$SINGULARITY_RUBY_INLINE"
|
144
|
-
EOF
|
145
|
-
|
146
|
-
if contain
|
147
|
-
scratch_group_dir = File.join('/gpfs/scratch/', group)
|
148
|
-
projects_group_dir = File.join('/gpfs/projects/', group)
|
47
|
+
header << "#SBATCH --exclusive" << "\n" if exclusive
|
149
48
|
|
150
|
-
|
151
|
-
|
152
|
-
# Prepare container dir
|
153
|
-
CONTAINER_DIR="#{contain}"
|
154
|
-
mkdir -p $CONTAINER_DIR/.rbbt/etc/
|
155
|
-
|
156
|
-
for dir in .ruby_inline git home; do
|
157
|
-
mkdir -p $CONTAINER_DIR/$dir
|
158
|
-
done
|
159
|
-
|
160
|
-
for tmpd in persist_locks produce_locks R_sockets sensiblewrite sensiblewrite_locks step_info_locks tsv_open_locks; do
|
161
|
-
mkdir -p $CONTAINER_DIR/.rbbt/tmp/$tmpd
|
162
|
-
done
|
163
|
-
|
164
|
-
# Copy environment
|
165
|
-
cp ~/.rbbt/etc/environment $CONTAINER_DIR/.rbbt/etc/
|
166
|
-
|
167
|
-
# Set search_paths
|
168
|
-
echo "singularity: /singularity_opt/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" > $CONTAINER_DIR/.rbbt/etc/search_paths
|
169
|
-
echo "rbbt_user: /home/rbbt/.rbbt/{TOPLEVEL}/{SUBPATH}" >> $CONTAINER_DIR/.rbbt/etc/search_paths
|
170
|
-
echo "outside_home: $CONTAINER_DIR/home/{TOPLEVEL}/{SUBPATH}" >> $CONTAINER_DIR/.rbbt/etc/search_paths
|
171
|
-
echo "group_projects: #{projects_group_dir}/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" >> $CONTAINER_DIR/.rbbt/etc/search_paths
|
172
|
-
echo "group_scratch: #{scratch_group_dir}/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" >> $CONTAINER_DIR/.rbbt/etc/search_paths
|
173
|
-
echo "user_projects: #{projects_group_dir}/#{user}/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" >> $CONTAINER_DIR/.rbbt/etc/search_paths
|
174
|
-
echo "user_scratch: #{scratch_group_dir}/#{user}/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" >> $CONTAINER_DIR/.rbbt/etc/search_paths
|
175
|
-
EOF
|
176
|
-
|
177
|
-
if user_group && group != user_group
|
178
|
-
prep +=<<-EOF
|
179
|
-
|
180
|
-
# Add user_group search_path
|
181
|
-
echo "#{user_group}: /gpfs/projects/#{user_group}/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" >> $CONTAINER_DIR/.rbbt/etc/search_paths
|
182
|
-
EOF
|
183
|
-
end
|
184
|
-
|
185
|
-
if inputs_dir
|
186
|
-
prep +=<<-EOF
|
187
|
-
|
188
|
-
# Copy inputs
|
189
|
-
[[ -d '#{inputs_dir}' ]] && cp -R '#{inputs_dir}' $CONTAINER_DIR/inputs
|
190
|
-
EOF
|
191
|
-
rbbt_cmd = rbbt_cmd.sub(inputs_dir, "#{contain}/inputs")
|
192
|
-
end
|
193
|
-
|
194
|
-
if copy_image
|
195
|
-
prep +=<<EOF
|
196
|
-
|
197
|
-
# Copy image
|
198
|
-
rsync -avz "$SINGULARITY_IMG" "$CONTAINER_DIR/rbbt.singularity.img" 1>&2
|
199
|
-
SINGULARITY_IMG="$CONTAINER_DIR/rbbt.singularity.img"
|
200
|
-
EOF
|
201
|
-
end
|
49
|
+
header
|
50
|
+
end
|
202
51
|
|
203
|
-
|
204
|
-
if singularity
|
205
|
-
prep +=<<-EOF
|
52
|
+
def self.run_template(batch_dir, dry_run)
|
206
53
|
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
end
|
216
|
-
end
|
54
|
+
fout = File.join(batch_dir, 'std.out')
|
55
|
+
ferr = File.join(batch_dir, 'std.err')
|
56
|
+
fjob = File.join(batch_dir, 'job.id')
|
57
|
+
fdep = File.join(batch_dir, 'dependencies.list')
|
58
|
+
fcfdep = File.join(batch_dir, 'canfail_dependencies.list')
|
59
|
+
fexit = File.join(batch_dir, 'exit.status')
|
60
|
+
fsync = File.join(batch_dir, 'sync.log')
|
61
|
+
fcmd = File.join(batch_dir, 'command.batch')
|
217
62
|
|
218
|
-
|
219
|
-
singularity_exec << %( -C -H "$CONTAINER_DIR" \
|
220
|
-
-B /scratch/tmp \
|
221
|
-
#{ group != user_group ? "-B /gpfs/projects/#{user_group}" : "" } \
|
222
|
-
-B #{scratch_group_dir} \
|
223
|
-
-B #{projects_group_dir} \
|
224
|
-
-B "$SINGULARITY_RUBY_INLINE":"$CONTAINER_DIR/.ruby_inline":rw \
|
225
|
-
-B ~/git:"$CONTAINER_DIR/git":ro \
|
226
|
-
#{Open.exists?('~/.rbbt/software/opt/')? '-B ~/.rbbt/software/opt/:"/opt/":ro' : '' } \
|
227
|
-
-B ~/.rbbt:"$CONTAINER_DIR/home/":ro \
|
228
|
-
"$SINGULARITY_IMG")
|
229
|
-
exec_cmd << ' TMPDIR="$CONTAINER_DIR/.rbbt/tmp" '
|
230
|
-
else
|
231
|
-
singularity_exec += %( -B "$SINGULARITY_RUBY_INLINE":"$HOME/.ruby_inline":rw "$SINGULARITY_IMG" )
|
232
|
-
end
|
63
|
+
return if Open.exists?(fexit)
|
233
64
|
|
234
|
-
|
235
|
-
|
236
|
-
else
|
237
|
-
exec_cmd += ' rbbt'
|
238
|
-
end
|
65
|
+
STDERR.puts Log.color(:magenta, "Issuing SLURM file: #{fcmd}")
|
66
|
+
STDERR.puts Open.read(fcmd)
|
239
67
|
|
240
|
-
|
68
|
+
if File.exists?(fjob)
|
69
|
+
job = Open.read(fjob).to_i
|
241
70
|
else
|
242
|
-
if development
|
243
|
-
exec_cmd << " " << %(~/git/rbbt-util/bin/rbbt --dev=#{development})
|
244
|
-
else
|
245
|
-
exec_cmd << " " << 'rbbt'
|
246
|
-
end
|
247
|
-
|
248
|
-
if contain
|
249
|
-
rbbt_cmd << " " << %(--workdir_all='#{contain.gsub("'", '\'')}/workdir')
|
250
|
-
end
|
251
|
-
end
|
252
|
-
|
253
|
-
|
254
|
-
cmd =<<-EOF
|
255
|
-
#{exec_cmd} \\
|
256
|
-
#{rbbt_cmd}
|
257
|
-
EOF
|
258
|
-
annotate_cmd =<<-EOF
|
259
|
-
#{exec_cmd} \\
|
260
|
-
workflow write_info --recursive --force=false --check_pid "$step_path" slurm_job $SLURM_JOB_ID
|
261
|
-
EOF
|
262
71
|
|
263
|
-
|
264
|
-
|
265
|
-
EOF
|
266
|
-
|
267
|
-
header +=<<-EOF
|
268
|
-
#CMD: #{rbbt_cmd}
|
269
|
-
EOF
|
270
|
-
|
271
|
-
run +=<<-EOF
|
272
|
-
|
273
|
-
# Run command
|
274
|
-
step_path=$(#{cmd})
|
72
|
+
dependencies = Open.read(fdep).split("\n") if File.exists? fdep
|
73
|
+
canfail_dependencies = Open.read(fcfdep).split("\n") if File.exists? fcfdep
|
275
74
|
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
# Annotate info with SLURM job_info
|
280
|
-
#{annotate_cmd}
|
281
|
-
|
282
|
-
EOF
|
283
|
-
|
284
|
-
# CODA
|
285
|
-
coda = ""
|
286
|
-
if sync
|
287
|
-
if singularity
|
288
|
-
coda +=<<-EOF
|
289
|
-
singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rbbt system clean all -q &>> #{fsync}
|
290
|
-
EOF
|
291
|
-
# else
|
292
|
-
# coda +=<<-EOF
|
293
|
-
#rbbt system clean all -q &>> #{fsync}
|
294
|
-
#EOF
|
295
|
-
end
|
75
|
+
normal_dep_str = dependencies && dependencies.any? ? "afterok:" + dependencies * ":" : nil
|
76
|
+
canfail_dep_str = canfail_dependencies && canfail_dependencies.any? ? "afterany:" + canfail_dependencies * ":" : nil
|
296
77
|
|
297
|
-
if
|
298
|
-
|
299
|
-
source = source.strip
|
300
|
-
sync = sync.strip
|
301
|
-
source = File.join(File.expand_path(contain), source)
|
78
|
+
if normal_dep_str.nil? && canfail_dep_str.nil?
|
79
|
+
dep_str = ""
|
302
80
|
else
|
303
|
-
|
81
|
+
dep_str = '--dependency=' + [normal_dep_str, canfail_dep_str].compact * ","
|
304
82
|
end
|
305
83
|
|
306
|
-
|
307
|
-
coda +=<<-EOF
|
84
|
+
cmd = "sbatch #{dep_str} '#{fcmd}'"
|
308
85
|
|
309
|
-
# Sync data to target location
|
310
|
-
if [ $exit_status == '0' ]; then
|
311
|
-
mkdir -p "$(dirname '#{target}')"
|
312
|
-
rsync -avztAXHP --copy-unsafe-links "#{source}/" "#{target}/" &>> #{fsync}
|
313
|
-
sync_es="$?"
|
314
|
-
echo $sync_es > #{fsyncexit}
|
315
|
-
find '#{target}' -type l -ls | awk '$13 ~ /^#{target.gsub('/','\/')}/ { sub("#{source}", "#{target}", $13); print $11, $13 }' | while read A B; do rm $A; ln -s $B $A; done
|
316
|
-
else
|
317
|
-
sync_es="$exit_status"
|
318
|
-
fi
|
319
|
-
EOF
|
320
|
-
|
321
|
-
if contain && (wipe_container == "post" || wipe_container == "both")
|
322
|
-
prep =<<-EOF + prep
|
323
|
-
if ls -A '#{contain}' &> /dev/null ; then
|
324
|
-
echo "ERROR: Container directory not empty, refusing to wipe. #{contain}" &>> #{fsync}
|
325
|
-
fi
|
326
|
-
EOF
|
327
|
-
if singularity
|
328
|
-
coda +=<<-EOF
|
329
|
-
singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rm -v /dev/shm/sem.*.{in,out,process} /dev/shm/sem.Session-PID.*.sem 2> /dev/null >> #{fsync}
|
330
|
-
|
331
|
-
|
332
|
-
# Clean container directory
|
333
|
-
#if [ $exit_status == '0' -a $sync_es == '0' ]; then
|
334
|
-
singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rbbt system clean -f &>> #{fsync}
|
335
|
-
singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rm -Rfv .rbbt/var/jobs &>> #{fsync}
|
336
|
-
singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rm -Rfv tmp/ &>> #{fsync}
|
337
|
-
#else
|
338
|
-
# echo "ERROR: Process failed or results could not sync correctly. Contain directory not purged" &>> #{fsync}
|
339
|
-
#fi
|
340
|
-
EOF
|
341
|
-
else
|
342
|
-
coda +=<<-EOF
|
343
|
-
##{exec_cmd} system clean
|
344
|
-
#if [ $exit_status == '0' -a $sync_es == '0' ]; then
|
345
|
-
rm -Rfv #{contain} &>> #{fsync}
|
346
|
-
#else
|
347
|
-
# echo "ERROR: Process failed or results could not sync correctly. Contain directory not purged" &>> #{fsync}
|
348
|
-
#fi
|
349
|
-
EOF
|
350
|
-
|
351
|
-
end
|
352
|
-
end
|
353
|
-
end
|
354
|
-
|
355
|
-
coda +=<<-EOF
|
356
|
-
|
357
|
-
# Write exit status to file
|
358
|
-
echo $exit_status > #{fexit}
|
359
|
-
EOF
|
360
|
-
|
361
|
-
if sync
|
362
|
-
coda +=<<-EOF
|
363
|
-
if [ "$sync_es" == '0' ]; then
|
364
|
-
unset sync_es
|
365
|
-
exit $exit_status
|
366
|
-
else
|
367
|
-
exit $sync_es
|
368
|
-
fi
|
369
|
-
EOF
|
370
|
-
else
|
371
|
-
coda +=<<-EOF
|
372
|
-
exit $exit_status
|
373
|
-
EOF
|
374
|
-
end
|
375
|
-
|
376
|
-
template = [header, env, prep, run, coda] * "\n"
|
377
|
-
|
378
|
-
template
|
379
|
-
end
|
380
|
-
|
381
|
-
def self.issue_template(template, options = {})
|
382
|
-
|
383
|
-
slurm_basedir = options[:slurm_basedir]
|
384
|
-
dependencies = options.delete :slurm_dependencies
|
385
|
-
dependencies = [] if dependencies.nil?
|
386
|
-
|
387
|
-
canfail_dependencies = dependencies.select{|dep| dep =~ /^canfail:(\d+)/ }.collect{|dep| dep.partition(":").last}
|
388
|
-
dependencies = dependencies.reject{|dep| dep =~ /^canfail:(\d+)/ }
|
389
|
-
|
390
|
-
Open.mkdir slurm_basedir
|
391
|
-
|
392
|
-
dry_run = options.delete :dry_run
|
393
|
-
|
394
|
-
fout = File.join(slurm_basedir, 'std.out')
|
395
|
-
ferr = File.join(slurm_basedir, 'std.err')
|
396
|
-
fjob = File.join(slurm_basedir, 'job.id')
|
397
|
-
fdep = File.join(slurm_basedir, 'dependencies.list')
|
398
|
-
fcfdep = File.join(slurm_basedir, 'canfail_dependencies.list')
|
399
|
-
fexit = File.join(slurm_basedir, 'exit.status')
|
400
|
-
fsync = File.join(slurm_basedir, 'sync.log')
|
401
|
-
fcmd = File.join(slurm_basedir, 'command.slurm')
|
402
|
-
|
403
|
-
job = nil
|
404
|
-
if options[:clean_job]
|
405
|
-
[fcmd, fjob, fout, ferr, fsync, fexit].each do |file|
|
406
|
-
Open.rm file if Open.exists? file
|
407
|
-
end
|
408
|
-
end
|
409
|
-
|
410
|
-
return if Open.exists?(fexit)
|
411
|
-
|
412
|
-
STDERR.puts Log.color(:magenta, "Issuing SLURM file: #{fcmd}")
|
413
|
-
STDERR.puts template
|
414
|
-
|
415
|
-
Open.write(fcmd, template) unless File.exists? fcmd
|
416
|
-
if File.exists?(fjob)
|
417
|
-
job = Open.read(fjob).to_i
|
418
|
-
else
|
419
86
|
if File.exists?(fout)
|
420
87
|
return
|
421
88
|
elsif dry_run
|
422
|
-
STDERR.puts Log.color(:magenta, "To execute run: ") + Log.color(:blue, "sbatch '#{
|
423
|
-
STDERR.puts Log.color(:magenta, "To monitor progress run (needs local rbbt): ") + Log.color(:blue, "rbbt
|
424
|
-
raise HPC::SBATCH,
|
89
|
+
STDERR.puts Log.color(:magenta, "To execute run: ") + Log.color(:blue, "sbatch '#{fcmd}'")
|
90
|
+
STDERR.puts Log.color(:magenta, "To monitor progress run (needs local rbbt): ") + Log.color(:blue, "rbbt slurm tail '#{batch_dir}'")
|
91
|
+
raise HPC::SBATCH, batch_dir
|
425
92
|
else
|
426
93
|
Open.rm fsync
|
427
94
|
Open.rm fexit
|
428
95
|
Open.rm fout
|
429
96
|
Open.rm ferr
|
430
97
|
|
431
|
-
|
432
|
-
Open.write(fcfdep, canfail_dependencies * "\n") if canfail_dependencies.any?
|
433
|
-
|
434
|
-
|
435
|
-
dep_str = '--dependency='
|
436
|
-
normal_dep_str = dependencies.any? ? "afterok:" + dependencies * ":" : nil
|
437
|
-
canfail_dep_str = canfail_dependencies.any? ? "afterany:" + canfail_dependencies * ":" : nil
|
438
|
-
|
439
|
-
if normal_dep_str.nil? && canfail_dep_str.nil?
|
440
|
-
dep_str = ""
|
441
|
-
else
|
442
|
-
dep_str += [normal_dep_str, canfail_dep_str].compact * ","
|
443
|
-
end
|
444
|
-
|
445
|
-
job = CMD.cmd("sbatch #{dep_str} '#{fcmd}'").read.scan(/\d+/).first.to_i
|
98
|
+
job = CMD.cmd(cmd).read.scan(/\d+/).first.to_i
|
446
99
|
Log.debug "SBATCH job id: #{job}"
|
447
100
|
Open.write(fjob, job.to_s)
|
448
101
|
job
|
@@ -450,163 +103,13 @@ EOF
|
|
450
103
|
end
|
451
104
|
end
|
452
105
|
|
453
|
-
def self.
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
job = Open.read(fjob).strip if Open.exists?(fjob)
|
460
|
-
|
461
|
-
if job
|
462
|
-
status_txt = CMD.cmd("squeue --job #{job}").read
|
463
|
-
STDERR.puts Log.color(:magenta, "Status [#{job.to_i}]:")
|
464
|
-
STDERR.puts status_txt
|
465
|
-
lines = status_txt.split("\n").length
|
466
|
-
end
|
467
|
-
|
468
|
-
if tail
|
469
|
-
Log.severity = 10
|
470
|
-
while ! File.exists? fout
|
471
|
-
if job
|
472
|
-
STDERR.puts
|
473
|
-
Log.clear_line(STDERR)
|
474
|
-
STDERR.write Log.color(:magenta, "Waiting for Output")
|
475
|
-
3.times do
|
476
|
-
STDERR.write Log.color(:magenta, ".")
|
477
|
-
sleep 1
|
478
|
-
end
|
479
|
-
status_txt = CMD.cmd("squeue --job #{job}").read
|
480
|
-
lines.times do
|
481
|
-
Log.clear_line(STDERR)
|
482
|
-
end
|
483
|
-
Log.clear_line(STDERR)
|
484
|
-
STDERR.puts Log.color(:magenta, "Status [#{job.to_i}]:")
|
485
|
-
STDERR.puts status_txt
|
486
|
-
lines = status_txt.split("\n").length
|
487
|
-
end
|
488
|
-
end
|
489
|
-
STDERR.puts
|
490
|
-
Log.clear_line(STDERR)
|
491
|
-
STDERR.puts Log.color(:magenta, "Output:")
|
492
|
-
begin
|
493
|
-
CMD.cmd("squeue --job #{job} > #{fstatus}")
|
494
|
-
out = CMD.cmd("tail -f '#{fout}'", :pipe => true) if File.exists?(fout) and not tail == :STDERR
|
495
|
-
err = CMD.cmd("tail -f '#{ferr}'", :pipe => true) if File.exists?(ferr)
|
496
|
-
|
497
|
-
terr = Misc.consume_stream(err, true, STDERR) if err
|
498
|
-
tout = Misc.consume_stream(out, true, STDOUT) if out
|
499
|
-
|
500
|
-
sleep 3 while CMD.cmd("squeue --job #{job}").read.include? job.to_s
|
501
|
-
rescue Aborted
|
502
|
-
ensure
|
503
|
-
begin
|
504
|
-
terr.exit if terr
|
505
|
-
tout.exit if tout
|
506
|
-
err.close if err
|
507
|
-
err.join if err
|
508
|
-
rescue Exception
|
509
|
-
end
|
510
|
-
|
511
|
-
begin
|
512
|
-
out.close if out
|
513
|
-
out.join if out
|
514
|
-
rescue Exception
|
515
|
-
end
|
516
|
-
end
|
517
|
-
end
|
518
|
-
end
|
519
|
-
|
520
|
-
def self.wait_for_job(slurm_basedir, time = 1)
|
521
|
-
fexit = File.join(slurm_basedir, 'exit.status')
|
522
|
-
fjob = File.join(slurm_basedir, 'job.id')
|
523
|
-
job = Open.read(fjob) if Open.exists?(fjob)
|
524
|
-
|
525
|
-
|
526
|
-
while ! Open.exists?(fexit)
|
527
|
-
sleep time
|
528
|
-
end
|
529
|
-
end
|
530
|
-
|
531
|
-
def self.run_job(job, options = {})
|
532
|
-
options = IndiferentHash.setup(options.dup)
|
533
|
-
|
534
|
-
dry_run = options.delete :dry_run
|
535
|
-
tail = options.delete :tail
|
536
|
-
dependencies = options.delete :slurm_dependencies
|
537
|
-
procpath = options.delete :SLURM_procpath
|
538
|
-
|
539
|
-
options[:jobname] = job.clean_name
|
540
|
-
log_level = options.delete :log
|
541
|
-
log_level ||= Log.severity
|
542
|
-
|
543
|
-
workflow = job.workflow
|
544
|
-
|
545
|
-
task = Symbol === job.overriden ? job.overriden : job.task_name
|
546
|
-
|
547
|
-
if job.overriden
|
548
|
-
override_deps = job.rec_dependencies.
|
549
|
-
select{|dep| Symbol === dep.overriden }.
|
550
|
-
collect do |dep|
|
551
|
-
|
552
|
-
name = [dep.workflow.to_s, dep.task_name] * "#"
|
553
|
-
[name, dep.path] * "="
|
554
|
-
end * ","
|
555
|
-
end
|
556
|
-
|
557
|
-
remove_slurm_basedir = options.delete :remove_slurm_basedir
|
558
|
-
slurm_basedir = options.delete :SLURM_basedir
|
559
|
-
slurm_basedir = "~/rbbt-slurm" if slurm_basedir.nil?
|
560
|
-
TmpFile.with_file(nil, remove_slurm_basedir, :tmpdir => slurm_basedir, :prefix => "SLURM_rbbt_job-") do |tmp_directory|
|
561
|
-
options[:slurm_basedir] ||= tmp_directory
|
562
|
-
slurm_basedir = options[:slurm_basedir]
|
563
|
-
inputs_dir = File.join(tmp_directory, 'inputs_dir')
|
564
|
-
saved = Step.save_job_inputs(job, inputs_dir)
|
565
|
-
|
566
|
-
cmd = ['workflow', 'task', workflow.to_s, task.to_s, '--printpath', '--log', log_level.to_s]
|
567
|
-
|
568
|
-
cmd << "--procpath_performance='#{tmp_directory}/procpath##{procpath.gsub(',', '#')}'" if procpath
|
569
|
-
|
570
|
-
cmd << "--override_deps='#{override_deps.gsub("'", '\'')}'" if override_deps and not override_deps.empty?
|
571
|
-
|
572
|
-
cmd << "--load_inputs='#{inputs_dir}'" if saved && saved.any?
|
573
|
-
|
574
|
-
template = self.template(cmd, options)
|
575
|
-
jobid = self.issue_template(template, options.merge(:slurm_basedir => slurm_basedir, :dry_run => dry_run, :slurm_dependencies => dependencies))
|
576
|
-
|
577
|
-
return jobid unless tail
|
578
|
-
|
579
|
-
t_monitor = Thread.new do
|
580
|
-
self.follow_job(slurm_basedir, :STDERR)
|
581
|
-
end
|
582
|
-
self.wait_for_job(slurm_basedir)
|
583
|
-
t_monitor.raise Aborted
|
584
|
-
return unless Open.read(File.join(slurm_basedir, 'exit.status')).strip == '0'
|
585
|
-
path = Open.read(File.join(slurm_basedir, 'std.out')).strip
|
586
|
-
if Open.exists?(path) && job.path != path
|
587
|
-
Log.info "Path of SLURM job #{path} is different from original job #{job.path}. Stablishing link."
|
588
|
-
Open.ln path, job.path
|
589
|
-
Open.ln path + '.info', job.path + '.info' if Open.exists?(path + '.info')
|
590
|
-
Open.ln path + '.files', job.path + '.files' if Open.exists?(path + '.files')
|
591
|
-
end
|
592
|
-
jobid
|
106
|
+
def self.job_status(job = nil)
|
107
|
+
if job.nil?
|
108
|
+
CMD.cmd("squeue").read
|
109
|
+
else
|
110
|
+
CMD.cmd("squeue --job #{job}").read
|
593
111
|
end
|
594
112
|
end
|
595
|
-
end
|
596
|
-
|
597
|
-
def self.relay(job, options={})
|
598
|
-
options = Misc.add_defaults options, :target => 'mn1', :search_path => 'user'
|
599
|
-
done_deps = job.dependencies.select do |dep|
|
600
|
-
dep.done?
|
601
|
-
end
|
602
|
-
|
603
|
-
error_deps = job.dependencies.select do |dep|
|
604
|
-
dep.error? && ! dep.recoverable_error?
|
605
|
-
end
|
606
|
-
|
607
|
-
(done_deps + error_deps).each do |dep|
|
608
|
-
Step.migrate(dep.path, options[:search_path], options)
|
609
|
-
end
|
610
113
|
|
611
114
|
end
|
612
115
|
end
|