rbbt-util 5.30.9 → 5.31.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/hpc.rb +3 -0
- data/lib/rbbt/hpc/batch.rb +623 -0
- data/lib/rbbt/hpc/lsf.rb +119 -0
- data/lib/rbbt/hpc/orchestrate.rb +24 -19
- data/lib/rbbt/hpc/slurm.rb +62 -559
- data/lib/rbbt/resource/path.rb +3 -1
- data/lib/rbbt/tsv/accessor.rb +5 -2
- data/lib/rbbt/tsv/dumper.rb +1 -0
- data/lib/rbbt/tsv/parallel/traverse.rb +1 -1
- data/lib/rbbt/tsv/stream.rb +5 -6
- data/lib/rbbt/util/cmd.rb +15 -1
- data/lib/rbbt/util/config.rb +2 -2
- data/lib/rbbt/util/log.rb +22 -1
- data/lib/rbbt/util/log/progress.rb +17 -2
- data/lib/rbbt/util/log/progress/report.rb +36 -3
- data/lib/rbbt/util/misc/development.rb +2 -2
- data/lib/rbbt/util/misc/inspect.rb +17 -1
- data/lib/rbbt/util/misc/omics.rb +60 -1
- data/lib/rbbt/util/misc/options.rb +5 -0
- data/lib/rbbt/workflow/accessor.rb +7 -2
- data/lib/rbbt/workflow/definition.rb +7 -3
- data/lib/rbbt/workflow/step/accessor.rb +1 -1
- data/lib/rbbt/workflow/step/run.rb +9 -0
- data/lib/rbbt/workflow/usage.rb +13 -13
- data/lib/rbbt/workflow/util/archive.rb +5 -3
- data/lib/rbbt/workflow/util/provenance.rb +26 -21
- data/share/config.ru +3 -3
- data/share/rbbt_commands/{slurm → hpc}/clean +91 -18
- data/share/rbbt_commands/{slurm → hpc}/list +119 -31
- data/share/rbbt_commands/hpc/orchestrate +81 -0
- data/share/rbbt_commands/hpc/tail +81 -0
- data/share/rbbt_commands/hpc/task +80 -0
- data/test/rbbt/hpc/test_batch.rb +65 -0
- data/test/rbbt/hpc/test_slurm.rb +30 -0
- data/test/rbbt/util/misc/test_development.rb +11 -0
- data/test/rbbt/util/test_config.rb +13 -3
- data/test/test_helper.rb +3 -1
- metadata +16 -7
- data/share/rbbt_commands/slurm/orchestrate +0 -48
- data/share/rbbt_commands/slurm/task +0 -46
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
require 'rbbt-util'
|
4
4
|
require 'rbbt/util/simpleopt'
|
5
|
+
require 'rbbt/hpc'
|
5
6
|
|
6
7
|
#$0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
|
7
8
|
|
@@ -9,7 +10,7 @@ options = SOPT.setup <<EOF
|
|
9
10
|
|
10
11
|
Queue a job in Marenostrum
|
11
12
|
|
12
|
-
$ rbbt
|
13
|
+
$ rbbt slurm list [options]
|
13
14
|
|
14
15
|
-h--help Print this help
|
15
16
|
-d--done Done jobs only
|
@@ -20,9 +21,11 @@ $ rbbt mnl [options]
|
|
20
21
|
-j--job* Job ids
|
21
22
|
-s--search* Regular expression
|
22
23
|
-t--tail* Show the last lines of the STDERR
|
23
|
-
-
|
24
|
-
-
|
24
|
+
-p--progress Report progress of job and the dependencies
|
25
|
+
-BP--batch_parameters show batch parameters
|
26
|
+
-BPP--batch_procpath show Procpath performance summary
|
25
27
|
-sacct--sacct_peformance show sacct performance summary
|
28
|
+
-bs--batch_system* Batch system to use: auto, lsf, slurm (default is auto-detect)
|
26
29
|
EOF
|
27
30
|
|
28
31
|
if options[:help]
|
@@ -34,14 +37,48 @@ if options[:help]
|
|
34
37
|
exit 0
|
35
38
|
end
|
36
39
|
|
37
|
-
|
38
|
-
|
40
|
+
batch_system = options.delete :batch_system
|
41
|
+
batch_system ||= 'auto'
|
42
|
+
|
43
|
+
HPC::BATCH_MODULE = case batch_system.to_s.downcase
|
44
|
+
when 'slurm'
|
45
|
+
HPC::SLURM
|
46
|
+
when 'lsf'
|
47
|
+
HPC::LSF
|
48
|
+
when 'auto'
|
49
|
+
case $previous_commands.last
|
50
|
+
when 'slurm'
|
51
|
+
HPC::SLURM
|
52
|
+
when 'lsf'
|
53
|
+
HPC::LSF
|
54
|
+
else
|
55
|
+
case Rbbt::Config.get(:batch_system, :batch, :batch_system, :hpc, :HPC, :BATCH).to_s.downcase
|
56
|
+
when 'slurm'
|
57
|
+
HPC::SLURM
|
58
|
+
when 'lsf'
|
59
|
+
HPC::LSF
|
60
|
+
else
|
61
|
+
case ENV["BATCH_SYSTEM"].to_s.downcase
|
62
|
+
when 'slurm'
|
63
|
+
HPC::SLURM
|
64
|
+
when 'lsf'
|
65
|
+
HPC::LSF
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
|
72
|
+
|
73
|
+
batch_system = HPC::BATCH_MODULE.to_s.split("::").last.downcase
|
39
74
|
|
40
|
-
|
75
|
+
done, error, running, queued, aborted, jobid, search, tail, progress = options.values_at :done, :error, :running, :queued, :aborted, :job, :search, :tail, :progress
|
76
|
+
|
77
|
+
workdir = File.expand_path('~/rbbt-batch')
|
41
78
|
Path.setup(workdir)
|
42
79
|
|
43
80
|
running_jobs = begin
|
44
|
-
squeue_txt =
|
81
|
+
squeue_txt = HPC::BATCH_MODULE.job_status
|
45
82
|
squeue_txt.split("\n").collect{|l| l.to_i.to_s}
|
46
83
|
rescue
|
47
84
|
Log.warn "Cannot determine if jobs are running, they will seem to be all alive (Job ID in green)"
|
@@ -61,35 +98,48 @@ else
|
|
61
98
|
end
|
62
99
|
|
63
100
|
count = 0
|
64
|
-
workdir.glob("**/command.
|
101
|
+
workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
65
102
|
dir = File.dirname(fcmd)
|
103
|
+
command_txt = Open.read(fcmd)
|
66
104
|
|
67
|
-
if m =
|
105
|
+
if m = command_txt.match(/#CMD: (.*)/)
|
68
106
|
cmd = m[1]
|
69
107
|
else
|
70
108
|
cmd = nil
|
71
109
|
end
|
72
110
|
|
73
|
-
if m =
|
111
|
+
if m = command_txt.match(/^BATCH_SYSTEM=(.*)/)
|
112
|
+
job_batch_system = m[1].downcase
|
113
|
+
else
|
114
|
+
job_batch_system = nil
|
115
|
+
end
|
116
|
+
|
117
|
+
different_system = job_batch_system != batch_system
|
118
|
+
|
119
|
+
if m = command_txt.match(/#MANIFEST: (.*)/)
|
74
120
|
manifest = m[1]
|
75
121
|
else
|
76
122
|
manifest = nil
|
77
123
|
end
|
78
124
|
|
125
|
+
if m = command_txt.match(/#STEP_PATH: (.*)/)
|
126
|
+
step_path = m[1]
|
127
|
+
else
|
128
|
+
step_path = nil
|
129
|
+
end
|
79
130
|
|
80
|
-
if m =
|
81
|
-
exe = m[1]
|
131
|
+
if m = command_txt.match(/#EXEC_CMD: (.*)/)
|
132
|
+
exe = m[1]
|
82
133
|
else
|
83
134
|
exe = nil
|
84
135
|
end
|
85
136
|
|
86
|
-
if m =
|
137
|
+
if m = command_txt.match(/^CONTAINER_DIR=(.*)/)
|
87
138
|
container_home = m[1]
|
88
139
|
else
|
89
140
|
container_home = nil
|
90
141
|
end
|
91
142
|
|
92
|
-
|
93
143
|
if File.exists?(fid = File.join(dir, 'job.id'))
|
94
144
|
id = Open.read(fid).chomp
|
95
145
|
else
|
@@ -103,11 +153,20 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
103
153
|
end
|
104
154
|
|
105
155
|
if File.exists?(fstatus = File.join(dir, 'job.status'))
|
106
|
-
|
156
|
+
fstatus_txt = Open.read(fstatus)
|
157
|
+
begin
|
158
|
+
if job_batch_system == "lsf"
|
159
|
+
nodes = Open.read(fstatus).split("\n").last.split(/\s+/)[5].split(",")
|
160
|
+
else
|
161
|
+
nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
|
162
|
+
end
|
163
|
+
rescue
|
164
|
+
nodes = []
|
165
|
+
end
|
107
166
|
elsif job_nodes[id]
|
108
|
-
|
167
|
+
nodes = job_nodes[id].reject{|n| n.include? "("}
|
109
168
|
else
|
110
|
-
|
169
|
+
nodes = []
|
111
170
|
end
|
112
171
|
|
113
172
|
if File.exists?(File.join(dir, 'exit.status'))
|
@@ -136,7 +195,7 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
136
195
|
select = true if done && exit_status == 0
|
137
196
|
select = true if error && exit_status && exit_status != 0
|
138
197
|
select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
|
139
|
-
is_running = exit_status.nil? && running_jobs.include?(id) && (!deps || (running_jobs & deps).empty?)
|
198
|
+
is_running = exit_status.nil? && ( (running_jobs.include?(id) && (!deps || (running_jobs & deps).empty?)) || different_system )
|
140
199
|
select = true if queued && deps && (running_jobs & deps).any? || queued && is_running && nodes.empty?
|
141
200
|
select = true if running && nodes.any? && (exit_status.nil? && running_jobs.include?(id)) && (!deps || (running_jobs & deps).empty?)
|
142
201
|
select = true if jobid && jobid.split(",").include?(id)
|
@@ -150,29 +209,39 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
150
209
|
|
151
210
|
|
152
211
|
puts Log.color :blue, dir
|
153
|
-
puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.
|
212
|
+
puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.batch')).to_s
|
154
213
|
puts Log.color(:magenta, "Started: ") << File.ctime(File.join(dir, 'std.err')).to_s if File.exist?(File.join(dir, 'std.err'))
|
155
214
|
puts Log.color(:magenta, "Manifest: ") << Log.color(:yellow, manifest)
|
215
|
+
puts Log.color(:magenta, "Step path: ") << Log.color(:yellow, step_path)
|
156
216
|
puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
|
157
217
|
puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
|
158
218
|
puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
|
159
219
|
puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home
|
160
|
-
|
220
|
+
if different_system
|
221
|
+
puts Log.color(:magenta, "Job ID (#{Log.color(:red, job_batch_system)}): ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : Log.color(:green, id) )
|
222
|
+
else
|
223
|
+
puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
|
224
|
+
end
|
161
225
|
puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
|
162
226
|
puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
|
163
227
|
puts Log.color(:magenta, "Nodes: ") << nodes * ", "
|
164
228
|
puts Log.color(:magenta, "Time elapsed: ") << Misc.format_seconds(time_elapsed) if time_elapsed
|
165
229
|
puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? || File.exists?(File.join(dir, 'exit.status')) ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
|
166
230
|
|
167
|
-
if options[:
|
168
|
-
puts Log.color(:magenta, "
|
169
|
-
|
231
|
+
if options[:batch_parameters]
|
232
|
+
puts Log.color(:magenta, "BATCH parameters: ")
|
233
|
+
case job_batch_system
|
234
|
+
when 'slurm'
|
235
|
+
text = CMD.cmd('grep "^#SBATCH" |tail -n +5', :in => Open.read(fcmd)).read.strip
|
236
|
+
when 'lsf'
|
237
|
+
text = CMD.cmd('grep "^#BSUB" |tail -n +5', :in => Open.read(fcmd)).read.strip
|
238
|
+
end
|
170
239
|
lines = text.split("\n").collect{|line| header, _sep, value = line.partition(/\s+/); Log.color(:yellow, header + ": ") + value}
|
171
240
|
puts Log.color :yellow, lines * "\n"
|
172
241
|
end
|
173
242
|
|
174
243
|
fprocpath = File.join(dir, 'procpath.sqlite3')
|
175
|
-
if options[:
|
244
|
+
if options[:batch_procpath] && Open.exists?(fprocpath)
|
176
245
|
puts Log.color(:magenta, "Procpath summary: ")
|
177
246
|
require 'rbbt/tsv/csv'
|
178
247
|
meta = TSV.csv(CMD.cmd("sqlite3 -header -csv #{fprocpath} 'select * from meta;' "))
|
@@ -214,13 +283,15 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
214
283
|
|
215
284
|
if options[:sacct_peformance]
|
216
285
|
begin
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
286
|
+
raise "sacct not supported for LSF" unless batch_system == 'slurm'
|
287
|
+
tsv = TSV.open(CMD.cmd("sacct -j #{id} -o 'jobid,AveRSS,MaxRSS,MaxDiskRead,MaxDiskWrite' -P|grep 'JobID\\|\.batch'"), :header_hash => '', :sep => "|", :type => :list)
|
288
|
+
values = tsv[tsv.keys.first]
|
289
|
+
if values.compact.any?
|
290
|
+
puts Log.color(:magenta, "SACCT performance: ")
|
291
|
+
puts values.zip(values.fields).collect{|v,t| Log.color(:yellow, t + ": ") + v.to_s } * "\n"
|
292
|
+
end
|
223
293
|
rescue
|
294
|
+
Log.warn $!.message
|
224
295
|
end
|
225
296
|
end
|
226
297
|
|
@@ -234,7 +305,24 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
234
305
|
puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
|
235
306
|
else
|
236
307
|
puts Log.color(:magenta, "Log tail: ")
|
237
|
-
puts CMD.cmd("
|
308
|
+
puts CMD.cmd(" cat #{File.join(dir, 'std.err')} | grep -v '^[^\\s:]*\\[3.m' | tail -n #{tail.to_i} ").read
|
309
|
+
end
|
310
|
+
end
|
311
|
+
|
312
|
+
if options[:progress]
|
313
|
+
step_line = Open.read(fcmd).split("\n").select{|line| line =~ /^#STEP_PATH:/}.first
|
314
|
+
if step_line
|
315
|
+
require 'rbbt/workflow'
|
316
|
+
step_path = step_line.split(": ").last.strip
|
317
|
+
step = Step.new step_path
|
318
|
+
step.load_dependencies_from_info
|
319
|
+
(step.rec_dependencies + [step]).reverse.each do |j|
|
320
|
+
next if j.done?
|
321
|
+
next unless j.file(:progress).exists?
|
322
|
+
bar = Log::ProgressBar.new
|
323
|
+
bar.load(j.file(:progress).yaml)
|
324
|
+
puts Log.color(:magenta, "Progress: ") + bar.report_msg + " " + Log.color(:yellow, j.task_signature)
|
325
|
+
end
|
238
326
|
end
|
239
327
|
end
|
240
328
|
|
@@ -0,0 +1,81 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rbbt/util/simpleopt'
|
4
|
+
require 'rbbt/workflow'
|
5
|
+
require 'rbbt/workflow/usage'
|
6
|
+
require 'rbbt/hpc'
|
7
|
+
require 'rbbt/hpc/orchestrate'
|
8
|
+
require 'time'
|
9
|
+
|
10
|
+
$slurm_options = SOPT.get <<EOF
|
11
|
+
-dr--dry_run Print only the template
|
12
|
+
-cj--clean_job Clean job
|
13
|
+
--drbbt* Use development version of rbbt
|
14
|
+
-sing--singularity Use Singularity
|
15
|
+
-ug--user_group* Use alternative user group for group project directory
|
16
|
+
-c--contain* Contain in directory (using Singularity)
|
17
|
+
-s--sync* Contain in directory and sync jobs
|
18
|
+
-e--exclusive Make exclusive use of the node
|
19
|
+
-hm--highmem Make use of highmem cores
|
20
|
+
-wc--wipe_container* Wipe the jobs from the contain directory
|
21
|
+
-CS--contain_and_sync Contain and sync to default locations
|
22
|
+
-ci--copy_image When using a container directory, copy image there
|
23
|
+
-t--tail Tail the logs
|
24
|
+
-BPP--batch_procpath* Save Procpath performance for batch job; specify only options
|
25
|
+
-q--queue* Queue
|
26
|
+
-t--task_cpus* Tasks
|
27
|
+
-W--workflows* Additional workflows
|
28
|
+
-tm--time* Time
|
29
|
+
-OR--orchestration_rules* Orchestration rules
|
30
|
+
-rmb--remove_batch_basedir Remove the SLURM working directory (command, STDIN, exit status, ...)
|
31
|
+
EOF
|
32
|
+
|
33
|
+
batch_system = $slurm_options.delete :batch_system
|
34
|
+
batch_system ||= 'auto'
|
35
|
+
|
36
|
+
HPC::BATCH_MODULE = case batch_system.to_s.downcase
|
37
|
+
when 'slurm'
|
38
|
+
HPC::SLURM
|
39
|
+
when 'lsf'
|
40
|
+
HPC::LSF
|
41
|
+
when 'auto'
|
42
|
+
case $previous_commands.last
|
43
|
+
when 'slurm'
|
44
|
+
HPC::SLURM
|
45
|
+
when 'lsf'
|
46
|
+
HPC::LSF
|
47
|
+
else
|
48
|
+
case Rbbt::Config.get(:batch_system, :batch, :batch_system, :hpc, :HPC, :BATCH).to_s.downcase
|
49
|
+
when 'slurm'
|
50
|
+
HPC::SLURM
|
51
|
+
when 'lsf'
|
52
|
+
HPC::LSF
|
53
|
+
else
|
54
|
+
case ENV["BATCH_SYSTEM"].to_s.downcase
|
55
|
+
when 'slurm'
|
56
|
+
HPC::SLURM
|
57
|
+
when 'lsf'
|
58
|
+
HPC::LSF
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
|
65
|
+
|
66
|
+
class Step
|
67
|
+
def run(*args)
|
68
|
+
if done?
|
69
|
+
self.load
|
70
|
+
else
|
71
|
+
begin
|
72
|
+
Log.debug "Issuing SLURM job for #{self.path}"
|
73
|
+
HPC::BATCH_MODULE.orchestrate_job(self, SOPT::GOT_OPTIONS.merge($slurm_options))
|
74
|
+
rescue HPC::SBATCH
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
ARGV.concat ["-W", $slurm_options[:workflows], '--detach'] if $slurm_options[:workflows]
|
81
|
+
load Rbbt.share.rbbt_commands.workflow.task.find
|
@@ -0,0 +1,81 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rbbt-util'
|
4
|
+
require 'rbbt/util/simpleopt'
|
5
|
+
require 'rbbt/hpc'
|
6
|
+
|
7
|
+
#$0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
|
8
|
+
|
9
|
+
options = SOPT.setup <<EOF
|
10
|
+
|
11
|
+
Queue a job in Marenostrum
|
12
|
+
|
13
|
+
$ rbbt slurm tail <directory> [options]
|
14
|
+
|
15
|
+
-h--help Print this help
|
16
|
+
-d--done Done jobs only
|
17
|
+
-e--error Error jobs only
|
18
|
+
-a--aborted SLURM aboted jobs
|
19
|
+
-r--running Running jobs only
|
20
|
+
-q--queued Queued jobs only
|
21
|
+
-j--job* Job ids
|
22
|
+
-s--search* Regular expression
|
23
|
+
-t--tail* Show the last lines of the STDERR
|
24
|
+
-p--progress Report progress of job and the dependencies
|
25
|
+
-SBP--sbatch_parameters show sbatch parameters
|
26
|
+
-PERF--procpath_performance show Procpath performance summary
|
27
|
+
-sacct--sacct_peformance show sacct performance summary
|
28
|
+
-bs--batch_system* Batch system to use: auto, lsf, slurm (default is auto-detect)
|
29
|
+
EOF
|
30
|
+
|
31
|
+
if options[:help]
|
32
|
+
if defined? rbbt_usage
|
33
|
+
rbbt_usage
|
34
|
+
else
|
35
|
+
puts SOPT.doc
|
36
|
+
end
|
37
|
+
exit 0
|
38
|
+
end
|
39
|
+
|
40
|
+
batch_system = options.delete :batch_system
|
41
|
+
batch_system ||= 'auto'
|
42
|
+
|
43
|
+
HPC::BATCH_MODULE = case batch_system.to_s.downcase
|
44
|
+
when 'slurm'
|
45
|
+
HPC::SLURM
|
46
|
+
when 'lsf'
|
47
|
+
HPC::LSF
|
48
|
+
when 'auto'
|
49
|
+
case $previous_commands.last
|
50
|
+
when 'slurm'
|
51
|
+
HPC::SLURM
|
52
|
+
when 'lsf'
|
53
|
+
HPC::LSF
|
54
|
+
else
|
55
|
+
case Rbbt::Config.get(:batch_system, :batch, :batch_system, :hpc, :HPC, :BATCH).to_s.downcase
|
56
|
+
when 'slurm'
|
57
|
+
HPC::SLURM
|
58
|
+
when 'lsf'
|
59
|
+
HPC::LSF
|
60
|
+
else
|
61
|
+
case ENV["BATCH_SYSTEM"].to_s.downcase
|
62
|
+
when 'slurm'
|
63
|
+
HPC::SLURM
|
64
|
+
when 'lsf'
|
65
|
+
HPC::LSF
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
|
72
|
+
|
73
|
+
directory = ARGV.shift
|
74
|
+
|
75
|
+
raise ParameterException if directory.nil?
|
76
|
+
|
77
|
+
directory = File.dirname(directory) unless File.directory?(directory)
|
78
|
+
|
79
|
+
require 'rbbt/hpc/slurm'
|
80
|
+
|
81
|
+
HPC::BATCH_MODULE.follow_job directory, true
|
@@ -0,0 +1,80 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rbbt/util/simpleopt'
|
4
|
+
require 'rbbt/workflow'
|
5
|
+
require 'rbbt/workflow/usage'
|
6
|
+
require 'rbbt/hpc'
|
7
|
+
require 'time'
|
8
|
+
|
9
|
+
$slurm_options = SOPT.get <<EOF
|
10
|
+
-dr--dry_run Print only the template
|
11
|
+
-cj--clean_job Clean job
|
12
|
+
--drbbt* Use development version of rbbt
|
13
|
+
-sing--singularity Use Singularity
|
14
|
+
-ug--user_group* Use alternative user group for group project directory
|
15
|
+
-c--contain* Contain in directory (using Singularity)
|
16
|
+
-s--sync* Contain in directory and sync jobs
|
17
|
+
-e--exclusive Make exclusive use of the node
|
18
|
+
-hm--highmem Make use of highmem cores
|
19
|
+
-wc--wipe_container* Wipe the jobs from the contain directory
|
20
|
+
-CS--contain_and_sync Contain and sync to default locations
|
21
|
+
-ci--copy_image When using a container directory, copy image there
|
22
|
+
-t--tail Tail the logs
|
23
|
+
-BPP--batch_procpath* Save Procpath performance for batch job; specify only options
|
24
|
+
-q--queue* Queue
|
25
|
+
-t--task_cpus* Tasks
|
26
|
+
-W--workflows* Additional workflows
|
27
|
+
-tm--time* Time
|
28
|
+
-rmb--remove_batch_dir Remove the batch working directory (command, STDIN, exit status, ...)
|
29
|
+
-bs--batch_system* Batch system to use: auto, lsf, slurm (default is auto-detect)
|
30
|
+
EOF
|
31
|
+
|
32
|
+
batch_system = $slurm_options.delete :batch_system
|
33
|
+
batch_system ||= 'auto'
|
34
|
+
|
35
|
+
HPC::BATCH_MODULE = case batch_system.to_s.downcase
|
36
|
+
when 'slurm'
|
37
|
+
HPC::SLURM
|
38
|
+
when 'lsf'
|
39
|
+
HPC::LSF
|
40
|
+
when 'auto'
|
41
|
+
case $previous_commands.last
|
42
|
+
when 'slurm'
|
43
|
+
HPC::SLURM
|
44
|
+
when 'lsf'
|
45
|
+
HPC::LSF
|
46
|
+
else
|
47
|
+
case Rbbt::Config.get(:batch_system, :batch, :batch_system, :hpc, :HPC, :BATCH).to_s.downcase
|
48
|
+
when 'slurm'
|
49
|
+
HPC::SLURM
|
50
|
+
when 'lsf'
|
51
|
+
HPC::LSF
|
52
|
+
else
|
53
|
+
case ENV["BATCH_SYSTEM"].to_s.downcase
|
54
|
+
when 'slurm'
|
55
|
+
HPC::SLURM
|
56
|
+
when 'lsf'
|
57
|
+
HPC::LSF
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
|
64
|
+
|
65
|
+
class Step
|
66
|
+
def run(*args)
|
67
|
+
if done?
|
68
|
+
self.load
|
69
|
+
else
|
70
|
+
begin
|
71
|
+
Log.debug "Issuing SLURM job for #{self.path}"
|
72
|
+
HPC::BATCH_MODULE.run_job(self, SOPT::GOT_OPTIONS.merge($slurm_options))
|
73
|
+
rescue HPC::SBATCH
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
ARGV.concat ["-W", $slurm_options[:workflows]] if $slurm_options[:workflows]
|
80
|
+
load Rbbt.share.rbbt_commands.workflow.task.find
|