rbbt-util 5.29.2 → 5.30.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/hpc/orchestrate.rb +12 -1
- data/lib/rbbt/hpc/slurm.rb +56 -24
- data/lib/rbbt/persist.rb +4 -0
- data/lib/rbbt/persist/tsv/adapter.rb +44 -13
- data/lib/rbbt/tsv.rb +3 -2
- data/lib/rbbt/util/cmd.rb +6 -1
- data/lib/rbbt/util/misc/options.rb +0 -42
- data/lib/rbbt/util/procpath.rb +49 -0
- data/lib/rbbt/workflow/step/accessor.rb +3 -4
- data/lib/rbbt/workflow/step/run.rb +2 -4
- data/lib/rbbt/workflow/usage.rb +1 -1
- data/share/rbbt_commands/slurm/clean +165 -0
- data/share/rbbt_commands/slurm/list +174 -95
- data/share/rbbt_commands/slurm/orchestrate +3 -2
- data/share/rbbt_commands/slurm/task +1 -0
- data/share/rbbt_commands/tsv/slice +3 -3
- data/share/rbbt_commands/workflow/info +1 -1
- data/share/rbbt_commands/workflow/task +27 -7
- data/share/rbbt_commands/workflow/write_info +52 -0
- data/test/rbbt/util/test_procpath.rb +23 -0
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 86a6fb4f62fc52b64a090ff3068452055f79ec65f5d11a1a497d1433370b76e2
|
4
|
+
data.tar.gz: 453409b866e291e8971d13ef737ac8b6666ba89251b22a756bebc37339fa4ea7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8438fa94a7f460192656be9bd300b2b9991fd5b97d68cdfdf3d33baa76ed8f80c53caaa967e0ecd317cb6d99a32ca0e0fede252b58c96d913b5b44bbdcdfb8f0
|
7
|
+
data.tar.gz: f6d50c0b5aba669526c0d044a15598a07ded038b3ae7964c84723de05c93e30b30763a5ceddbd300a9c5f79f1c84b9305ebaa786c67702de32c5a0b26ce56222
|
data/lib/rbbt/hpc/orchestrate.rb
CHANGED
@@ -60,9 +60,14 @@ module HPC
|
|
60
60
|
return if job.done?
|
61
61
|
return unless job.path.split("/")[-4] == "jobs"
|
62
62
|
seen[:orchestration_target_job] ||= job
|
63
|
+
|
63
64
|
options.delete "recursive_clean"
|
65
|
+
options.delete "clean_task"
|
66
|
+
options.delete "clean"
|
64
67
|
options.delete "tail"
|
65
68
|
options.delete "printfile"
|
69
|
+
options.delete "detach"
|
70
|
+
|
66
71
|
rules = YAML.load(Open.read(options[:orchestration_rules])) if options[:orchestration_rules]
|
67
72
|
rules ||= {}
|
68
73
|
IndiferentHash.setup(rules)
|
@@ -75,7 +80,13 @@ module HPC
|
|
75
80
|
skip_dep = job_rules["chain_tasks"] &&
|
76
81
|
job_rules["chain_tasks"][job.workflow.to_s] && job_rules["chain_tasks"][job.workflow.to_s].include?(job.task_name.to_s) &&
|
77
82
|
job_rules["chain_tasks"][dep.workflow.to_s] && job_rules["chain_tasks"][dep.workflow.to_s].include?(dep.task_name.to_s)
|
78
|
-
|
83
|
+
|
84
|
+
deps = seen[dep.path] ||= self.orchestrate_job(dep, options, skip_dep, seen)
|
85
|
+
if job.canfail_paths.include? dep.path
|
86
|
+
[deps].flatten.compact.collect{|id| ['canfail', id] * ":"}
|
87
|
+
else
|
88
|
+
deps
|
89
|
+
end
|
79
90
|
end.flatten.compact.uniq
|
80
91
|
|
81
92
|
skip = true if job_rules[:skip]
|
data/lib/rbbt/hpc/slurm.rb
CHANGED
@@ -33,7 +33,8 @@ module HPC
|
|
33
33
|
group = File.basename(File.dirname(ENV['HOME']))
|
34
34
|
|
35
35
|
if contain_and_sync
|
36
|
-
|
36
|
+
random_file = TmpFile.random_name
|
37
|
+
contain = "/scratch/tmp/rbbt-#{user}/#{random_file}" if contain.nil?
|
37
38
|
sync = "~/.rbbt/var/jobs" if sync.nil?
|
38
39
|
wipe_container = "post" if wipe_container.nil?
|
39
40
|
end
|
@@ -58,11 +59,11 @@ module HPC
|
|
58
59
|
when FalseClass
|
59
60
|
'--' << o << "=false"
|
60
61
|
else
|
61
|
-
['--' << o, "'#{v}'"] * " "
|
62
|
+
['--' << o, "'#{v.to_s.gsub("'", '\'')}'"] * " "
|
62
63
|
end
|
63
64
|
end * " "
|
64
65
|
|
65
|
-
rbbt_cmd << " --config_keys='#{config_keys}'" if config_keys and not config_keys.empty?
|
66
|
+
rbbt_cmd << " --config_keys='#{config_keys.gsub("'", '\'')}'" if config_keys and not config_keys.empty?
|
66
67
|
|
67
68
|
time = Misc.format_seconds Misc.timespan(time) unless time.include? ":"
|
68
69
|
|
@@ -76,6 +77,7 @@ module HPC
|
|
76
77
|
fjob = File.join(slurm_basedir, 'job.id')
|
77
78
|
fexit = File.join(slurm_basedir, 'exit.status')
|
78
79
|
fsync = File.join(slurm_basedir, 'sync.log')
|
80
|
+
fsyncexit = File.join(slurm_basedir, 'sync.status')
|
79
81
|
fcmd = File.join(slurm_basedir, 'command.slurm')
|
80
82
|
|
81
83
|
#{{{ GENERATE TEMPLATE
|
@@ -107,10 +109,6 @@ module HPC
|
|
107
109
|
EOF
|
108
110
|
end
|
109
111
|
|
110
|
-
header +=<<-EOF
|
111
|
-
#CMD: #{rbbt_cmd}
|
112
|
-
EOF
|
113
|
-
|
114
112
|
# ENV
|
115
113
|
env = ""
|
116
114
|
env +=<<-EOF
|
@@ -246,7 +244,7 @@ EOF
|
|
246
244
|
end
|
247
245
|
|
248
246
|
if contain
|
249
|
-
rbbt_cmd << " " << %(--workdir_all='#{contain}')
|
247
|
+
rbbt_cmd << " " << %(--workdir_all='#{contain.gsub("'", '\'')}/workdir')
|
250
248
|
end
|
251
249
|
end
|
252
250
|
|
@@ -254,16 +252,27 @@ EOF
|
|
254
252
|
cmd =<<-EOF
|
255
253
|
#{exec_cmd} \\
|
256
254
|
#{rbbt_cmd}
|
255
|
+
EOF
|
256
|
+
annotate_cmd =<<-EOF
|
257
|
+
#{exec_cmd} \\
|
258
|
+
workflow write_info --recursive --force=false --check_pid "$step_path" slurm_job $SLURM_JOB_ID
|
257
259
|
EOF
|
258
260
|
|
261
|
+
header +=<<-EOF
|
262
|
+
#CMD: #{rbbt_cmd}
|
263
|
+
EOF
|
264
|
+
|
259
265
|
run +=<<-EOF
|
260
266
|
|
261
267
|
# Run command
|
262
|
-
#{cmd}
|
268
|
+
step_path=$(#{cmd})
|
263
269
|
|
264
270
|
# Save exit status
|
265
271
|
exit_status=$?
|
266
272
|
|
273
|
+
# Annotate info with SLURM job_info
|
274
|
+
#{annotate_cmd}
|
275
|
+
|
267
276
|
EOF
|
268
277
|
|
269
278
|
# CODA
|
@@ -273,10 +282,10 @@ EOF
|
|
273
282
|
coda +=<<-EOF
|
274
283
|
singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rbbt system clean all -q &>> #{fsync}
|
275
284
|
EOF
|
276
|
-
else
|
277
|
-
coda +=<<-EOF
|
278
|
-
rbbt system clean all -q &>> #{fsync}
|
279
|
-
EOF
|
285
|
+
# else
|
286
|
+
# coda +=<<-EOF
|
287
|
+
#rbbt system clean all -q &>> #{fsync}
|
288
|
+
#EOF
|
280
289
|
end
|
281
290
|
|
282
291
|
if sync.include?("=>")
|
@@ -285,7 +294,7 @@ EOF
|
|
285
294
|
sync = sync.strip
|
286
295
|
source = File.join(File.expand_path(contain), source)
|
287
296
|
else
|
288
|
-
source = File.join(File.expand_path(contain), '
|
297
|
+
source = File.join(File.expand_path(contain), 'workdir/var/jobs')
|
289
298
|
end
|
290
299
|
|
291
300
|
target = File.expand_path(sync)
|
@@ -295,6 +304,7 @@ EOF
|
|
295
304
|
mkdir -p "$(dirname '#{target}')"
|
296
305
|
rsync -avztAXHP --copy-unsafe-links "#{source}/" "#{target}/" &>> #{fsync}
|
297
306
|
sync_es="$?"
|
307
|
+
echo $sync_es > #{fsyncexit}
|
298
308
|
find '#{target}' -type l -ls | awk '$13 ~ /^#{target.gsub('/','\/')}/ { sub("#{source}", "#{target}", $13); print $11, $13 }' | while read A B; do rm $A; ln -s $B $A; done
|
299
309
|
EOF
|
300
310
|
|
@@ -320,23 +330,24 @@ singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rm -v /dev/shm/sem
|
|
320
330
|
EOF
|
321
331
|
else
|
322
332
|
coda +=<<-EOF
|
323
|
-
|
333
|
+
##{exec_cmd} system clean
|
324
334
|
if [ $exit_status == '0' -a $sync_es == '0' ]; then
|
325
335
|
rm -Rfv #{contain} &>> #{fsync}
|
326
336
|
else
|
327
337
|
echo "ERROR: Process failed or results could not sync correctly. Contain directory not purged" &>> #{fsync}
|
328
338
|
fi
|
329
|
-
unset sync_es
|
330
339
|
EOF
|
331
340
|
|
332
341
|
end
|
333
342
|
end
|
334
343
|
end
|
344
|
+
|
335
345
|
coda +=<<-EOF
|
336
346
|
|
337
347
|
# Write exit status to file
|
338
348
|
echo $exit_status > #{fexit}
|
339
349
|
EOF
|
350
|
+
|
340
351
|
if sync
|
341
352
|
coda +=<<-EOF
|
342
353
|
if [ "$sync_es" == '0' ]; then
|
@@ -362,6 +373,10 @@ EOF
|
|
362
373
|
slurm_basedir = options[:slurm_basedir]
|
363
374
|
dependencies = options.delete :slurm_dependencies
|
364
375
|
dependencies = [] if dependencies.nil?
|
376
|
+
|
377
|
+
canfail_dependencies = dependencies.select{|dep| dep =~ /^canfail:(\d+)/ }.collect{|dep| dep.partition(":").last}
|
378
|
+
dependencies = dependencies.reject{|dep| dep =~ /^canfail:(\d+)/ }
|
379
|
+
|
365
380
|
Open.mkdir slurm_basedir
|
366
381
|
|
367
382
|
dry_run = options.delete :dry_run
|
@@ -370,6 +385,7 @@ EOF
|
|
370
385
|
ferr = File.join(slurm_basedir, 'std.err')
|
371
386
|
fjob = File.join(slurm_basedir, 'job.id')
|
372
387
|
fdep = File.join(slurm_basedir, 'dependencies.list')
|
388
|
+
fcfdep = File.join(slurm_basedir, 'canfail_dependencies.list')
|
373
389
|
fexit = File.join(slurm_basedir, 'exit.status')
|
374
390
|
fsync = File.join(slurm_basedir, 'sync.log')
|
375
391
|
fcmd = File.join(slurm_basedir, 'command.slurm')
|
@@ -401,8 +417,21 @@ EOF
|
|
401
417
|
Open.rm fexit
|
402
418
|
Open.rm fout
|
403
419
|
Open.rm ferr
|
420
|
+
|
404
421
|
Open.write(fdep, dependencies * "\n") if dependencies.any?
|
405
|
-
|
422
|
+
Open.write(fcfdep, canfail_dependencies * "\n") if canfail_dependencies.any?
|
423
|
+
|
424
|
+
|
425
|
+
dep_str = '--dependency='
|
426
|
+
normal_dep_str = dependencies.any? ? "afterok:" + dependencies * ":" : nil
|
427
|
+
canfail_dep_str = canfail_dependencies.any? ? "afterany:" + canfail_dependencies * ":" : nil
|
428
|
+
|
429
|
+
if normal_dep_str.nil? && canfail_dep_str.nil?
|
430
|
+
dep_str = ""
|
431
|
+
else
|
432
|
+
dep_str += [normal_dep_str, canfail_dep_str].compact * ","
|
433
|
+
end
|
434
|
+
|
406
435
|
job = CMD.cmd("sbatch #{dep_str} '#{fcmd}'").read.scan(/\d+/).first.to_i
|
407
436
|
Log.debug "SBATCH job id: #{job}"
|
408
437
|
Open.write(fjob, job.to_s)
|
@@ -495,7 +524,11 @@ EOF
|
|
495
524
|
dry_run = options.delete :dry_run
|
496
525
|
tail = options.delete :tail
|
497
526
|
dependencies = options.delete :slurm_dependencies
|
527
|
+
procpath = options.delete :SLURM_procpath
|
528
|
+
|
498
529
|
options[:jobname] = job.clean_name
|
530
|
+
log_level = options.delete :log
|
531
|
+
log_level ||= Log.severity
|
499
532
|
|
500
533
|
workflow = job.workflow
|
501
534
|
|
@@ -520,14 +553,13 @@ EOF
|
|
520
553
|
inputs_dir = File.join(tmp_directory, 'inputs_dir')
|
521
554
|
saved = Step.save_job_inputs(job, inputs_dir)
|
522
555
|
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
end
|
556
|
+
cmd = ['workflow', 'task', workflow.to_s, task.to_s, '--printpath', '--log', log_level.to_s]
|
557
|
+
|
558
|
+
cmd << "--procpath_performance='#{tmp_directory}/procpath##{procpath.gsub(',', '#')}'" if procpath
|
559
|
+
|
560
|
+
cmd << "--override_deps='#{override_deps.gsub("'", '\'')}'" if override_deps and not override_deps.empty?
|
529
561
|
|
530
|
-
cmd << "--
|
562
|
+
cmd << "--load_inputs='#{inputs_dir}'" if saved && saved.any?
|
531
563
|
|
532
564
|
template = self.template(cmd, options)
|
533
565
|
jobid = self.issue_template(template, options.merge(:slurm_basedir => slurm_basedir, :dry_run => dry_run, :slurm_dependencies => dependencies))
|
data/lib/rbbt/persist.rb
CHANGED
@@ -110,6 +110,8 @@ module Persist
|
|
110
110
|
def self.load_file(path, type)
|
111
111
|
begin
|
112
112
|
case (type || :marshal).to_sym
|
113
|
+
when :path
|
114
|
+
path
|
113
115
|
when :nil
|
114
116
|
nil
|
115
117
|
when :boolean
|
@@ -167,6 +169,8 @@ module Persist
|
|
167
169
|
end
|
168
170
|
|
169
171
|
case (type || :marshal).to_sym
|
172
|
+
when :path
|
173
|
+
nil
|
170
174
|
when :nil
|
171
175
|
nil
|
172
176
|
when :boolean
|
@@ -104,9 +104,6 @@ module Persist
|
|
104
104
|
write(true) if closed? || ! write?
|
105
105
|
res = begin
|
106
106
|
yield
|
107
|
-
rescue Exception
|
108
|
-
Log.exception $!
|
109
|
-
raise $!
|
110
107
|
ensure
|
111
108
|
close
|
112
109
|
end
|
@@ -115,7 +112,6 @@ module Persist
|
|
115
112
|
end
|
116
113
|
|
117
114
|
def read_and_close
|
118
|
-
#return yield if @locked
|
119
115
|
if read? || write?
|
120
116
|
begin
|
121
117
|
return yield
|
@@ -134,6 +130,41 @@ module Persist
|
|
134
130
|
end
|
135
131
|
end
|
136
132
|
|
133
|
+
def read_lock
|
134
|
+
read if closed?
|
135
|
+
if read?
|
136
|
+
return yield
|
137
|
+
end
|
138
|
+
|
139
|
+
lock do
|
140
|
+
close
|
141
|
+
read true
|
142
|
+
begin
|
143
|
+
yield
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
def write_lock
|
149
|
+
write if closed?
|
150
|
+
if write?
|
151
|
+
begin
|
152
|
+
return yield
|
153
|
+
ensure
|
154
|
+
close
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
lock do
|
159
|
+
close
|
160
|
+
write true
|
161
|
+
begin
|
162
|
+
yield
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
|
137
168
|
def merge!(hash)
|
138
169
|
hash.each do |key,values|
|
139
170
|
self[key] = values
|
@@ -141,38 +172,38 @@ module Persist
|
|
141
172
|
end
|
142
173
|
|
143
174
|
def range(*args)
|
144
|
-
self.
|
175
|
+
self.read_lock do
|
145
176
|
super(*args)
|
146
177
|
end
|
147
178
|
end
|
148
179
|
|
149
180
|
def include?(*args)
|
150
|
-
self.
|
181
|
+
self.read_lock do
|
151
182
|
super(*args) #- TSV::ENTRY_KEYS.to_a
|
152
183
|
end
|
153
184
|
end
|
154
185
|
|
155
186
|
def [](*args)
|
156
|
-
self.
|
187
|
+
self.read_lock do
|
157
188
|
super(*args) #- TSV::ENTRY_KEYS.to_a
|
158
189
|
end
|
159
190
|
end
|
160
191
|
|
161
192
|
def []=(*args)
|
162
|
-
self.
|
193
|
+
self.write_lock do
|
163
194
|
super(*args) #- TSV::ENTRY_KEYS.to_a
|
164
195
|
end
|
165
196
|
end
|
166
197
|
|
167
198
|
def keys(*args)
|
168
|
-
self.
|
199
|
+
self.read_lock do
|
169
200
|
super(*args)
|
170
201
|
end
|
171
202
|
end
|
172
203
|
|
173
204
|
|
174
205
|
def prefix(key)
|
175
|
-
self.
|
206
|
+
self.read_lock do
|
176
207
|
range(key, 1, key + MAX_CHAR, 1)
|
177
208
|
end
|
178
209
|
end
|
@@ -184,13 +215,13 @@ module Persist
|
|
184
215
|
|
185
216
|
|
186
217
|
def size(*args)
|
187
|
-
self.
|
218
|
+
self.read_lock do
|
188
219
|
super(*args)
|
189
220
|
end
|
190
221
|
end
|
191
222
|
|
192
223
|
def each(*args, &block)
|
193
|
-
self.
|
224
|
+
self.read_lock do
|
194
225
|
super(*args, &block)
|
195
226
|
end
|
196
227
|
end
|
@@ -208,7 +239,7 @@ module Persist
|
|
208
239
|
end
|
209
240
|
|
210
241
|
def values_at(*keys)
|
211
|
-
self.
|
242
|
+
self.read_lock do
|
212
243
|
keys.collect do |k|
|
213
244
|
self[k]
|
214
245
|
end
|
data/lib/rbbt/tsv.rb
CHANGED
@@ -113,11 +113,12 @@ module TSV
|
|
113
113
|
|
114
114
|
data.entity_options = entity_options
|
115
115
|
|
116
|
-
if Path === source
|
117
|
-
|
116
|
+
if Path === source && data.identifiers
|
117
|
+
Path.setup(data.identifiers, source.pkgdir, source.resource)
|
118
118
|
end
|
119
119
|
|
120
120
|
if data.respond_to? :persistence_path
|
121
|
+
data.read
|
121
122
|
data
|
122
123
|
else
|
123
124
|
h = data.dup
|
data/lib/rbbt/util/cmd.rb
CHANGED
@@ -217,7 +217,7 @@ module CMD
|
|
217
217
|
end
|
218
218
|
end
|
219
219
|
|
220
|
-
def self.
|
220
|
+
def self.cmd_pid(*args)
|
221
221
|
all_args = *args
|
222
222
|
|
223
223
|
all_args << {} unless Hash === all_args.last
|
@@ -248,4 +248,9 @@ module CMD
|
|
248
248
|
nil
|
249
249
|
end
|
250
250
|
|
251
|
+
def self.cmd_log(*args)
|
252
|
+
cmd_pid(*args)
|
253
|
+
nil
|
254
|
+
end
|
255
|
+
|
251
256
|
end
|
@@ -242,48 +242,6 @@ module Misc
|
|
242
242
|
|
243
243
|
return options
|
244
244
|
|
245
|
-
options = {}
|
246
|
-
string.split(/#/).each do |str|
|
247
|
-
if str.match(/(.*)=(.*)/)
|
248
|
-
option, value = $1, $2
|
249
|
-
else
|
250
|
-
option, value = str, true
|
251
|
-
end
|
252
|
-
|
253
|
-
option = option.sub(":",'').to_sym if option.chars.first == ':'
|
254
|
-
value = value.sub(":",'').to_sym if String === value and value.chars.first == ':'
|
255
|
-
|
256
|
-
if value == true
|
257
|
-
options[option] = option.to_s.chars.first != '!'
|
258
|
-
else
|
259
|
-
options[option] = Thread.start do
|
260
|
-
$SAFE = 0;
|
261
|
-
case
|
262
|
-
when value =~ /^(?:true|T)$/i
|
263
|
-
true
|
264
|
-
when value =~ /^(?:false|F)$/i
|
265
|
-
false
|
266
|
-
when Symbol === value
|
267
|
-
value
|
268
|
-
when (String === value and value =~ /^\/(.*)\/$/)
|
269
|
-
Regexp.new /#{$1}/
|
270
|
-
else
|
271
|
-
begin
|
272
|
-
Kernel.const_get value
|
273
|
-
rescue
|
274
|
-
begin
|
275
|
-
raise if value =~ /[a-z]/ and defined? value
|
276
|
-
eval(value)
|
277
|
-
rescue Exception
|
278
|
-
value
|
279
|
-
end
|
280
|
-
end
|
281
|
-
end
|
282
|
-
end.value
|
283
|
-
end
|
284
|
-
end
|
285
|
-
|
286
|
-
options
|
287
245
|
end
|
288
246
|
|
289
247
|
end
|