rbbt-util 5.29.2 → 5.30.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/hpc/orchestrate.rb +12 -1
- data/lib/rbbt/hpc/slurm.rb +56 -24
- data/lib/rbbt/persist.rb +4 -0
- data/lib/rbbt/persist/tsv/adapter.rb +44 -13
- data/lib/rbbt/tsv.rb +3 -2
- data/lib/rbbt/util/cmd.rb +6 -1
- data/lib/rbbt/util/misc/options.rb +0 -42
- data/lib/rbbt/util/procpath.rb +49 -0
- data/lib/rbbt/workflow/step/accessor.rb +3 -4
- data/lib/rbbt/workflow/step/run.rb +2 -4
- data/lib/rbbt/workflow/usage.rb +1 -1
- data/share/rbbt_commands/slurm/clean +165 -0
- data/share/rbbt_commands/slurm/list +174 -95
- data/share/rbbt_commands/slurm/orchestrate +3 -2
- data/share/rbbt_commands/slurm/task +1 -0
- data/share/rbbt_commands/tsv/slice +3 -3
- data/share/rbbt_commands/workflow/info +1 -1
- data/share/rbbt_commands/workflow/task +27 -7
- data/share/rbbt_commands/workflow/write_info +52 -0
- data/test/rbbt/util/test_procpath.rb +23 -0
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 86a6fb4f62fc52b64a090ff3068452055f79ec65f5d11a1a497d1433370b76e2
|
4
|
+
data.tar.gz: 453409b866e291e8971d13ef737ac8b6666ba89251b22a756bebc37339fa4ea7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8438fa94a7f460192656be9bd300b2b9991fd5b97d68cdfdf3d33baa76ed8f80c53caaa967e0ecd317cb6d99a32ca0e0fede252b58c96d913b5b44bbdcdfb8f0
|
7
|
+
data.tar.gz: f6d50c0b5aba669526c0d044a15598a07ded038b3ae7964c84723de05c93e30b30763a5ceddbd300a9c5f79f1c84b9305ebaa786c67702de32c5a0b26ce56222
|
data/lib/rbbt/hpc/orchestrate.rb
CHANGED
@@ -60,9 +60,14 @@ module HPC
|
|
60
60
|
return if job.done?
|
61
61
|
return unless job.path.split("/")[-4] == "jobs"
|
62
62
|
seen[:orchestration_target_job] ||= job
|
63
|
+
|
63
64
|
options.delete "recursive_clean"
|
65
|
+
options.delete "clean_task"
|
66
|
+
options.delete "clean"
|
64
67
|
options.delete "tail"
|
65
68
|
options.delete "printfile"
|
69
|
+
options.delete "detach"
|
70
|
+
|
66
71
|
rules = YAML.load(Open.read(options[:orchestration_rules])) if options[:orchestration_rules]
|
67
72
|
rules ||= {}
|
68
73
|
IndiferentHash.setup(rules)
|
@@ -75,7 +80,13 @@ module HPC
|
|
75
80
|
skip_dep = job_rules["chain_tasks"] &&
|
76
81
|
job_rules["chain_tasks"][job.workflow.to_s] && job_rules["chain_tasks"][job.workflow.to_s].include?(job.task_name.to_s) &&
|
77
82
|
job_rules["chain_tasks"][dep.workflow.to_s] && job_rules["chain_tasks"][dep.workflow.to_s].include?(dep.task_name.to_s)
|
78
|
-
|
83
|
+
|
84
|
+
deps = seen[dep.path] ||= self.orchestrate_job(dep, options, skip_dep, seen)
|
85
|
+
if job.canfail_paths.include? dep.path
|
86
|
+
[deps].flatten.compact.collect{|id| ['canfail', id] * ":"}
|
87
|
+
else
|
88
|
+
deps
|
89
|
+
end
|
79
90
|
end.flatten.compact.uniq
|
80
91
|
|
81
92
|
skip = true if job_rules[:skip]
|
data/lib/rbbt/hpc/slurm.rb
CHANGED
@@ -33,7 +33,8 @@ module HPC
|
|
33
33
|
group = File.basename(File.dirname(ENV['HOME']))
|
34
34
|
|
35
35
|
if contain_and_sync
|
36
|
-
|
36
|
+
random_file = TmpFile.random_name
|
37
|
+
contain = "/scratch/tmp/rbbt-#{user}/#{random_file}" if contain.nil?
|
37
38
|
sync = "~/.rbbt/var/jobs" if sync.nil?
|
38
39
|
wipe_container = "post" if wipe_container.nil?
|
39
40
|
end
|
@@ -58,11 +59,11 @@ module HPC
|
|
58
59
|
when FalseClass
|
59
60
|
'--' << o << "=false"
|
60
61
|
else
|
61
|
-
['--' << o, "'#{v}'"] * " "
|
62
|
+
['--' << o, "'#{v.to_s.gsub("'", '\'')}'"] * " "
|
62
63
|
end
|
63
64
|
end * " "
|
64
65
|
|
65
|
-
rbbt_cmd << " --config_keys='#{config_keys}'" if config_keys and not config_keys.empty?
|
66
|
+
rbbt_cmd << " --config_keys='#{config_keys.gsub("'", '\'')}'" if config_keys and not config_keys.empty?
|
66
67
|
|
67
68
|
time = Misc.format_seconds Misc.timespan(time) unless time.include? ":"
|
68
69
|
|
@@ -76,6 +77,7 @@ module HPC
|
|
76
77
|
fjob = File.join(slurm_basedir, 'job.id')
|
77
78
|
fexit = File.join(slurm_basedir, 'exit.status')
|
78
79
|
fsync = File.join(slurm_basedir, 'sync.log')
|
80
|
+
fsyncexit = File.join(slurm_basedir, 'sync.status')
|
79
81
|
fcmd = File.join(slurm_basedir, 'command.slurm')
|
80
82
|
|
81
83
|
#{{{ GENERATE TEMPLATE
|
@@ -107,10 +109,6 @@ module HPC
|
|
107
109
|
EOF
|
108
110
|
end
|
109
111
|
|
110
|
-
header +=<<-EOF
|
111
|
-
#CMD: #{rbbt_cmd}
|
112
|
-
EOF
|
113
|
-
|
114
112
|
# ENV
|
115
113
|
env = ""
|
116
114
|
env +=<<-EOF
|
@@ -246,7 +244,7 @@ EOF
|
|
246
244
|
end
|
247
245
|
|
248
246
|
if contain
|
249
|
-
rbbt_cmd << " " << %(--workdir_all='#{contain}')
|
247
|
+
rbbt_cmd << " " << %(--workdir_all='#{contain.gsub("'", '\'')}/workdir')
|
250
248
|
end
|
251
249
|
end
|
252
250
|
|
@@ -254,16 +252,27 @@ EOF
|
|
254
252
|
cmd =<<-EOF
|
255
253
|
#{exec_cmd} \\
|
256
254
|
#{rbbt_cmd}
|
255
|
+
EOF
|
256
|
+
annotate_cmd =<<-EOF
|
257
|
+
#{exec_cmd} \\
|
258
|
+
workflow write_info --recursive --force=false --check_pid "$step_path" slurm_job $SLURM_JOB_ID
|
257
259
|
EOF
|
258
260
|
|
261
|
+
header +=<<-EOF
|
262
|
+
#CMD: #{rbbt_cmd}
|
263
|
+
EOF
|
264
|
+
|
259
265
|
run +=<<-EOF
|
260
266
|
|
261
267
|
# Run command
|
262
|
-
#{cmd}
|
268
|
+
step_path=$(#{cmd})
|
263
269
|
|
264
270
|
# Save exit status
|
265
271
|
exit_status=$?
|
266
272
|
|
273
|
+
# Annotate info with SLURM job_info
|
274
|
+
#{annotate_cmd}
|
275
|
+
|
267
276
|
EOF
|
268
277
|
|
269
278
|
# CODA
|
@@ -273,10 +282,10 @@ EOF
|
|
273
282
|
coda +=<<-EOF
|
274
283
|
singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rbbt system clean all -q &>> #{fsync}
|
275
284
|
EOF
|
276
|
-
else
|
277
|
-
coda +=<<-EOF
|
278
|
-
rbbt system clean all -q &>> #{fsync}
|
279
|
-
EOF
|
285
|
+
# else
|
286
|
+
# coda +=<<-EOF
|
287
|
+
#rbbt system clean all -q &>> #{fsync}
|
288
|
+
#EOF
|
280
289
|
end
|
281
290
|
|
282
291
|
if sync.include?("=>")
|
@@ -285,7 +294,7 @@ EOF
|
|
285
294
|
sync = sync.strip
|
286
295
|
source = File.join(File.expand_path(contain), source)
|
287
296
|
else
|
288
|
-
source = File.join(File.expand_path(contain), '
|
297
|
+
source = File.join(File.expand_path(contain), 'workdir/var/jobs')
|
289
298
|
end
|
290
299
|
|
291
300
|
target = File.expand_path(sync)
|
@@ -295,6 +304,7 @@ EOF
|
|
295
304
|
mkdir -p "$(dirname '#{target}')"
|
296
305
|
rsync -avztAXHP --copy-unsafe-links "#{source}/" "#{target}/" &>> #{fsync}
|
297
306
|
sync_es="$?"
|
307
|
+
echo $sync_es > #{fsyncexit}
|
298
308
|
find '#{target}' -type l -ls | awk '$13 ~ /^#{target.gsub('/','\/')}/ { sub("#{source}", "#{target}", $13); print $11, $13 }' | while read A B; do rm $A; ln -s $B $A; done
|
299
309
|
EOF
|
300
310
|
|
@@ -320,23 +330,24 @@ singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rm -v /dev/shm/sem
|
|
320
330
|
EOF
|
321
331
|
else
|
322
332
|
coda +=<<-EOF
|
323
|
-
|
333
|
+
##{exec_cmd} system clean
|
324
334
|
if [ $exit_status == '0' -a $sync_es == '0' ]; then
|
325
335
|
rm -Rfv #{contain} &>> #{fsync}
|
326
336
|
else
|
327
337
|
echo "ERROR: Process failed or results could not sync correctly. Contain directory not purged" &>> #{fsync}
|
328
338
|
fi
|
329
|
-
unset sync_es
|
330
339
|
EOF
|
331
340
|
|
332
341
|
end
|
333
342
|
end
|
334
343
|
end
|
344
|
+
|
335
345
|
coda +=<<-EOF
|
336
346
|
|
337
347
|
# Write exit status to file
|
338
348
|
echo $exit_status > #{fexit}
|
339
349
|
EOF
|
350
|
+
|
340
351
|
if sync
|
341
352
|
coda +=<<-EOF
|
342
353
|
if [ "$sync_es" == '0' ]; then
|
@@ -362,6 +373,10 @@ EOF
|
|
362
373
|
slurm_basedir = options[:slurm_basedir]
|
363
374
|
dependencies = options.delete :slurm_dependencies
|
364
375
|
dependencies = [] if dependencies.nil?
|
376
|
+
|
377
|
+
canfail_dependencies = dependencies.select{|dep| dep =~ /^canfail:(\d+)/ }.collect{|dep| dep.partition(":").last}
|
378
|
+
dependencies = dependencies.reject{|dep| dep =~ /^canfail:(\d+)/ }
|
379
|
+
|
365
380
|
Open.mkdir slurm_basedir
|
366
381
|
|
367
382
|
dry_run = options.delete :dry_run
|
@@ -370,6 +385,7 @@ EOF
|
|
370
385
|
ferr = File.join(slurm_basedir, 'std.err')
|
371
386
|
fjob = File.join(slurm_basedir, 'job.id')
|
372
387
|
fdep = File.join(slurm_basedir, 'dependencies.list')
|
388
|
+
fcfdep = File.join(slurm_basedir, 'canfail_dependencies.list')
|
373
389
|
fexit = File.join(slurm_basedir, 'exit.status')
|
374
390
|
fsync = File.join(slurm_basedir, 'sync.log')
|
375
391
|
fcmd = File.join(slurm_basedir, 'command.slurm')
|
@@ -401,8 +417,21 @@ EOF
|
|
401
417
|
Open.rm fexit
|
402
418
|
Open.rm fout
|
403
419
|
Open.rm ferr
|
420
|
+
|
404
421
|
Open.write(fdep, dependencies * "\n") if dependencies.any?
|
405
|
-
|
422
|
+
Open.write(fcfdep, canfail_dependencies * "\n") if canfail_dependencies.any?
|
423
|
+
|
424
|
+
|
425
|
+
dep_str = '--dependency='
|
426
|
+
normal_dep_str = dependencies.any? ? "afterok:" + dependencies * ":" : nil
|
427
|
+
canfail_dep_str = canfail_dependencies.any? ? "afterany:" + canfail_dependencies * ":" : nil
|
428
|
+
|
429
|
+
if normal_dep_str.nil? && canfail_dep_str.nil?
|
430
|
+
dep_str = ""
|
431
|
+
else
|
432
|
+
dep_str += [normal_dep_str, canfail_dep_str].compact * ","
|
433
|
+
end
|
434
|
+
|
406
435
|
job = CMD.cmd("sbatch #{dep_str} '#{fcmd}'").read.scan(/\d+/).first.to_i
|
407
436
|
Log.debug "SBATCH job id: #{job}"
|
408
437
|
Open.write(fjob, job.to_s)
|
@@ -495,7 +524,11 @@ EOF
|
|
495
524
|
dry_run = options.delete :dry_run
|
496
525
|
tail = options.delete :tail
|
497
526
|
dependencies = options.delete :slurm_dependencies
|
527
|
+
procpath = options.delete :SLURM_procpath
|
528
|
+
|
498
529
|
options[:jobname] = job.clean_name
|
530
|
+
log_level = options.delete :log
|
531
|
+
log_level ||= Log.severity
|
499
532
|
|
500
533
|
workflow = job.workflow
|
501
534
|
|
@@ -520,14 +553,13 @@ EOF
|
|
520
553
|
inputs_dir = File.join(tmp_directory, 'inputs_dir')
|
521
554
|
saved = Step.save_job_inputs(job, inputs_dir)
|
522
555
|
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
end
|
556
|
+
cmd = ['workflow', 'task', workflow.to_s, task.to_s, '--printpath', '--log', log_level.to_s]
|
557
|
+
|
558
|
+
cmd << "--procpath_performance='#{tmp_directory}/procpath##{procpath.gsub(',', '#')}'" if procpath
|
559
|
+
|
560
|
+
cmd << "--override_deps='#{override_deps.gsub("'", '\'')}'" if override_deps and not override_deps.empty?
|
529
561
|
|
530
|
-
cmd << "--
|
562
|
+
cmd << "--load_inputs='#{inputs_dir}'" if saved && saved.any?
|
531
563
|
|
532
564
|
template = self.template(cmd, options)
|
533
565
|
jobid = self.issue_template(template, options.merge(:slurm_basedir => slurm_basedir, :dry_run => dry_run, :slurm_dependencies => dependencies))
|
data/lib/rbbt/persist.rb
CHANGED
@@ -110,6 +110,8 @@ module Persist
|
|
110
110
|
def self.load_file(path, type)
|
111
111
|
begin
|
112
112
|
case (type || :marshal).to_sym
|
113
|
+
when :path
|
114
|
+
path
|
113
115
|
when :nil
|
114
116
|
nil
|
115
117
|
when :boolean
|
@@ -167,6 +169,8 @@ module Persist
|
|
167
169
|
end
|
168
170
|
|
169
171
|
case (type || :marshal).to_sym
|
172
|
+
when :path
|
173
|
+
nil
|
170
174
|
when :nil
|
171
175
|
nil
|
172
176
|
when :boolean
|
@@ -104,9 +104,6 @@ module Persist
|
|
104
104
|
write(true) if closed? || ! write?
|
105
105
|
res = begin
|
106
106
|
yield
|
107
|
-
rescue Exception
|
108
|
-
Log.exception $!
|
109
|
-
raise $!
|
110
107
|
ensure
|
111
108
|
close
|
112
109
|
end
|
@@ -115,7 +112,6 @@ module Persist
|
|
115
112
|
end
|
116
113
|
|
117
114
|
def read_and_close
|
118
|
-
#return yield if @locked
|
119
115
|
if read? || write?
|
120
116
|
begin
|
121
117
|
return yield
|
@@ -134,6 +130,41 @@ module Persist
|
|
134
130
|
end
|
135
131
|
end
|
136
132
|
|
133
|
+
def read_lock
|
134
|
+
read if closed?
|
135
|
+
if read?
|
136
|
+
return yield
|
137
|
+
end
|
138
|
+
|
139
|
+
lock do
|
140
|
+
close
|
141
|
+
read true
|
142
|
+
begin
|
143
|
+
yield
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
def write_lock
|
149
|
+
write if closed?
|
150
|
+
if write?
|
151
|
+
begin
|
152
|
+
return yield
|
153
|
+
ensure
|
154
|
+
close
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
lock do
|
159
|
+
close
|
160
|
+
write true
|
161
|
+
begin
|
162
|
+
yield
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
|
137
168
|
def merge!(hash)
|
138
169
|
hash.each do |key,values|
|
139
170
|
self[key] = values
|
@@ -141,38 +172,38 @@ module Persist
|
|
141
172
|
end
|
142
173
|
|
143
174
|
def range(*args)
|
144
|
-
self.
|
175
|
+
self.read_lock do
|
145
176
|
super(*args)
|
146
177
|
end
|
147
178
|
end
|
148
179
|
|
149
180
|
def include?(*args)
|
150
|
-
self.
|
181
|
+
self.read_lock do
|
151
182
|
super(*args) #- TSV::ENTRY_KEYS.to_a
|
152
183
|
end
|
153
184
|
end
|
154
185
|
|
155
186
|
def [](*args)
|
156
|
-
self.
|
187
|
+
self.read_lock do
|
157
188
|
super(*args) #- TSV::ENTRY_KEYS.to_a
|
158
189
|
end
|
159
190
|
end
|
160
191
|
|
161
192
|
def []=(*args)
|
162
|
-
self.
|
193
|
+
self.write_lock do
|
163
194
|
super(*args) #- TSV::ENTRY_KEYS.to_a
|
164
195
|
end
|
165
196
|
end
|
166
197
|
|
167
198
|
def keys(*args)
|
168
|
-
self.
|
199
|
+
self.read_lock do
|
169
200
|
super(*args)
|
170
201
|
end
|
171
202
|
end
|
172
203
|
|
173
204
|
|
174
205
|
def prefix(key)
|
175
|
-
self.
|
206
|
+
self.read_lock do
|
176
207
|
range(key, 1, key + MAX_CHAR, 1)
|
177
208
|
end
|
178
209
|
end
|
@@ -184,13 +215,13 @@ module Persist
|
|
184
215
|
|
185
216
|
|
186
217
|
def size(*args)
|
187
|
-
self.
|
218
|
+
self.read_lock do
|
188
219
|
super(*args)
|
189
220
|
end
|
190
221
|
end
|
191
222
|
|
192
223
|
def each(*args, &block)
|
193
|
-
self.
|
224
|
+
self.read_lock do
|
194
225
|
super(*args, &block)
|
195
226
|
end
|
196
227
|
end
|
@@ -208,7 +239,7 @@ module Persist
|
|
208
239
|
end
|
209
240
|
|
210
241
|
def values_at(*keys)
|
211
|
-
self.
|
242
|
+
self.read_lock do
|
212
243
|
keys.collect do |k|
|
213
244
|
self[k]
|
214
245
|
end
|
data/lib/rbbt/tsv.rb
CHANGED
@@ -113,11 +113,12 @@ module TSV
|
|
113
113
|
|
114
114
|
data.entity_options = entity_options
|
115
115
|
|
116
|
-
if Path === source
|
117
|
-
|
116
|
+
if Path === source && data.identifiers
|
117
|
+
Path.setup(data.identifiers, source.pkgdir, source.resource)
|
118
118
|
end
|
119
119
|
|
120
120
|
if data.respond_to? :persistence_path
|
121
|
+
data.read
|
121
122
|
data
|
122
123
|
else
|
123
124
|
h = data.dup
|
data/lib/rbbt/util/cmd.rb
CHANGED
@@ -217,7 +217,7 @@ module CMD
|
|
217
217
|
end
|
218
218
|
end
|
219
219
|
|
220
|
-
def self.
|
220
|
+
def self.cmd_pid(*args)
|
221
221
|
all_args = *args
|
222
222
|
|
223
223
|
all_args << {} unless Hash === all_args.last
|
@@ -248,4 +248,9 @@ module CMD
|
|
248
248
|
nil
|
249
249
|
end
|
250
250
|
|
251
|
+
def self.cmd_log(*args)
|
252
|
+
cmd_pid(*args)
|
253
|
+
nil
|
254
|
+
end
|
255
|
+
|
251
256
|
end
|
@@ -242,48 +242,6 @@ module Misc
|
|
242
242
|
|
243
243
|
return options
|
244
244
|
|
245
|
-
options = {}
|
246
|
-
string.split(/#/).each do |str|
|
247
|
-
if str.match(/(.*)=(.*)/)
|
248
|
-
option, value = $1, $2
|
249
|
-
else
|
250
|
-
option, value = str, true
|
251
|
-
end
|
252
|
-
|
253
|
-
option = option.sub(":",'').to_sym if option.chars.first == ':'
|
254
|
-
value = value.sub(":",'').to_sym if String === value and value.chars.first == ':'
|
255
|
-
|
256
|
-
if value == true
|
257
|
-
options[option] = option.to_s.chars.first != '!'
|
258
|
-
else
|
259
|
-
options[option] = Thread.start do
|
260
|
-
$SAFE = 0;
|
261
|
-
case
|
262
|
-
when value =~ /^(?:true|T)$/i
|
263
|
-
true
|
264
|
-
when value =~ /^(?:false|F)$/i
|
265
|
-
false
|
266
|
-
when Symbol === value
|
267
|
-
value
|
268
|
-
when (String === value and value =~ /^\/(.*)\/$/)
|
269
|
-
Regexp.new /#{$1}/
|
270
|
-
else
|
271
|
-
begin
|
272
|
-
Kernel.const_get value
|
273
|
-
rescue
|
274
|
-
begin
|
275
|
-
raise if value =~ /[a-z]/ and defined? value
|
276
|
-
eval(value)
|
277
|
-
rescue Exception
|
278
|
-
value
|
279
|
-
end
|
280
|
-
end
|
281
|
-
end
|
282
|
-
end.value
|
283
|
-
end
|
284
|
-
end
|
285
|
-
|
286
|
-
options
|
287
245
|
end
|
288
246
|
|
289
247
|
end
|