rbbt-util 5.30.0 → 5.30.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/hpc/orchestrate.rb +4 -1
- data/lib/rbbt/hpc/slurm.rb +22 -18
- data/lib/rbbt/persist/tsv/adapter.rb +1 -5
- data/lib/rbbt/tsv.rb +3 -2
- data/lib/rbbt/util/open.rb +3 -1
- data/lib/rbbt/workflow.rb +1 -1
- data/lib/rbbt/workflow/examples.rb +5 -1
- data/lib/rbbt/workflow/util/archive.rb +1 -1
- data/lib/rbbt/workflow/util/provenance.rb +2 -1
- data/share/rbbt_commands/migrate +1 -1
- data/share/rbbt_commands/slurm/list +26 -7
- data/share/rbbt_commands/workflow/task +12 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: aa5b1545b1a92199d163bf398ca973c6aa3d78ab37cc724b988e007c87824b3b
|
4
|
+
data.tar.gz: 6cd3ce80ea589fb97a211ebe1d2973dd7c0d3f93b1ccefe10b9ecefbc77ff515
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d8246d2a2686cc38086800fb6c30949bba93c8e78b7feac220b710a4e80f66858f979c5ea41de0beb67aeefd29de045e2315c3b50128f448bfef521ce3d52e15
|
7
|
+
data.tar.gz: 26559322cae3bfb736565e8da0c7e63c3b006c793ab5cda8ec37a04fabfa460001bae6d7da8164ccd310730b81553d1d3b5a1d14cee9e6929793329ce508ec59
|
data/lib/rbbt/hpc/orchestrate.rb
CHANGED
@@ -5,7 +5,10 @@ module HPC
|
|
5
5
|
def self.job_rules(rules, job)
|
6
6
|
workflow = job.workflow.to_s
|
7
7
|
task_name = job.task_name.to_s
|
8
|
+
task_name = job.overriden.to_s if Symbol === job.overriden
|
9
|
+
|
8
10
|
defaults = rules["defaults"] || {}
|
11
|
+
defaults.merge(rules[workflow]["defaults"] || {}) if rules[workflow]
|
9
12
|
|
10
13
|
job_rules = IndiferentHash.setup(defaults.dup)
|
11
14
|
|
@@ -83,7 +86,7 @@ module HPC
|
|
83
86
|
|
84
87
|
deps = seen[dep.path] ||= self.orchestrate_job(dep, options, skip_dep, seen)
|
85
88
|
if job.canfail_paths.include? dep.path
|
86
|
-
[deps].flatten.collect{|id| ['canfail', id] * ":"}
|
89
|
+
[deps].flatten.compact.collect{|id| ['canfail', id] * ":"}
|
87
90
|
else
|
88
91
|
deps
|
89
92
|
end
|
data/lib/rbbt/hpc/slurm.rb
CHANGED
@@ -21,10 +21,10 @@ module HPC
|
|
21
21
|
exclusive = options.delete :exclusive
|
22
22
|
highmem = options.delete :highmem
|
23
23
|
|
24
|
-
queue = options.delete(:queue) || 'bsc_ls'
|
24
|
+
queue = options.delete(:queue) || Rbbt::Config.get('queue', :slurm_queue, :slurm, :SLURM, :default => 'bsc_ls')
|
25
25
|
task_cpus = options.delete(:task_cpus) || 1
|
26
26
|
nodes = options.delete(:nodes) || 1
|
27
|
-
time = options.delete(:time) || "0:00
|
27
|
+
time = options.delete(:time) || "0:02:00"
|
28
28
|
|
29
29
|
inputs_dir = options.delete :inputs_dir
|
30
30
|
config_keys = options.delete :config_keys
|
@@ -301,11 +301,15 @@ EOF
|
|
301
301
|
coda +=<<-EOF
|
302
302
|
|
303
303
|
# Sync data to target location
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
304
|
+
if [ $exit_status == '0' ]; then
|
305
|
+
mkdir -p "$(dirname '#{target}')"
|
306
|
+
rsync -avztAXHP --copy-unsafe-links "#{source}/" "#{target}/" &>> #{fsync}
|
307
|
+
sync_es="$?"
|
308
|
+
echo $sync_es > #{fsyncexit}
|
309
|
+
find '#{target}' -type l -ls | awk '$13 ~ /^#{target.gsub('/','\/')}/ { sub("#{source}", "#{target}", $13); print $11, $13 }' | while read A B; do rm $A; ln -s $B $A; done
|
310
|
+
else
|
311
|
+
sync_es="$exit_status"
|
312
|
+
fi
|
309
313
|
EOF
|
310
314
|
|
311
315
|
if contain && (wipe_container == "post" || wipe_container == "both")
|
@@ -331,11 +335,11 @@ EOF
|
|
331
335
|
else
|
332
336
|
coda +=<<-EOF
|
333
337
|
##{exec_cmd} system clean
|
334
|
-
if [ $exit_status == '0' -a $sync_es == '0' ]; then
|
338
|
+
#if [ $exit_status == '0' -a $sync_es == '0' ]; then
|
335
339
|
rm -Rfv #{contain} &>> #{fsync}
|
336
|
-
else
|
337
|
-
echo "ERROR: Process failed or results could not sync correctly. Contain directory not purged" &>> #{fsync}
|
338
|
-
fi
|
340
|
+
#else
|
341
|
+
# echo "ERROR: Process failed or results could not sync correctly. Contain directory not purged" &>> #{fsync}
|
342
|
+
#fi
|
339
343
|
EOF
|
340
344
|
|
341
345
|
end
|
@@ -525,7 +529,10 @@ EOF
|
|
525
529
|
tail = options.delete :tail
|
526
530
|
dependencies = options.delete :slurm_dependencies
|
527
531
|
procpath = options.delete :SLURM_procpath
|
532
|
+
|
528
533
|
options[:jobname] = job.clean_name
|
534
|
+
log_level = options.delete :log
|
535
|
+
log_level ||= Log.severity
|
529
536
|
|
530
537
|
workflow = job.workflow
|
531
538
|
|
@@ -550,16 +557,13 @@ EOF
|
|
550
557
|
inputs_dir = File.join(tmp_directory, 'inputs_dir')
|
551
558
|
saved = Step.save_job_inputs(job, inputs_dir)
|
552
559
|
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
else
|
557
|
-
cmd = ['workflow', 'task', workflow.to_s, task.to_s, '--printpath', '--log', (options[:log] || Log.severity).to_s]
|
558
|
-
end
|
560
|
+
cmd = ['workflow', 'task', workflow.to_s, task.to_s, '--printpath', '--log', log_level.to_s]
|
561
|
+
|
562
|
+
cmd << "--procpath_performance='#{tmp_directory}/procpath##{procpath.gsub(',', '#')}'" if procpath
|
559
563
|
|
560
564
|
cmd << "--override_deps='#{override_deps.gsub("'", '\'')}'" if override_deps and not override_deps.empty?
|
561
565
|
|
562
|
-
cmd << "--
|
566
|
+
cmd << "--load_inputs='#{inputs_dir}'" if saved && saved.any?
|
563
567
|
|
564
568
|
template = self.template(cmd, options)
|
565
569
|
jobid = self.issue_template(template, options.merge(:slurm_basedir => slurm_basedir, :dry_run => dry_run, :slurm_dependencies => dependencies))
|
data/lib/rbbt/tsv.rb
CHANGED
@@ -113,11 +113,12 @@ module TSV
|
|
113
113
|
|
114
114
|
data.entity_options = entity_options
|
115
115
|
|
116
|
-
if Path === source
|
117
|
-
|
116
|
+
if Path === source && data.identifiers
|
117
|
+
Path.setup(data.identifiers, source.pkgdir, source.resource)
|
118
118
|
end
|
119
119
|
|
120
120
|
if data.respond_to? :persistence_path
|
121
|
+
data.read
|
121
122
|
data
|
122
123
|
else
|
123
124
|
h = data.dup
|
data/lib/rbbt/util/open.rb
CHANGED
@@ -748,7 +748,9 @@ module Open
|
|
748
748
|
if (dir_sub_path = find_repo_dir(path))
|
749
749
|
writable_repo?(*dir_sub_path)
|
750
750
|
else
|
751
|
-
if File.
|
751
|
+
if File.symlink?(path)
|
752
|
+
File.writable?(File.dirname(path))
|
753
|
+
elsif File.exist?(path)
|
752
754
|
File.writable?(path)
|
753
755
|
else
|
754
756
|
File.writable?(File.dirname(File.expand_path(path)))
|
data/lib/rbbt/workflow.rb
CHANGED
@@ -190,7 +190,7 @@ module Workflow
|
|
190
190
|
return Misc.string2const Misc.camel_case(wf_name)
|
191
191
|
end
|
192
192
|
|
193
|
-
Log.
|
193
|
+
Log.high{"Loading workflow #{wf_name}"}
|
194
194
|
require_local_workflow(wf_name) or
|
195
195
|
(Workflow.autoinstall and `rbbt workflow install #{Misc.snake_case(wf_name)} || rbbt workflow install #{wf_name}` and require_local_workflow(wf_name)) or raise("Workflow not found or could not be loaded: #{ wf_name }")
|
196
196
|
begin
|
@@ -53,7 +53,11 @@ module Workflow
|
|
53
53
|
if file =~ /\.yaml/
|
54
54
|
inputs[input.to_sym] = YAML.load(Open.read(file))
|
55
55
|
else
|
56
|
-
|
56
|
+
if File.symlink?(file)
|
57
|
+
inputs[input.to_sym] = File.readlink(file)
|
58
|
+
else
|
59
|
+
inputs[input.to_sym] = Open.realpath(file)
|
60
|
+
end
|
57
61
|
end
|
58
62
|
when :text
|
59
63
|
Log.debug "Reading #{ input } from #{file}"
|
@@ -78,6 +78,7 @@ class Step
|
|
78
78
|
name = info[:name] || File.basename(path)
|
79
79
|
status = :unsync if status == :done and not Open.exist?(path)
|
80
80
|
status = :notfound if status == :noinfo and not Open.exist?(path)
|
81
|
+
|
81
82
|
str = " " * offset
|
82
83
|
str << prov_report_msg(status, name, path, info)
|
83
84
|
step.dependencies.reverse.each do |dep|
|
@@ -90,7 +91,7 @@ class Step
|
|
90
91
|
if expand_repeats
|
91
92
|
str << Log.color(:green, Log.uncolor(prov_report(dep, offset+1, task)))
|
92
93
|
else
|
93
|
-
str << Log.color(:green, " " * (offset + 1) + Log.uncolor(prov_report_msg(status, name, path, info)))
|
94
|
+
str << Log.color(:green, " " * (offset + 1) + Log.uncolor(prov_report_msg(dep.status, dep.info[:name], dep.path, dep.info)))
|
94
95
|
end
|
95
96
|
end
|
96
97
|
end if step.dependencies
|
data/share/rbbt_commands/migrate
CHANGED
@@ -71,7 +71,7 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
71
71
|
end
|
72
72
|
|
73
73
|
if m = Open.read(fcmd).match(/# Run command\n(.*?)\n/im)
|
74
|
-
exe = m[1]
|
74
|
+
exe = m[1].sub('step_path=$(','')
|
75
75
|
else
|
76
76
|
exe = nil
|
77
77
|
end
|
@@ -98,15 +98,24 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
98
98
|
if File.exists?(fstatus = File.join(dir, 'job.status'))
|
99
99
|
nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
|
100
100
|
elsif job_nodes[id]
|
101
|
-
nodes = job_nodes[id]
|
101
|
+
nodes = job_nodes[id].reject{|n| n.include? "("}
|
102
102
|
else
|
103
103
|
nodes = []
|
104
104
|
end
|
105
105
|
|
106
|
+
if File.exists?(File.join(dir, 'exit.status'))
|
107
|
+
now = File.ctime(File.join(dir, 'exit.status'))
|
108
|
+
else
|
109
|
+
now = Time.now
|
110
|
+
end
|
111
|
+
|
106
112
|
if File.exists?(File.join(dir, 'std.out'))
|
113
|
+
cerrt = File.ctime File.join(dir, 'std.err')
|
114
|
+
coutt = File.ctime File.join(dir, 'std.out')
|
107
115
|
outt = File.mtime File.join(dir, 'std.out')
|
108
116
|
errt = File.mtime File.join(dir, 'std.err')
|
109
|
-
time_diff =
|
117
|
+
time_diff = now - [outt, errt].max
|
118
|
+
time_elapsed = now - [cerrt, coutt].min
|
110
119
|
end
|
111
120
|
|
112
121
|
fdep = File.join(dir, 'dependencies.list')
|
@@ -115,14 +124,19 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
115
124
|
fcadep = File.join(dir, 'canfail_dependencies.list')
|
116
125
|
cadeps = Open.read(fcadep).split("\n") if File.exists?(fcadep)
|
117
126
|
|
118
|
-
if done || error || aborted || running || queued || jobid
|
127
|
+
if done || error || aborted || running || queued || jobid
|
119
128
|
select = false
|
120
129
|
select = true if done && exit_status == 0
|
121
130
|
select = true if error && exit_status && exit_status != 0
|
122
131
|
select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
|
123
|
-
|
124
|
-
select = true if
|
132
|
+
is_running = exit_status.nil? && running_jobs.include?(id) && (!deps || (running_jobs & deps).empty?)
|
133
|
+
select = true if queued && deps && (running_jobs & deps).any? || queued && is_running && nodes.empty?
|
134
|
+
select = true if running && nodes.any? && (exit_status.nil? && running_jobs.include?(id)) && (!deps || (running_jobs & deps).empty?)
|
125
135
|
select = true if jobid && jobid.split(",").include?(id)
|
136
|
+
select = select && cmd.match(/#{search}/) if search
|
137
|
+
next unless select
|
138
|
+
elsif search
|
139
|
+
select = false
|
126
140
|
select = true if search && cmd.match(/#{search}/)
|
127
141
|
next unless select
|
128
142
|
end
|
@@ -130,6 +144,7 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
130
144
|
|
131
145
|
puts Log.color :blue, dir
|
132
146
|
puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.slurm')).to_s
|
147
|
+
puts Log.color(:magenta, "Started: ") << File.ctime(File.join(dir, 'std.err')).to_s if File.exist?(File.join(dir, 'std.err'))
|
133
148
|
puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
|
134
149
|
puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
|
135
150
|
puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
|
@@ -138,7 +153,8 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
138
153
|
puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
|
139
154
|
puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
|
140
155
|
puts Log.color(:magenta, "Nodes: ") << nodes * ", "
|
141
|
-
puts Log.color(:magenta, "
|
156
|
+
puts Log.color(:magenta, "Time elapsed: ") << Misc.format_seconds(time_elapsed) if time_elapsed
|
157
|
+
puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? || File.exists?(File.join(dir, 'exit.status')) ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
|
142
158
|
|
143
159
|
if options[:sbatch_parameters]
|
144
160
|
puts Log.color(:magenta, "SBATCH parameters: ")
|
@@ -180,8 +196,11 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
180
196
|
start = rss_average.keys.sort.first
|
181
197
|
eend = rss_average.keys.sort.last
|
182
198
|
time_elapsed = eend - start
|
199
|
+
ticks = 1 if ticks == 0
|
200
|
+
time_elapsed = 1 if time_elapsed == 0
|
183
201
|
puts Log.color(:yellow, "CPU average: ") + "%.2f" % ( ticks / clock_ticks / time_elapsed * 100).to_s
|
184
202
|
puts Log.color(:yellow, "RSS average: ") + "%.2f GB" % Misc.mean(rss_average.collect{|t,l| Misc.sum(l) / (1024 * 1024 * 1024)}).to_s
|
203
|
+
puts Log.color(:yellow, "Time: ") + Misc.format_seconds((eend - start))
|
185
204
|
|
186
205
|
end
|
187
206
|
|
@@ -410,10 +410,20 @@ begin
|
|
410
410
|
|
411
411
|
if options[:procpath_performance]
|
412
412
|
require 'rbbt/util/procpath'
|
413
|
+
current_pid = job.info[:pid]
|
413
414
|
job.fork
|
414
415
|
job.soft_grace
|
415
|
-
|
416
|
-
|
416
|
+
sleep 2 if job.info[:pid] == current_pid
|
417
|
+
if job.info[:pid] != current_pid
|
418
|
+
pid = job.info[:pid]
|
419
|
+
begin
|
420
|
+
ProcPath.monitor(pid, options[:procpath_performance])
|
421
|
+
rescue Errno::ECHILD
|
422
|
+
Log.warn "Procpath didn't find process #{pid} to monitor. Maybe it finished already"
|
423
|
+
rescue
|
424
|
+
Log.warn "Procpath failed: #{$!.message}"
|
425
|
+
end
|
426
|
+
end
|
417
427
|
end
|
418
428
|
|
419
429
|
if do_fork
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.30.
|
4
|
+
version: 5.30.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-02-
|
11
|
+
date: 2021-02-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|