rbbt-util 5.30.0 → 5.30.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/hpc/orchestrate.rb +4 -1
- data/lib/rbbt/hpc/slurm.rb +22 -18
- data/lib/rbbt/persist/tsv/adapter.rb +1 -5
- data/lib/rbbt/tsv.rb +3 -2
- data/lib/rbbt/util/open.rb +3 -1
- data/lib/rbbt/workflow.rb +1 -1
- data/lib/rbbt/workflow/examples.rb +5 -1
- data/lib/rbbt/workflow/util/archive.rb +1 -1
- data/lib/rbbt/workflow/util/provenance.rb +2 -1
- data/share/rbbt_commands/migrate +1 -1
- data/share/rbbt_commands/slurm/list +26 -7
- data/share/rbbt_commands/workflow/task +12 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: aa5b1545b1a92199d163bf398ca973c6aa3d78ab37cc724b988e007c87824b3b
|
4
|
+
data.tar.gz: 6cd3ce80ea589fb97a211ebe1d2973dd7c0d3f93b1ccefe10b9ecefbc77ff515
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d8246d2a2686cc38086800fb6c30949bba93c8e78b7feac220b710a4e80f66858f979c5ea41de0beb67aeefd29de045e2315c3b50128f448bfef521ce3d52e15
|
7
|
+
data.tar.gz: 26559322cae3bfb736565e8da0c7e63c3b006c793ab5cda8ec37a04fabfa460001bae6d7da8164ccd310730b81553d1d3b5a1d14cee9e6929793329ce508ec59
|
data/lib/rbbt/hpc/orchestrate.rb
CHANGED
@@ -5,7 +5,10 @@ module HPC
|
|
5
5
|
def self.job_rules(rules, job)
|
6
6
|
workflow = job.workflow.to_s
|
7
7
|
task_name = job.task_name.to_s
|
8
|
+
task_name = job.overriden.to_s if Symbol === job.overriden
|
9
|
+
|
8
10
|
defaults = rules["defaults"] || {}
|
11
|
+
defaults.merge(rules[workflow]["defaults"] || {}) if rules[workflow]
|
9
12
|
|
10
13
|
job_rules = IndiferentHash.setup(defaults.dup)
|
11
14
|
|
@@ -83,7 +86,7 @@ module HPC
|
|
83
86
|
|
84
87
|
deps = seen[dep.path] ||= self.orchestrate_job(dep, options, skip_dep, seen)
|
85
88
|
if job.canfail_paths.include? dep.path
|
86
|
-
[deps].flatten.collect{|id| ['canfail', id] * ":"}
|
89
|
+
[deps].flatten.compact.collect{|id| ['canfail', id] * ":"}
|
87
90
|
else
|
88
91
|
deps
|
89
92
|
end
|
data/lib/rbbt/hpc/slurm.rb
CHANGED
@@ -21,10 +21,10 @@ module HPC
|
|
21
21
|
exclusive = options.delete :exclusive
|
22
22
|
highmem = options.delete :highmem
|
23
23
|
|
24
|
-
queue = options.delete(:queue) || 'bsc_ls'
|
24
|
+
queue = options.delete(:queue) || Rbbt::Config.get('queue', :slurm_queue, :slurm, :SLURM, :default => 'bsc_ls')
|
25
25
|
task_cpus = options.delete(:task_cpus) || 1
|
26
26
|
nodes = options.delete(:nodes) || 1
|
27
|
-
time = options.delete(:time) || "0:00
|
27
|
+
time = options.delete(:time) || "0:02:00"
|
28
28
|
|
29
29
|
inputs_dir = options.delete :inputs_dir
|
30
30
|
config_keys = options.delete :config_keys
|
@@ -301,11 +301,15 @@ EOF
|
|
301
301
|
coda +=<<-EOF
|
302
302
|
|
303
303
|
# Sync data to target location
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
304
|
+
if [ $exit_status == '0' ]; then
|
305
|
+
mkdir -p "$(dirname '#{target}')"
|
306
|
+
rsync -avztAXHP --copy-unsafe-links "#{source}/" "#{target}/" &>> #{fsync}
|
307
|
+
sync_es="$?"
|
308
|
+
echo $sync_es > #{fsyncexit}
|
309
|
+
find '#{target}' -type l -ls | awk '$13 ~ /^#{target.gsub('/','\/')}/ { sub("#{source}", "#{target}", $13); print $11, $13 }' | while read A B; do rm $A; ln -s $B $A; done
|
310
|
+
else
|
311
|
+
sync_es="$exit_status"
|
312
|
+
fi
|
309
313
|
EOF
|
310
314
|
|
311
315
|
if contain && (wipe_container == "post" || wipe_container == "both")
|
@@ -331,11 +335,11 @@ EOF
|
|
331
335
|
else
|
332
336
|
coda +=<<-EOF
|
333
337
|
##{exec_cmd} system clean
|
334
|
-
if [ $exit_status == '0' -a $sync_es == '0' ]; then
|
338
|
+
#if [ $exit_status == '0' -a $sync_es == '0' ]; then
|
335
339
|
rm -Rfv #{contain} &>> #{fsync}
|
336
|
-
else
|
337
|
-
echo "ERROR: Process failed or results could not sync correctly. Contain directory not purged" &>> #{fsync}
|
338
|
-
fi
|
340
|
+
#else
|
341
|
+
# echo "ERROR: Process failed or results could not sync correctly. Contain directory not purged" &>> #{fsync}
|
342
|
+
#fi
|
339
343
|
EOF
|
340
344
|
|
341
345
|
end
|
@@ -525,7 +529,10 @@ EOF
|
|
525
529
|
tail = options.delete :tail
|
526
530
|
dependencies = options.delete :slurm_dependencies
|
527
531
|
procpath = options.delete :SLURM_procpath
|
532
|
+
|
528
533
|
options[:jobname] = job.clean_name
|
534
|
+
log_level = options.delete :log
|
535
|
+
log_level ||= Log.severity
|
529
536
|
|
530
537
|
workflow = job.workflow
|
531
538
|
|
@@ -550,16 +557,13 @@ EOF
|
|
550
557
|
inputs_dir = File.join(tmp_directory, 'inputs_dir')
|
551
558
|
saved = Step.save_job_inputs(job, inputs_dir)
|
552
559
|
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
else
|
557
|
-
cmd = ['workflow', 'task', workflow.to_s, task.to_s, '--printpath', '--log', (options[:log] || Log.severity).to_s]
|
558
|
-
end
|
560
|
+
cmd = ['workflow', 'task', workflow.to_s, task.to_s, '--printpath', '--log', log_level.to_s]
|
561
|
+
|
562
|
+
cmd << "--procpath_performance='#{tmp_directory}/procpath##{procpath.gsub(',', '#')}'" if procpath
|
559
563
|
|
560
564
|
cmd << "--override_deps='#{override_deps.gsub("'", '\'')}'" if override_deps and not override_deps.empty?
|
561
565
|
|
562
|
-
cmd << "--
|
566
|
+
cmd << "--load_inputs='#{inputs_dir}'" if saved && saved.any?
|
563
567
|
|
564
568
|
template = self.template(cmd, options)
|
565
569
|
jobid = self.issue_template(template, options.merge(:slurm_basedir => slurm_basedir, :dry_run => dry_run, :slurm_dependencies => dependencies))
|
data/lib/rbbt/tsv.rb
CHANGED
@@ -113,11 +113,12 @@ module TSV
|
|
113
113
|
|
114
114
|
data.entity_options = entity_options
|
115
115
|
|
116
|
-
if Path === source
|
117
|
-
|
116
|
+
if Path === source && data.identifiers
|
117
|
+
Path.setup(data.identifiers, source.pkgdir, source.resource)
|
118
118
|
end
|
119
119
|
|
120
120
|
if data.respond_to? :persistence_path
|
121
|
+
data.read
|
121
122
|
data
|
122
123
|
else
|
123
124
|
h = data.dup
|
data/lib/rbbt/util/open.rb
CHANGED
@@ -748,7 +748,9 @@ module Open
|
|
748
748
|
if (dir_sub_path = find_repo_dir(path))
|
749
749
|
writable_repo?(*dir_sub_path)
|
750
750
|
else
|
751
|
-
if File.
|
751
|
+
if File.symlink?(path)
|
752
|
+
File.writable?(File.dirname(path))
|
753
|
+
elsif File.exist?(path)
|
752
754
|
File.writable?(path)
|
753
755
|
else
|
754
756
|
File.writable?(File.dirname(File.expand_path(path)))
|
data/lib/rbbt/workflow.rb
CHANGED
@@ -190,7 +190,7 @@ module Workflow
|
|
190
190
|
return Misc.string2const Misc.camel_case(wf_name)
|
191
191
|
end
|
192
192
|
|
193
|
-
Log.
|
193
|
+
Log.high{"Loading workflow #{wf_name}"}
|
194
194
|
require_local_workflow(wf_name) or
|
195
195
|
(Workflow.autoinstall and `rbbt workflow install #{Misc.snake_case(wf_name)} || rbbt workflow install #{wf_name}` and require_local_workflow(wf_name)) or raise("Workflow not found or could not be loaded: #{ wf_name }")
|
196
196
|
begin
|
@@ -53,7 +53,11 @@ module Workflow
|
|
53
53
|
if file =~ /\.yaml/
|
54
54
|
inputs[input.to_sym] = YAML.load(Open.read(file))
|
55
55
|
else
|
56
|
-
|
56
|
+
if File.symlink?(file)
|
57
|
+
inputs[input.to_sym] = File.readlink(file)
|
58
|
+
else
|
59
|
+
inputs[input.to_sym] = Open.realpath(file)
|
60
|
+
end
|
57
61
|
end
|
58
62
|
when :text
|
59
63
|
Log.debug "Reading #{ input } from #{file}"
|
@@ -78,6 +78,7 @@ class Step
|
|
78
78
|
name = info[:name] || File.basename(path)
|
79
79
|
status = :unsync if status == :done and not Open.exist?(path)
|
80
80
|
status = :notfound if status == :noinfo and not Open.exist?(path)
|
81
|
+
|
81
82
|
str = " " * offset
|
82
83
|
str << prov_report_msg(status, name, path, info)
|
83
84
|
step.dependencies.reverse.each do |dep|
|
@@ -90,7 +91,7 @@ class Step
|
|
90
91
|
if expand_repeats
|
91
92
|
str << Log.color(:green, Log.uncolor(prov_report(dep, offset+1, task)))
|
92
93
|
else
|
93
|
-
str << Log.color(:green, " " * (offset + 1) + Log.uncolor(prov_report_msg(status, name, path, info)))
|
94
|
+
str << Log.color(:green, " " * (offset + 1) + Log.uncolor(prov_report_msg(dep.status, dep.info[:name], dep.path, dep.info)))
|
94
95
|
end
|
95
96
|
end
|
96
97
|
end if step.dependencies
|
data/share/rbbt_commands/migrate
CHANGED
@@ -71,7 +71,7 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
71
71
|
end
|
72
72
|
|
73
73
|
if m = Open.read(fcmd).match(/# Run command\n(.*?)\n/im)
|
74
|
-
exe = m[1]
|
74
|
+
exe = m[1].sub('step_path=$(','')
|
75
75
|
else
|
76
76
|
exe = nil
|
77
77
|
end
|
@@ -98,15 +98,24 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
98
98
|
if File.exists?(fstatus = File.join(dir, 'job.status'))
|
99
99
|
nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
|
100
100
|
elsif job_nodes[id]
|
101
|
-
nodes = job_nodes[id]
|
101
|
+
nodes = job_nodes[id].reject{|n| n.include? "("}
|
102
102
|
else
|
103
103
|
nodes = []
|
104
104
|
end
|
105
105
|
|
106
|
+
if File.exists?(File.join(dir, 'exit.status'))
|
107
|
+
now = File.ctime(File.join(dir, 'exit.status'))
|
108
|
+
else
|
109
|
+
now = Time.now
|
110
|
+
end
|
111
|
+
|
106
112
|
if File.exists?(File.join(dir, 'std.out'))
|
113
|
+
cerrt = File.ctime File.join(dir, 'std.err')
|
114
|
+
coutt = File.ctime File.join(dir, 'std.out')
|
107
115
|
outt = File.mtime File.join(dir, 'std.out')
|
108
116
|
errt = File.mtime File.join(dir, 'std.err')
|
109
|
-
time_diff =
|
117
|
+
time_diff = now - [outt, errt].max
|
118
|
+
time_elapsed = now - [cerrt, coutt].min
|
110
119
|
end
|
111
120
|
|
112
121
|
fdep = File.join(dir, 'dependencies.list')
|
@@ -115,14 +124,19 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
115
124
|
fcadep = File.join(dir, 'canfail_dependencies.list')
|
116
125
|
cadeps = Open.read(fcadep).split("\n") if File.exists?(fcadep)
|
117
126
|
|
118
|
-
if done || error || aborted || running || queued || jobid
|
127
|
+
if done || error || aborted || running || queued || jobid
|
119
128
|
select = false
|
120
129
|
select = true if done && exit_status == 0
|
121
130
|
select = true if error && exit_status && exit_status != 0
|
122
131
|
select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
|
123
|
-
|
124
|
-
select = true if
|
132
|
+
is_running = exit_status.nil? && running_jobs.include?(id) && (!deps || (running_jobs & deps).empty?)
|
133
|
+
select = true if queued && deps && (running_jobs & deps).any? || queued && is_running && nodes.empty?
|
134
|
+
select = true if running && nodes.any? && (exit_status.nil? && running_jobs.include?(id)) && (!deps || (running_jobs & deps).empty?)
|
125
135
|
select = true if jobid && jobid.split(",").include?(id)
|
136
|
+
select = select && cmd.match(/#{search}/) if search
|
137
|
+
next unless select
|
138
|
+
elsif search
|
139
|
+
select = false
|
126
140
|
select = true if search && cmd.match(/#{search}/)
|
127
141
|
next unless select
|
128
142
|
end
|
@@ -130,6 +144,7 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
130
144
|
|
131
145
|
puts Log.color :blue, dir
|
132
146
|
puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.slurm')).to_s
|
147
|
+
puts Log.color(:magenta, "Started: ") << File.ctime(File.join(dir, 'std.err')).to_s if File.exist?(File.join(dir, 'std.err'))
|
133
148
|
puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
|
134
149
|
puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
|
135
150
|
puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
|
@@ -138,7 +153,8 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
138
153
|
puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
|
139
154
|
puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
|
140
155
|
puts Log.color(:magenta, "Nodes: ") << nodes * ", "
|
141
|
-
puts Log.color(:magenta, "
|
156
|
+
puts Log.color(:magenta, "Time elapsed: ") << Misc.format_seconds(time_elapsed) if time_elapsed
|
157
|
+
puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? || File.exists?(File.join(dir, 'exit.status')) ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
|
142
158
|
|
143
159
|
if options[:sbatch_parameters]
|
144
160
|
puts Log.color(:magenta, "SBATCH parameters: ")
|
@@ -180,8 +196,11 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
180
196
|
start = rss_average.keys.sort.first
|
181
197
|
eend = rss_average.keys.sort.last
|
182
198
|
time_elapsed = eend - start
|
199
|
+
ticks = 1 if ticks == 0
|
200
|
+
time_elapsed = 1 if time_elapsed == 0
|
183
201
|
puts Log.color(:yellow, "CPU average: ") + "%.2f" % ( ticks / clock_ticks / time_elapsed * 100).to_s
|
184
202
|
puts Log.color(:yellow, "RSS average: ") + "%.2f GB" % Misc.mean(rss_average.collect{|t,l| Misc.sum(l) / (1024 * 1024 * 1024)}).to_s
|
203
|
+
puts Log.color(:yellow, "Time: ") + Misc.format_seconds((eend - start))
|
185
204
|
|
186
205
|
end
|
187
206
|
|
@@ -410,10 +410,20 @@ begin
|
|
410
410
|
|
411
411
|
if options[:procpath_performance]
|
412
412
|
require 'rbbt/util/procpath'
|
413
|
+
current_pid = job.info[:pid]
|
413
414
|
job.fork
|
414
415
|
job.soft_grace
|
415
|
-
|
416
|
-
|
416
|
+
sleep 2 if job.info[:pid] == current_pid
|
417
|
+
if job.info[:pid] != current_pid
|
418
|
+
pid = job.info[:pid]
|
419
|
+
begin
|
420
|
+
ProcPath.monitor(pid, options[:procpath_performance])
|
421
|
+
rescue Errno::ECHILD
|
422
|
+
Log.warn "Procpath didn't find process #{pid} to monitor. Maybe it finished already"
|
423
|
+
rescue
|
424
|
+
Log.warn "Procpath failed: #{$!.message}"
|
425
|
+
end
|
426
|
+
end
|
417
427
|
end
|
418
428
|
|
419
429
|
if do_fork
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.30.
|
4
|
+
version: 5.30.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-02-
|
11
|
+
date: 2021-02-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|