rbbt-util 5.28.14 → 5.29.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/hpc.rb +1 -551
- data/lib/rbbt/hpc/orchestrate.rb +23 -0
- data/lib/rbbt/hpc/slurm.rb +570 -0
- data/lib/rbbt/workflow.rb +2 -1
- data/lib/rbbt/workflow/accessor.rb +2 -1
- data/lib/rbbt/workflow/examples.rb +2 -2
- data/lib/rbbt/workflow/step.rb +8 -5
- data/lib/rbbt/workflow/step/accessor.rb +13 -11
- data/lib/rbbt/workflow/util/orchestrator.rb +14 -9
- data/share/rbbt_commands/slurm/list +141 -0
- data/share/rbbt_commands/slurm/orchestrate +47 -0
- data/share/rbbt_commands/{workflow/slurm → slurm/task} +10 -3
- metadata +7 -3
data/lib/rbbt/workflow.rb
CHANGED
@@ -385,7 +385,7 @@ module Workflow
|
|
385
385
|
next if default == v
|
386
386
|
next if (String === default and Symbol === v and v.to_s == default)
|
387
387
|
next if (Symbol === default and String === v and v == default.to_s)
|
388
|
-
real_inputs[k] = v
|
388
|
+
real_inputs[k.to_sym] = v
|
389
389
|
end
|
390
390
|
|
391
391
|
jobname_input_value = inputs[jobname_input] || all_defaults[jobname_input]
|
@@ -410,6 +410,7 @@ module Workflow
|
|
410
410
|
job.workflow = self
|
411
411
|
job.clean_name = jobname
|
412
412
|
job.overriden = overriden
|
413
|
+
job.real_inputs = real_inputs.keys
|
413
414
|
job
|
414
415
|
end
|
415
416
|
|
@@ -302,6 +302,7 @@ module Workflow
|
|
302
302
|
def setup_override_dependency(dep, workflow, task_name)
|
303
303
|
dep = Step === dep ? dep : Workflow.load_step(dep)
|
304
304
|
dep.info[:name] = dep.name
|
305
|
+
dep.original_task_name ||= dep.task_name
|
305
306
|
begin
|
306
307
|
workflow = Kernel.const_get workflow if String === workflow
|
307
308
|
dep.task = workflow.tasks[task_name] if dep.task.nil? && workflow.tasks.include?(task_name)
|
@@ -309,7 +310,7 @@ module Workflow
|
|
309
310
|
Log.exception $!
|
310
311
|
end
|
311
312
|
dep.task_name = task_name
|
312
|
-
dep.overriden =
|
313
|
+
dep.overriden = dep.original_task_name.to_sym
|
313
314
|
dep
|
314
315
|
end
|
315
316
|
|
@@ -50,8 +50,8 @@ module Workflow
|
|
50
50
|
case input_types[input]
|
51
51
|
when :file
|
52
52
|
Log.debug "Pointing #{ input } to #{file}"
|
53
|
-
if file =~ /\.
|
54
|
-
inputs[input.to_sym] = Open.read(file)
|
53
|
+
if file =~ /\.yaml/
|
54
|
+
inputs[input.to_sym] = YAML.load(Open.read(file))
|
55
55
|
else
|
56
56
|
inputs[input.to_sym] = Open.realpath(file)
|
57
57
|
end
|
data/lib/rbbt/workflow/step.rb
CHANGED
@@ -12,6 +12,7 @@ class Step
|
|
12
12
|
attr_accessor :exec
|
13
13
|
attr_accessor :relocated
|
14
14
|
attr_accessor :result, :mutex, :seen
|
15
|
+
attr_accessor :real_inputs, :original_task_name
|
15
16
|
|
16
17
|
RBBT_DEBUG_CLEAN = ENV["RBBT_DEBUG_CLEAN"] == 'true'
|
17
18
|
|
@@ -145,11 +146,13 @@ class Step
|
|
145
146
|
seen = []
|
146
147
|
while path = deps.pop
|
147
148
|
dep_info = archived_info[path]
|
148
|
-
dep_info
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
149
|
+
if dep_info
|
150
|
+
dep_info[:inputs].each do |k,v|
|
151
|
+
all_inputs[k] = v unless all_inputs.include?(k)
|
152
|
+
end if dep_info[:inputs]
|
153
|
+
deps.concat(dep_info[:dependencies].collect{|p| p.last } - seen) if dep_info[:dependencies]
|
154
|
+
deps.concat(dep_info[:archived_dependencies].collect{|p| p.last } - seen) if dep_info[:archived_dependencies]
|
155
|
+
end
|
153
156
|
seen << path
|
154
157
|
end
|
155
158
|
|
@@ -93,18 +93,18 @@ class Step
|
|
93
93
|
|
94
94
|
Log.debug "Saving job input #{name} (#{type}) into #{path}"
|
95
95
|
case
|
96
|
-
when
|
97
|
-
Open.
|
98
|
-
when
|
99
|
-
Open.write(path, value)
|
100
|
-
when type == "file"
|
96
|
+
when Step === value
|
97
|
+
Open.link(value.path, path)
|
98
|
+
when type.to_s == "file"
|
101
99
|
if String === value && File.exists?(value)
|
102
100
|
Open.link(value, path)
|
103
101
|
else
|
104
|
-
Open.write(path + '.
|
102
|
+
Open.write(path + '.yaml', value.to_yaml)
|
105
103
|
end
|
106
|
-
when
|
107
|
-
value
|
104
|
+
when Array === value
|
105
|
+
Open.write(path, value * "\n")
|
106
|
+
when IO === value
|
107
|
+
Open.write(path, value)
|
108
108
|
else
|
109
109
|
Open.write(path, value.to_s)
|
110
110
|
end
|
@@ -114,7 +114,7 @@ class Step
|
|
114
114
|
def self.save_job_inputs(job, dir, options = nil)
|
115
115
|
options = IndiferentHash.setup options.dup if options
|
116
116
|
|
117
|
-
task_name = job.task_name
|
117
|
+
task_name = Symbol === job.overriden ? job.overriden : job.task_name
|
118
118
|
workflow = job.workflow
|
119
119
|
workflow = Kernel.const_get workflow if String === workflow
|
120
120
|
task_info = workflow.task_info(task_name)
|
@@ -123,9 +123,11 @@ class Step
|
|
123
123
|
input_defaults = task_info[:input_defaults]
|
124
124
|
|
125
125
|
inputs = {}
|
126
|
+
real_inputs = job.real_inputs || job.info[:real_inputs]
|
126
127
|
job.recursive_inputs.zip(job.recursive_inputs.fields).each do |value,name|
|
127
128
|
next unless task_inputs.include? name.to_sym
|
128
|
-
next
|
129
|
+
next unless real_inputs.include? name.to_sym
|
130
|
+
next if options && ! options.include?(name)
|
129
131
|
next if value.nil?
|
130
132
|
next if input_defaults[name] == value
|
131
133
|
inputs[name] = value
|
@@ -222,7 +224,7 @@ class Step
|
|
222
224
|
def init_info(force = false)
|
223
225
|
return nil if @exec || info_file.nil? || (Open.exists?(info_file) && ! force)
|
224
226
|
Open.lock(info_file, :lock => info_lock) do
|
225
|
-
i = {:status => :waiting, :pid => Process.pid, :path => path}
|
227
|
+
i = {:status => :waiting, :pid => Process.pid, :path => path, :real_inputs => real_inputs}
|
226
228
|
i[:dependencies] = dependencies.collect{|dep| [dep.task_name, dep.name, dep.path]} if dependencies
|
227
229
|
Misc.sensiblewrite(info_file, Step.serialize_info(i), :force => true, :lock => false)
|
228
230
|
@info_cache = IndiferentHash.setup(i)
|
@@ -26,15 +26,25 @@ module Workflow
|
|
26
26
|
workload
|
27
27
|
end
|
28
28
|
|
29
|
+
def self.workload(jobs)
|
30
|
+
jobs.inject({}) do |acc,job|
|
31
|
+
Orchestrator.job_workload(job).each do |j,d|
|
32
|
+
acc[j] = d unless acc.keys.collect{|k| k.path }.include? j.path
|
33
|
+
end
|
34
|
+
acc
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
29
38
|
def self.job_rules(rules, job)
|
30
39
|
workflow = job.workflow.to_s
|
31
40
|
task_name = job.task_name.to_s
|
41
|
+
defaults = rules["defaults"] || {}
|
32
42
|
|
33
|
-
return IndiferentHash.setup(
|
34
|
-
return IndiferentHash.setup(
|
43
|
+
return IndiferentHash.setup(defaults) unless rules[workflow]
|
44
|
+
return IndiferentHash.setup(defaults) unless rules[workflow][task_name]
|
35
45
|
|
36
46
|
job_rules = IndiferentHash.setup(rules[workflow][task_name])
|
37
|
-
|
47
|
+
defaults.each{|k,v| job_rules[k] = v if job_rules[k].nil? } if defaults
|
38
48
|
job_rules
|
39
49
|
end
|
40
50
|
|
@@ -169,12 +179,7 @@ module Workflow
|
|
169
179
|
def process(rules, jobs)
|
170
180
|
begin
|
171
181
|
|
172
|
-
workload =
|
173
|
-
Orchestrator.job_workload(job).each do |j,d|
|
174
|
-
acc[j] = d unless acc.keys.collect{|k| k.path }.include? j.path
|
175
|
-
end
|
176
|
-
acc
|
177
|
-
end
|
182
|
+
workload = Orchestrator.workload(jobs)
|
178
183
|
all_jobs = workload.keys
|
179
184
|
|
180
185
|
top_level_jobs = jobs.collect{|job| job.path }
|
@@ -0,0 +1,141 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rbbt-util'
|
4
|
+
require 'rbbt/util/simpleopt'
|
5
|
+
|
6
|
+
#$0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
|
7
|
+
|
8
|
+
options = SOPT.setup <<EOF
|
9
|
+
|
10
|
+
Queue a job in Marenostrum
|
11
|
+
|
12
|
+
$ rbbt mnl [options]
|
13
|
+
|
14
|
+
-h--help Print this help
|
15
|
+
-d--done Done jobs only
|
16
|
+
-e--error Error jobs only
|
17
|
+
-a--aborted SLURM aboted jobs
|
18
|
+
-r--running Running jobs only
|
19
|
+
-q--queued Queued jobs only
|
20
|
+
-j--job* Job ids
|
21
|
+
-s--search* Regular expression
|
22
|
+
-t--tail* Show the last lines of the STDERR
|
23
|
+
EOF
|
24
|
+
|
25
|
+
if options[:help]
|
26
|
+
if defined? rbbt_usage
|
27
|
+
rbbt_usage
|
28
|
+
else
|
29
|
+
puts SOPT.doc
|
30
|
+
end
|
31
|
+
exit 0
|
32
|
+
end
|
33
|
+
|
34
|
+
Log.severity = 4
|
35
|
+
done, error, running, queued, aborted, jobid, search, tail = options.values_at :done, :error, :running, :queued, :aborted, :job, :search, :tail
|
36
|
+
|
37
|
+
workdir = File.expand_path('~/rbbt-slurm')
|
38
|
+
Path.setup(workdir)
|
39
|
+
|
40
|
+
running_jobs = begin
|
41
|
+
CMD.cmd('squeue').read.split("\n").collect{|l| l.to_i.to_s}
|
42
|
+
rescue
|
43
|
+
Log.warn "Cannot determine if jobs are running, they will seem to be all alive (Job ID in green)"
|
44
|
+
$norunningjobs = true
|
45
|
+
[]
|
46
|
+
end
|
47
|
+
|
48
|
+
count = 0
|
49
|
+
workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
50
|
+
dir = File.dirname(fcmd)
|
51
|
+
|
52
|
+
if m = Open.read(fcmd).match(/#CMD: (.*)/)
|
53
|
+
cmd = m[1]
|
54
|
+
else
|
55
|
+
cmd = nil
|
56
|
+
end
|
57
|
+
|
58
|
+
if m = Open.read(fcmd).match(/# Run command\n(.*?)\n/im)
|
59
|
+
exe = m[1]
|
60
|
+
else
|
61
|
+
exe = nil
|
62
|
+
end
|
63
|
+
|
64
|
+
if m = Open.read(fcmd).match(/^CONTAINER_DIR=(.*)/)
|
65
|
+
container_home = m[1]
|
66
|
+
else
|
67
|
+
container_home = nil
|
68
|
+
end
|
69
|
+
|
70
|
+
|
71
|
+
if File.exists?(fid = File.join(dir, 'job.id'))
|
72
|
+
id = Open.read(fid).chomp
|
73
|
+
else
|
74
|
+
id = nil
|
75
|
+
end
|
76
|
+
|
77
|
+
if File.exists?(fstatus = File.join(dir, 'exit.status'))
|
78
|
+
exit_status = Open.read(fstatus).to_i
|
79
|
+
else
|
80
|
+
exit_status = nil
|
81
|
+
end
|
82
|
+
|
83
|
+
if File.exists?(fstatus = File.join(dir, 'job.status'))
|
84
|
+
nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
|
85
|
+
else
|
86
|
+
nodes = []
|
87
|
+
end
|
88
|
+
|
89
|
+
if File.exists?(File.join(dir, 'std.out'))
|
90
|
+
outt = File.mtime File.join(dir, 'std.out')
|
91
|
+
errt = File.mtime File.join(dir, 'std.err')
|
92
|
+
time_diff = Time.now - [outt, errt].max
|
93
|
+
end
|
94
|
+
|
95
|
+
fdep = File.join(dir, 'dependencies.list')
|
96
|
+
deps = Open.read(fdep).split("\n") if File.exists?(fdep)
|
97
|
+
|
98
|
+
if done || error || aborted || running || queued || jobid || search
|
99
|
+
select = false
|
100
|
+
select = true if done && exit_status == 0
|
101
|
+
select = true if error && exit_status && exit_status != 0
|
102
|
+
select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
|
103
|
+
select = true if queued && deps && (running_jobs & deps).any?
|
104
|
+
select = true if running && (exit_status.nil? && running_jobs.include?(id)) && (!deps || (running_jobs & deps).empty?)
|
105
|
+
select = true if jobid && jobid.split(",").include?(id)
|
106
|
+
select = true if search && cmd.match(/#{search}/)
|
107
|
+
next unless select
|
108
|
+
end
|
109
|
+
|
110
|
+
|
111
|
+
puts Log.color :blue, dir
|
112
|
+
puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.slurm')).to_s
|
113
|
+
puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
|
114
|
+
puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
|
115
|
+
puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
|
116
|
+
puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home
|
117
|
+
puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
|
118
|
+
puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
|
119
|
+
puts Log.color(:magenta, "Nodes: ") << nodes * ", "
|
120
|
+
puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
|
121
|
+
|
122
|
+
if tail && File.exists?(File.join(dir, 'std.err'))
|
123
|
+
if exit_status && exit_status != 0
|
124
|
+
puts Log.color(:magenta, "First error or exception found: ")
|
125
|
+
puts CMD.cmd("grep -i -w 'error\\|[a-z]*exception' #{File.join(dir, 'std.err')} -A #{tail.to_i} |head -n #{tail.to_i}", :no_fail => true).read
|
126
|
+
elsif exit_status
|
127
|
+
puts Log.color(:magenta, "Completed jobs: ")
|
128
|
+
puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
|
129
|
+
else
|
130
|
+
puts Log.color(:magenta, "Log tail: ")
|
131
|
+
puts CMD.cmd("tail -n #{tail.to_i} #{File.join(dir, 'std.err')}").read
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
count += 1
|
136
|
+
|
137
|
+
end
|
138
|
+
|
139
|
+
puts
|
140
|
+
puts "Found #{count} jobs"
|
141
|
+
|
@@ -0,0 +1,47 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rbbt/util/simpleopt'
|
4
|
+
require 'rbbt/workflow'
|
5
|
+
require 'rbbt/workflow/usage'
|
6
|
+
require 'rbbt/hpc'
|
7
|
+
require 'rbbt/hpc/orchestrate'
|
8
|
+
require 'time'
|
9
|
+
|
10
|
+
$slurm_options = SOPT.get <<EOF
|
11
|
+
-dr--dry_run Print only the template
|
12
|
+
-cj--clean_job Clean job
|
13
|
+
--drbbt* Use development version of rbbt
|
14
|
+
-sing--singularity Use Singularity
|
15
|
+
-ug--user_group* Use alternative user group for group project directory
|
16
|
+
-c--contain* Contain in directory (using Singularity)
|
17
|
+
-s--sync* Contain in directory and sync jobs
|
18
|
+
-e--exclusive Make exclusive use of the node
|
19
|
+
-hm--highmem Make use of highmem cores
|
20
|
+
-wc--wipe_container* Wipe the jobs from the contain directory
|
21
|
+
-CS--contain_and_sync Contain and sync to default locations
|
22
|
+
-ci--copy_image When using a container directory, copy image there
|
23
|
+
-t--tail Tail the logs
|
24
|
+
-q--queue* Queue
|
25
|
+
-t--task_cpus* Tasks
|
26
|
+
-W--workflows* Additional workflows
|
27
|
+
-tm--time* Time
|
28
|
+
-R--rules* Orchestration rules
|
29
|
+
-rmb--remove_slurm_basedir Remove the SLURM working directory (command, STDIN, exit status, ...)
|
30
|
+
EOF
|
31
|
+
|
32
|
+
class Step
|
33
|
+
def run(*args)
|
34
|
+
if done?
|
35
|
+
self.load
|
36
|
+
else
|
37
|
+
begin
|
38
|
+
Log.debug "Issuing SLURM job for #{self.path}"
|
39
|
+
HPC::SLURM.orchestrate_job(self, SOPT::GOT_OPTIONS.merge($slurm_options))
|
40
|
+
rescue HPC::SBATCH
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
ARGV.concat ["-W", $slurm_options[:workflows]] if $slurm_options[:workflows]
|
47
|
+
load Rbbt.share.rbbt_commands.workflow.task.find
|
@@ -9,8 +9,9 @@ require 'time'
|
|
9
9
|
$slurm_options = SOPT.get <<EOF
|
10
10
|
-dr--dry_run Print only the template
|
11
11
|
-cj--clean_job Clean job
|
12
|
-
--drbbt Use development version of rbbt
|
12
|
+
--drbbt* Use development version of rbbt
|
13
13
|
-sing--singularity Use Singularity
|
14
|
+
-ug--user_group* Use alternative user group for group project directory
|
14
15
|
-c--contain* Contain in directory (using Singularity)
|
15
16
|
-s--sync* Contain in directory and sync jobs
|
16
17
|
-e--exclusive Make exclusive use of the node
|
@@ -21,8 +22,9 @@ $slurm_options = SOPT.get <<EOF
|
|
21
22
|
-t--tail Tail the logs
|
22
23
|
-q--queue* Queue
|
23
24
|
-t--task_cpus* Tasks
|
25
|
+
-W--workflows* Additional workflows
|
24
26
|
-tm--time* Time
|
25
|
-
-
|
27
|
+
-rmb--remove_slurm_basedir Remove the SLURM working directory (command, STDIN, exit status, ...)
|
26
28
|
EOF
|
27
29
|
|
28
30
|
class Step
|
@@ -30,9 +32,14 @@ class Step
|
|
30
32
|
if done?
|
31
33
|
self.load
|
32
34
|
else
|
33
|
-
|
35
|
+
begin
|
36
|
+
Log.debug "Issuing SLURM job for #{self.path}"
|
37
|
+
HPC::SLURM.run_job(self, SOPT::GOT_OPTIONS.merge($slurm_options))
|
38
|
+
rescue HPC::SBATCH
|
39
|
+
end
|
34
40
|
end
|
35
41
|
end
|
36
42
|
end
|
37
43
|
|
44
|
+
ARGV.concat ["-W", $slurm_options[:workflows]] if $slurm_options[:workflows]
|
38
45
|
load Rbbt.share.rbbt_commands.workflow.task.find
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.29.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-01-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -211,6 +211,8 @@ files:
|
|
211
211
|
- lib/rbbt/entity/identifiers.rb
|
212
212
|
- lib/rbbt/fix_width_table.rb
|
213
213
|
- lib/rbbt/hpc.rb
|
214
|
+
- lib/rbbt/hpc/orchestrate.rb
|
215
|
+
- lib/rbbt/hpc/slurm.rb
|
214
216
|
- lib/rbbt/knowledge_base.rb
|
215
217
|
- lib/rbbt/knowledge_base/enrichment.rb
|
216
218
|
- lib/rbbt/knowledge_base/entity.rb
|
@@ -376,6 +378,9 @@ files:
|
|
376
378
|
- share/rbbt_commands/resource/produce
|
377
379
|
- share/rbbt_commands/resource/read
|
378
380
|
- share/rbbt_commands/rsync
|
381
|
+
- share/rbbt_commands/slurm/list
|
382
|
+
- share/rbbt_commands/slurm/orchestrate
|
383
|
+
- share/rbbt_commands/slurm/task
|
379
384
|
- share/rbbt_commands/stat/abs
|
380
385
|
- share/rbbt_commands/stat/boxplot
|
381
386
|
- share/rbbt_commands/stat/compare_lists
|
@@ -430,7 +435,6 @@ files:
|
|
430
435
|
- share/rbbt_commands/workflow/remote/list
|
431
436
|
- share/rbbt_commands/workflow/remote/remove
|
432
437
|
- share/rbbt_commands/workflow/server
|
433
|
-
- share/rbbt_commands/workflow/slurm
|
434
438
|
- share/rbbt_commands/workflow/task
|
435
439
|
- share/rbbt_commands/workflow/trace
|
436
440
|
- share/unicorn.rb
|