rbbt-util 5.33.4 → 5.33.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/hpc/batch.rb +1 -1
- data/lib/rbbt/hpc/orchestrate/chains.rb +1 -89
- data/lib/rbbt/hpc/orchestrate.rb +22 -2
- data/lib/rbbt/hpc/slurm.rb +4 -1
- data/lib/rbbt/util/migrate.rb +6 -0
- data/lib/rbbt/workflow/definition.rb +12 -12
- data/lib/rbbt/workflow/step/run.rb +2 -2
- data/lib/rbbt/workflow/util/orchestrator.rb +3 -3
- data/share/Rlib/plot.R +1 -0
- data/share/rbbt_commands/hpc/list +13 -10
- data/share/rbbt_commands/hpc/tail +22 -2
- data/share/rbbt_commands/lsf/list +13 -10
- data/share/rbbt_commands/lsf/tail +22 -2
- data/share/rbbt_commands/slurm/list +13 -10
- data/share/rbbt_commands/slurm/tail +22 -2
- data/share/rbbt_commands/workflow/prov +1 -1
- data/share/rbbt_commands/workflow/task +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f4272048a5c86e74b051600f0db895ea42f4ad51509a0de9ce88452d27133746
|
|
4
|
+
data.tar.gz: f10d19e028de390beb5e2649c7d2f1e0a7754d3235abe3eeba2790946b347f64
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0667eb9d5783d7722c33f72a66a1c406d330d086a37660aa7682006d10fe15860a9e4cbfcb461f17ed15b146fa294a6f6d8158d937884f14dc7952908f63c357
|
|
7
|
+
data.tar.gz: f3ba25b347bc4a7f7b56ecdbd1a0bef932b646ac030b3d98fe6ddb76f36a6863073f4692747ceef32c42e4344538a7ee19b531a25b16cc2fa208b201887b441e
|
data/lib/rbbt/hpc/batch.rb
CHANGED
|
@@ -544,7 +544,7 @@ env > #{batch_options[:fenv]}
|
|
|
544
544
|
Misc.add_defaults options,
|
|
545
545
|
:batch_dir => batch_dir,
|
|
546
546
|
:inputs_dir => File.join(batch_dir, "inputs_dir"),
|
|
547
|
-
:workflows => workflows_to_load * ","
|
|
547
|
+
:workflows => workflows_to_load.uniq * ","
|
|
548
548
|
|
|
549
549
|
options[:procpath_performance] ||= File.join(batch_dir, "procpath##{procpath.gsub(',', '#')}") if procpath
|
|
550
550
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
module HPC
|
|
2
2
|
module Orchestration
|
|
3
3
|
def self.check_chains(chains, job)
|
|
4
|
+
return [] if Symbol === job.overriden
|
|
4
5
|
matches = []
|
|
5
6
|
chains.each do |name, chain|
|
|
6
7
|
next unless chain[:tasks].include?(job.workflow.to_s)
|
|
@@ -36,95 +37,6 @@ module HPC
|
|
|
36
37
|
(job.dependencies + job.input_dependencies).uniq.select{|d| ! d.done? || d.dirty? }
|
|
37
38
|
end
|
|
38
39
|
|
|
39
|
-
#def self.job_workload(job)
|
|
40
|
-
# workload = []
|
|
41
|
-
# heap = []
|
|
42
|
-
# heap << job
|
|
43
|
-
# while job = heap.pop
|
|
44
|
-
# next if job.done?
|
|
45
|
-
# workload << job
|
|
46
|
-
# heap.concat job_dependencies(job)
|
|
47
|
-
# heap.uniq!
|
|
48
|
-
# end
|
|
49
|
-
# workload.uniq
|
|
50
|
-
#end
|
|
51
|
-
|
|
52
|
-
#def self.top_level_job(jobs)
|
|
53
|
-
# top = jobs.select do |job|
|
|
54
|
-
# (jobs - job_workload(job)).empty? &&
|
|
55
|
-
# (job_workload(job) - jobs).select{|j| (job_workload(j) & jobs).any? }.empty?
|
|
56
|
-
# end
|
|
57
|
-
# return nil if top.length != 1
|
|
58
|
-
# top.first
|
|
59
|
-
#end
|
|
60
|
-
|
|
61
|
-
#def self.job_chains(rules, job)
|
|
62
|
-
# workload = job_workload(job)
|
|
63
|
-
# chains = parse_chains(rules)
|
|
64
|
-
|
|
65
|
-
# chain_jobs = {}
|
|
66
|
-
# workload.each do |job|
|
|
67
|
-
# check_chains(chains, job).each do |match|
|
|
68
|
-
# chain_jobs[match] ||= []
|
|
69
|
-
# chain_jobs[match] << job
|
|
70
|
-
# end
|
|
71
|
-
# end
|
|
72
|
-
|
|
73
|
-
# job_chains = []
|
|
74
|
-
|
|
75
|
-
# seen = []
|
|
76
|
-
# chain_jobs.sort_by{|name,jobs| jobs.length }.reverse.each do |name,jobs|
|
|
77
|
-
# remain = jobs - seen
|
|
78
|
-
# next unless remain.length > 1
|
|
79
|
-
# top_level_job = top_level_job(jobs)
|
|
80
|
-
# next if top_level_job.nil?
|
|
81
|
-
# job_chains << {:jobs => remain, :rules => chains[name][:rules], :top_level_job => top_level_job}
|
|
82
|
-
# seen.concat remain
|
|
83
|
-
# end
|
|
84
|
-
|
|
85
|
-
# job_chains
|
|
86
|
-
#end
|
|
87
|
-
|
|
88
|
-
#def self._job_chains(rules, job)
|
|
89
|
-
# workload = job_workload(job)
|
|
90
|
-
# chains = parse_chains(rules)
|
|
91
|
-
|
|
92
|
-
# matches = check_chains(chains, job)
|
|
93
|
-
|
|
94
|
-
# job_chains = {}
|
|
95
|
-
# job.dependencies.each do |dep|
|
|
96
|
-
# dep_chains = _job_chains(rules, dep)
|
|
97
|
-
# matches.each do |match|
|
|
98
|
-
# if dep_chains[match] && dep_chains[match].include?(dep)
|
|
99
|
-
# dep_chains[match].prepend job
|
|
100
|
-
# end
|
|
101
|
-
# end
|
|
102
|
-
# job_chains.merge!(dep_chains)
|
|
103
|
-
# end
|
|
104
|
-
|
|
105
|
-
# matches.each do |match|
|
|
106
|
-
# job_chains[match] ||= [job]
|
|
107
|
-
# end
|
|
108
|
-
|
|
109
|
-
# job_chains
|
|
110
|
-
#end
|
|
111
|
-
|
|
112
|
-
#def self.job_chains(rules, job)
|
|
113
|
-
# job_chains = self._job_chains(rules, job)
|
|
114
|
-
# iif job_chains
|
|
115
|
-
# chains = parse_chains(rules)
|
|
116
|
-
|
|
117
|
-
# seen = []
|
|
118
|
-
# job_chains.collect do |name,jobs|
|
|
119
|
-
# remain = jobs - seen
|
|
120
|
-
# next unless remain.length > 1
|
|
121
|
-
# top_level_job = top_level_job(jobs)
|
|
122
|
-
# next if top_level_job.nil?
|
|
123
|
-
# seen.concat remain
|
|
124
|
-
# {:jobs => remain, :rules => chains[name][:rules], :top_level_job => top_level_job}
|
|
125
|
-
# end.compact
|
|
126
|
-
#end
|
|
127
|
-
|
|
128
40
|
def self.job_chains(rules, job)
|
|
129
41
|
chains = self.parse_chains(rules)
|
|
130
42
|
|
data/lib/rbbt/hpc/orchestrate.rb
CHANGED
|
@@ -5,6 +5,19 @@ require 'rbbt/hpc/orchestrate/batches'
|
|
|
5
5
|
module HPC
|
|
6
6
|
module Orchestration
|
|
7
7
|
|
|
8
|
+
def prepare_for_execution(job)
|
|
9
|
+
rec_dependencies = job.rec_dependencies(true)
|
|
10
|
+
|
|
11
|
+
return if rec_dependencies.empty?
|
|
12
|
+
|
|
13
|
+
all_deps = rec_dependencies + [job]
|
|
14
|
+
|
|
15
|
+
all_deps.each do |dep|
|
|
16
|
+
Step.prepare_for_execution(dep)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
end
|
|
20
|
+
|
|
8
21
|
def orchestrate_job(job, options)
|
|
9
22
|
options.delete "recursive_clean"
|
|
10
23
|
options.delete "clean_task"
|
|
@@ -14,6 +27,9 @@ module HPC
|
|
|
14
27
|
options.delete "detach"
|
|
15
28
|
options.delete "jobname"
|
|
16
29
|
|
|
30
|
+
Log.high "Prepare for exec"
|
|
31
|
+
prepare_for_execution(job)
|
|
32
|
+
|
|
17
33
|
if options[:orchestration_rules]
|
|
18
34
|
rules = YAML.load(Open.read(options[:orchestration_rules]))
|
|
19
35
|
elsif Rbbt.etc.slurm["default.yaml"].exists?
|
|
@@ -24,6 +40,7 @@ module HPC
|
|
|
24
40
|
|
|
25
41
|
IndiferentHash.setup(rules)
|
|
26
42
|
|
|
43
|
+
Log.high "Compute batches"
|
|
27
44
|
batches = HPC::Orchestration.job_batches(rules, job)
|
|
28
45
|
|
|
29
46
|
batch_ids = {}
|
|
@@ -31,7 +48,8 @@ module HPC
|
|
|
31
48
|
top = batches.select{|b| b[:deps].nil? || (b[:deps] - batch_ids.keys).empty? }.first
|
|
32
49
|
raise "No batch without unmet dependencies" if top.nil?
|
|
33
50
|
batches.delete top
|
|
34
|
-
|
|
51
|
+
|
|
52
|
+
job_options = HPC::Orchestration.merge_rules(options, top[:rules])
|
|
35
53
|
|
|
36
54
|
if top[:deps].nil?
|
|
37
55
|
batch_dependencies = []
|
|
@@ -59,7 +77,9 @@ module HPC
|
|
|
59
77
|
|
|
60
78
|
if options[:dry_run]
|
|
61
79
|
puts Log.color(:magenta, "Manifest: ") + Log.color(:blue, job_options[:manifest] * ", ") + " - tasks: #{job_options[:task_cpus] || 1} - time: #{job_options[:time]} - config: #{job_options[:config_keys]}"
|
|
62
|
-
puts Log.color(:
|
|
80
|
+
puts Log.color(:magenta, "Deps: ") + Log.color(:blue, job_options[:batch_dependencies]*", ")
|
|
81
|
+
puts Log.color(:yellow, "Path: ") + top[:top_level].path
|
|
82
|
+
puts Log.color(:yellow, "Options: ") + Misc.fingerprint(job_options)
|
|
63
83
|
batch_ids[top] = top[:top_level].task_signature
|
|
64
84
|
else
|
|
65
85
|
id = run_job(top[:top_level], job_options)
|
data/lib/rbbt/hpc/slurm.rb
CHANGED
|
@@ -32,6 +32,8 @@ export BATCH_SYSTEM=SLURM
|
|
|
32
32
|
constraint = Misc.process_options options, :constraint
|
|
33
33
|
gres = Misc.process_options options, :gres
|
|
34
34
|
|
|
35
|
+
constraint = [constraint, "highmem"].compact * "&" if highmem
|
|
36
|
+
|
|
35
37
|
mem = Misc.process_options options, :mem
|
|
36
38
|
mem_per_cpu = Misc.process_options options, :mem_per_cpu
|
|
37
39
|
|
|
@@ -50,14 +52,15 @@ export BATCH_SYSTEM=SLURM
|
|
|
50
52
|
"cpus-per-task" => task_cpus,
|
|
51
53
|
"nodes" => nodes,
|
|
52
54
|
"time" => time,
|
|
55
|
+
"constraint" => constraint,
|
|
53
56
|
"exclusive" => exclusive,
|
|
54
|
-
"highmem" => highmem,
|
|
55
57
|
"licenses" => licenses,
|
|
56
58
|
"gres" => gres,
|
|
57
59
|
"mem" => mem,
|
|
58
60
|
"mem-per-cpu" => mem_per_cpu,
|
|
59
61
|
}
|
|
60
62
|
|
|
63
|
+
|
|
61
64
|
header =<<-EOF
|
|
62
65
|
#!/bin/bash
|
|
63
66
|
EOF
|
data/lib/rbbt/util/migrate.rb
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
module Rbbt
|
|
2
|
+
|
|
3
|
+
prepare_for_execution(job)
|
|
2
4
|
def self.migrate_source_paths(path, resource = Rbbt, source = nil)
|
|
3
5
|
if source
|
|
4
6
|
lpath, *paths = Misc.ssh_run(source, <<-EOF).split("\n")
|
|
@@ -44,6 +46,8 @@ puts resource[path].find(search_path)
|
|
|
44
46
|
excludes += (options[:exclude] || "").split(/,\s*/)
|
|
45
47
|
excludes_str = excludes.collect{|s| "--exclude '#{s}'" } * " "
|
|
46
48
|
|
|
49
|
+
hard_link = options[:hard_link]
|
|
50
|
+
|
|
47
51
|
other = options[:other] || []
|
|
48
52
|
|
|
49
53
|
test_str = options[:test] ? '-nv' : ''
|
|
@@ -82,6 +86,8 @@ puts resource[path].find(search_path)
|
|
|
82
86
|
# rsync_args = "-avztAXHP --copy-unsafe-links"
|
|
83
87
|
rsync_args = "-avztAHP --copy-unsafe-links"
|
|
84
88
|
|
|
89
|
+
rsync_args << " --link-dest '#{source_path}'" if hard_link && ! options[:source]
|
|
90
|
+
|
|
85
91
|
cmd = "rsync #{rsync_args} #{test_str} #{files_from_str} #{excludes_str} '#{source_path}' #{target_path} #{other * " "}"
|
|
86
92
|
|
|
87
93
|
cmd << " && rm -Rf #{source_path}" if options[:delete] && ! options[:files]
|
|
@@ -93,22 +93,22 @@ module Workflow
|
|
|
93
93
|
Open.rm_rf self.files_dir if Open.exist? self.files_dir
|
|
94
94
|
FileUtils.cp_r dep.files_dir, self.files_dir if Open.exist?(dep.files_dir)
|
|
95
95
|
|
|
96
|
-
if dep.overriden
|
|
96
|
+
if dep.overriden || ! Workflow.job_path?(dep.path)
|
|
97
97
|
Open.link dep.path, self.tmp_path
|
|
98
98
|
else
|
|
99
99
|
Open.ln_h dep.path, self.tmp_path
|
|
100
|
-
end
|
|
101
100
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
101
|
+
case remove.to_s
|
|
102
|
+
when 'true'
|
|
103
|
+
dep.clean
|
|
104
|
+
when 'recursive'
|
|
105
|
+
(dep.dependencies + dep.rec_dependencies).uniq.each do |d|
|
|
106
|
+
next if d.overriden
|
|
107
|
+
d.clean unless config(:remove_dep, d.task_signature, d.task_name, d.workflow.to_s, :default => true).to_s == 'false'
|
|
108
|
+
end
|
|
109
|
+
dep.clean unless config(:remove_dep, dep.task_signature, dep.task_name, dep.workflow.to_s, :default => true).to_s == 'false'
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
112
|
else
|
|
113
113
|
if Open.exists?(dep.files_dir)
|
|
114
114
|
Open.rm_rf self.files_dir
|
|
@@ -181,8 +181,8 @@ class Step
|
|
|
181
181
|
# end
|
|
182
182
|
#end
|
|
183
183
|
|
|
184
|
-
Log.
|
|
185
|
-
Log.
|
|
184
|
+
Log.medium "Some newer files found: #{Misc.fingerprint outdated_time}" if outdated_time.any?
|
|
185
|
+
Log.medium "Some outdated files found: #{Misc.fingerprint outdated_dep}" if outdated_dep.any?
|
|
186
186
|
|
|
187
187
|
outdated_time + outdated_dep
|
|
188
188
|
end
|
|
@@ -5,10 +5,10 @@ module Workflow
|
|
|
5
5
|
|
|
6
6
|
def self.job_workload(job)
|
|
7
7
|
workload = {job => []}
|
|
8
|
-
return workload if job.done?
|
|
8
|
+
return workload if job.done? && ! job.dirty?
|
|
9
9
|
|
|
10
10
|
job.dependencies.each do |dep|
|
|
11
|
-
next if dep.done?
|
|
11
|
+
next if dep.done? && ! job.dirty?
|
|
12
12
|
workload.merge!(job_workload(dep))
|
|
13
13
|
workload[job] += workload[dep]
|
|
14
14
|
workload[job] << dep
|
|
@@ -16,7 +16,7 @@ module Workflow
|
|
|
16
16
|
end
|
|
17
17
|
|
|
18
18
|
job.input_dependencies.each do |dep|
|
|
19
|
-
next if dep.done?
|
|
19
|
+
next if dep.done? && ! job.dirty?
|
|
20
20
|
workload.merge!(job_workload(dep))
|
|
21
21
|
workload[job] += workload[dep]
|
|
22
22
|
workload[job] << dep
|
data/share/Rlib/plot.R
CHANGED
|
@@ -21,6 +21,7 @@ $ rbbt slurm list [options]
|
|
|
21
21
|
-j--job* Job ids
|
|
22
22
|
-s--search* Regular expression
|
|
23
23
|
-t--tail* Show the last lines of the STDERR
|
|
24
|
+
-l--long Show more entries
|
|
24
25
|
-p--progress Report progress of job and the dependencies
|
|
25
26
|
-BP--batch_parameters show batch parameters
|
|
26
27
|
-BPP--batch_procpath show Procpath performance summary
|
|
@@ -40,6 +41,8 @@ end
|
|
|
40
41
|
batch_system = options.delete :batch_system
|
|
41
42
|
batch_system ||= 'auto'
|
|
42
43
|
|
|
44
|
+
long = options.delete :long
|
|
45
|
+
|
|
43
46
|
HPC::BATCH_MODULE = HPC.batch_system batch_system
|
|
44
47
|
|
|
45
48
|
raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
|
|
@@ -183,24 +186,24 @@ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
|
183
186
|
|
|
184
187
|
|
|
185
188
|
puts Log.color :blue, dir
|
|
186
|
-
puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.batch')).to_s
|
|
187
|
-
puts Log.color(:magenta, "Started: ") << File.ctime(File.join(dir, 'std.err')).to_s if File.exist?(File.join(dir, 'std.err'))
|
|
188
|
-
puts Log.color(:magenta, "Manifest: ") << Log.color(:yellow, manifest)
|
|
189
|
+
puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.batch')).to_s if long
|
|
190
|
+
puts Log.color(:magenta, "Started: ") << File.ctime(File.join(dir, 'std.err')).to_s if File.exist?(File.join(dir, 'std.err')) && long
|
|
191
|
+
puts Log.color(:magenta, "Manifest: ") << Log.color(:yellow, manifest) if long
|
|
189
192
|
puts Log.color(:magenta, "Step path: ") << Log.color(:yellow, step_path)
|
|
190
193
|
puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
|
|
191
|
-
puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
|
|
192
|
-
puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
|
|
193
|
-
puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home
|
|
194
|
+
puts Log.color(:magenta, "Exec: ") << (exe || "Missing") if long
|
|
195
|
+
puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing") if long
|
|
196
|
+
puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home && long
|
|
194
197
|
if different_system
|
|
195
198
|
puts Log.color(:magenta, "Job ID (#{Log.color(:red, job_batch_system)}): ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : Log.color(:green, id) )
|
|
196
199
|
else
|
|
197
200
|
puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
|
|
198
|
-
end
|
|
201
|
+
end
|
|
199
202
|
puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
|
|
200
203
|
puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
|
|
201
|
-
puts Log.color(:magenta, "Nodes: ") << nodes * ", "
|
|
204
|
+
puts Log.color(:magenta, "Nodes: ") << nodes * ", " if long
|
|
202
205
|
puts Log.color(:magenta, "Time elapsed: ") << Misc.format_seconds(time_elapsed) if time_elapsed
|
|
203
|
-
puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? || File.exists?(File.join(dir, 'exit.status')) ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
|
|
206
|
+
puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? || File.exists?(File.join(dir, 'exit.status')) ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)") if long
|
|
204
207
|
|
|
205
208
|
if options[:batch_parameters]
|
|
206
209
|
puts Log.color(:magenta, "BATCH parameters: ")
|
|
@@ -281,7 +284,7 @@ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
|
281
284
|
puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
|
|
282
285
|
else
|
|
283
286
|
puts Log.color(:magenta, "Log tail: ")
|
|
284
|
-
puts CMD.cmd(" cat #{File.join(dir, 'std.err')} | grep -v '^[^\\s:]*\\[3.m' | tail -n #{tail.to_i} ").read
|
|
287
|
+
puts CMD.cmd(" cat #{File.join(dir, 'std.err')} | grep -v '^[^\\s:]*\\[3.m' | grep -v -e '^[[:space:]]*$' | grep -v \"\\(STDOUT\\|STDERR\\):[[:space:]]*$\" | tail -n #{tail.to_i} ").read
|
|
285
288
|
end
|
|
286
289
|
end
|
|
287
290
|
|
|
@@ -10,7 +10,7 @@ options = SOPT.setup <<EOF
|
|
|
10
10
|
|
|
11
11
|
Queue a job in Marenostrum
|
|
12
12
|
|
|
13
|
-
$ rbbt slurm tail <directory> [options]
|
|
13
|
+
$ rbbt slurm tail <directory|jobid> [options]
|
|
14
14
|
|
|
15
15
|
-h--help Print this help
|
|
16
16
|
EOF
|
|
@@ -35,8 +35,28 @@ directory = ARGV.shift
|
|
|
35
35
|
|
|
36
36
|
raise ParameterException if directory.nil?
|
|
37
37
|
|
|
38
|
-
|
|
38
|
+
if directory =~ /^[0-9]*$/
|
|
39
|
+
workdir = File.expand_path('~/rbbt-batch')
|
|
40
|
+
Path.setup(workdir)
|
|
41
|
+
|
|
42
|
+
workdir.glob("**/job.id").each do |file|
|
|
43
|
+
next unless directory == Open.read(file).strip
|
|
44
|
+
directory = File.dirname(file)
|
|
45
|
+
break
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
raise ParameterException, "Could not identify job #{directory}" unless File.exists?(directory)
|
|
39
50
|
|
|
40
51
|
require 'rbbt/hpc/slurm'
|
|
41
52
|
|
|
53
|
+
command_txt = Open.read(File.join(directory, 'command.batch'))
|
|
54
|
+
if m = command_txt.match(/#STEP_PATH: (.*)/)
|
|
55
|
+
step_path = m[1]
|
|
56
|
+
else
|
|
57
|
+
step_path = nil
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
puts Log.color(:magenta, "Step path: ") + step_path if step_path
|
|
61
|
+
|
|
42
62
|
HPC::BATCH_MODULE.follow_job directory, true
|
|
@@ -21,6 +21,7 @@ $ rbbt slurm list [options]
|
|
|
21
21
|
-j--job* Job ids
|
|
22
22
|
-s--search* Regular expression
|
|
23
23
|
-t--tail* Show the last lines of the STDERR
|
|
24
|
+
-l--long Show more entries
|
|
24
25
|
-p--progress Report progress of job and the dependencies
|
|
25
26
|
-BP--batch_parameters show batch parameters
|
|
26
27
|
-BPP--batch_procpath show Procpath performance summary
|
|
@@ -40,6 +41,8 @@ end
|
|
|
40
41
|
batch_system = options.delete :batch_system
|
|
41
42
|
batch_system ||= 'auto'
|
|
42
43
|
|
|
44
|
+
long = options.delete :long
|
|
45
|
+
|
|
43
46
|
HPC::BATCH_MODULE = HPC.batch_system batch_system
|
|
44
47
|
|
|
45
48
|
raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
|
|
@@ -183,24 +186,24 @@ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
|
183
186
|
|
|
184
187
|
|
|
185
188
|
puts Log.color :blue, dir
|
|
186
|
-
puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.batch')).to_s
|
|
187
|
-
puts Log.color(:magenta, "Started: ") << File.ctime(File.join(dir, 'std.err')).to_s if File.exist?(File.join(dir, 'std.err'))
|
|
188
|
-
puts Log.color(:magenta, "Manifest: ") << Log.color(:yellow, manifest)
|
|
189
|
+
puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.batch')).to_s if long
|
|
190
|
+
puts Log.color(:magenta, "Started: ") << File.ctime(File.join(dir, 'std.err')).to_s if File.exist?(File.join(dir, 'std.err')) && long
|
|
191
|
+
puts Log.color(:magenta, "Manifest: ") << Log.color(:yellow, manifest) if long
|
|
189
192
|
puts Log.color(:magenta, "Step path: ") << Log.color(:yellow, step_path)
|
|
190
193
|
puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
|
|
191
|
-
puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
|
|
192
|
-
puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
|
|
193
|
-
puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home
|
|
194
|
+
puts Log.color(:magenta, "Exec: ") << (exe || "Missing") if long
|
|
195
|
+
puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing") if long
|
|
196
|
+
puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home && long
|
|
194
197
|
if different_system
|
|
195
198
|
puts Log.color(:magenta, "Job ID (#{Log.color(:red, job_batch_system)}): ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : Log.color(:green, id) )
|
|
196
199
|
else
|
|
197
200
|
puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
|
|
198
|
-
end
|
|
201
|
+
end
|
|
199
202
|
puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
|
|
200
203
|
puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
|
|
201
|
-
puts Log.color(:magenta, "Nodes: ") << nodes * ", "
|
|
204
|
+
puts Log.color(:magenta, "Nodes: ") << nodes * ", " if long
|
|
202
205
|
puts Log.color(:magenta, "Time elapsed: ") << Misc.format_seconds(time_elapsed) if time_elapsed
|
|
203
|
-
puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? || File.exists?(File.join(dir, 'exit.status')) ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
|
|
206
|
+
puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? || File.exists?(File.join(dir, 'exit.status')) ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)") if long
|
|
204
207
|
|
|
205
208
|
if options[:batch_parameters]
|
|
206
209
|
puts Log.color(:magenta, "BATCH parameters: ")
|
|
@@ -281,7 +284,7 @@ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
|
281
284
|
puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
|
|
282
285
|
else
|
|
283
286
|
puts Log.color(:magenta, "Log tail: ")
|
|
284
|
-
puts CMD.cmd(" cat #{File.join(dir, 'std.err')} | grep -v '^[^\\s:]*\\[3.m' | tail -n #{tail.to_i} ").read
|
|
287
|
+
puts CMD.cmd(" cat #{File.join(dir, 'std.err')} | grep -v '^[^\\s:]*\\[3.m' | grep -v -e '^[[:space:]]*$' | grep -v \"\\(STDOUT\\|STDERR\\):[[:space:]]*$\" | tail -n #{tail.to_i} ").read
|
|
285
288
|
end
|
|
286
289
|
end
|
|
287
290
|
|
|
@@ -10,7 +10,7 @@ options = SOPT.setup <<EOF
|
|
|
10
10
|
|
|
11
11
|
Queue a job in Marenostrum
|
|
12
12
|
|
|
13
|
-
$ rbbt slurm tail <directory> [options]
|
|
13
|
+
$ rbbt slurm tail <directory|jobid> [options]
|
|
14
14
|
|
|
15
15
|
-h--help Print this help
|
|
16
16
|
EOF
|
|
@@ -35,8 +35,28 @@ directory = ARGV.shift
|
|
|
35
35
|
|
|
36
36
|
raise ParameterException if directory.nil?
|
|
37
37
|
|
|
38
|
-
|
|
38
|
+
if directory =~ /^[0-9]*$/
|
|
39
|
+
workdir = File.expand_path('~/rbbt-batch')
|
|
40
|
+
Path.setup(workdir)
|
|
41
|
+
|
|
42
|
+
workdir.glob("**/job.id").each do |file|
|
|
43
|
+
next unless directory == Open.read(file).strip
|
|
44
|
+
directory = File.dirname(file)
|
|
45
|
+
break
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
raise ParameterException, "Could not identify job #{directory}" unless File.exists?(directory)
|
|
39
50
|
|
|
40
51
|
require 'rbbt/hpc/slurm'
|
|
41
52
|
|
|
53
|
+
command_txt = Open.read(File.join(directory, 'command.batch'))
|
|
54
|
+
if m = command_txt.match(/#STEP_PATH: (.*)/)
|
|
55
|
+
step_path = m[1]
|
|
56
|
+
else
|
|
57
|
+
step_path = nil
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
puts Log.color(:magenta, "Step path: ") + step_path if step_path
|
|
61
|
+
|
|
42
62
|
HPC::BATCH_MODULE.follow_job directory, true
|
|
@@ -21,6 +21,7 @@ $ rbbt slurm list [options]
|
|
|
21
21
|
-j--job* Job ids
|
|
22
22
|
-s--search* Regular expression
|
|
23
23
|
-t--tail* Show the last lines of the STDERR
|
|
24
|
+
-l--long Show more entries
|
|
24
25
|
-p--progress Report progress of job and the dependencies
|
|
25
26
|
-BP--batch_parameters show batch parameters
|
|
26
27
|
-BPP--batch_procpath show Procpath performance summary
|
|
@@ -40,6 +41,8 @@ end
|
|
|
40
41
|
batch_system = options.delete :batch_system
|
|
41
42
|
batch_system ||= 'auto'
|
|
42
43
|
|
|
44
|
+
long = options.delete :long
|
|
45
|
+
|
|
43
46
|
HPC::BATCH_MODULE = HPC.batch_system batch_system
|
|
44
47
|
|
|
45
48
|
raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
|
|
@@ -183,24 +186,24 @@ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
|
183
186
|
|
|
184
187
|
|
|
185
188
|
puts Log.color :blue, dir
|
|
186
|
-
puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.batch')).to_s
|
|
187
|
-
puts Log.color(:magenta, "Started: ") << File.ctime(File.join(dir, 'std.err')).to_s if File.exist?(File.join(dir, 'std.err'))
|
|
188
|
-
puts Log.color(:magenta, "Manifest: ") << Log.color(:yellow, manifest)
|
|
189
|
+
puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.batch')).to_s if long
|
|
190
|
+
puts Log.color(:magenta, "Started: ") << File.ctime(File.join(dir, 'std.err')).to_s if File.exist?(File.join(dir, 'std.err')) && long
|
|
191
|
+
puts Log.color(:magenta, "Manifest: ") << Log.color(:yellow, manifest) if long
|
|
189
192
|
puts Log.color(:magenta, "Step path: ") << Log.color(:yellow, step_path)
|
|
190
193
|
puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
|
|
191
|
-
puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
|
|
192
|
-
puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
|
|
193
|
-
puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home
|
|
194
|
+
puts Log.color(:magenta, "Exec: ") << (exe || "Missing") if long
|
|
195
|
+
puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing") if long
|
|
196
|
+
puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home && long
|
|
194
197
|
if different_system
|
|
195
198
|
puts Log.color(:magenta, "Job ID (#{Log.color(:red, job_batch_system)}): ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : Log.color(:green, id) )
|
|
196
199
|
else
|
|
197
200
|
puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
|
|
198
|
-
end
|
|
201
|
+
end
|
|
199
202
|
puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
|
|
200
203
|
puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
|
|
201
|
-
puts Log.color(:magenta, "Nodes: ") << nodes * ", "
|
|
204
|
+
puts Log.color(:magenta, "Nodes: ") << nodes * ", " if long
|
|
202
205
|
puts Log.color(:magenta, "Time elapsed: ") << Misc.format_seconds(time_elapsed) if time_elapsed
|
|
203
|
-
puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? || File.exists?(File.join(dir, 'exit.status')) ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
|
|
206
|
+
puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? || File.exists?(File.join(dir, 'exit.status')) ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)") if long
|
|
204
207
|
|
|
205
208
|
if options[:batch_parameters]
|
|
206
209
|
puts Log.color(:magenta, "BATCH parameters: ")
|
|
@@ -281,7 +284,7 @@ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
|
281
284
|
puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
|
|
282
285
|
else
|
|
283
286
|
puts Log.color(:magenta, "Log tail: ")
|
|
284
|
-
puts CMD.cmd(" cat #{File.join(dir, 'std.err')} | grep -v '^[^\\s:]*\\[3.m' | tail -n #{tail.to_i} ").read
|
|
287
|
+
puts CMD.cmd(" cat #{File.join(dir, 'std.err')} | grep -v '^[^\\s:]*\\[3.m' | grep -v -e '^[[:space:]]*$' | grep -v \"\\(STDOUT\\|STDERR\\):[[:space:]]*$\" | tail -n #{tail.to_i} ").read
|
|
285
288
|
end
|
|
286
289
|
end
|
|
287
290
|
|
|
@@ -10,7 +10,7 @@ options = SOPT.setup <<EOF
|
|
|
10
10
|
|
|
11
11
|
Queue a job in Marenostrum
|
|
12
12
|
|
|
13
|
-
$ rbbt slurm tail <directory> [options]
|
|
13
|
+
$ rbbt slurm tail <directory|jobid> [options]
|
|
14
14
|
|
|
15
15
|
-h--help Print this help
|
|
16
16
|
EOF
|
|
@@ -35,8 +35,28 @@ directory = ARGV.shift
|
|
|
35
35
|
|
|
36
36
|
raise ParameterException if directory.nil?
|
|
37
37
|
|
|
38
|
-
|
|
38
|
+
if directory =~ /^[0-9]*$/
|
|
39
|
+
workdir = File.expand_path('~/rbbt-batch')
|
|
40
|
+
Path.setup(workdir)
|
|
41
|
+
|
|
42
|
+
workdir.glob("**/job.id").each do |file|
|
|
43
|
+
next unless directory == Open.read(file).strip
|
|
44
|
+
directory = File.dirname(file)
|
|
45
|
+
break
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
raise ParameterException, "Could not identify job #{directory}" unless File.exists?(directory)
|
|
39
50
|
|
|
40
51
|
require 'rbbt/hpc/slurm'
|
|
41
52
|
|
|
53
|
+
command_txt = Open.read(File.join(directory, 'command.batch'))
|
|
54
|
+
if m = command_txt.match(/#STEP_PATH: (.*)/)
|
|
55
|
+
step_path = m[1]
|
|
56
|
+
else
|
|
57
|
+
step_path = nil
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
puts Log.color(:magenta, "Step path: ") + step_path if step_path
|
|
61
|
+
|
|
42
62
|
HPC::BATCH_MODULE.follow_job directory, true
|
|
@@ -219,8 +219,8 @@ help = !!options.delete(:help)
|
|
|
219
219
|
do_fork = !!options.delete(:fork)
|
|
220
220
|
detach = !!options.delete(:detach)
|
|
221
221
|
do_exec = !!options.delete(:exec)
|
|
222
|
-
clean = !!options.delete(:clean)
|
|
223
222
|
clean_task = options.delete(:clean_task)
|
|
223
|
+
clean = !!options.delete(:clean) || clean_task
|
|
224
224
|
override_deps = options.delete(:override_deps)
|
|
225
225
|
recursive_clean = !!options.delete(:recursive_clean)
|
|
226
226
|
out = options.include?(:output) ? File.open(options[:output], 'wb') : STDOUT
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rbbt-util
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 5.33.
|
|
4
|
+
version: 5.33.7
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Miguel Vazquez
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2022-03-
|
|
11
|
+
date: 2022-03-10 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rake
|