rbbt-util 5.33.4 → 5.33.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/hpc/batch.rb +1 -1
- data/lib/rbbt/hpc/orchestrate/chains.rb +1 -89
- data/lib/rbbt/hpc/orchestrate.rb +22 -2
- data/lib/rbbt/hpc/slurm.rb +4 -1
- data/lib/rbbt/util/migrate.rb +6 -0
- data/lib/rbbt/workflow/definition.rb +12 -12
- data/lib/rbbt/workflow/step/run.rb +2 -2
- data/lib/rbbt/workflow/util/orchestrator.rb +3 -3
- data/share/Rlib/plot.R +1 -0
- data/share/rbbt_commands/hpc/list +13 -10
- data/share/rbbt_commands/hpc/tail +22 -2
- data/share/rbbt_commands/lsf/list +13 -10
- data/share/rbbt_commands/lsf/tail +22 -2
- data/share/rbbt_commands/slurm/list +13 -10
- data/share/rbbt_commands/slurm/tail +22 -2
- data/share/rbbt_commands/workflow/prov +1 -1
- data/share/rbbt_commands/workflow/task +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f4272048a5c86e74b051600f0db895ea42f4ad51509a0de9ce88452d27133746
|
4
|
+
data.tar.gz: f10d19e028de390beb5e2649c7d2f1e0a7754d3235abe3eeba2790946b347f64
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0667eb9d5783d7722c33f72a66a1c406d330d086a37660aa7682006d10fe15860a9e4cbfcb461f17ed15b146fa294a6f6d8158d937884f14dc7952908f63c357
|
7
|
+
data.tar.gz: f3ba25b347bc4a7f7b56ecdbd1a0bef932b646ac030b3d98fe6ddb76f36a6863073f4692747ceef32c42e4344538a7ee19b531a25b16cc2fa208b201887b441e
|
data/lib/rbbt/hpc/batch.rb
CHANGED
@@ -544,7 +544,7 @@ env > #{batch_options[:fenv]}
|
|
544
544
|
Misc.add_defaults options,
|
545
545
|
:batch_dir => batch_dir,
|
546
546
|
:inputs_dir => File.join(batch_dir, "inputs_dir"),
|
547
|
-
:workflows => workflows_to_load * ","
|
547
|
+
:workflows => workflows_to_load.uniq * ","
|
548
548
|
|
549
549
|
options[:procpath_performance] ||= File.join(batch_dir, "procpath##{procpath.gsub(',', '#')}") if procpath
|
550
550
|
|
@@ -1,6 +1,7 @@
|
|
1
1
|
module HPC
|
2
2
|
module Orchestration
|
3
3
|
def self.check_chains(chains, job)
|
4
|
+
return [] if Symbol === job.overriden
|
4
5
|
matches = []
|
5
6
|
chains.each do |name, chain|
|
6
7
|
next unless chain[:tasks].include?(job.workflow.to_s)
|
@@ -36,95 +37,6 @@ module HPC
|
|
36
37
|
(job.dependencies + job.input_dependencies).uniq.select{|d| ! d.done? || d.dirty? }
|
37
38
|
end
|
38
39
|
|
39
|
-
#def self.job_workload(job)
|
40
|
-
# workload = []
|
41
|
-
# heap = []
|
42
|
-
# heap << job
|
43
|
-
# while job = heap.pop
|
44
|
-
# next if job.done?
|
45
|
-
# workload << job
|
46
|
-
# heap.concat job_dependencies(job)
|
47
|
-
# heap.uniq!
|
48
|
-
# end
|
49
|
-
# workload.uniq
|
50
|
-
#end
|
51
|
-
|
52
|
-
#def self.top_level_job(jobs)
|
53
|
-
# top = jobs.select do |job|
|
54
|
-
# (jobs - job_workload(job)).empty? &&
|
55
|
-
# (job_workload(job) - jobs).select{|j| (job_workload(j) & jobs).any? }.empty?
|
56
|
-
# end
|
57
|
-
# return nil if top.length != 1
|
58
|
-
# top.first
|
59
|
-
#end
|
60
|
-
|
61
|
-
#def self.job_chains(rules, job)
|
62
|
-
# workload = job_workload(job)
|
63
|
-
# chains = parse_chains(rules)
|
64
|
-
|
65
|
-
# chain_jobs = {}
|
66
|
-
# workload.each do |job|
|
67
|
-
# check_chains(chains, job).each do |match|
|
68
|
-
# chain_jobs[match] ||= []
|
69
|
-
# chain_jobs[match] << job
|
70
|
-
# end
|
71
|
-
# end
|
72
|
-
|
73
|
-
# job_chains = []
|
74
|
-
|
75
|
-
# seen = []
|
76
|
-
# chain_jobs.sort_by{|name,jobs| jobs.length }.reverse.each do |name,jobs|
|
77
|
-
# remain = jobs - seen
|
78
|
-
# next unless remain.length > 1
|
79
|
-
# top_level_job = top_level_job(jobs)
|
80
|
-
# next if top_level_job.nil?
|
81
|
-
# job_chains << {:jobs => remain, :rules => chains[name][:rules], :top_level_job => top_level_job}
|
82
|
-
# seen.concat remain
|
83
|
-
# end
|
84
|
-
|
85
|
-
# job_chains
|
86
|
-
#end
|
87
|
-
|
88
|
-
#def self._job_chains(rules, job)
|
89
|
-
# workload = job_workload(job)
|
90
|
-
# chains = parse_chains(rules)
|
91
|
-
|
92
|
-
# matches = check_chains(chains, job)
|
93
|
-
|
94
|
-
# job_chains = {}
|
95
|
-
# job.dependencies.each do |dep|
|
96
|
-
# dep_chains = _job_chains(rules, dep)
|
97
|
-
# matches.each do |match|
|
98
|
-
# if dep_chains[match] && dep_chains[match].include?(dep)
|
99
|
-
# dep_chains[match].prepend job
|
100
|
-
# end
|
101
|
-
# end
|
102
|
-
# job_chains.merge!(dep_chains)
|
103
|
-
# end
|
104
|
-
|
105
|
-
# matches.each do |match|
|
106
|
-
# job_chains[match] ||= [job]
|
107
|
-
# end
|
108
|
-
|
109
|
-
# job_chains
|
110
|
-
#end
|
111
|
-
|
112
|
-
#def self.job_chains(rules, job)
|
113
|
-
# job_chains = self._job_chains(rules, job)
|
114
|
-
# iif job_chains
|
115
|
-
# chains = parse_chains(rules)
|
116
|
-
|
117
|
-
# seen = []
|
118
|
-
# job_chains.collect do |name,jobs|
|
119
|
-
# remain = jobs - seen
|
120
|
-
# next unless remain.length > 1
|
121
|
-
# top_level_job = top_level_job(jobs)
|
122
|
-
# next if top_level_job.nil?
|
123
|
-
# seen.concat remain
|
124
|
-
# {:jobs => remain, :rules => chains[name][:rules], :top_level_job => top_level_job}
|
125
|
-
# end.compact
|
126
|
-
#end
|
127
|
-
|
128
40
|
def self.job_chains(rules, job)
|
129
41
|
chains = self.parse_chains(rules)
|
130
42
|
|
data/lib/rbbt/hpc/orchestrate.rb
CHANGED
@@ -5,6 +5,19 @@ require 'rbbt/hpc/orchestrate/batches'
|
|
5
5
|
module HPC
|
6
6
|
module Orchestration
|
7
7
|
|
8
|
+
def prepare_for_execution(job)
|
9
|
+
rec_dependencies = job.rec_dependencies(true)
|
10
|
+
|
11
|
+
return if rec_dependencies.empty?
|
12
|
+
|
13
|
+
all_deps = rec_dependencies + [job]
|
14
|
+
|
15
|
+
all_deps.each do |dep|
|
16
|
+
Step.prepare_for_execution(dep)
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
8
21
|
def orchestrate_job(job, options)
|
9
22
|
options.delete "recursive_clean"
|
10
23
|
options.delete "clean_task"
|
@@ -14,6 +27,9 @@ module HPC
|
|
14
27
|
options.delete "detach"
|
15
28
|
options.delete "jobname"
|
16
29
|
|
30
|
+
Log.high "Prepare for exec"
|
31
|
+
prepare_for_execution(job)
|
32
|
+
|
17
33
|
if options[:orchestration_rules]
|
18
34
|
rules = YAML.load(Open.read(options[:orchestration_rules]))
|
19
35
|
elsif Rbbt.etc.slurm["default.yaml"].exists?
|
@@ -24,6 +40,7 @@ module HPC
|
|
24
40
|
|
25
41
|
IndiferentHash.setup(rules)
|
26
42
|
|
43
|
+
Log.high "Compute batches"
|
27
44
|
batches = HPC::Orchestration.job_batches(rules, job)
|
28
45
|
|
29
46
|
batch_ids = {}
|
@@ -31,7 +48,8 @@ module HPC
|
|
31
48
|
top = batches.select{|b| b[:deps].nil? || (b[:deps] - batch_ids.keys).empty? }.first
|
32
49
|
raise "No batch without unmet dependencies" if top.nil?
|
33
50
|
batches.delete top
|
34
|
-
|
51
|
+
|
52
|
+
job_options = HPC::Orchestration.merge_rules(options, top[:rules])
|
35
53
|
|
36
54
|
if top[:deps].nil?
|
37
55
|
batch_dependencies = []
|
@@ -59,7 +77,9 @@ module HPC
|
|
59
77
|
|
60
78
|
if options[:dry_run]
|
61
79
|
puts Log.color(:magenta, "Manifest: ") + Log.color(:blue, job_options[:manifest] * ", ") + " - tasks: #{job_options[:task_cpus] || 1} - time: #{job_options[:time]} - config: #{job_options[:config_keys]}"
|
62
|
-
puts Log.color(:
|
80
|
+
puts Log.color(:magenta, "Deps: ") + Log.color(:blue, job_options[:batch_dependencies]*", ")
|
81
|
+
puts Log.color(:yellow, "Path: ") + top[:top_level].path
|
82
|
+
puts Log.color(:yellow, "Options: ") + Misc.fingerprint(job_options)
|
63
83
|
batch_ids[top] = top[:top_level].task_signature
|
64
84
|
else
|
65
85
|
id = run_job(top[:top_level], job_options)
|
data/lib/rbbt/hpc/slurm.rb
CHANGED
@@ -32,6 +32,8 @@ export BATCH_SYSTEM=SLURM
|
|
32
32
|
constraint = Misc.process_options options, :constraint
|
33
33
|
gres = Misc.process_options options, :gres
|
34
34
|
|
35
|
+
constraint = [constraint, "highmem"].compact * "&" if highmem
|
36
|
+
|
35
37
|
mem = Misc.process_options options, :mem
|
36
38
|
mem_per_cpu = Misc.process_options options, :mem_per_cpu
|
37
39
|
|
@@ -50,14 +52,15 @@ export BATCH_SYSTEM=SLURM
|
|
50
52
|
"cpus-per-task" => task_cpus,
|
51
53
|
"nodes" => nodes,
|
52
54
|
"time" => time,
|
55
|
+
"constraint" => constraint,
|
53
56
|
"exclusive" => exclusive,
|
54
|
-
"highmem" => highmem,
|
55
57
|
"licenses" => licenses,
|
56
58
|
"gres" => gres,
|
57
59
|
"mem" => mem,
|
58
60
|
"mem-per-cpu" => mem_per_cpu,
|
59
61
|
}
|
60
62
|
|
63
|
+
|
61
64
|
header =<<-EOF
|
62
65
|
#!/bin/bash
|
63
66
|
EOF
|
data/lib/rbbt/util/migrate.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
module Rbbt
|
2
|
+
|
3
|
+
prepare_for_execution(job)
|
2
4
|
def self.migrate_source_paths(path, resource = Rbbt, source = nil)
|
3
5
|
if source
|
4
6
|
lpath, *paths = Misc.ssh_run(source, <<-EOF).split("\n")
|
@@ -44,6 +46,8 @@ puts resource[path].find(search_path)
|
|
44
46
|
excludes += (options[:exclude] || "").split(/,\s*/)
|
45
47
|
excludes_str = excludes.collect{|s| "--exclude '#{s}'" } * " "
|
46
48
|
|
49
|
+
hard_link = options[:hard_link]
|
50
|
+
|
47
51
|
other = options[:other] || []
|
48
52
|
|
49
53
|
test_str = options[:test] ? '-nv' : ''
|
@@ -82,6 +86,8 @@ puts resource[path].find(search_path)
|
|
82
86
|
# rsync_args = "-avztAXHP --copy-unsafe-links"
|
83
87
|
rsync_args = "-avztAHP --copy-unsafe-links"
|
84
88
|
|
89
|
+
rsync_args << " --link-dest '#{source_path}'" if hard_link && ! options[:source]
|
90
|
+
|
85
91
|
cmd = "rsync #{rsync_args} #{test_str} #{files_from_str} #{excludes_str} '#{source_path}' #{target_path} #{other * " "}"
|
86
92
|
|
87
93
|
cmd << " && rm -Rf #{source_path}" if options[:delete] && ! options[:files]
|
@@ -93,22 +93,22 @@ module Workflow
|
|
93
93
|
Open.rm_rf self.files_dir if Open.exist? self.files_dir
|
94
94
|
FileUtils.cp_r dep.files_dir, self.files_dir if Open.exist?(dep.files_dir)
|
95
95
|
|
96
|
-
if dep.overriden
|
96
|
+
if dep.overriden || ! Workflow.job_path?(dep.path)
|
97
97
|
Open.link dep.path, self.tmp_path
|
98
98
|
else
|
99
99
|
Open.ln_h dep.path, self.tmp_path
|
100
|
-
end
|
101
100
|
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
101
|
+
case remove.to_s
|
102
|
+
when 'true'
|
103
|
+
dep.clean
|
104
|
+
when 'recursive'
|
105
|
+
(dep.dependencies + dep.rec_dependencies).uniq.each do |d|
|
106
|
+
next if d.overriden
|
107
|
+
d.clean unless config(:remove_dep, d.task_signature, d.task_name, d.workflow.to_s, :default => true).to_s == 'false'
|
108
|
+
end
|
109
|
+
dep.clean unless config(:remove_dep, dep.task_signature, dep.task_name, dep.workflow.to_s, :default => true).to_s == 'false'
|
110
|
+
end
|
111
|
+
end
|
112
112
|
else
|
113
113
|
if Open.exists?(dep.files_dir)
|
114
114
|
Open.rm_rf self.files_dir
|
@@ -181,8 +181,8 @@ class Step
|
|
181
181
|
# end
|
182
182
|
#end
|
183
183
|
|
184
|
-
Log.
|
185
|
-
Log.
|
184
|
+
Log.medium "Some newer files found: #{Misc.fingerprint outdated_time}" if outdated_time.any?
|
185
|
+
Log.medium "Some outdated files found: #{Misc.fingerprint outdated_dep}" if outdated_dep.any?
|
186
186
|
|
187
187
|
outdated_time + outdated_dep
|
188
188
|
end
|
@@ -5,10 +5,10 @@ module Workflow
|
|
5
5
|
|
6
6
|
def self.job_workload(job)
|
7
7
|
workload = {job => []}
|
8
|
-
return workload if job.done?
|
8
|
+
return workload if job.done? && ! job.dirty?
|
9
9
|
|
10
10
|
job.dependencies.each do |dep|
|
11
|
-
next if dep.done?
|
11
|
+
next if dep.done? && ! job.dirty?
|
12
12
|
workload.merge!(job_workload(dep))
|
13
13
|
workload[job] += workload[dep]
|
14
14
|
workload[job] << dep
|
@@ -16,7 +16,7 @@ module Workflow
|
|
16
16
|
end
|
17
17
|
|
18
18
|
job.input_dependencies.each do |dep|
|
19
|
-
next if dep.done?
|
19
|
+
next if dep.done? && ! job.dirty?
|
20
20
|
workload.merge!(job_workload(dep))
|
21
21
|
workload[job] += workload[dep]
|
22
22
|
workload[job] << dep
|
data/share/Rlib/plot.R
CHANGED
@@ -21,6 +21,7 @@ $ rbbt slurm list [options]
|
|
21
21
|
-j--job* Job ids
|
22
22
|
-s--search* Regular expression
|
23
23
|
-t--tail* Show the last lines of the STDERR
|
24
|
+
-l--long Show more entries
|
24
25
|
-p--progress Report progress of job and the dependencies
|
25
26
|
-BP--batch_parameters show batch parameters
|
26
27
|
-BPP--batch_procpath show Procpath performance summary
|
@@ -40,6 +41,8 @@ end
|
|
40
41
|
batch_system = options.delete :batch_system
|
41
42
|
batch_system ||= 'auto'
|
42
43
|
|
44
|
+
long = options.delete :long
|
45
|
+
|
43
46
|
HPC::BATCH_MODULE = HPC.batch_system batch_system
|
44
47
|
|
45
48
|
raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
|
@@ -183,24 +186,24 @@ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
183
186
|
|
184
187
|
|
185
188
|
puts Log.color :blue, dir
|
186
|
-
puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.batch')).to_s
|
187
|
-
puts Log.color(:magenta, "Started: ") << File.ctime(File.join(dir, 'std.err')).to_s if File.exist?(File.join(dir, 'std.err'))
|
188
|
-
puts Log.color(:magenta, "Manifest: ") << Log.color(:yellow, manifest)
|
189
|
+
puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.batch')).to_s if long
|
190
|
+
puts Log.color(:magenta, "Started: ") << File.ctime(File.join(dir, 'std.err')).to_s if File.exist?(File.join(dir, 'std.err')) && long
|
191
|
+
puts Log.color(:magenta, "Manifest: ") << Log.color(:yellow, manifest) if long
|
189
192
|
puts Log.color(:magenta, "Step path: ") << Log.color(:yellow, step_path)
|
190
193
|
puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
|
191
|
-
puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
|
192
|
-
puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
|
193
|
-
puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home
|
194
|
+
puts Log.color(:magenta, "Exec: ") << (exe || "Missing") if long
|
195
|
+
puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing") if long
|
196
|
+
puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home && long
|
194
197
|
if different_system
|
195
198
|
puts Log.color(:magenta, "Job ID (#{Log.color(:red, job_batch_system)}): ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : Log.color(:green, id) )
|
196
199
|
else
|
197
200
|
puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
|
198
|
-
end
|
201
|
+
end
|
199
202
|
puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
|
200
203
|
puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
|
201
|
-
puts Log.color(:magenta, "Nodes: ") << nodes * ", "
|
204
|
+
puts Log.color(:magenta, "Nodes: ") << nodes * ", " if long
|
202
205
|
puts Log.color(:magenta, "Time elapsed: ") << Misc.format_seconds(time_elapsed) if time_elapsed
|
203
|
-
puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? || File.exists?(File.join(dir, 'exit.status')) ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
|
206
|
+
puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? || File.exists?(File.join(dir, 'exit.status')) ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)") if long
|
204
207
|
|
205
208
|
if options[:batch_parameters]
|
206
209
|
puts Log.color(:magenta, "BATCH parameters: ")
|
@@ -281,7 +284,7 @@ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
281
284
|
puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
|
282
285
|
else
|
283
286
|
puts Log.color(:magenta, "Log tail: ")
|
284
|
-
puts CMD.cmd(" cat #{File.join(dir, 'std.err')} | grep -v '^[^\\s:]*\\[3.m' | tail -n #{tail.to_i} ").read
|
287
|
+
puts CMD.cmd(" cat #{File.join(dir, 'std.err')} | grep -v '^[^\\s:]*\\[3.m' | grep -v -e '^[[:space:]]*$' | grep -v \"\\(STDOUT\\|STDERR\\):[[:space:]]*$\" | tail -n #{tail.to_i} ").read
|
285
288
|
end
|
286
289
|
end
|
287
290
|
|
@@ -10,7 +10,7 @@ options = SOPT.setup <<EOF
|
|
10
10
|
|
11
11
|
Queue a job in Marenostrum
|
12
12
|
|
13
|
-
$ rbbt slurm tail <directory> [options]
|
13
|
+
$ rbbt slurm tail <directory|jobid> [options]
|
14
14
|
|
15
15
|
-h--help Print this help
|
16
16
|
EOF
|
@@ -35,8 +35,28 @@ directory = ARGV.shift
|
|
35
35
|
|
36
36
|
raise ParameterException if directory.nil?
|
37
37
|
|
38
|
-
|
38
|
+
if directory =~ /^[0-9]*$/
|
39
|
+
workdir = File.expand_path('~/rbbt-batch')
|
40
|
+
Path.setup(workdir)
|
41
|
+
|
42
|
+
workdir.glob("**/job.id").each do |file|
|
43
|
+
next unless directory == Open.read(file).strip
|
44
|
+
directory = File.dirname(file)
|
45
|
+
break
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
raise ParameterException, "Could not identify job #{directory}" unless File.exists?(directory)
|
39
50
|
|
40
51
|
require 'rbbt/hpc/slurm'
|
41
52
|
|
53
|
+
command_txt = Open.read(File.join(directory, 'command.batch'))
|
54
|
+
if m = command_txt.match(/#STEP_PATH: (.*)/)
|
55
|
+
step_path = m[1]
|
56
|
+
else
|
57
|
+
step_path = nil
|
58
|
+
end
|
59
|
+
|
60
|
+
puts Log.color(:magenta, "Step path: ") + step_path if step_path
|
61
|
+
|
42
62
|
HPC::BATCH_MODULE.follow_job directory, true
|
@@ -21,6 +21,7 @@ $ rbbt slurm list [options]
|
|
21
21
|
-j--job* Job ids
|
22
22
|
-s--search* Regular expression
|
23
23
|
-t--tail* Show the last lines of the STDERR
|
24
|
+
-l--long Show more entries
|
24
25
|
-p--progress Report progress of job and the dependencies
|
25
26
|
-BP--batch_parameters show batch parameters
|
26
27
|
-BPP--batch_procpath show Procpath performance summary
|
@@ -40,6 +41,8 @@ end
|
|
40
41
|
batch_system = options.delete :batch_system
|
41
42
|
batch_system ||= 'auto'
|
42
43
|
|
44
|
+
long = options.delete :long
|
45
|
+
|
43
46
|
HPC::BATCH_MODULE = HPC.batch_system batch_system
|
44
47
|
|
45
48
|
raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
|
@@ -183,24 +186,24 @@ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
183
186
|
|
184
187
|
|
185
188
|
puts Log.color :blue, dir
|
186
|
-
puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.batch')).to_s
|
187
|
-
puts Log.color(:magenta, "Started: ") << File.ctime(File.join(dir, 'std.err')).to_s if File.exist?(File.join(dir, 'std.err'))
|
188
|
-
puts Log.color(:magenta, "Manifest: ") << Log.color(:yellow, manifest)
|
189
|
+
puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.batch')).to_s if long
|
190
|
+
puts Log.color(:magenta, "Started: ") << File.ctime(File.join(dir, 'std.err')).to_s if File.exist?(File.join(dir, 'std.err')) && long
|
191
|
+
puts Log.color(:magenta, "Manifest: ") << Log.color(:yellow, manifest) if long
|
189
192
|
puts Log.color(:magenta, "Step path: ") << Log.color(:yellow, step_path)
|
190
193
|
puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
|
191
|
-
puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
|
192
|
-
puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
|
193
|
-
puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home
|
194
|
+
puts Log.color(:magenta, "Exec: ") << (exe || "Missing") if long
|
195
|
+
puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing") if long
|
196
|
+
puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home && long
|
194
197
|
if different_system
|
195
198
|
puts Log.color(:magenta, "Job ID (#{Log.color(:red, job_batch_system)}): ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : Log.color(:green, id) )
|
196
199
|
else
|
197
200
|
puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
|
198
|
-
end
|
201
|
+
end
|
199
202
|
puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
|
200
203
|
puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
|
201
|
-
puts Log.color(:magenta, "Nodes: ") << nodes * ", "
|
204
|
+
puts Log.color(:magenta, "Nodes: ") << nodes * ", " if long
|
202
205
|
puts Log.color(:magenta, "Time elapsed: ") << Misc.format_seconds(time_elapsed) if time_elapsed
|
203
|
-
puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? || File.exists?(File.join(dir, 'exit.status')) ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
|
206
|
+
puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? || File.exists?(File.join(dir, 'exit.status')) ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)") if long
|
204
207
|
|
205
208
|
if options[:batch_parameters]
|
206
209
|
puts Log.color(:magenta, "BATCH parameters: ")
|
@@ -281,7 +284,7 @@ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
281
284
|
puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
|
282
285
|
else
|
283
286
|
puts Log.color(:magenta, "Log tail: ")
|
284
|
-
puts CMD.cmd(" cat #{File.join(dir, 'std.err')} | grep -v '^[^\\s:]*\\[3.m' | tail -n #{tail.to_i} ").read
|
287
|
+
puts CMD.cmd(" cat #{File.join(dir, 'std.err')} | grep -v '^[^\\s:]*\\[3.m' | grep -v -e '^[[:space:]]*$' | grep -v \"\\(STDOUT\\|STDERR\\):[[:space:]]*$\" | tail -n #{tail.to_i} ").read
|
285
288
|
end
|
286
289
|
end
|
287
290
|
|
@@ -10,7 +10,7 @@ options = SOPT.setup <<EOF
|
|
10
10
|
|
11
11
|
Queue a job in Marenostrum
|
12
12
|
|
13
|
-
$ rbbt slurm tail <directory> [options]
|
13
|
+
$ rbbt slurm tail <directory|jobid> [options]
|
14
14
|
|
15
15
|
-h--help Print this help
|
16
16
|
EOF
|
@@ -35,8 +35,28 @@ directory = ARGV.shift
|
|
35
35
|
|
36
36
|
raise ParameterException if directory.nil?
|
37
37
|
|
38
|
-
|
38
|
+
if directory =~ /^[0-9]*$/
|
39
|
+
workdir = File.expand_path('~/rbbt-batch')
|
40
|
+
Path.setup(workdir)
|
41
|
+
|
42
|
+
workdir.glob("**/job.id").each do |file|
|
43
|
+
next unless directory == Open.read(file).strip
|
44
|
+
directory = File.dirname(file)
|
45
|
+
break
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
raise ParameterException, "Could not identify job #{directory}" unless File.exists?(directory)
|
39
50
|
|
40
51
|
require 'rbbt/hpc/slurm'
|
41
52
|
|
53
|
+
command_txt = Open.read(File.join(directory, 'command.batch'))
|
54
|
+
if m = command_txt.match(/#STEP_PATH: (.*)/)
|
55
|
+
step_path = m[1]
|
56
|
+
else
|
57
|
+
step_path = nil
|
58
|
+
end
|
59
|
+
|
60
|
+
puts Log.color(:magenta, "Step path: ") + step_path if step_path
|
61
|
+
|
42
62
|
HPC::BATCH_MODULE.follow_job directory, true
|
@@ -21,6 +21,7 @@ $ rbbt slurm list [options]
|
|
21
21
|
-j--job* Job ids
|
22
22
|
-s--search* Regular expression
|
23
23
|
-t--tail* Show the last lines of the STDERR
|
24
|
+
-l--long Show more entries
|
24
25
|
-p--progress Report progress of job and the dependencies
|
25
26
|
-BP--batch_parameters show batch parameters
|
26
27
|
-BPP--batch_procpath show Procpath performance summary
|
@@ -40,6 +41,8 @@ end
|
|
40
41
|
batch_system = options.delete :batch_system
|
41
42
|
batch_system ||= 'auto'
|
42
43
|
|
44
|
+
long = options.delete :long
|
45
|
+
|
43
46
|
HPC::BATCH_MODULE = HPC.batch_system batch_system
|
44
47
|
|
45
48
|
raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
|
@@ -183,24 +186,24 @@ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
183
186
|
|
184
187
|
|
185
188
|
puts Log.color :blue, dir
|
186
|
-
puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.batch')).to_s
|
187
|
-
puts Log.color(:magenta, "Started: ") << File.ctime(File.join(dir, 'std.err')).to_s if File.exist?(File.join(dir, 'std.err'))
|
188
|
-
puts Log.color(:magenta, "Manifest: ") << Log.color(:yellow, manifest)
|
189
|
+
puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.batch')).to_s if long
|
190
|
+
puts Log.color(:magenta, "Started: ") << File.ctime(File.join(dir, 'std.err')).to_s if File.exist?(File.join(dir, 'std.err')) && long
|
191
|
+
puts Log.color(:magenta, "Manifest: ") << Log.color(:yellow, manifest) if long
|
189
192
|
puts Log.color(:magenta, "Step path: ") << Log.color(:yellow, step_path)
|
190
193
|
puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
|
191
|
-
puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
|
192
|
-
puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
|
193
|
-
puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home
|
194
|
+
puts Log.color(:magenta, "Exec: ") << (exe || "Missing") if long
|
195
|
+
puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing") if long
|
196
|
+
puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home && long
|
194
197
|
if different_system
|
195
198
|
puts Log.color(:magenta, "Job ID (#{Log.color(:red, job_batch_system)}): ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : Log.color(:green, id) )
|
196
199
|
else
|
197
200
|
puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
|
198
|
-
end
|
201
|
+
end
|
199
202
|
puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
|
200
203
|
puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
|
201
|
-
puts Log.color(:magenta, "Nodes: ") << nodes * ", "
|
204
|
+
puts Log.color(:magenta, "Nodes: ") << nodes * ", " if long
|
202
205
|
puts Log.color(:magenta, "Time elapsed: ") << Misc.format_seconds(time_elapsed) if time_elapsed
|
203
|
-
puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? || File.exists?(File.join(dir, 'exit.status')) ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
|
206
|
+
puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? || File.exists?(File.join(dir, 'exit.status')) ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)") if long
|
204
207
|
|
205
208
|
if options[:batch_parameters]
|
206
209
|
puts Log.color(:magenta, "BATCH parameters: ")
|
@@ -281,7 +284,7 @@ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
281
284
|
puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
|
282
285
|
else
|
283
286
|
puts Log.color(:magenta, "Log tail: ")
|
284
|
-
puts CMD.cmd(" cat #{File.join(dir, 'std.err')} | grep -v '^[^\\s:]*\\[3.m' | tail -n #{tail.to_i} ").read
|
287
|
+
puts CMD.cmd(" cat #{File.join(dir, 'std.err')} | grep -v '^[^\\s:]*\\[3.m' | grep -v -e '^[[:space:]]*$' | grep -v \"\\(STDOUT\\|STDERR\\):[[:space:]]*$\" | tail -n #{tail.to_i} ").read
|
285
288
|
end
|
286
289
|
end
|
287
290
|
|
@@ -10,7 +10,7 @@ options = SOPT.setup <<EOF
|
|
10
10
|
|
11
11
|
Queue a job in Marenostrum
|
12
12
|
|
13
|
-
$ rbbt slurm tail <directory> [options]
|
13
|
+
$ rbbt slurm tail <directory|jobid> [options]
|
14
14
|
|
15
15
|
-h--help Print this help
|
16
16
|
EOF
|
@@ -35,8 +35,28 @@ directory = ARGV.shift
|
|
35
35
|
|
36
36
|
raise ParameterException if directory.nil?
|
37
37
|
|
38
|
-
|
38
|
+
if directory =~ /^[0-9]*$/
|
39
|
+
workdir = File.expand_path('~/rbbt-batch')
|
40
|
+
Path.setup(workdir)
|
41
|
+
|
42
|
+
workdir.glob("**/job.id").each do |file|
|
43
|
+
next unless directory == Open.read(file).strip
|
44
|
+
directory = File.dirname(file)
|
45
|
+
break
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
raise ParameterException, "Could not identify job #{directory}" unless File.exists?(directory)
|
39
50
|
|
40
51
|
require 'rbbt/hpc/slurm'
|
41
52
|
|
53
|
+
command_txt = Open.read(File.join(directory, 'command.batch'))
|
54
|
+
if m = command_txt.match(/#STEP_PATH: (.*)/)
|
55
|
+
step_path = m[1]
|
56
|
+
else
|
57
|
+
step_path = nil
|
58
|
+
end
|
59
|
+
|
60
|
+
puts Log.color(:magenta, "Step path: ") + step_path if step_path
|
61
|
+
|
42
62
|
HPC::BATCH_MODULE.follow_job directory, true
|
@@ -219,8 +219,8 @@ help = !!options.delete(:help)
|
|
219
219
|
do_fork = !!options.delete(:fork)
|
220
220
|
detach = !!options.delete(:detach)
|
221
221
|
do_exec = !!options.delete(:exec)
|
222
|
-
clean = !!options.delete(:clean)
|
223
222
|
clean_task = options.delete(:clean_task)
|
223
|
+
clean = !!options.delete(:clean) || clean_task
|
224
224
|
override_deps = options.delete(:override_deps)
|
225
225
|
recursive_clean = !!options.delete(:recursive_clean)
|
226
226
|
out = options.include?(:output) ? File.open(options[:output], 'wb') : STDOUT
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.33.
|
4
|
+
version: 5.33.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-03-
|
11
|
+
date: 2022-03-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|