rbbt-util 5.32.4 → 5.32.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/hpc/slurm.rb +4 -0
- data/lib/rbbt/persist/tsv/adapter.rb +1 -5
- data/lib/rbbt/resource.rb +58 -40
- data/lib/rbbt/util/cmd.rb +14 -4
- data/lib/rbbt/util/migrate.rb +118 -0
- data/lib/rbbt/workflow.rb +11 -0
- data/lib/rbbt/workflow/util/archive.rb +31 -102
- data/share/rbbt_commands/hpc/orchestrate +3 -1
- data/share/rbbt_commands/hpc/task +3 -1
- data/share/rbbt_commands/lsf/clean +212 -0
- data/share/rbbt_commands/lsf/list +311 -0
- data/share/rbbt_commands/lsf/orchestrate +58 -0
- data/share/rbbt_commands/lsf/tail +55 -0
- data/share/rbbt_commands/lsf/task +57 -0
- data/share/rbbt_commands/migrate +3 -76
- data/share/rbbt_commands/slurm/clean +212 -0
- data/share/rbbt_commands/slurm/list +311 -0
- data/share/rbbt_commands/slurm/orchestrate +58 -0
- data/share/rbbt_commands/slurm/tail +55 -0
- data/share/rbbt_commands/slurm/task +57 -0
- data/test/rbbt/util/test_migrate.rb +36 -0
- data/test/rbbt/workflow/util/test_archive.rb +31 -0
- metadata +18 -3
@@ -0,0 +1,58 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rbbt/util/simpleopt'
|
4
|
+
require 'rbbt/workflow'
|
5
|
+
require 'rbbt/workflow/usage'
|
6
|
+
require 'rbbt/hpc'
|
7
|
+
require 'rbbt/hpc/orchestrate'
|
8
|
+
require 'time'
|
9
|
+
|
10
|
+
$slurm_options = SOPT.get <<EOF
|
11
|
+
-dr--dry_run Print only the template
|
12
|
+
-cj--clean_job Clean job
|
13
|
+
--drbbt* Use development version of rbbt
|
14
|
+
-sing--singularity Use Singularity
|
15
|
+
-si--singularity_img* Singularity image to use
|
16
|
+
-ug--user_group* Use alternative user group for group project directory
|
17
|
+
-c--contain* Contain in directory (using Singularity)
|
18
|
+
-s--sync* Contain in directory and sync jobs
|
19
|
+
-e--exclusive Make exclusive use of the node
|
20
|
+
-hm--highmem Make use of highmem cores
|
21
|
+
-wc--wipe_container* Wipe the jobs from the contain directory
|
22
|
+
-CS--contain_and_sync Contain and sync to default locations
|
23
|
+
-ci--copy_image When using a container directory, copy image there
|
24
|
+
-t--tail Tail the logs
|
25
|
+
-BPP--batch_procpath* Save Procpath performance for batch job; specify only options
|
26
|
+
-q--queue* Queue
|
27
|
+
-t--task_cpus* Tasks
|
28
|
+
-tm--time* Time
|
29
|
+
-lin--licenses* SLURM licenses
|
30
|
+
-cons--constraint* SLURM constraint
|
31
|
+
-W--workflows* Additional workflows
|
32
|
+
-OR--orchestration_rules* Orchestration rules
|
33
|
+
-rmb--remove_batch_basedir Remove the SLURM working directory (command, STDIN, exit status, ...)
|
34
|
+
EOF
|
35
|
+
|
36
|
+
batch_system = $slurm_options.delete :batch_system
|
37
|
+
batch_system ||= 'auto'
|
38
|
+
|
39
|
+
HPC::BATCH_MODULE = HPC.batch_system batch_system
|
40
|
+
|
41
|
+
raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
|
42
|
+
|
43
|
+
class Step
|
44
|
+
def run(*args)
|
45
|
+
if done?
|
46
|
+
self.load
|
47
|
+
else
|
48
|
+
begin
|
49
|
+
Log.debug "Issuing SLURM job for #{self.path}"
|
50
|
+
HPC::BATCH_MODULE.orchestrate_job(self, SOPT::GOT_OPTIONS.merge($slurm_options))
|
51
|
+
rescue HPC::SBATCH
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
ARGV.concat ["-W", $slurm_options[:workflows], '--detach'] if $slurm_options[:workflows]
|
58
|
+
load Rbbt.share.rbbt_commands.workflow.task.find
|
@@ -0,0 +1,55 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rbbt-util'
|
4
|
+
require 'rbbt/util/simpleopt'
|
5
|
+
require 'rbbt/hpc'
|
6
|
+
|
7
|
+
#$0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
|
8
|
+
|
9
|
+
options = SOPT.setup <<EOF
|
10
|
+
|
11
|
+
Queue a job in Marenostrum
|
12
|
+
|
13
|
+
$ rbbt slurm tail <directory> [options]
|
14
|
+
|
15
|
+
-h--help Print this help
|
16
|
+
-d--done Done jobs only
|
17
|
+
-e--error Error jobs only
|
18
|
+
-a--aborted SLURM aboted jobs
|
19
|
+
-r--running Running jobs only
|
20
|
+
-q--queued Queued jobs only
|
21
|
+
-j--job* Job ids
|
22
|
+
-s--search* Regular expression
|
23
|
+
-t--tail* Show the last lines of the STDERR
|
24
|
+
-p--progress Report progress of job and the dependencies
|
25
|
+
-SBP--sbatch_parameters show sbatch parameters
|
26
|
+
-PERF--procpath_performance show Procpath performance summary
|
27
|
+
-sacct--sacct_peformance show sacct performance summary
|
28
|
+
-bs--batch_system* Batch system to use: auto, lsf, slurm (default is auto-detect)
|
29
|
+
EOF
|
30
|
+
|
31
|
+
if options[:help]
|
32
|
+
if defined? rbbt_usage
|
33
|
+
rbbt_usage
|
34
|
+
else
|
35
|
+
puts SOPT.doc
|
36
|
+
end
|
37
|
+
exit 0
|
38
|
+
end
|
39
|
+
|
40
|
+
batch_system = options.delete :batch_system
|
41
|
+
batch_system ||= 'auto'
|
42
|
+
|
43
|
+
HPC::BATCH_MODULE = HPC.batch_system batch_system
|
44
|
+
|
45
|
+
raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
|
46
|
+
|
47
|
+
directory = ARGV.shift
|
48
|
+
|
49
|
+
raise ParameterException if directory.nil?
|
50
|
+
|
51
|
+
directory = File.dirname(directory) unless File.directory?(directory)
|
52
|
+
|
53
|
+
require 'rbbt/hpc/slurm'
|
54
|
+
|
55
|
+
HPC::BATCH_MODULE.follow_job directory, true
|
@@ -0,0 +1,57 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rbbt/util/simpleopt'
|
4
|
+
require 'rbbt/workflow'
|
5
|
+
require 'rbbt/workflow/usage'
|
6
|
+
require 'rbbt/hpc'
|
7
|
+
require 'time'
|
8
|
+
|
9
|
+
$slurm_options = SOPT.get <<EOF
|
10
|
+
-dr--dry_run Print only the template
|
11
|
+
-cj--clean_job Clean job
|
12
|
+
--drbbt* Use development version of rbbt
|
13
|
+
-sing--singularity Use Singularity
|
14
|
+
-si--singularity_img* Singularity image to use
|
15
|
+
-ug--user_group* Use alternative user group for group project directory
|
16
|
+
-c--contain* Contain in directory (using Singularity)
|
17
|
+
-s--sync* Contain in directory and sync jobs
|
18
|
+
-e--exclusive Make exclusive use of the node
|
19
|
+
-hm--highmem Make use of highmem cores
|
20
|
+
-wc--wipe_container* Wipe the jobs from the contain directory
|
21
|
+
-CS--contain_and_sync Contain and sync to default locations
|
22
|
+
-ci--copy_image When using a container directory, copy image there
|
23
|
+
-t--tail Tail the logs
|
24
|
+
-BPP--batch_procpath* Save Procpath performance for batch job; specify only options
|
25
|
+
-q--queue* Queue
|
26
|
+
-t--task_cpus* Tasks
|
27
|
+
-tm--time* Time
|
28
|
+
-lin--licenses* SLURM licenses
|
29
|
+
-cons--constraint* SLURM constraint
|
30
|
+
-W--workflows* Additional workflows
|
31
|
+
-rmb--remove_batch_dir Remove the batch working directory (command, STDIN, exit status, ...)
|
32
|
+
-bs--batch_system* Batch system to use: auto, lsf, slurm (default is auto-detect)
|
33
|
+
EOF
|
34
|
+
|
35
|
+
batch_system = $slurm_options.delete :batch_system
|
36
|
+
batch_system ||= 'auto'
|
37
|
+
|
38
|
+
HPC::BATCH_MODULE = HPC.batch_system batch_system
|
39
|
+
|
40
|
+
raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
|
41
|
+
|
42
|
+
class Step
|
43
|
+
def run(*args)
|
44
|
+
if done?
|
45
|
+
self.load
|
46
|
+
else
|
47
|
+
begin
|
48
|
+
Log.debug "Issuing SLURM job for #{self.path}"
|
49
|
+
HPC::BATCH_MODULE.run_job(self, SOPT::GOT_OPTIONS.merge($slurm_options))
|
50
|
+
rescue HPC::SBATCH
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
ARGV.concat ["-W", $slurm_options[:workflows]] if $slurm_options[:workflows]
|
57
|
+
load Rbbt.share.rbbt_commands.workflow.task.find
|
data/share/rbbt_commands/migrate
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
require 'rbbt-util'
|
4
4
|
require 'rbbt/util/simpleopt'
|
5
|
-
require 'rbbt/
|
5
|
+
require 'rbbt/util/migrate'
|
6
6
|
|
7
7
|
$0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
|
8
8
|
|
@@ -30,83 +30,10 @@ if options[:help]
|
|
30
30
|
exit 0
|
31
31
|
end
|
32
32
|
|
33
|
-
#excludes = %w(.save .crap .source tmp filecache open-remote workflows apps software jobs PCAWG)
|
34
|
-
excludes = %w(.save .crap .source tmp filecache open-remote)
|
35
|
-
excludes += (options[:exclude] || "").split(/,\s*/)
|
36
|
-
excludes_str = excludes.collect{|s| "--exclude '#{s}'" } * " "
|
37
|
-
|
38
|
-
test_str = options[:test] ? '-nv' : ''
|
39
|
-
|
40
33
|
path, search_path, _sep, *other = ARGV
|
41
34
|
|
42
35
|
search_path = 'user' if search_path.nil?
|
43
|
-
resource = Rbbt
|
44
|
-
|
45
|
-
path, real_paths, lpath = if options[:source]
|
46
|
-
lpath, *paths = Misc.ssh_run(options[:source], <<-EOF).split("\n")
|
47
|
-
require 'rbbt-util'
|
48
|
-
path = "#{path}"
|
49
|
-
if Open.exists?(path)
|
50
|
-
path = #{resource.to_s}.identify(path)
|
51
|
-
else
|
52
|
-
path = Path.setup(path)
|
53
|
-
end
|
54
|
-
puts path
|
55
|
-
puts path.glob_all.collect{|p| File.directory?(p) ? p + "/" : p } * "\n"
|
56
|
-
EOF
|
57
|
-
[path, paths.collect{|p| [options[:source], p] * ":"}, lpath]
|
58
|
-
else
|
59
|
-
if File.exists?(path)
|
60
|
-
|
61
|
-
path = resource.identify(path)
|
62
|
-
else
|
63
|
-
path = Path.setup(path)
|
64
|
-
end
|
65
|
-
[path, path.glob_all, path]
|
66
|
-
end
|
67
|
-
|
68
|
-
target = if options[:target]
|
69
|
-
target = Misc.ssh_run(options[:target], <<-EOF).split("\n").first
|
70
|
-
require 'rbbt-util'
|
71
|
-
path = "#{path}"
|
72
|
-
resource = #{resource.to_s}
|
73
|
-
search_path = "#{search_path}"
|
74
|
-
puts resource[path].find(search_path)
|
75
|
-
EOF
|
76
|
-
else
|
77
|
-
resource[lpath].find(search_path)
|
78
|
-
end
|
79
|
-
|
80
|
-
real_paths.each do |source|
|
81
|
-
|
82
|
-
|
83
|
-
if File.directory?(source) || source =~ /\/$/
|
84
|
-
source += "/" unless source[-1] == "/"
|
85
|
-
target += "/" unless target[-1] == "/"
|
86
|
-
end
|
87
36
|
|
88
|
-
|
37
|
+
options[:other] = other
|
89
38
|
|
90
|
-
|
91
|
-
CMD.cmd("ssh #{options[:target]} mkdir -p '#{File.dirname(target)}'")
|
92
|
-
else
|
93
|
-
Open.mkdir File.dirname(target)
|
94
|
-
end
|
95
|
-
|
96
|
-
if options[:target]
|
97
|
-
target_path = [options[:target], target] * ":"
|
98
|
-
else
|
99
|
-
target_path = target
|
100
|
-
end
|
101
|
-
|
102
|
-
cmd = "rsync -avztAXHP --copy-unsafe-links #{test_str} #{excludes_str} #{source} #{target_path} #{other * " "}"
|
103
|
-
|
104
|
-
cmd << " && rm -Rf #{source}" if options[:delete]
|
105
|
-
|
106
|
-
if options[:print]
|
107
|
-
puts cmd
|
108
|
-
exit 0
|
109
|
-
else
|
110
|
-
CMD.cmd_log(cmd, :log => Log::INFO)
|
111
|
-
end
|
112
|
-
end
|
39
|
+
Rbbt.migrate(path, search_path, options)
|
@@ -0,0 +1,212 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rbbt-util'
|
4
|
+
require 'rbbt/util/simpleopt'
|
5
|
+
require 'rbbt/hpc'
|
6
|
+
|
7
|
+
#$0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
|
8
|
+
|
9
|
+
options = SOPT.setup <<EOF
|
10
|
+
|
11
|
+
Clean error or aborted jobs
|
12
|
+
|
13
|
+
$ rbbt slurm clean [options]
|
14
|
+
|
15
|
+
-h--help Print this help
|
16
|
+
-d--done Done jobs only
|
17
|
+
-e--error Error jobs only
|
18
|
+
-a--aborted SLURM aboted jobs
|
19
|
+
-q--queued Queued jobs only
|
20
|
+
-j--job* Job ids
|
21
|
+
-s--search* Regular expression
|
22
|
+
-t--tail* Show the last lines of the STDERR
|
23
|
+
-BP--batch_parameters show batch parameters
|
24
|
+
-dr--dry_run Do not erase anything
|
25
|
+
EOF
|
26
|
+
|
27
|
+
if options[:help]
|
28
|
+
if defined? rbbt_usage
|
29
|
+
rbbt_usage
|
30
|
+
else
|
31
|
+
puts SOPT.doc
|
32
|
+
end
|
33
|
+
exit 0
|
34
|
+
end
|
35
|
+
|
36
|
+
batch_system = options.delete :batch_system
|
37
|
+
batch_system ||= 'auto'
|
38
|
+
|
39
|
+
HPC::BATCH_MODULE = HPC.batch_system batch_system
|
40
|
+
|
41
|
+
raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
|
42
|
+
|
43
|
+
Log.severity = 4
|
44
|
+
done, error, aborted, queued, jobid, search, tail, batch_parameters, dry_run = options.values_at :done, :error, :aborted, :queued, :job, :search, :tail, :batch_parameters, :dry_run
|
45
|
+
|
46
|
+
workdir = File.expand_path('~/rbbt-batch')
|
47
|
+
Path.setup(workdir)
|
48
|
+
|
49
|
+
running_jobs = begin
|
50
|
+
squeue_txt = HPC::BATCH_MODULE.job_status
|
51
|
+
squeue_txt.split("\n").collect{|l| l.to_i.to_s}
|
52
|
+
rescue
|
53
|
+
Log.warn "Cannot determine if jobs are running, they will seem to be all alive (Job ID in green)"
|
54
|
+
squeue_txt = nil
|
55
|
+
$norunningjobs = true
|
56
|
+
[]
|
57
|
+
end
|
58
|
+
|
59
|
+
if squeue_txt
|
60
|
+
job_nodes = {}
|
61
|
+
squeue_txt.split("\n").each do |line|
|
62
|
+
parts = line.strip.split(/\s+/)
|
63
|
+
job_nodes[parts.first] = parts.last.split(",")
|
64
|
+
end
|
65
|
+
else
|
66
|
+
job_nodes = nil
|
67
|
+
end
|
68
|
+
|
69
|
+
count = 0
|
70
|
+
workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
71
|
+
dir = File.dirname(fcmd)
|
72
|
+
command_txt = Open.read(fcmd)
|
73
|
+
|
74
|
+
if m = command_txt.match(/#CMD: (.*)/)
|
75
|
+
cmd = m[1]
|
76
|
+
else
|
77
|
+
cmd = nil
|
78
|
+
end
|
79
|
+
|
80
|
+
if m = command_txt.match(/# Run command\n(.*?)\n/im)
|
81
|
+
exe = m[1]
|
82
|
+
else
|
83
|
+
exe = nil
|
84
|
+
end
|
85
|
+
|
86
|
+
if m = command_txt.match(/^CONTAINER_DIR=(.*)/)
|
87
|
+
container_home = m[1]
|
88
|
+
else
|
89
|
+
container_home = nil
|
90
|
+
end
|
91
|
+
|
92
|
+
if m = command_txt.match(/^BATCH_SYSTEM=(.*)/)
|
93
|
+
job_batch_system = m[1].downcase
|
94
|
+
else
|
95
|
+
job_batch_system = nil
|
96
|
+
end
|
97
|
+
|
98
|
+
different_system = job_batch_system != batch_system
|
99
|
+
|
100
|
+
if File.exists?(fid = File.join(dir, 'job.id'))
|
101
|
+
id = Open.read(fid).chomp
|
102
|
+
else
|
103
|
+
id = nil
|
104
|
+
end
|
105
|
+
|
106
|
+
if File.exists?(fstatus = File.join(dir, 'exit.status'))
|
107
|
+
exit_status = Open.read(fstatus).to_i
|
108
|
+
else
|
109
|
+
exit_status = nil
|
110
|
+
end
|
111
|
+
|
112
|
+
if File.exists?(fstatus = File.join(dir, 'job.status'))
|
113
|
+
fstatus_txt = Open.read(fstatus)
|
114
|
+
begin
|
115
|
+
if job_batch_system == "lsf"
|
116
|
+
nodes = Open.read(fstatus).split("\n").last.split(/\s+/)[5].split(",")
|
117
|
+
else
|
118
|
+
nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
|
119
|
+
end
|
120
|
+
rescue
|
121
|
+
nodes = []
|
122
|
+
end
|
123
|
+
elsif job_nodes[id]
|
124
|
+
nodes = job_nodes[id]
|
125
|
+
else
|
126
|
+
nodes = []
|
127
|
+
end
|
128
|
+
|
129
|
+
if File.exists?(File.join(dir, 'std.out'))
|
130
|
+
outt = File.mtime File.join(dir, 'std.out')
|
131
|
+
errt = File.mtime File.join(dir, 'std.err')
|
132
|
+
time_diff = Time.now - [outt, errt].max
|
133
|
+
end
|
134
|
+
|
135
|
+
fdep = File.join(dir, 'dependencies.list')
|
136
|
+
deps = Open.read(fdep).split("\n") if File.exists?(fdep)
|
137
|
+
|
138
|
+
fcadep = File.join(dir, 'canfail_dependencies.list')
|
139
|
+
cadeps = Open.read(fcadep).split("\n") if File.exists?(fcadep)
|
140
|
+
|
141
|
+
aborted = error = true if aborted.nil? && error.nil?
|
142
|
+
#if done || error || aborted || running || queued || jobid || search
|
143
|
+
# select = false
|
144
|
+
# select = true if done && exit_status && exit_status.to_i == 0
|
145
|
+
# select = true if error && exit_status && exit_status.to_i != 0
|
146
|
+
# select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
|
147
|
+
# select = select && jobid.split(",").include?(id) if jobid
|
148
|
+
# select = select && cmd.match(/#{search}/) if search
|
149
|
+
# next unless select
|
150
|
+
#end
|
151
|
+
|
152
|
+
if done || error || aborted || queued || jobid
|
153
|
+
select = false
|
154
|
+
select = true if done && exit_status == 0
|
155
|
+
select = true if error && exit_status && exit_status != 0
|
156
|
+
select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
|
157
|
+
is_running = exit_status.nil? && ( (running_jobs.include?(id) && (!deps || (running_jobs & deps).empty?)) || different_system )
|
158
|
+
select = true if queued && deps && (running_jobs & deps).any? || queued && is_running && nodes.empty?
|
159
|
+
select = true if jobid && jobid.split(",").include?(id)
|
160
|
+
select = select && cmd.match(/#{search}/) if search
|
161
|
+
next unless select
|
162
|
+
elsif search
|
163
|
+
select = false
|
164
|
+
select = true if search && cmd.match(/#{search}/)
|
165
|
+
next unless select
|
166
|
+
end
|
167
|
+
|
168
|
+
|
169
|
+
puts Log.color(:yellow, "**ERASING**")
|
170
|
+
puts Log.color :blue, dir
|
171
|
+
puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.batch')).to_s
|
172
|
+
puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
|
173
|
+
puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
|
174
|
+
puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
|
175
|
+
puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home
|
176
|
+
puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
|
177
|
+
puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
|
178
|
+
puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
|
179
|
+
puts Log.color(:magenta, "Nodes: ") << nodes * ", "
|
180
|
+
puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
|
181
|
+
|
182
|
+
if options[:batch_parameters]
|
183
|
+
puts Log.color(:magenta, "BATCH parameters: ")
|
184
|
+
case job_batch_system
|
185
|
+
when 'slurm'
|
186
|
+
puts Log.color :blue, CMD.cmd('grep "^#SBATCH" |tail -n +6', :in => Open.read(fcmd)).read.strip
|
187
|
+
when 'lsf'
|
188
|
+
puts Log.color :blue, CMD.cmd('grep "^#BSUB" |tail -n +6', :in => Open.read(fcmd)).read.strip
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
if tail && File.exists?(File.join(dir, 'std.err'))
|
193
|
+
if exit_status && exit_status != 0
|
194
|
+
puts Log.color(:magenta, "First error or exception found: ")
|
195
|
+
puts CMD.cmd("grep -i -w 'error\\|[a-z]*exception' #{File.join(dir, 'std.err')} -A #{tail.to_i} |head -n #{tail.to_i}", :no_fail => true).read
|
196
|
+
elsif exit_status
|
197
|
+
puts Log.color(:magenta, "Completed jobs: ")
|
198
|
+
puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
|
199
|
+
else
|
200
|
+
puts Log.color(:magenta, "Log tail: ")
|
201
|
+
puts CMD.cmd("tail -n #{tail.to_i} #{File.join(dir, 'std.err')}").read
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
count += 1
|
206
|
+
|
207
|
+
Open.rm_rf dir unless dry_run
|
208
|
+
end
|
209
|
+
|
210
|
+
puts
|
211
|
+
puts "Found #{count} jobs"
|
212
|
+
|