rbbt-util 5.29.2 → 5.30.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,49 @@
1
+ require 'rbbt/util/cmd'
2
+ module ProcPath
3
+ CMD.tool :procpath do
4
+ 'pip install procpath'
5
+ end
6
+
7
+ def self.record(pid, path, options = {})
8
+ IndiferentHash.setup(options)
9
+ options = Misc.add_defaults options, "interval" => 30
10
+
11
+ cmd_options = %w(interval recnum reevalnum).inject({}){|acc,k| acc[k] = options[k]; acc}
12
+
13
+ Log.debug "ProcPath recording #{pid} in #{path} (#{Misc.fingerprint options})"
14
+ procpath_thread = Thread.new do
15
+ begin
16
+ procpath_pid = CMD.cmd_pid(:procpath, "record --database-file '#{path}' '$..children[?(@.stat.pid == #{pid})]'", cmd_options.merge(:nofail => true, :add_option_dashes => true))
17
+ rescue Exception
18
+ Log.exceptions $!
19
+ Process.kill "INT", procpath_pid
20
+ end
21
+ end
22
+
23
+ procpath_thread.report_on_exception = false
24
+
25
+ Process.wait pid.to_i
26
+ procpath_thread.raise Interrupt
27
+ end
28
+
29
+ def self.plot(path, output, options = {})
30
+ IndiferentHash.setup(options)
31
+ options = Misc.add_defaults options, "query-name" => 'rss', 'epsilon' => 0.5, "moving-average-window" => 10
32
+
33
+ cmd_options = %w(query-name epsilon monitor-average-window title logarithmic after before custom-query-file custom-value-expr).inject({}){|acc,k| acc[k] = options[k]; acc}
34
+ CMD.cmd_log(:procpath, "plot --database-file '#{path}' --plot-file '#{output}' ", cmd_options.merge(:nofail => true, :add_option_dashes => true))
35
+ end
36
+
37
+ def self.monitor(pid, path)
38
+ database, options_str = path.split("#")
39
+ options = options_str.nil? ? {} : Misc.string2hash(options_str)
40
+
41
+ database = File.expand_path database
42
+ Log.low "ProcPath monitor #{pid} in #{database} (#{Misc.fingerprint options})"
43
+
44
+ ProcPath.record(pid, database + '.sqlite3', options)
45
+ ProcPath.plot(database + '.sqlite3', database + '.cpu.svg', options.merge("query-name" => 'cpu'))
46
+ ProcPath.plot(database + '.sqlite3', database + '.rss.svg', options.merge("query-name" => 'rss'))
47
+ end
48
+ end
49
+
@@ -505,8 +505,8 @@ class Step
505
505
 
506
506
  def running?
507
507
  return false if ! (started? || status == :ending)
508
- pid = info[:pid]
509
- return nil if pid.nil?
508
+ return nil unless Open.exist?(self.pid_file)
509
+ pid = Open.read(self.pid_file).to_i
510
510
 
511
511
  return false if done? or error? or aborted?
512
512
 
@@ -530,8 +530,7 @@ class Step
530
530
  end
531
531
 
532
532
  def nopid?
533
- pid = info[:pid] || Open.exists?(pid_file)
534
- ! pid && ! (status.nil? || status == :aborted || status == :done || status == :error || status == :cleaned)
533
+ ! Open.exists?(pid_file) && ! (status.nil? || status == :aborted || status == :done || status == :error || status == :cleaned)
535
534
  end
536
535
 
537
536
  def aborted?
@@ -373,7 +373,6 @@ class Step
373
373
  Log.exception $!
374
374
  ensure
375
375
  Step.purge_stream_cache
376
- set_info :pid, nil
377
376
  Open.rm pid_file if Open.exist?(pid_file)
378
377
  end
379
378
  end
@@ -388,7 +387,6 @@ class Step
388
387
  _clean_finished
389
388
  rescue
390
389
  stop_dependencies
391
- set_info :pid, nil
392
390
  Open.rm pid_file if Open.exist?(pid_file)
393
391
  end
394
392
  end
@@ -449,7 +447,7 @@ class Step
449
447
  ensure
450
448
  no_load = false unless IO === result
451
449
  Open.rm pid_file if Open.exist?(pid_file) unless no_load
452
- set_info :pid, nil unless no_load
450
+ #set_info :pid, nil unless no_load
453
451
  end
454
452
  end
455
453
 
@@ -559,7 +557,7 @@ class Step
559
557
  RbbtSemaphore.post_semaphore(semaphore) if semaphore
560
558
  Kernel.exit! -1
561
559
  end
562
- set_info :pid, nil
560
+ #set_info :pid, nil
563
561
  ensure
564
562
  RbbtSemaphore.post_semaphore(semaphore) if semaphore
565
563
  end
@@ -57,7 +57,7 @@ module Task
57
57
  puts Log.color(:magenta, "Input select options")
58
58
  puts
59
59
  selects.collect{|p| p}.uniq.each do |input,options|
60
- puts Log.color(:blue, input.to_s + ": ") << Misc.format_paragraph(options.collect{|o| o.to_s} * ", ") << "\n"
60
+ puts Log.color(:blue, input.to_s + ": ") << Misc.format_paragraph(options.collect{|o| Array === o ? o.first.to_s : o.to_s} * ", ") << "\n"
61
61
  puts unless Log.compact
62
62
  end
63
63
  puts
@@ -0,0 +1,165 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+
6
+ #$0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
7
+
8
+ options = SOPT.setup <<EOF
9
+
10
+ Clean error or aborted jobs
11
+
12
+ $ rbbt mnl [options]
13
+
14
+ -h--help Print this help
15
+ -d--done Done jobs only
16
+ -e--error Error jobs only
17
+ -a--aborted SLURM aboted jobs
18
+ -j--job* Job ids
19
+ -s--search* Regular expression
20
+ -t--tail* Show the last lines of the STDERR
21
+ -SBP--sbatch_parameters show sbatch parameters
22
+ -dr--dry_run Do not erase anything
23
+ EOF
24
+
25
+ if options[:help]
26
+ if defined? rbbt_usage
27
+ rbbt_usage
28
+ else
29
+ puts SOPT.doc
30
+ end
31
+ exit 0
32
+ end
33
+
34
+ Log.severity = 4
35
+ done, error, aborted, jobid, search, tail, sbatch_parameters, dry_run = options.values_at :done, :error, :aborted, :job, :search, :tail, :sbatch_parameters, :dry_run
36
+
37
+ workdir = File.expand_path('~/rbbt-slurm')
38
+ Path.setup(workdir)
39
+
40
+ running_jobs = begin
41
+ squeue_txt = CMD.cmd('squeue').read
42
+ squeue_txt.split("\n").collect{|l| l.to_i.to_s}
43
+ rescue
44
+ Log.warn "Cannot determine if jobs are running, they will seem to be all alive (Job ID in green)"
45
+ squeue_txt = nil
46
+ $norunningjobs = true
47
+ []
48
+ end
49
+
50
+ if squeue_txt
51
+ job_nodes = {}
52
+ squeue_txt.split("\n").each do |line|
53
+ parts = line.strip.split(/\s+/)
54
+ job_nodes[parts.first] = parts.last.split(",")
55
+ end
56
+ else
57
+ job_nodes = nil
58
+ end
59
+
60
+ count = 0
61
+ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
62
+ dir = File.dirname(fcmd)
63
+
64
+ if m = Open.read(fcmd).match(/#CMD: (.*)/)
65
+ cmd = m[1]
66
+ else
67
+ cmd = nil
68
+ end
69
+
70
+ if m = Open.read(fcmd).match(/# Run command\n(.*?)\n/im)
71
+ exe = m[1]
72
+ else
73
+ exe = nil
74
+ end
75
+
76
+ if m = Open.read(fcmd).match(/^CONTAINER_DIR=(.*)/)
77
+ container_home = m[1]
78
+ else
79
+ container_home = nil
80
+ end
81
+
82
+
83
+ if File.exists?(fid = File.join(dir, 'job.id'))
84
+ id = Open.read(fid).chomp
85
+ else
86
+ id = nil
87
+ end
88
+
89
+ if File.exists?(fstatus = File.join(dir, 'exit.status'))
90
+ exit_status = Open.read(fstatus).to_i
91
+ else
92
+ exit_status = nil
93
+ end
94
+
95
+ if File.exists?(fstatus = File.join(dir, 'job.status'))
96
+ nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
97
+ elsif job_nodes[id]
98
+ nodes = job_nodes[id]
99
+ else
100
+ nodes = []
101
+ end
102
+
103
+ if File.exists?(File.join(dir, 'std.out'))
104
+ outt = File.mtime File.join(dir, 'std.out')
105
+ errt = File.mtime File.join(dir, 'std.err')
106
+ time_diff = Time.now - [outt, errt].max
107
+ end
108
+
109
+ fdep = File.join(dir, 'dependencies.list')
110
+ deps = Open.read(fdep).split("\n") if File.exists?(fdep)
111
+
112
+ fcadep = File.join(dir, 'canfail_dependencies.list')
113
+ cadeps = Open.read(fcadep).split("\n") if File.exists?(fcadep)
114
+
115
+ aborted = error = true if aborted.nil? && error.nil?
116
+ if done || error || aborted || running || queued || jobid || search
117
+ select = false
118
+ select = true if done && exit_status && exit_status.to_i == 0
119
+ select = true if error && exit_status && exit_status.to_i != 0
120
+ select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
121
+ select = select && jobid.split(",").include?(id) if jobid
122
+ select = select && cmd.match(/#{search}/) if search
123
+ next unless select
124
+ end
125
+
126
+
127
+ puts Log.color(:yellow, "**ERASING**")
128
+ puts Log.color :blue, dir
129
+ puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.slurm')).to_s
130
+ puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
131
+ puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
132
+ puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
133
+ puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home
134
+ puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
135
+ puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
136
+ puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
137
+ puts Log.color(:magenta, "Nodes: ") << nodes * ", "
138
+ puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
139
+
140
+ if options[:sbatch_parameters]
141
+ puts Log.color(:magenta, "SBATCH parameters: ")
142
+ puts Log.color :blue, CMD.cmd('grep "^#SBATCH" |tail -n +6', :in => Open.read(fcmd)).read.strip
143
+ end
144
+
145
+ if tail && File.exists?(File.join(dir, 'std.err'))
146
+ if exit_status && exit_status != 0
147
+ puts Log.color(:magenta, "First error or exception found: ")
148
+ puts CMD.cmd("grep -i -w 'error\\|[a-z]*exception' #{File.join(dir, 'std.err')} -A #{tail.to_i} |head -n #{tail.to_i}", :no_fail => true).read
149
+ elsif exit_status
150
+ puts Log.color(:magenta, "Completed jobs: ")
151
+ puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
152
+ else
153
+ puts Log.color(:magenta, "Log tail: ")
154
+ puts CMD.cmd("tail -n #{tail.to_i} #{File.join(dir, 'std.err')}").read
155
+ end
156
+ end
157
+
158
+ count += 1
159
+
160
+ Open.rm_rf dir unless dry_run
161
+ end
162
+
163
+ puts
164
+ puts "Found #{count} jobs"
165
+
@@ -20,15 +20,18 @@ $ rbbt mnl [options]
20
20
  -j--job* Job ids
21
21
  -s--search* Regular expression
22
22
  -t--tail* Show the last lines of the STDERR
23
+ -SBP--sbatch_parameters show sbatch parameters
24
+ -PERF--procpath_performance show Procpath performance summary
25
+ -sacct--sacct_peformance show sacct performance summary
23
26
  EOF
24
27
 
25
28
  if options[:help]
26
- if defined? rbbt_usage
27
- rbbt_usage
28
- else
29
- puts SOPT.doc
30
- end
31
- exit 0
29
+ if defined? rbbt_usage
30
+ rbbt_usage
31
+ else
32
+ puts SOPT.doc
33
+ end
34
+ exit 0
32
35
  end
33
36
 
34
37
  Log.severity = 4
@@ -38,101 +41,177 @@ workdir = File.expand_path('~/rbbt-slurm')
38
41
  Path.setup(workdir)
39
42
 
40
43
  running_jobs = begin
41
- CMD.cmd('squeue').read.split("\n").collect{|l| l.to_i.to_s}
44
+ squeue_txt = CMD.cmd('squeue').read
45
+ squeue_txt.split("\n").collect{|l| l.to_i.to_s}
42
46
  rescue
43
- Log.warn "Cannot determine if jobs are running, they will seem to be all alive (Job ID in green)"
44
- $norunningjobs = true
45
- []
47
+ Log.warn "Cannot determine if jobs are running, they will seem to be all alive (Job ID in green)"
48
+ squeue_txt = nil
49
+ $norunningjobs = true
50
+ []
46
51
  end
47
52
 
53
+ if squeue_txt
54
+ job_nodes = {}
55
+ squeue_txt.split("\n").each do |line|
56
+ parts = line.strip.split(/\s+/)
57
+ job_nodes[parts.first] = parts.last.split(",")
58
+ end
59
+ else
60
+ job_nodes = nil
61
+ end
62
+
48
63
  count = 0
49
64
  workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
50
- dir = File.dirname(fcmd)
51
-
52
- if m = Open.read(fcmd).match(/#CMD: (.*)/)
53
- cmd = m[1]
54
- else
55
- cmd = nil
65
+ dir = File.dirname(fcmd)
66
+
67
+ if m = Open.read(fcmd).match(/#CMD: (.*)/)
68
+ cmd = m[1]
69
+ else
70
+ cmd = nil
71
+ end
72
+
73
+ if m = Open.read(fcmd).match(/# Run command\n(.*?)\n/im)
74
+ exe = m[1].sub('step_path=$(','')
75
+ else
76
+ exe = nil
77
+ end
78
+
79
+ if m = Open.read(fcmd).match(/^CONTAINER_DIR=(.*)/)
80
+ container_home = m[1]
81
+ else
82
+ container_home = nil
83
+ end
84
+
85
+
86
+ if File.exists?(fid = File.join(dir, 'job.id'))
87
+ id = Open.read(fid).chomp
88
+ else
89
+ id = nil
90
+ end
91
+
92
+ if File.exists?(fstatus = File.join(dir, 'exit.status'))
93
+ exit_status = Open.read(fstatus).to_i
94
+ else
95
+ exit_status = nil
96
+ end
97
+
98
+ if File.exists?(fstatus = File.join(dir, 'job.status'))
99
+ nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
100
+ elsif job_nodes[id]
101
+ nodes = job_nodes[id]
102
+ else
103
+ nodes = []
104
+ end
105
+
106
+ if File.exists?(File.join(dir, 'std.out'))
107
+ outt = File.mtime File.join(dir, 'std.out')
108
+ errt = File.mtime File.join(dir, 'std.err')
109
+ time_diff = Time.now - [outt, errt].max
110
+ end
111
+
112
+ fdep = File.join(dir, 'dependencies.list')
113
+ deps = Open.read(fdep).split("\n") if File.exists?(fdep)
114
+
115
+ fcadep = File.join(dir, 'canfail_dependencies.list')
116
+ cadeps = Open.read(fcadep).split("\n") if File.exists?(fcadep)
117
+
118
+ if done || error || aborted || running || queued || jobid || search
119
+ select = false
120
+ select = true if done && exit_status == 0
121
+ select = true if error && exit_status && exit_status != 0
122
+ select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
123
+ select = true if queued && deps && (running_jobs & deps).any?
124
+ select = true if running && (exit_status.nil? && running_jobs.include?(id)) && (!deps || (running_jobs & deps).empty?)
125
+ select = true if jobid && jobid.split(",").include?(id)
126
+ select = true if search && cmd.match(/#{search}/)
127
+ next unless select
128
+ end
129
+
130
+
131
+ puts Log.color :blue, dir
132
+ puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.slurm')).to_s
133
+ puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
134
+ puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
135
+ puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
136
+ puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home
137
+ puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
138
+ puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
139
+ puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
140
+ puts Log.color(:magenta, "Nodes: ") << nodes * ", "
141
+ puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
142
+
143
+ if options[:sbatch_parameters]
144
+ puts Log.color(:magenta, "SBATCH parameters: ")
145
+ text = CMD.cmd('grep "^#SBATCH" |tail -n +6', :in => Open.read(fcmd)).read.strip
146
+ lines = text.split("\n").collect{|line| header, _sep, value = line.partition(/\s+/); Log.color(:yellow, header + ": ") + value}
147
+ puts Log.color :yellow, lines * "\n"
148
+ end
149
+
150
+ fprocpath = File.join(dir, 'procpath.sqlite3')
151
+ if options[:procpath_performance] && Open.exists?(fprocpath)
152
+ puts Log.color(:magenta, "Procpath summary: ")
153
+ require 'rbbt/tsv/csv'
154
+ meta = TSV.csv(CMD.cmd("sqlite3 -header -csv #{fprocpath} 'select * from meta;' "))
155
+ perf = TSV.csv(CMD.cmd("sqlite3 -header -csv #{fprocpath} 'select * from record;' "))
156
+
157
+ page_size = meta["page_size"].first.to_f
158
+ clock_ticks = meta["clock_ticks"].first.to_f
159
+
160
+ cpu_average = {}
161
+ rss_average = {}
162
+ perf.through :key, ["ts", 'stat_pid', "stat_utime", "stat_stime", "stat_cutime", "stat_cstime", "stat_rss"] do |k, values|
163
+ time, stat_pid, ucpu, scpu, ccpu, cscpu, rss = values
164
+ time = time.to_f
165
+
166
+ cpu = Misc.sum([ucpu, scpu].collect{|v| v.to_f})
167
+ cpu_average[stat_pid] ||= {}
168
+ cpu_average[stat_pid][time] ||= []
169
+ cpu_average[stat_pid][time] << cpu.to_f
170
+ rss_average[time] ||= []
171
+ rss_average[time] << rss.to_f * page_size
172
+ end
173
+
174
+ ticks = 0
175
+ cpu_average.each do |stat_pid, cpu_average_pid|
176
+ start = cpu_average_pid.keys.sort.first
177
+ eend = cpu_average_pid.keys.sort.last
178
+ ticks += Misc.sum(cpu_average_pid[eend]) - Misc.sum(cpu_average_pid[start])
179
+ end
180
+ start = rss_average.keys.sort.first
181
+ eend = rss_average.keys.sort.last
182
+ time_elapsed = eend - start
183
+ puts Log.color(:yellow, "CPU average: ") + "%.2f" % ( ticks / clock_ticks / time_elapsed * 100).to_s
184
+ puts Log.color(:yellow, "RSS average: ") + "%.2f GB" % Misc.mean(rss_average.collect{|t,l| Misc.sum(l) / (1024 * 1024 * 1024)}).to_s
185
+
186
+ end
187
+
188
+ if options[:sacct_peformance]
189
+ begin
190
+ tsv = TSV.open(CMD.cmd("sacct -j #{id} -o 'jobid,AveRSS,MaxRSS,MaxDiskRead,MaxDiskWrite' -P|grep 'JobID\\|\.batch'"), :header_hash => '', :sep => "|", :type => :list)
191
+ values = tsv[tsv.keys.first]
192
+ if values.compact.any?
193
+ puts Log.color(:magenta, "SACCT performance: ")
194
+ puts values.zip(values.fields).collect{|v,t| Log.color(:yellow, t + ": ") + v.to_s } * "\n"
56
195
  end
57
-
58
- if m = Open.read(fcmd).match(/# Run command\n(.*?)\n/im)
59
- exe = m[1]
60
- else
61
- exe = nil
62
- end
63
-
64
- if m = Open.read(fcmd).match(/^CONTAINER_DIR=(.*)/)
65
- container_home = m[1]
66
- else
67
- container_home = nil
68
- end
69
-
70
-
71
- if File.exists?(fid = File.join(dir, 'job.id'))
72
- id = Open.read(fid).chomp
73
- else
74
- id = nil
75
- end
76
-
77
- if File.exists?(fstatus = File.join(dir, 'exit.status'))
78
- exit_status = Open.read(fstatus).to_i
79
- else
80
- exit_status = nil
81
- end
82
-
83
- if File.exists?(fstatus = File.join(dir, 'job.status'))
84
- nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
85
- else
86
- nodes = []
87
- end
88
-
89
- if File.exists?(File.join(dir, 'std.out'))
90
- outt = File.mtime File.join(dir, 'std.out')
91
- errt = File.mtime File.join(dir, 'std.err')
92
- time_diff = Time.now - [outt, errt].max
93
- end
94
-
95
- fdep = File.join(dir, 'dependencies.list')
96
- deps = Open.read(fdep).split("\n") if File.exists?(fdep)
97
-
98
- if done || error || aborted || running || queued || jobid || search
99
- select = false
100
- select = true if done && exit_status == 0
101
- select = true if error && exit_status && exit_status != 0
102
- select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
103
- select = true if queued && deps && (running_jobs & deps).any?
104
- select = true if running && (exit_status.nil? && running_jobs.include?(id)) && (!deps || (running_jobs & deps).empty?)
105
- select = true if jobid && jobid.split(",").include?(id)
106
- select = true if search && cmd.match(/#{search}/)
107
- next unless select
108
- end
109
-
110
-
111
- puts Log.color :blue, dir
112
- puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.slurm')).to_s
113
- puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
114
- puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
115
- puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
116
- puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home
117
- puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
118
- puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
119
- puts Log.color(:magenta, "Nodes: ") << nodes * ", "
120
- puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
121
-
122
- if tail && File.exists?(File.join(dir, 'std.err'))
123
- if exit_status && exit_status != 0
124
- puts Log.color(:magenta, "First error or exception found: ")
125
- puts CMD.cmd("grep -i -w 'error\\|[a-z]*exception' #{File.join(dir, 'std.err')} -A #{tail.to_i} |head -n #{tail.to_i}", :no_fail => true).read
126
- elsif exit_status
127
- puts Log.color(:magenta, "Completed jobs: ")
128
- puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
129
- else
130
- puts Log.color(:magenta, "Log tail: ")
131
- puts CMD.cmd("tail -n #{tail.to_i} #{File.join(dir, 'std.err')}").read
132
- end
133
- end
134
-
135
- count += 1
196
+ rescue
197
+ end
198
+ end
199
+
200
+
201
+ if tail && File.exists?(File.join(dir, 'std.err'))
202
+ if exit_status && exit_status != 0
203
+ puts Log.color(:magenta, "First error or exception found: ")
204
+ puts CMD.cmd("grep -i -w 'error\\|[a-z]*exception' #{File.join(dir, 'std.err')} -A #{tail.to_i} |head -n #{tail.to_i}", :no_fail => true).read
205
+ elsif exit_status
206
+ puts Log.color(:magenta, "Completed jobs: ")
207
+ puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
208
+ else
209
+ puts Log.color(:magenta, "Log tail: ")
210
+ puts CMD.cmd("tail -n #{tail.to_i} #{File.join(dir, 'std.err')}").read
211
+ end
212
+ end
213
+
214
+ count += 1
136
215
 
137
216
  end
138
217