rbbt-util 5.29.1 → 5.30.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/rbbt/tsv.rb CHANGED
@@ -113,8 +113,8 @@ module TSV
113
113
 
114
114
  data.entity_options = entity_options
115
115
 
116
- if Path === source and data.identifiers
117
- data.identifiers = Path.setup(data.identifiers, source.pkgdir, source.resource)
116
+ if Path === source && data.identifiers
117
+ Path.setup(data.identifiers, source.pkgdir, source.resource)
118
118
  end
119
119
 
120
120
  if data.respond_to? :persistence_path
@@ -124,6 +124,10 @@ module TSV
124
124
  data.clear
125
125
  data.annotate h
126
126
  end
127
+
128
+ data.read if data.respond_to? :persistence_path
129
+
130
+ data
127
131
  end
128
132
 
129
133
  def self.parse_header(stream, options = {})
data/lib/rbbt/util/cmd.rb CHANGED
@@ -217,7 +217,7 @@ module CMD
217
217
  end
218
218
  end
219
219
 
220
- def self.cmd_log(*args)
220
+ def self.cmd_pid(*args)
221
221
  all_args = *args
222
222
 
223
223
  all_args << {} unless Hash === all_args.last
@@ -248,4 +248,9 @@ module CMD
248
248
  nil
249
249
  end
250
250
 
251
+ def self.cmd_log(*args)
252
+ cmd_pid(*args)
253
+ nil
254
+ end
255
+
251
256
  end
@@ -242,48 +242,6 @@ module Misc
242
242
 
243
243
  return options
244
244
 
245
- options = {}
246
- string.split(/#/).each do |str|
247
- if str.match(/(.*)=(.*)/)
248
- option, value = $1, $2
249
- else
250
- option, value = str, true
251
- end
252
-
253
- option = option.sub(":",'').to_sym if option.chars.first == ':'
254
- value = value.sub(":",'').to_sym if String === value and value.chars.first == ':'
255
-
256
- if value == true
257
- options[option] = option.to_s.chars.first != '!'
258
- else
259
- options[option] = Thread.start do
260
- $SAFE = 0;
261
- case
262
- when value =~ /^(?:true|T)$/i
263
- true
264
- when value =~ /^(?:false|F)$/i
265
- false
266
- when Symbol === value
267
- value
268
- when (String === value and value =~ /^\/(.*)\/$/)
269
- Regexp.new /#{$1}/
270
- else
271
- begin
272
- Kernel.const_get value
273
- rescue
274
- begin
275
- raise if value =~ /[a-z]/ and defined? value
276
- eval(value)
277
- rescue Exception
278
- value
279
- end
280
- end
281
- end
282
- end.value
283
- end
284
- end
285
-
286
- options
287
245
  end
288
246
 
289
247
  end
@@ -0,0 +1,49 @@
1
+ require 'rbbt/util/cmd'
2
+ module ProcPath
3
+ CMD.tool :procpath do
4
+ 'pip install procpath'
5
+ end
6
+
7
+ def self.record(pid, path, options = {})
8
+ IndiferentHash.setup(options)
9
+ options = Misc.add_defaults options, "interval" => 30
10
+
11
+ cmd_options = %w(interval recnum reevalnum).inject({}){|acc,k| acc[k] = options[k]; acc}
12
+
13
+ Log.debug "ProcPath recording #{pid} in #{path} (#{Misc.fingerprint options})"
14
+ procpath_thread = Thread.new do
15
+ begin
16
+ procpath_pid = CMD.cmd_pid(:procpath, "record --database-file '#{path}' '$..children[?(@.stat.pid == #{pid})]'", cmd_options.merge(:nofail => true, :add_option_dashes => true))
17
+ rescue Exception
18
+ Log.exceptions $!
19
+ Process.kill "INT", procpath_pid
20
+ end
21
+ end
22
+
23
+ procpath_thread.report_on_exception = false
24
+
25
+ Process.wait pid.to_i
26
+ procpath_thread.raise Interrupt
27
+ end
28
+
29
+ def self.plot(path, output, options = {})
30
+ IndiferentHash.setup(options)
31
+ options = Misc.add_defaults options, "query-name" => 'rss', 'epsilon' => 0.5, "moving-average-window" => 10
32
+
33
+ cmd_options = %w(query-name epsilon monitor-average-window title logarithmic after before custom-query-file custom-value-expr).inject({}){|acc,k| acc[k] = options[k]; acc}
34
+ CMD.cmd_log(:procpath, "plot --database-file '#{path}' --plot-file '#{output}' ", cmd_options.merge(:nofail => true, :add_option_dashes => true))
35
+ end
36
+
37
+ def self.monitor(pid, path)
38
+ database, options_str = path.split("#")
39
+ options = options_str.nil? ? {} : Misc.string2hash(options_str)
40
+
41
+ database = File.expand_path database
42
+ Log.low "ProcPath monitor #{pid} in #{database} (#{Misc.fingerprint options})"
43
+
44
+ ProcPath.record(pid, database + '.sqlite3', options)
45
+ ProcPath.plot(database + '.sqlite3', database + '.cpu.svg', options.merge("query-name" => 'cpu'))
46
+ ProcPath.plot(database + '.sqlite3', database + '.rss.svg', options.merge("query-name" => 'rss'))
47
+ end
48
+ end
49
+
@@ -505,8 +505,8 @@ class Step
505
505
 
506
506
  def running?
507
507
  return false if ! (started? || status == :ending)
508
- pid = info[:pid]
509
- return nil if pid.nil?
508
+ return nil unless Open.exist?(self.pid_file)
509
+ pid = Open.read(self.pid_file).to_i
510
510
 
511
511
  return false if done? or error? or aborted?
512
512
 
@@ -530,8 +530,7 @@ class Step
530
530
  end
531
531
 
532
532
  def nopid?
533
- pid = info[:pid] || Open.exists?(pid_file)
534
- ! pid && ! (status.nil? || status == :aborted || status == :done || status == :error || status == :cleaned)
533
+ ! Open.exists?(pid_file) && ! (status.nil? || status == :aborted || status == :done || status == :error || status == :cleaned)
535
534
  end
536
535
 
537
536
  def aborted?
@@ -373,7 +373,6 @@ class Step
373
373
  Log.exception $!
374
374
  ensure
375
375
  Step.purge_stream_cache
376
- set_info :pid, nil
377
376
  Open.rm pid_file if Open.exist?(pid_file)
378
377
  end
379
378
  end
@@ -388,7 +387,6 @@ class Step
388
387
  _clean_finished
389
388
  rescue
390
389
  stop_dependencies
391
- set_info :pid, nil
392
390
  Open.rm pid_file if Open.exist?(pid_file)
393
391
  end
394
392
  end
@@ -449,7 +447,7 @@ class Step
449
447
  ensure
450
448
  no_load = false unless IO === result
451
449
  Open.rm pid_file if Open.exist?(pid_file) unless no_load
452
- set_info :pid, nil unless no_load
450
+ #set_info :pid, nil unless no_load
453
451
  end
454
452
  end
455
453
 
@@ -559,7 +557,7 @@ class Step
559
557
  RbbtSemaphore.post_semaphore(semaphore) if semaphore
560
558
  Kernel.exit! -1
561
559
  end
562
- set_info :pid, nil
560
+ #set_info :pid, nil
563
561
  ensure
564
562
  RbbtSemaphore.post_semaphore(semaphore) if semaphore
565
563
  end
@@ -57,7 +57,7 @@ module Task
57
57
  puts Log.color(:magenta, "Input select options")
58
58
  puts
59
59
  selects.collect{|p| p}.uniq.each do |input,options|
60
- puts Log.color(:blue, input.to_s + ": ") << Misc.format_paragraph(options.collect{|o| o.to_s} * ", ") << "\n"
60
+ puts Log.color(:blue, input.to_s + ": ") << Misc.format_paragraph(options.collect{|o| Array === o ? o.first.to_s : o.to_s} * ", ") << "\n"
61
61
  puts unless Log.compact
62
62
  end
63
63
  puts
@@ -0,0 +1,165 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+
6
+ #$0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
7
+
8
+ options = SOPT.setup <<EOF
9
+
10
+ Clean error or aborted jobs
11
+
12
+ $ rbbt mnl [options]
13
+
14
+ -h--help Print this help
15
+ -d--done Done jobs only
16
+ -e--error Error jobs only
17
+ -a--aborted SLURM aboted jobs
18
+ -j--job* Job ids
19
+ -s--search* Regular expression
20
+ -t--tail* Show the last lines of the STDERR
21
+ -SBP--sbatch_parameters show sbatch parameters
22
+ -dr--dry_run Do not erase anything
23
+ EOF
24
+
25
+ if options[:help]
26
+ if defined? rbbt_usage
27
+ rbbt_usage
28
+ else
29
+ puts SOPT.doc
30
+ end
31
+ exit 0
32
+ end
33
+
34
+ Log.severity = 4
35
+ done, error, aborted, jobid, search, tail, sbatch_parameters, dry_run = options.values_at :done, :error, :aborted, :job, :search, :tail, :sbatch_parameters, :dry_run
36
+
37
+ workdir = File.expand_path('~/rbbt-slurm')
38
+ Path.setup(workdir)
39
+
40
+ running_jobs = begin
41
+ squeue_txt = CMD.cmd('squeue').read
42
+ squeue_txt.split("\n").collect{|l| l.to_i.to_s}
43
+ rescue
44
+ Log.warn "Cannot determine if jobs are running, they will seem to be all alive (Job ID in green)"
45
+ squeue_txt = nil
46
+ $norunningjobs = true
47
+ []
48
+ end
49
+
50
+ if squeue_txt
51
+ job_nodes = {}
52
+ squeue_txt.split("\n").each do |line|
53
+ parts = line.strip.split(/\s+/)
54
+ job_nodes[parts.first] = parts.last.split(",")
55
+ end
56
+ else
57
+ job_nodes = nil
58
+ end
59
+
60
+ count = 0
61
+ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
62
+ dir = File.dirname(fcmd)
63
+
64
+ if m = Open.read(fcmd).match(/#CMD: (.*)/)
65
+ cmd = m[1]
66
+ else
67
+ cmd = nil
68
+ end
69
+
70
+ if m = Open.read(fcmd).match(/# Run command\n(.*?)\n/im)
71
+ exe = m[1]
72
+ else
73
+ exe = nil
74
+ end
75
+
76
+ if m = Open.read(fcmd).match(/^CONTAINER_DIR=(.*)/)
77
+ container_home = m[1]
78
+ else
79
+ container_home = nil
80
+ end
81
+
82
+
83
+ if File.exists?(fid = File.join(dir, 'job.id'))
84
+ id = Open.read(fid).chomp
85
+ else
86
+ id = nil
87
+ end
88
+
89
+ if File.exists?(fstatus = File.join(dir, 'exit.status'))
90
+ exit_status = Open.read(fstatus).to_i
91
+ else
92
+ exit_status = nil
93
+ end
94
+
95
+ if File.exists?(fstatus = File.join(dir, 'job.status'))
96
+ nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
97
+ elsif job_nodes[id]
98
+ nodes = job_nodes[id]
99
+ else
100
+ nodes = []
101
+ end
102
+
103
+ if File.exists?(File.join(dir, 'std.out'))
104
+ outt = File.mtime File.join(dir, 'std.out')
105
+ errt = File.mtime File.join(dir, 'std.err')
106
+ time_diff = Time.now - [outt, errt].max
107
+ end
108
+
109
+ fdep = File.join(dir, 'dependencies.list')
110
+ deps = Open.read(fdep).split("\n") if File.exists?(fdep)
111
+
112
+ fcadep = File.join(dir, 'canfail_dependencies.list')
113
+ cadeps = Open.read(fcadep).split("\n") if File.exists?(fcadep)
114
+
115
+ aborted = error = true if aborted.nil? && error.nil?
116
+ if done || error || aborted || running || queued || jobid || search
117
+ select = false
118
+ select = true if done && exit_status && exit_status.to_i == 0
119
+ select = true if error && exit_status && exit_status.to_i != 0
120
+ select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
121
+ select = select && jobid.split(",").include?(id) if jobid
122
+ select = select && cmd.match(/#{search}/) if search
123
+ next unless select
124
+ end
125
+
126
+
127
+ puts Log.color(:yellow, "**ERASING**")
128
+ puts Log.color :blue, dir
129
+ puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.slurm')).to_s
130
+ puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
131
+ puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
132
+ puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
133
+ puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home
134
+ puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
135
+ puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
136
+ puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
137
+ puts Log.color(:magenta, "Nodes: ") << nodes * ", "
138
+ puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
139
+
140
+ if options[:sbatch_parameters]
141
+ puts Log.color(:magenta, "SBATCH parameters: ")
142
+ puts Log.color :blue, CMD.cmd('grep "^#SBATCH" |tail -n +6', :in => Open.read(fcmd)).read.strip
143
+ end
144
+
145
+ if tail && File.exists?(File.join(dir, 'std.err'))
146
+ if exit_status && exit_status != 0
147
+ puts Log.color(:magenta, "First error or exception found: ")
148
+ puts CMD.cmd("grep -i -w 'error\\|[a-z]*exception' #{File.join(dir, 'std.err')} -A #{tail.to_i} |head -n #{tail.to_i}", :no_fail => true).read
149
+ elsif exit_status
150
+ puts Log.color(:magenta, "Completed jobs: ")
151
+ puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
152
+ else
153
+ puts Log.color(:magenta, "Log tail: ")
154
+ puts CMD.cmd("tail -n #{tail.to_i} #{File.join(dir, 'std.err')}").read
155
+ end
156
+ end
157
+
158
+ count += 1
159
+
160
+ Open.rm_rf dir unless dry_run
161
+ end
162
+
163
+ puts
164
+ puts "Found #{count} jobs"
165
+
@@ -20,15 +20,18 @@ $ rbbt mnl [options]
20
20
  -j--job* Job ids
21
21
  -s--search* Regular expression
22
22
  -t--tail* Show the last lines of the STDERR
23
+ -SBP--sbatch_parameters show sbatch parameters
24
+ -PERF--procpath_performance show Procpath performance summary
25
+ -sacct--sacct_peformance show sacct performance summary
23
26
  EOF
24
27
 
25
28
  if options[:help]
26
- if defined? rbbt_usage
27
- rbbt_usage
28
- else
29
- puts SOPT.doc
30
- end
31
- exit 0
29
+ if defined? rbbt_usage
30
+ rbbt_usage
31
+ else
32
+ puts SOPT.doc
33
+ end
34
+ exit 0
32
35
  end
33
36
 
34
37
  Log.severity = 4
@@ -38,101 +41,177 @@ workdir = File.expand_path('~/rbbt-slurm')
38
41
  Path.setup(workdir)
39
42
 
40
43
  running_jobs = begin
41
- CMD.cmd('squeue').read.split("\n").collect{|l| l.to_i.to_s}
44
+ squeue_txt = CMD.cmd('squeue').read
45
+ squeue_txt.split("\n").collect{|l| l.to_i.to_s}
42
46
  rescue
43
- Log.warn "Cannot determine if jobs are running, they will seem to be all alive (Job ID in green)"
44
- $norunningjobs = true
45
- []
47
+ Log.warn "Cannot determine if jobs are running, they will seem to be all alive (Job ID in green)"
48
+ squeue_txt = nil
49
+ $norunningjobs = true
50
+ []
46
51
  end
47
52
 
53
+ if squeue_txt
54
+ job_nodes = {}
55
+ squeue_txt.split("\n").each do |line|
56
+ parts = line.strip.split(/\s+/)
57
+ job_nodes[parts.first] = parts.last.split(",")
58
+ end
59
+ else
60
+ job_nodes = nil
61
+ end
62
+
48
63
  count = 0
49
64
  workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
50
- dir = File.dirname(fcmd)
51
-
52
- if m = Open.read(fcmd).match(/#CMD: (.*)/)
53
- cmd = m[1]
54
- else
55
- cmd = nil
65
+ dir = File.dirname(fcmd)
66
+
67
+ if m = Open.read(fcmd).match(/#CMD: (.*)/)
68
+ cmd = m[1]
69
+ else
70
+ cmd = nil
71
+ end
72
+
73
+ if m = Open.read(fcmd).match(/# Run command\n(.*?)\n/im)
74
+ exe = m[1].sub('step_path=$(','')
75
+ else
76
+ exe = nil
77
+ end
78
+
79
+ if m = Open.read(fcmd).match(/^CONTAINER_DIR=(.*)/)
80
+ container_home = m[1]
81
+ else
82
+ container_home = nil
83
+ end
84
+
85
+
86
+ if File.exists?(fid = File.join(dir, 'job.id'))
87
+ id = Open.read(fid).chomp
88
+ else
89
+ id = nil
90
+ end
91
+
92
+ if File.exists?(fstatus = File.join(dir, 'exit.status'))
93
+ exit_status = Open.read(fstatus).to_i
94
+ else
95
+ exit_status = nil
96
+ end
97
+
98
+ if File.exists?(fstatus = File.join(dir, 'job.status'))
99
+ nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
100
+ elsif job_nodes[id]
101
+ nodes = job_nodes[id]
102
+ else
103
+ nodes = []
104
+ end
105
+
106
+ if File.exists?(File.join(dir, 'std.out'))
107
+ outt = File.mtime File.join(dir, 'std.out')
108
+ errt = File.mtime File.join(dir, 'std.err')
109
+ time_diff = Time.now - [outt, errt].max
110
+ end
111
+
112
+ fdep = File.join(dir, 'dependencies.list')
113
+ deps = Open.read(fdep).split("\n") if File.exists?(fdep)
114
+
115
+ fcadep = File.join(dir, 'canfail_dependencies.list')
116
+ cadeps = Open.read(fcadep).split("\n") if File.exists?(fcadep)
117
+
118
+ if done || error || aborted || running || queued || jobid || search
119
+ select = false
120
+ select = true if done && exit_status == 0
121
+ select = true if error && exit_status && exit_status != 0
122
+ select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
123
+ select = true if queued && deps && (running_jobs & deps).any?
124
+ select = true if running && (exit_status.nil? && running_jobs.include?(id)) && (!deps || (running_jobs & deps).empty?)
125
+ select = true if jobid && jobid.split(",").include?(id)
126
+ select = true if search && cmd.match(/#{search}/)
127
+ next unless select
128
+ end
129
+
130
+
131
+ puts Log.color :blue, dir
132
+ puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.slurm')).to_s
133
+ puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
134
+ puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
135
+ puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
136
+ puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home
137
+ puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
138
+ puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
139
+ puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
140
+ puts Log.color(:magenta, "Nodes: ") << nodes * ", "
141
+ puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
142
+
143
+ if options[:sbatch_parameters]
144
+ puts Log.color(:magenta, "SBATCH parameters: ")
145
+ text = CMD.cmd('grep "^#SBATCH" |tail -n +6', :in => Open.read(fcmd)).read.strip
146
+ lines = text.split("\n").collect{|line| header, _sep, value = line.partition(/\s+/); Log.color(:yellow, header + ": ") + value}
147
+ puts Log.color :yellow, lines * "\n"
148
+ end
149
+
150
+ fprocpath = File.join(dir, 'procpath.sqlite3')
151
+ if options[:procpath_performance] && Open.exists?(fprocpath)
152
+ puts Log.color(:magenta, "Procpath summary: ")
153
+ require 'rbbt/tsv/csv'
154
+ meta = TSV.csv(CMD.cmd("sqlite3 -header -csv #{fprocpath} 'select * from meta;' "))
155
+ perf = TSV.csv(CMD.cmd("sqlite3 -header -csv #{fprocpath} 'select * from record;' "))
156
+
157
+ page_size = meta["page_size"].first.to_f
158
+ clock_ticks = meta["clock_ticks"].first.to_f
159
+
160
+ cpu_average = {}
161
+ rss_average = {}
162
+ perf.through :key, ["ts", 'stat_pid', "stat_utime", "stat_stime", "stat_cutime", "stat_cstime", "stat_rss"] do |k, values|
163
+ time, stat_pid, ucpu, scpu, ccpu, cscpu, rss = values
164
+ time = time.to_f
165
+
166
+ cpu = Misc.sum([ucpu, scpu].collect{|v| v.to_f})
167
+ cpu_average[stat_pid] ||= {}
168
+ cpu_average[stat_pid][time] ||= []
169
+ cpu_average[stat_pid][time] << cpu.to_f
170
+ rss_average[time] ||= []
171
+ rss_average[time] << rss.to_f * page_size
172
+ end
173
+
174
+ ticks = 0
175
+ cpu_average.each do |stat_pid, cpu_average_pid|
176
+ start = cpu_average_pid.keys.sort.first
177
+ eend = cpu_average_pid.keys.sort.last
178
+ ticks += Misc.sum(cpu_average_pid[eend]) - Misc.sum(cpu_average_pid[start])
179
+ end
180
+ start = rss_average.keys.sort.first
181
+ eend = rss_average.keys.sort.last
182
+ time_elapsed = eend - start
183
+ puts Log.color(:yellow, "CPU average: ") + "%.2f" % ( ticks / clock_ticks / time_elapsed * 100).to_s
184
+ puts Log.color(:yellow, "RSS average: ") + "%.2f GB" % Misc.mean(rss_average.collect{|t,l| Misc.sum(l) / (1024 * 1024 * 1024)}).to_s
185
+
186
+ end
187
+
188
+ if options[:sacct_peformance]
189
+ begin
190
+ tsv = TSV.open(CMD.cmd("sacct -j #{id} -o 'jobid,AveRSS,MaxRSS,MaxDiskRead,MaxDiskWrite' -P|grep 'JobID\\|\.batch'"), :header_hash => '', :sep => "|", :type => :list)
191
+ values = tsv[tsv.keys.first]
192
+ if values.compact.any?
193
+ puts Log.color(:magenta, "SACCT performance: ")
194
+ puts values.zip(values.fields).collect{|v,t| Log.color(:yellow, t + ": ") + v.to_s } * "\n"
56
195
  end
57
-
58
- if m = Open.read(fcmd).match(/# Run command\n(.*?)\n/im)
59
- exe = m[1]
60
- else
61
- exe = nil
62
- end
63
-
64
- if m = Open.read(fcmd).match(/^CONTAINER_DIR=(.*)/)
65
- container_home = m[1]
66
- else
67
- container_home = nil
68
- end
69
-
70
-
71
- if File.exists?(fid = File.join(dir, 'job.id'))
72
- id = Open.read(fid).chomp
73
- else
74
- id = nil
75
- end
76
-
77
- if File.exists?(fstatus = File.join(dir, 'exit.status'))
78
- exit_status = Open.read(fstatus).to_i
79
- else
80
- exit_status = nil
81
- end
82
-
83
- if File.exists?(fstatus = File.join(dir, 'job.status'))
84
- nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
85
- else
86
- nodes = []
87
- end
88
-
89
- if File.exists?(File.join(dir, 'std.out'))
90
- outt = File.mtime File.join(dir, 'std.out')
91
- errt = File.mtime File.join(dir, 'std.err')
92
- time_diff = Time.now - [outt, errt].max
93
- end
94
-
95
- fdep = File.join(dir, 'dependencies.list')
96
- deps = Open.read(fdep).split("\n") if File.exists?(fdep)
97
-
98
- if done || error || aborted || running || queued || jobid || search
99
- select = false
100
- select = true if done && exit_status == 0
101
- select = true if error && exit_status && exit_status != 0
102
- select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
103
- select = true if queued && deps && (running_jobs & deps).any?
104
- select = true if running && (exit_status.nil? && running_jobs.include?(id)) && (!deps || (running_jobs & deps).empty?)
105
- select = true if jobid && jobid.split(",").include?(id)
106
- select = true if search && cmd.match(/#{search}/)
107
- next unless select
108
- end
109
-
110
-
111
- puts Log.color :blue, dir
112
- puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.slurm')).to_s
113
- puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
114
- puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
115
- puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
116
- puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home
117
- puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
118
- puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
119
- puts Log.color(:magenta, "Nodes: ") << nodes * ", "
120
- puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
121
-
122
- if tail && File.exists?(File.join(dir, 'std.err'))
123
- if exit_status && exit_status != 0
124
- puts Log.color(:magenta, "First error or exception found: ")
125
- puts CMD.cmd("grep -i -w 'error\\|[a-z]*exception' #{File.join(dir, 'std.err')} -A #{tail.to_i} |head -n #{tail.to_i}", :no_fail => true).read
126
- elsif exit_status
127
- puts Log.color(:magenta, "Completed jobs: ")
128
- puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
129
- else
130
- puts Log.color(:magenta, "Log tail: ")
131
- puts CMD.cmd("tail -n #{tail.to_i} #{File.join(dir, 'std.err')}").read
132
- end
133
- end
134
-
135
- count += 1
196
+ rescue
197
+ end
198
+ end
199
+
200
+
201
+ if tail && File.exists?(File.join(dir, 'std.err'))
202
+ if exit_status && exit_status != 0
203
+ puts Log.color(:magenta, "First error or exception found: ")
204
+ puts CMD.cmd("grep -i -w 'error\\|[a-z]*exception' #{File.join(dir, 'std.err')} -A #{tail.to_i} |head -n #{tail.to_i}", :no_fail => true).read
205
+ elsif exit_status
206
+ puts Log.color(:magenta, "Completed jobs: ")
207
+ puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
208
+ else
209
+ puts Log.color(:magenta, "Log tail: ")
210
+ puts CMD.cmd("tail -n #{tail.to_i} #{File.join(dir, 'std.err')}").read
211
+ end
212
+ end
213
+
214
+ count += 1
136
215
 
137
216
  end
138
217