scout-gear 10.1.0 → 10.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,211 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'scout'
4
+
5
+ require 'rbbt/hpc'
6
+
7
+ $0 = "scout #{$previous_commands.any? ? $previous_commands*" " + " " : "" }#{ File.basename(__FILE__) }" if $previous_commands
8
+
9
+ options = SOPT.setup <<EOF
10
+
11
+ Description of the tool
12
+
13
+ $ #{$0} [<options>] <filename> [<other|->]*
14
+
15
+ -h--help Print this help
16
+ -d--done Done jobs only
17
+ -e--error Error jobs only
18
+ -a--aborted SLURM aboted jobs
19
+ -q--queued Queued jobs only
20
+ -j--job* Job ids
21
+ -s--search* Regular expression
22
+ -t--tail* Show the last lines of the STDERR
23
+ -BP--batch_parameters show batch parameters
24
+ -dr--dry_run Do not erase anything
25
+ EOF
26
+ if options[:help]
27
+ if defined? scout_usage
28
+ scout_usage
29
+ else
30
+ puts SOPT.doc
31
+ end
32
+ exit 0
33
+ end
34
+
35
+ batch_system = options.delete :batch_system
36
+ batch_system ||= 'auto'
37
+
38
+ HPC::BATCH_MODULE = HPC.batch_system batch_system
39
+
40
+ raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
41
+
42
+ Log.severity = 4
43
+ done, error, aborted, queued, jobid, search, tail, batch_parameters, dry_run = options.values_at :done, :error, :aborted, :queued, :job, :search, :tail, :batch_parameters, :dry_run
44
+
45
+ workdir = File.expand_path('~/scout-batch')
46
+ Path.setup(workdir)
47
+
48
+ running_jobs = begin
49
+ squeue_txt = HPC::BATCH_MODULE.job_status
50
+ squeue_txt.split("\n").collect{|l| l.to_i.to_s}
51
+ rescue
52
+ Log.warn "Cannot determine if jobs are running, they will seem to be all alive (Job ID in green)"
53
+ squeue_txt = nil
54
+ $norunningjobs = true
55
+ []
56
+ end
57
+
58
+ if squeue_txt
59
+ job_nodes = {}
60
+ squeue_txt.split("\n").each do |line|
61
+ parts = line.strip.split(/\s+/)
62
+ job_nodes[parts.first] = parts.last.split(",")
63
+ end
64
+ else
65
+ job_nodes = nil
66
+ end
67
+
68
+ count = 0
69
+ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
70
+ dir = File.dirname(fcmd)
71
+ command_txt = Open.read(fcmd)
72
+
73
+ if m = command_txt.match(/#CMD: (.*)/)
74
+ cmd = m[1]
75
+ else
76
+ cmd = nil
77
+ end
78
+
79
+ if m = command_txt.match(/# Run command\n(.*?)\n/im)
80
+ exe = m[1]
81
+ else
82
+ exe = nil
83
+ end
84
+
85
+ if m = command_txt.match(/^CONTAINER_DIR=(.*)/)
86
+ container_home = m[1]
87
+ else
88
+ container_home = nil
89
+ end
90
+
91
+ if m = command_txt.match(/^BATCH_SYSTEM=(.*)/)
92
+ job_batch_system = m[1].downcase
93
+ else
94
+ job_batch_system = nil
95
+ end
96
+
97
+ different_system = job_batch_system != batch_system
98
+
99
+ if File.exist?(fid = File.join(dir, 'job.id'))
100
+ id = Open.read(fid).chomp
101
+ else
102
+ id = nil
103
+ end
104
+
105
+ if File.exist?(fstatus = File.join(dir, 'exit.status'))
106
+ exit_status = Open.read(fstatus).to_i
107
+ else
108
+ exit_status = nil
109
+ end
110
+
111
+ if File.exist?(fstatus = File.join(dir, 'job.status'))
112
+ fstatus_txt = Open.read(fstatus)
113
+ begin
114
+ if job_batch_system == "lsf"
115
+ nodes = Open.read(fstatus).split("\n").last.split(/\s+/)[5].split(",")
116
+ else
117
+ nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
118
+ end
119
+ rescue
120
+ nodes = []
121
+ end
122
+ elsif job_nodes[id]
123
+ nodes = job_nodes[id]
124
+ else
125
+ nodes = []
126
+ end
127
+
128
+ if File.exist?(File.join(dir, 'std.out'))
129
+ outt = File.mtime File.join(dir, 'std.out')
130
+ errt = File.mtime File.join(dir, 'std.err')
131
+ time_diff = Time.now - [outt, errt].max
132
+ end
133
+
134
+ fdep = File.join(dir, 'dependencies.list')
135
+ deps = Open.read(fdep).split("\n") if File.exist?(fdep)
136
+
137
+ fcadep = File.join(dir, 'canfail_dependencies.list')
138
+ cadeps = Open.read(fcadep).split("\n") if File.exist?(fcadep)
139
+
140
+ aborted = error = true if ! done && aborted.nil? && error.nil?
141
+ #if done || error || aborted || running || queued || jobid || search
142
+ # select = false
143
+ # select = true if done && exit_status && exit_status.to_i == 0
144
+ # select = true if error && exit_status && exit_status.to_i != 0
145
+ # select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
146
+ # select = select && jobid.split(",").include?(id) if jobid
147
+ # select = select && cmd.match(/#{search}/) if search
148
+ # next unless select
149
+ #end
150
+
151
+ if done || error || aborted || queued || jobid
152
+ select = false
153
+ select = true if done && exit_status == 0
154
+ select = true if error && exit_status && exit_status != 0
155
+ select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
156
+ is_running = exit_status.nil? && ( (running_jobs.include?(id) && (!deps || (running_jobs & deps).empty?)) || different_system )
157
+ select = true if queued && deps && (running_jobs & deps).any? || queued && is_running && nodes.empty?
158
+ select = true if jobid && jobid.split(",").include?(id)
159
+ select = select && cmd.match(/#{search}/) if search
160
+ next unless select
161
+ elsif search
162
+ select = false
163
+ select = true if search && cmd.match(/#{search}/)
164
+ next unless select
165
+ end
166
+
167
+
168
+ puts Log.color(:yellow, "**ERASING**")
169
+ puts Log.color :blue, dir
170
+ puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.batch')).to_s
171
+ puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
172
+ puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
173
+ puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
174
+ puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home
175
+ puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
176
+ puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
177
+ puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
178
+ puts Log.color(:magenta, "Nodes: ") << nodes * ", "
179
+ puts Log.color(:magenta, "Output: ") << File.exist?(File.join(dir, 'std.out')).to_s << (id.nil? ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
180
+
181
+ if options[:batch_parameters]
182
+ puts Log.color(:magenta, "BATCH parameters: ")
183
+ case job_batch_system
184
+ when 'slurm'
185
+ puts Log.color :blue, CMD.cmd('grep "^#SBATCH" |tail -n +6', :in => Open.read(fcmd)).read.strip
186
+ when 'lsf'
187
+ puts Log.color :blue, CMD.cmd('grep "^#BSUB" |tail -n +6', :in => Open.read(fcmd)).read.strip
188
+ end
189
+ end
190
+
191
+ if tail && File.exist?(File.join(dir, 'std.err'))
192
+ if exit_status && exit_status != 0
193
+ puts Log.color(:magenta, "First error or exception found: ")
194
+ puts CMD.cmd("grep -i -w 'error\\|[a-z]*exception' #{File.join(dir, 'std.err')} -A #{tail.to_i} |head -n #{tail.to_i}", :no_fail => true).read
195
+ elsif exit_status
196
+ puts Log.color(:magenta, "Completed jobs: ")
197
+ puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
198
+ else
199
+ puts Log.color(:magenta, "Log tail: ")
200
+ puts CMD.cmd("tail -n #{tail.to_i} #{File.join(dir, 'std.err')}").read
201
+ end
202
+ end
203
+
204
+ count += 1
205
+
206
+ Open.rm_rf dir unless dry_run
207
+ end
208
+
209
+ puts
210
+ puts "Found #{count} jobs"
211
+
@@ -0,0 +1,357 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'scout'
4
+
5
+ require 'rbbt/hpc'
6
+
7
+ $0 = "scout #{$previous_commands.any? ? $previous_commands*" " + " " : "" }#{ File.basename(__FILE__) }" if $previous_commands
8
+
9
+ options = SOPT.setup <<EOF
10
+
11
+ List all batch jobs
12
+
13
+ $ #{$0} [<options>] <filename> [<other|->]*
14
+
15
+ $ rbbt slurm list [options]
16
+
17
+ -h--help Print this help
18
+ -d--done Done jobs only
19
+ -e--error Error jobs only
20
+ -a--aborted SLURM aboted jobs
21
+ -r--running Running jobs only
22
+ -q--queued Queued jobs only
23
+ -j--job* Job ids
24
+ -s--search* Regular expression
25
+ -t--tail* Show the last lines of the STDERR
26
+ -l--long Show more entries
27
+ -c--compressed Show compressed information about entries
28
+ -p--progress Report progress of job and the dependencies
29
+ -BP--batch_parameters show batch parameters
30
+ -BPP--batch_procpath show Procpath performance summary
31
+ -sacct--sacct_peformance show sacct performance summary
32
+ -bs--batch_system* Batch system to use: auto, lsf, slurm (default is auto-detect)
33
+ EOF
34
+ if options[:help]
35
+ if defined? scout_usage
36
+ scout_usage
37
+ else
38
+ puts SOPT.doc
39
+ end
40
+ exit 0
41
+ end
42
+
43
+ batch_system = options.delete :batch_system
44
+ batch_system ||= 'auto'
45
+
46
+ long = options.delete :long
47
+
48
+ HPC::BATCH_MODULE = HPC.batch_system batch_system
49
+
50
+ raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
51
+
52
+ batch_system = HPC::BATCH_MODULE.to_s.split("::").last.downcase
53
+
54
+ done, error, running, queued, aborted, jobid, search, tail, progress = options.values_at :done, :error, :running, :queued, :aborted, :job, :search, :tail, :progress
55
+
56
+ workdir = File.expand_path('~/scout-batch')
57
+ Path.setup(workdir)
58
+
59
+ running_jobs = begin
60
+ squeue_txt = HPC::BATCH_MODULE.job_status
61
+ squeue_txt.split("\n").collect{|l| l.to_i.to_s}
62
+ rescue
63
+ Log.warn "Cannot determine if jobs are running, they will seem to be all alive (Job ID in green)"
64
+ squeue_txt = nil
65
+ $norunningjobs = true
66
+ []
67
+ end
68
+
69
+ if squeue_txt
70
+ job_nodes = {}
71
+ squeue_txt.split("\n").each do |line|
72
+ parts = line.strip.split(/\s+/)
73
+ job_nodes[parts.first] = parts.last.split(",")
74
+ end
75
+ else
76
+ job_nodes = nil
77
+ end
78
+
79
+ count = 0
80
+ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
81
+ dir = File.dirname(fcmd)
82
+ command_txt = Open.read(fcmd)
83
+
84
+ if m = command_txt.match(/#CMD: (.*)/)
85
+ cmd = m[1]
86
+ else
87
+ cmd = nil
88
+ end
89
+
90
+ if m = command_txt.match(/^export BATCH_SYSTEM=(.*)/)
91
+ job_batch_system = m[1].downcase
92
+ else
93
+ job_batch_system = nil
94
+ end
95
+
96
+ different_system = job_batch_system != batch_system
97
+
98
+ if m = command_txt.match(/#MANIFEST: (.*)/)
99
+ manifest = m[1]
100
+ else
101
+ manifest = nil
102
+ end
103
+
104
+ if m = command_txt.match(/#STEP_PATH: (.*)/)
105
+ step_path = m[1]
106
+ else
107
+ step_path = nil
108
+ end
109
+
110
+ if m = command_txt.match(/#EXEC_CMD: (.*)/)
111
+ exe = m[1]
112
+ else
113
+ exe = nil
114
+ end
115
+
116
+ if m = command_txt.match(/^CONTAINER_DIR=(.*)/)
117
+ container_home = m[1]
118
+ else
119
+ container_home = nil
120
+ end
121
+
122
+ if File.exist?(fid = File.join(dir, 'job.id'))
123
+ id = Open.read(fid).chomp
124
+ else
125
+ id = nil
126
+ end
127
+
128
+ if File.exist?(fstatus = File.join(dir, 'exit.status'))
129
+ exit_status = Open.read(fstatus).to_i
130
+ else
131
+ exit_status = nil
132
+ end
133
+
134
+ if File.exist?(fstatus = File.join(dir, 'job.status'))
135
+ fstatus_txt = Open.read(fstatus)
136
+ begin
137
+ if job_batch_system == "lsf"
138
+ nodes = Open.read(fstatus).split("\n").last.split(/\s+/)[5].split(",")
139
+ else
140
+ nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
141
+ end
142
+ rescue
143
+ nodes = []
144
+ end
145
+ elsif job_nodes && job_nodes[id]
146
+ nodes = job_nodes[id].reject{|n| n.include? "("}
147
+ else
148
+ nodes = []
149
+ end
150
+
151
+ if File.exist?(File.join(dir, 'exit.status'))
152
+ now = File.ctime(File.join(dir, 'exit.status'))
153
+ else
154
+ now = Time.now
155
+ end
156
+
157
+ if File.exist?(File.join(dir, 'std.out'))
158
+ cerrt = File.ctime File.join(dir, 'std.err')
159
+ coutt = File.ctime File.join(dir, 'std.out')
160
+ outt = File.mtime File.join(dir, 'std.out')
161
+ errt = File.mtime File.join(dir, 'std.err')
162
+ time_diff = now - [outt, errt].max
163
+ time_elapsed = now - [cerrt, coutt].min
164
+ end
165
+
166
+ fdep = File.join(dir, 'dependencies.list')
167
+ deps = Open.read(fdep).split("\n") if File.exist?(fdep)
168
+
169
+ fcadep = File.join(dir, 'canfail_dependencies.list')
170
+ cadeps = Open.read(fcadep).split("\n") if File.exist?(fcadep)
171
+
172
+ is_running = exit_status.nil? && ( (running_jobs.include?(id) && (deps.nil? || (running_jobs & deps).empty?)) || different_system )
173
+ if done || error || aborted || running || queued || jobid
174
+ select = false
175
+ select = true if done && exit_status == 0
176
+ select = true if error && exit_status && exit_status != 0
177
+ select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
178
+ select = true if queued && deps && (running_jobs & deps).any? || queued && is_running && nodes.empty?
179
+ select = true if running && nodes.any? && (exit_status.nil? && running_jobs.include?(id)) && (!deps || (running_jobs & deps).empty?)
180
+ select = true if jobid && jobid.split(",").include?(id)
181
+ select = select && step_path.match(/#{search}/) if search
182
+ next unless select
183
+ elsif search
184
+ select = false
185
+ select = true if search && cmd.match(/#{search}/)
186
+ next unless select
187
+ end
188
+
189
+
190
+ count += 1
191
+
192
+ if options[:compressed]
193
+ status = exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) : Log.color(:green, id)
194
+ if different_system
195
+ status = exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : Log.color(:green, id)
196
+ else
197
+ #status = exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) )
198
+ status = exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" :
199
+ (running_jobs.include?(id) || $norunningjobs ?
200
+ (is_running ? Log.color(:cyan, id) : Log.color(:yellow, id) ) :
201
+ Log.color(:red, id) )
202
+ end
203
+ prog_rep = []
204
+ if options[:progress]
205
+ step_line = Open.read(fcmd).split("\n").select{|line| line =~ /^#STEP_PATH:/}.first
206
+ if step_line
207
+ step_path = step_line.split(": ").last.strip
208
+ step = Step.new step_path
209
+ has_bar = false
210
+ [step].reverse.each do |j|
211
+ next if j.done?
212
+ if j.file(:progress).exists?
213
+ bar = Log::ProgressBar.new
214
+ bar.load(j.file(:progress).yaml)
215
+ rep = bar.report_msg.split("·")[1]
216
+ rep = rep.sub(/.*?(\d+%)/, Log.color(:blue,'\1')).sub(/\-.*/,'')
217
+ prog_rep << [rep]
218
+ end
219
+ end
220
+ end
221
+ end
222
+ workflow, task, name = step_path.split("/")[-3..-1]
223
+ job_str = [Log.color(:yellow, workflow), Log.color(:magenta, task), name] * "/"
224
+ puts [job_str, status, prog_rep ].flatten * " "
225
+ next
226
+ end
227
+
228
+ puts Log.color :blue, dir
229
+ puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.batch')).to_s if long
230
+ puts Log.color(:magenta, "Started: ") << File.ctime(File.join(dir, 'std.err')).to_s if File.exist?(File.join(dir, 'std.err')) && long
231
+ puts Log.color(:magenta, "Manifest: ") << Log.color(:yellow, manifest) if long
232
+ puts Log.color(:magenta, "Step path: ") << Log.color(:yellow, step_path)
233
+ puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
234
+ puts Log.color(:magenta, "Exec: ") << (exe || "Missing") if long
235
+ puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing") if long
236
+ puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home && long
237
+ if different_system
238
+ puts Log.color(:magenta, "Job ID (#{Log.color(:red, job_batch_system)}): ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : Log.color(:green, id) )
239
+ else
240
+ puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
241
+ end
242
+ puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
243
+ puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
244
+ puts Log.color(:magenta, "Nodes: ") << nodes * ", " if long
245
+ puts Log.color(:magenta, "Time elapsed: ") << Misc.format_seconds(time_elapsed) if time_elapsed
246
+ puts Log.color(:magenta, "Output: ") << File.exist?(File.join(dir, 'std.out')).to_s << (id.nil? || File.exist?(File.join(dir, 'exit.status')) ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)") if long
247
+
248
+ if options[:batch_parameters]
249
+ puts Log.color(:magenta, "BATCH parameters: ")
250
+ case job_batch_system
251
+ when 'slurm'
252
+ text = CMD.cmd('grep "^#SBATCH" ', :in => Open.read(fcmd)).read.strip
253
+ when 'lsf'
254
+ text = CMD.cmd('grep "^#BSUB" ', :in => Open.read(fcmd)).read.strip
255
+ else
256
+ text = ""
257
+ end
258
+ lines = text.split("\n").collect{|line| header, _sep, value = line.partition(/\s+/); Log.color(:yellow, header + ": ") + value}
259
+ puts Log.color :yellow, lines * "\n"
260
+ end
261
+
262
+ fprocpath = File.join(dir, 'procpath.sqlite3')
263
+ if options[:batch_procpath] && Open.exists?(fprocpath)
264
+ puts Log.color(:magenta, "Procpath summary: ")
265
+ require 'rbbt/tsv/csv'
266
+ meta = TSV.csv(CMD.cmd("sqlite3 -header -csv #{fprocpath} 'select * from meta;' "))
267
+ perf = TSV.csv(CMD.cmd("sqlite3 -header -csv #{fprocpath} 'select * from record;' "))
268
+
269
+ page_size = meta["page_size"].first.to_f
270
+ clock_ticks = meta["clock_ticks"].first.to_f
271
+
272
+ cpu_average = {}
273
+ rss_average = {}
274
+ perf.through :key, ["ts", 'stat_pid', "stat_utime", "stat_stime", "stat_cutime", "stat_cstime", "stat_rss"] do |k, values|
275
+ time, stat_pid, ucpu, scpu, ccpu, cscpu, rss = values
276
+ time = time.to_f
277
+
278
+ cpu = Misc.sum([ucpu, scpu].collect{|v| v.to_f})
279
+ cpu_average[stat_pid] ||= {}
280
+ cpu_average[stat_pid][time] ||= []
281
+ cpu_average[stat_pid][time] << cpu.to_f
282
+ rss_average[time] ||= []
283
+ rss_average[time] << rss.to_f * page_size
284
+ end
285
+
286
+ ticks = 0
287
+ cpu_average.each do |stat_pid, cpu_average_pid|
288
+ start = cpu_average_pid.keys.sort.first
289
+ eend = cpu_average_pid.keys.sort.last
290
+ ticks += Misc.sum(cpu_average_pid[eend]) - Misc.sum(cpu_average_pid[start])
291
+ end
292
+ start = rss_average.keys.sort.first
293
+ eend = rss_average.keys.sort.last
294
+ time_elapsed = eend - start
295
+ ticks = 1 if ticks == 0
296
+ time_elapsed = 1 if time_elapsed == 0
297
+ puts Log.color(:yellow, "CPU average: ") + "%.2f" % ( ticks / clock_ticks / time_elapsed * 100).to_s
298
+ puts Log.color(:yellow, "RSS average: ") + "%.2f GB" % Misc.mean(rss_average.collect{|t,l| Misc.sum(l) / (1024 * 1024 * 1024)}).to_s
299
+ puts Log.color(:yellow, "Time: ") + Misc.format_seconds((eend - start))
300
+
301
+ end
302
+
303
+ if options[:sacct_peformance]
304
+ begin
305
+ raise "sacct not supported for LSF" unless batch_system == 'slurm'
306
+ tsv = TSV.open(CMD.cmd("sacct -j #{id} -o 'jobid,AveRSS,MaxRSS,MaxDiskRead,MaxDiskWrite' -P|grep 'JobID\\|\.batch'"), :header_hash => '', :sep => "|", :type => :list)
307
+ values = tsv[tsv.keys.first]
308
+ if values.compact.any?
309
+ puts Log.color(:magenta, "SACCT performance: ")
310
+ puts values.zip(values.fields).collect{|v,t| Log.color(:yellow, t + ": ") + v.to_s } * "\n"
311
+ end
312
+ rescue
313
+ Log.warn $!.message
314
+ end
315
+ end
316
+
317
+
318
+ if tail && File.exist?(File.join(dir, 'std.err'))
319
+ if exit_status && exit_status != 0
320
+ puts Log.color(:magenta, "First error or exception found: ")
321
+ puts CMD.cmd("grep -i -w 'error\\|[a-z]*exception' #{File.join(dir, 'std.err')} -A #{tail.to_i} |head -n #{tail.to_i}", :no_fail => true).read
322
+ elsif exit_status
323
+ puts Log.color(:magenta, "Completed jobs: ")
324
+ puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
325
+ else
326
+ puts Log.color(:magenta, "Log tail: ")
327
+ puts CMD.cmd(" cat #{File.join(dir, 'std.err')} | grep -v '^[^\\s:]*\\[3.m' | grep -v -e '^[[:space:]]*$' | grep -v \"\\(STDOUT\\|STDERR\\):[[:space:]]*$\" | tail -n #{tail.to_i} ").read
328
+ end
329
+ end
330
+
331
+ if options[:progress]
332
+ step_line = Open.read(fcmd).split("\n").select{|line| line =~ /^#STEP_PATH:/}.first
333
+ if step_line
334
+ step_path = step_line.split(": ").last.strip
335
+ step = Step.new step_path
336
+ has_bar = false
337
+ (step.rec_dependencies + [step]).reverse.each do |j|
338
+ next if j.done?
339
+ if j.file(:progress).exists?
340
+ bar = Log::ProgressBar.new
341
+ bar.load(j.file(:progress).yaml)
342
+ puts Log.color(:magenta, "Progress: ") + bar.report_msg + " " + Log.color(:yellow, j.task_signature)
343
+ has_bar = true
344
+ end
345
+ end
346
+ step_status = step.status
347
+ step_status = Log.color :red, step_status if step_status.to_s == 'cleaned'
348
+ step_status = Log.color :green, step_status if step_status.to_s == 'done'
349
+ puts Log.color(:magenta, "Progress: ") + Log.color(:yellow, step.task_signature) + " #{step_status}" unless has_bar
350
+ end
351
+ end
352
+
353
+ end
354
+
355
+ puts
356
+ puts Log.color :clear, "Found #{count} jobs"
357
+
@@ -0,0 +1,45 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'scout'
4
+
5
+ $0 = "scout #{$previous_commands.any? ? $previous_commands*" " + " " : "" }#{ File.basename(__FILE__) }" if $previous_commands
6
+
7
+ options = SOPT.setup <<EOF
8
+
9
+ Change log level
10
+
11
+ $ #{$0} <level>
12
+
13
+ DEBUG
14
+ LOW
15
+ MEDIUM
16
+ HIGH
17
+ INFO
18
+ WARN
19
+ ERROR
20
+ NONE
21
+
22
+ -h--help Print this help
23
+ EOF
24
+ if options[:help]
25
+ if defined? scout_usage
26
+ scout_usage
27
+ else
28
+ puts SOPT.doc
29
+ end
30
+ exit 0
31
+ end
32
+
33
+ if ARGV.empty?
34
+ if Scout.etc.log_severity.exists?
35
+ puts Scout.etc.log_severity.read
36
+ else
37
+ puts Scout.etc.log_severity.find + ' does not exist'
38
+ end
39
+ else
40
+ level = ARGV[0]
41
+ level = %w(DEBUG LOW MEDIUM HIGH INFO WARN ERROR NONE)[level.to_i] if level =~ /^\d+$/
42
+ Open.write(Scout.etc.log_severity, level.upcase)
43
+ end
44
+
45
+
data/scout_commands/rbbt CHANGED
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
+
2
3
  case
3
4
  when File.exist?(file = File.join(__dir__, '../modules/rbbt-util/bin/rbbt'))
4
5
  $LOAD_PATH.unshift(lib_dir = File.join(file, '../..', 'lib'))
@@ -19,5 +20,5 @@ when File.exist?(file = File.join(ENV["HOME"], 'git/rbbt-util/bin/rbbt'))
19
20
  Path.default_pkgdir = Rbbt
20
21
  load file
21
22
  else
22
- raise "Cant find rbbt"
23
+ raise "Can't find rbbt"
23
24
  end