scout-gear 10.1.0 → 10.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,211 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'scout'
4
+
5
+ require 'rbbt/hpc'
6
+
7
+ $0 = "scout #{$previous_commands.any? ? $previous_commands*" " + " " : "" }#{ File.basename(__FILE__) }" if $previous_commands
8
+
9
+ options = SOPT.setup <<EOF
10
+
11
+ Description of the tool
12
+
13
+ $ #{$0} [<options>] <filename> [<other|->]*
14
+
15
+ -h--help Print this help
16
+ -d--done Done jobs only
17
+ -e--error Error jobs only
18
+ -a--aborted SLURM aboted jobs
19
+ -q--queued Queued jobs only
20
+ -j--job* Job ids
21
+ -s--search* Regular expression
22
+ -t--tail* Show the last lines of the STDERR
23
+ -BP--batch_parameters show batch parameters
24
+ -dr--dry_run Do not erase anything
25
+ EOF
26
+ if options[:help]
27
+ if defined? scout_usage
28
+ scout_usage
29
+ else
30
+ puts SOPT.doc
31
+ end
32
+ exit 0
33
+ end
34
+
35
+ batch_system = options.delete :batch_system
36
+ batch_system ||= 'auto'
37
+
38
+ HPC::BATCH_MODULE = HPC.batch_system batch_system
39
+
40
+ raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
41
+
42
+ Log.severity = 4
43
+ done, error, aborted, queued, jobid, search, tail, batch_parameters, dry_run = options.values_at :done, :error, :aborted, :queued, :job, :search, :tail, :batch_parameters, :dry_run
44
+
45
+ workdir = File.expand_path('~/scout-batch')
46
+ Path.setup(workdir)
47
+
48
+ running_jobs = begin
49
+ squeue_txt = HPC::BATCH_MODULE.job_status
50
+ squeue_txt.split("\n").collect{|l| l.to_i.to_s}
51
+ rescue
52
+ Log.warn "Cannot determine if jobs are running, they will seem to be all alive (Job ID in green)"
53
+ squeue_txt = nil
54
+ $norunningjobs = true
55
+ []
56
+ end
57
+
58
+ if squeue_txt
59
+ job_nodes = {}
60
+ squeue_txt.split("\n").each do |line|
61
+ parts = line.strip.split(/\s+/)
62
+ job_nodes[parts.first] = parts.last.split(",")
63
+ end
64
+ else
65
+ job_nodes = nil
66
+ end
67
+
68
+ count = 0
69
+ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
70
+ dir = File.dirname(fcmd)
71
+ command_txt = Open.read(fcmd)
72
+
73
+ if m = command_txt.match(/#CMD: (.*)/)
74
+ cmd = m[1]
75
+ else
76
+ cmd = nil
77
+ end
78
+
79
+ if m = command_txt.match(/# Run command\n(.*?)\n/im)
80
+ exe = m[1]
81
+ else
82
+ exe = nil
83
+ end
84
+
85
+ if m = command_txt.match(/^CONTAINER_DIR=(.*)/)
86
+ container_home = m[1]
87
+ else
88
+ container_home = nil
89
+ end
90
+
91
+ if m = command_txt.match(/^BATCH_SYSTEM=(.*)/)
92
+ job_batch_system = m[1].downcase
93
+ else
94
+ job_batch_system = nil
95
+ end
96
+
97
+ different_system = job_batch_system != batch_system
98
+
99
+ if File.exist?(fid = File.join(dir, 'job.id'))
100
+ id = Open.read(fid).chomp
101
+ else
102
+ id = nil
103
+ end
104
+
105
+ if File.exist?(fstatus = File.join(dir, 'exit.status'))
106
+ exit_status = Open.read(fstatus).to_i
107
+ else
108
+ exit_status = nil
109
+ end
110
+
111
+ if File.exist?(fstatus = File.join(dir, 'job.status'))
112
+ fstatus_txt = Open.read(fstatus)
113
+ begin
114
+ if job_batch_system == "lsf"
115
+ nodes = Open.read(fstatus).split("\n").last.split(/\s+/)[5].split(",")
116
+ else
117
+ nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
118
+ end
119
+ rescue
120
+ nodes = []
121
+ end
122
+ elsif job_nodes[id]
123
+ nodes = job_nodes[id]
124
+ else
125
+ nodes = []
126
+ end
127
+
128
+ if File.exist?(File.join(dir, 'std.out'))
129
+ outt = File.mtime File.join(dir, 'std.out')
130
+ errt = File.mtime File.join(dir, 'std.err')
131
+ time_diff = Time.now - [outt, errt].max
132
+ end
133
+
134
+ fdep = File.join(dir, 'dependencies.list')
135
+ deps = Open.read(fdep).split("\n") if File.exist?(fdep)
136
+
137
+ fcadep = File.join(dir, 'canfail_dependencies.list')
138
+ cadeps = Open.read(fcadep).split("\n") if File.exist?(fcadep)
139
+
140
+ aborted = error = true if ! done && aborted.nil? && error.nil?
141
+ #if done || error || aborted || running || queued || jobid || search
142
+ # select = false
143
+ # select = true if done && exit_status && exit_status.to_i == 0
144
+ # select = true if error && exit_status && exit_status.to_i != 0
145
+ # select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
146
+ # select = select && jobid.split(",").include?(id) if jobid
147
+ # select = select && cmd.match(/#{search}/) if search
148
+ # next unless select
149
+ #end
150
+
151
+ if done || error || aborted || queued || jobid
152
+ select = false
153
+ select = true if done && exit_status == 0
154
+ select = true if error && exit_status && exit_status != 0
155
+ select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
156
+ is_running = exit_status.nil? && ( (running_jobs.include?(id) && (!deps || (running_jobs & deps).empty?)) || different_system )
157
+ select = true if queued && deps && (running_jobs & deps).any? || queued && is_running && nodes.empty?
158
+ select = true if jobid && jobid.split(",").include?(id)
159
+ select = select && cmd.match(/#{search}/) if search
160
+ next unless select
161
+ elsif search
162
+ select = false
163
+ select = true if search && cmd.match(/#{search}/)
164
+ next unless select
165
+ end
166
+
167
+
168
+ puts Log.color(:yellow, "**ERASING**")
169
+ puts Log.color :blue, dir
170
+ puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.batch')).to_s
171
+ puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
172
+ puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
173
+ puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
174
+ puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home
175
+ puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
176
+ puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
177
+ puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
178
+ puts Log.color(:magenta, "Nodes: ") << nodes * ", "
179
+ puts Log.color(:magenta, "Output: ") << File.exist?(File.join(dir, 'std.out')).to_s << (id.nil? ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
180
+
181
+ if options[:batch_parameters]
182
+ puts Log.color(:magenta, "BATCH parameters: ")
183
+ case job_batch_system
184
+ when 'slurm'
185
+ puts Log.color :blue, CMD.cmd('grep "^#SBATCH" |tail -n +6', :in => Open.read(fcmd)).read.strip
186
+ when 'lsf'
187
+ puts Log.color :blue, CMD.cmd('grep "^#BSUB" |tail -n +6', :in => Open.read(fcmd)).read.strip
188
+ end
189
+ end
190
+
191
+ if tail && File.exist?(File.join(dir, 'std.err'))
192
+ if exit_status && exit_status != 0
193
+ puts Log.color(:magenta, "First error or exception found: ")
194
+ puts CMD.cmd("grep -i -w 'error\\|[a-z]*exception' #{File.join(dir, 'std.err')} -A #{tail.to_i} |head -n #{tail.to_i}", :no_fail => true).read
195
+ elsif exit_status
196
+ puts Log.color(:magenta, "Completed jobs: ")
197
+ puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
198
+ else
199
+ puts Log.color(:magenta, "Log tail: ")
200
+ puts CMD.cmd("tail -n #{tail.to_i} #{File.join(dir, 'std.err')}").read
201
+ end
202
+ end
203
+
204
+ count += 1
205
+
206
+ Open.rm_rf dir unless dry_run
207
+ end
208
+
209
+ puts
210
+ puts "Found #{count} jobs"
211
+
@@ -0,0 +1,357 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'scout'
4
+
5
+ require 'rbbt/hpc'
6
+
7
+ $0 = "scout #{$previous_commands.any? ? $previous_commands*" " + " " : "" }#{ File.basename(__FILE__) }" if $previous_commands
8
+
9
+ options = SOPT.setup <<EOF
10
+
11
+ List all batch jobs
12
+
13
+ $ #{$0} [<options>] <filename> [<other|->]*
14
+
15
+ $ rbbt slurm list [options]
16
+
17
+ -h--help Print this help
18
+ -d--done Done jobs only
19
+ -e--error Error jobs only
20
+ -a--aborted SLURM aboted jobs
21
+ -r--running Running jobs only
22
+ -q--queued Queued jobs only
23
+ -j--job* Job ids
24
+ -s--search* Regular expression
25
+ -t--tail* Show the last lines of the STDERR
26
+ -l--long Show more entries
27
+ -c--compressed Show compressed information about entries
28
+ -p--progress Report progress of job and the dependencies
29
+ -BP--batch_parameters show batch parameters
30
+ -BPP--batch_procpath show Procpath performance summary
31
+ -sacct--sacct_peformance show sacct performance summary
32
+ -bs--batch_system* Batch system to use: auto, lsf, slurm (default is auto-detect)
33
+ EOF
34
+ if options[:help]
35
+ if defined? scout_usage
36
+ scout_usage
37
+ else
38
+ puts SOPT.doc
39
+ end
40
+ exit 0
41
+ end
42
+
43
+ batch_system = options.delete :batch_system
44
+ batch_system ||= 'auto'
45
+
46
+ long = options.delete :long
47
+
48
+ HPC::BATCH_MODULE = HPC.batch_system batch_system
49
+
50
+ raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
51
+
52
+ batch_system = HPC::BATCH_MODULE.to_s.split("::").last.downcase
53
+
54
+ done, error, running, queued, aborted, jobid, search, tail, progress = options.values_at :done, :error, :running, :queued, :aborted, :job, :search, :tail, :progress
55
+
56
+ workdir = File.expand_path('~/scout-batch')
57
+ Path.setup(workdir)
58
+
59
+ running_jobs = begin
60
+ squeue_txt = HPC::BATCH_MODULE.job_status
61
+ squeue_txt.split("\n").collect{|l| l.to_i.to_s}
62
+ rescue
63
+ Log.warn "Cannot determine if jobs are running, they will seem to be all alive (Job ID in green)"
64
+ squeue_txt = nil
65
+ $norunningjobs = true
66
+ []
67
+ end
68
+
69
+ if squeue_txt
70
+ job_nodes = {}
71
+ squeue_txt.split("\n").each do |line|
72
+ parts = line.strip.split(/\s+/)
73
+ job_nodes[parts.first] = parts.last.split(",")
74
+ end
75
+ else
76
+ job_nodes = nil
77
+ end
78
+
79
+ count = 0
80
+ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
81
+ dir = File.dirname(fcmd)
82
+ command_txt = Open.read(fcmd)
83
+
84
+ if m = command_txt.match(/#CMD: (.*)/)
85
+ cmd = m[1]
86
+ else
87
+ cmd = nil
88
+ end
89
+
90
+ if m = command_txt.match(/^export BATCH_SYSTEM=(.*)/)
91
+ job_batch_system = m[1].downcase
92
+ else
93
+ job_batch_system = nil
94
+ end
95
+
96
+ different_system = job_batch_system != batch_system
97
+
98
+ if m = command_txt.match(/#MANIFEST: (.*)/)
99
+ manifest = m[1]
100
+ else
101
+ manifest = nil
102
+ end
103
+
104
+ if m = command_txt.match(/#STEP_PATH: (.*)/)
105
+ step_path = m[1]
106
+ else
107
+ step_path = nil
108
+ end
109
+
110
+ if m = command_txt.match(/#EXEC_CMD: (.*)/)
111
+ exe = m[1]
112
+ else
113
+ exe = nil
114
+ end
115
+
116
+ if m = command_txt.match(/^CONTAINER_DIR=(.*)/)
117
+ container_home = m[1]
118
+ else
119
+ container_home = nil
120
+ end
121
+
122
+ if File.exist?(fid = File.join(dir, 'job.id'))
123
+ id = Open.read(fid).chomp
124
+ else
125
+ id = nil
126
+ end
127
+
128
+ if File.exist?(fstatus = File.join(dir, 'exit.status'))
129
+ exit_status = Open.read(fstatus).to_i
130
+ else
131
+ exit_status = nil
132
+ end
133
+
134
+ if File.exist?(fstatus = File.join(dir, 'job.status'))
135
+ fstatus_txt = Open.read(fstatus)
136
+ begin
137
+ if job_batch_system == "lsf"
138
+ nodes = Open.read(fstatus).split("\n").last.split(/\s+/)[5].split(",")
139
+ else
140
+ nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
141
+ end
142
+ rescue
143
+ nodes = []
144
+ end
145
+ elsif job_nodes && job_nodes[id]
146
+ nodes = job_nodes[id].reject{|n| n.include? "("}
147
+ else
148
+ nodes = []
149
+ end
150
+
151
+ if File.exist?(File.join(dir, 'exit.status'))
152
+ now = File.ctime(File.join(dir, 'exit.status'))
153
+ else
154
+ now = Time.now
155
+ end
156
+
157
+ if File.exist?(File.join(dir, 'std.out'))
158
+ cerrt = File.ctime File.join(dir, 'std.err')
159
+ coutt = File.ctime File.join(dir, 'std.out')
160
+ outt = File.mtime File.join(dir, 'std.out')
161
+ errt = File.mtime File.join(dir, 'std.err')
162
+ time_diff = now - [outt, errt].max
163
+ time_elapsed = now - [cerrt, coutt].min
164
+ end
165
+
166
+ fdep = File.join(dir, 'dependencies.list')
167
+ deps = Open.read(fdep).split("\n") if File.exist?(fdep)
168
+
169
+ fcadep = File.join(dir, 'canfail_dependencies.list')
170
+ cadeps = Open.read(fcadep).split("\n") if File.exist?(fcadep)
171
+
172
+ is_running = exit_status.nil? && ( (running_jobs.include?(id) && (deps.nil? || (running_jobs & deps).empty?)) || different_system )
173
+ if done || error || aborted || running || queued || jobid
174
+ select = false
175
+ select = true if done && exit_status == 0
176
+ select = true if error && exit_status && exit_status != 0
177
+ select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
178
+ select = true if queued && deps && (running_jobs & deps).any? || queued && is_running && nodes.empty?
179
+ select = true if running && nodes.any? && (exit_status.nil? && running_jobs.include?(id)) && (!deps || (running_jobs & deps).empty?)
180
+ select = true if jobid && jobid.split(",").include?(id)
181
+ select = select && step_path.match(/#{search}/) if search
182
+ next unless select
183
+ elsif search
184
+ select = false
185
+ select = true if search && cmd.match(/#{search}/)
186
+ next unless select
187
+ end
188
+
189
+
190
+ count += 1
191
+
192
+ if options[:compressed]
193
+ status = exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) : Log.color(:green, id)
194
+ if different_system
195
+ status = exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : Log.color(:green, id)
196
+ else
197
+ #status = exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) )
198
+ status = exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" :
199
+ (running_jobs.include?(id) || $norunningjobs ?
200
+ (is_running ? Log.color(:cyan, id) : Log.color(:yellow, id) ) :
201
+ Log.color(:red, id) )
202
+ end
203
+ prog_rep = []
204
+ if options[:progress]
205
+ step_line = Open.read(fcmd).split("\n").select{|line| line =~ /^#STEP_PATH:/}.first
206
+ if step_line
207
+ step_path = step_line.split(": ").last.strip
208
+ step = Step.new step_path
209
+ has_bar = false
210
+ [step].reverse.each do |j|
211
+ next if j.done?
212
+ if j.file(:progress).exists?
213
+ bar = Log::ProgressBar.new
214
+ bar.load(j.file(:progress).yaml)
215
+ rep = bar.report_msg.split("·")[1]
216
+ rep = rep.sub(/.*?(\d+%)/, Log.color(:blue,'\1')).sub(/\-.*/,'')
217
+ prog_rep << [rep]
218
+ end
219
+ end
220
+ end
221
+ end
222
+ workflow, task, name = step_path.split("/")[-3..-1]
223
+ job_str = [Log.color(:yellow, workflow), Log.color(:magenta, task), name] * "/"
224
+ puts [job_str, status, prog_rep ].flatten * " "
225
+ next
226
+ end
227
+
228
+ puts Log.color :blue, dir
229
+ puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.batch')).to_s if long
230
+ puts Log.color(:magenta, "Started: ") << File.ctime(File.join(dir, 'std.err')).to_s if File.exist?(File.join(dir, 'std.err')) && long
231
+ puts Log.color(:magenta, "Manifest: ") << Log.color(:yellow, manifest) if long
232
+ puts Log.color(:magenta, "Step path: ") << Log.color(:yellow, step_path)
233
+ puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
234
+ puts Log.color(:magenta, "Exec: ") << (exe || "Missing") if long
235
+ puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing") if long
236
+ puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home && long
237
+ if different_system
238
+ puts Log.color(:magenta, "Job ID (#{Log.color(:red, job_batch_system)}): ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : Log.color(:green, id) )
239
+ else
240
+ puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
241
+ end
242
+ puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
243
+ puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
244
+ puts Log.color(:magenta, "Nodes: ") << nodes * ", " if long
245
+ puts Log.color(:magenta, "Time elapsed: ") << Misc.format_seconds(time_elapsed) if time_elapsed
246
+ puts Log.color(:magenta, "Output: ") << File.exist?(File.join(dir, 'std.out')).to_s << (id.nil? || File.exist?(File.join(dir, 'exit.status')) ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)") if long
247
+
248
+ if options[:batch_parameters]
249
+ puts Log.color(:magenta, "BATCH parameters: ")
250
+ case job_batch_system
251
+ when 'slurm'
252
+ text = CMD.cmd('grep "^#SBATCH" ', :in => Open.read(fcmd)).read.strip
253
+ when 'lsf'
254
+ text = CMD.cmd('grep "^#BSUB" ', :in => Open.read(fcmd)).read.strip
255
+ else
256
+ text = ""
257
+ end
258
+ lines = text.split("\n").collect{|line| header, _sep, value = line.partition(/\s+/); Log.color(:yellow, header + ": ") + value}
259
+ puts Log.color :yellow, lines * "\n"
260
+ end
261
+
262
+ fprocpath = File.join(dir, 'procpath.sqlite3')
263
+ if options[:batch_procpath] && Open.exists?(fprocpath)
264
+ puts Log.color(:magenta, "Procpath summary: ")
265
+ require 'rbbt/tsv/csv'
266
+ meta = TSV.csv(CMD.cmd("sqlite3 -header -csv #{fprocpath} 'select * from meta;' "))
267
+ perf = TSV.csv(CMD.cmd("sqlite3 -header -csv #{fprocpath} 'select * from record;' "))
268
+
269
+ page_size = meta["page_size"].first.to_f
270
+ clock_ticks = meta["clock_ticks"].first.to_f
271
+
272
+ cpu_average = {}
273
+ rss_average = {}
274
+ perf.through :key, ["ts", 'stat_pid', "stat_utime", "stat_stime", "stat_cutime", "stat_cstime", "stat_rss"] do |k, values|
275
+ time, stat_pid, ucpu, scpu, ccpu, cscpu, rss = values
276
+ time = time.to_f
277
+
278
+ cpu = Misc.sum([ucpu, scpu].collect{|v| v.to_f})
279
+ cpu_average[stat_pid] ||= {}
280
+ cpu_average[stat_pid][time] ||= []
281
+ cpu_average[stat_pid][time] << cpu.to_f
282
+ rss_average[time] ||= []
283
+ rss_average[time] << rss.to_f * page_size
284
+ end
285
+
286
+ ticks = 0
287
+ cpu_average.each do |stat_pid, cpu_average_pid|
288
+ start = cpu_average_pid.keys.sort.first
289
+ eend = cpu_average_pid.keys.sort.last
290
+ ticks += Misc.sum(cpu_average_pid[eend]) - Misc.sum(cpu_average_pid[start])
291
+ end
292
+ start = rss_average.keys.sort.first
293
+ eend = rss_average.keys.sort.last
294
+ time_elapsed = eend - start
295
+ ticks = 1 if ticks == 0
296
+ time_elapsed = 1 if time_elapsed == 0
297
+ puts Log.color(:yellow, "CPU average: ") + "%.2f" % ( ticks / clock_ticks / time_elapsed * 100).to_s
298
+ puts Log.color(:yellow, "RSS average: ") + "%.2f GB" % Misc.mean(rss_average.collect{|t,l| Misc.sum(l) / (1024 * 1024 * 1024)}).to_s
299
+ puts Log.color(:yellow, "Time: ") + Misc.format_seconds((eend - start))
300
+
301
+ end
302
+
303
+ if options[:sacct_peformance]
304
+ begin
305
+ raise "sacct not supported for LSF" unless batch_system == 'slurm'
306
+ tsv = TSV.open(CMD.cmd("sacct -j #{id} -o 'jobid,AveRSS,MaxRSS,MaxDiskRead,MaxDiskWrite' -P|grep 'JobID\\|\.batch'"), :header_hash => '', :sep => "|", :type => :list)
307
+ values = tsv[tsv.keys.first]
308
+ if values.compact.any?
309
+ puts Log.color(:magenta, "SACCT performance: ")
310
+ puts values.zip(values.fields).collect{|v,t| Log.color(:yellow, t + ": ") + v.to_s } * "\n"
311
+ end
312
+ rescue
313
+ Log.warn $!.message
314
+ end
315
+ end
316
+
317
+
318
+ if tail && File.exist?(File.join(dir, 'std.err'))
319
+ if exit_status && exit_status != 0
320
+ puts Log.color(:magenta, "First error or exception found: ")
321
+ puts CMD.cmd("grep -i -w 'error\\|[a-z]*exception' #{File.join(dir, 'std.err')} -A #{tail.to_i} |head -n #{tail.to_i}", :no_fail => true).read
322
+ elsif exit_status
323
+ puts Log.color(:magenta, "Completed jobs: ")
324
+ puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
325
+ else
326
+ puts Log.color(:magenta, "Log tail: ")
327
+ puts CMD.cmd(" cat #{File.join(dir, 'std.err')} | grep -v '^[^\\s:]*\\[3.m' | grep -v -e '^[[:space:]]*$' | grep -v \"\\(STDOUT\\|STDERR\\):[[:space:]]*$\" | tail -n #{tail.to_i} ").read
328
+ end
329
+ end
330
+
331
+ if options[:progress]
332
+ step_line = Open.read(fcmd).split("\n").select{|line| line =~ /^#STEP_PATH:/}.first
333
+ if step_line
334
+ step_path = step_line.split(": ").last.strip
335
+ step = Step.new step_path
336
+ has_bar = false
337
+ (step.rec_dependencies + [step]).reverse.each do |j|
338
+ next if j.done?
339
+ if j.file(:progress).exists?
340
+ bar = Log::ProgressBar.new
341
+ bar.load(j.file(:progress).yaml)
342
+ puts Log.color(:magenta, "Progress: ") + bar.report_msg + " " + Log.color(:yellow, j.task_signature)
343
+ has_bar = true
344
+ end
345
+ end
346
+ step_status = step.status
347
+ step_status = Log.color :red, step_status if step_status.to_s == 'cleaned'
348
+ step_status = Log.color :green, step_status if step_status.to_s == 'done'
349
+ puts Log.color(:magenta, "Progress: ") + Log.color(:yellow, step.task_signature) + " #{step_status}" unless has_bar
350
+ end
351
+ end
352
+
353
+ end
354
+
355
+ puts
356
+ puts Log.color :clear, "Found #{count} jobs"
357
+
@@ -0,0 +1,45 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'scout'
4
+
5
+ $0 = "scout #{$previous_commands.any? ? $previous_commands*" " + " " : "" }#{ File.basename(__FILE__) }" if $previous_commands
6
+
7
+ options = SOPT.setup <<EOF
8
+
9
+ Change log level
10
+
11
+ $ #{$0} <level>
12
+
13
+ DEBUG
14
+ LOW
15
+ MEDIUM
16
+ HIGH
17
+ INFO
18
+ WARN
19
+ ERROR
20
+ NONE
21
+
22
+ -h--help Print this help
23
+ EOF
24
+ if options[:help]
25
+ if defined? scout_usage
26
+ scout_usage
27
+ else
28
+ puts SOPT.doc
29
+ end
30
+ exit 0
31
+ end
32
+
33
+ if ARGV.empty?
34
+ if Scout.etc.log_severity.exists?
35
+ puts Scout.etc.log_severity.read
36
+ else
37
+ puts Scout.etc.log_severity.find + ' does not exist'
38
+ end
39
+ else
40
+ level = ARGV[0]
41
+ level = %w(DEBUG LOW MEDIUM HIGH INFO WARN ERROR NONE)[level.to_i] if level =~ /^\d+$/
42
+ Open.write(Scout.etc.log_severity, level.upcase)
43
+ end
44
+
45
+
data/scout_commands/rbbt CHANGED
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
+
2
3
  case
3
4
  when File.exist?(file = File.join(__dir__, '../modules/rbbt-util/bin/rbbt'))
4
5
  $LOAD_PATH.unshift(lib_dir = File.join(file, '../..', 'lib'))
@@ -19,5 +20,5 @@ when File.exist?(file = File.join(ENV["HOME"], 'git/rbbt-util/bin/rbbt'))
19
20
  Path.default_pkgdir = Rbbt
20
21
  load file
21
22
  else
22
- raise "Cant find rbbt"
23
+ raise "Can't find rbbt"
23
24
  end