rbbt-util 5.29.0 → 5.30.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/hpc/orchestrate.rb +96 -8
- data/lib/rbbt/hpc/slurm.rb +57 -24
- data/lib/rbbt/persist.rb +4 -0
- data/lib/rbbt/persist/tsv/adapter.rb +44 -13
- data/lib/rbbt/tsv.rb +6 -2
- data/lib/rbbt/util/cmd.rb +6 -1
- data/lib/rbbt/util/misc/inspect.rb +13 -3
- data/lib/rbbt/util/misc/options.rb +0 -42
- data/lib/rbbt/util/procpath.rb +49 -0
- data/lib/rbbt/workflow/accessor.rb +6 -1
- data/lib/rbbt/workflow/step/accessor.rb +20 -13
- data/lib/rbbt/workflow/step/dependencies.rb +1 -2
- data/lib/rbbt/workflow/step/run.rb +2 -5
- data/lib/rbbt/workflow/usage.rb +1 -1
- data/lib/rbbt/workflow/util/provenance.rb +5 -2
- data/share/rbbt_commands/slurm/clean +165 -0
- data/share/rbbt_commands/slurm/list +174 -95
- data/share/rbbt_commands/slurm/orchestrate +3 -2
- data/share/rbbt_commands/slurm/task +1 -0
- data/share/rbbt_commands/tsv/slice +3 -3
- data/share/rbbt_commands/workflow/info +1 -1
- data/share/rbbt_commands/workflow/task +27 -7
- data/share/rbbt_commands/workflow/write_info +52 -0
- data/test/rbbt/test_workflow.rb +7 -7
- data/test/rbbt/util/test_procpath.rb +23 -0
- metadata +7 -2
data/lib/rbbt/tsv.rb
CHANGED
@@ -113,8 +113,8 @@ module TSV
|
|
113
113
|
|
114
114
|
data.entity_options = entity_options
|
115
115
|
|
116
|
-
if Path === source
|
117
|
-
|
116
|
+
if Path === source && data.identifiers
|
117
|
+
Path.setup(data.identifiers, source.pkgdir, source.resource)
|
118
118
|
end
|
119
119
|
|
120
120
|
if data.respond_to? :persistence_path
|
@@ -124,6 +124,10 @@ module TSV
|
|
124
124
|
data.clear
|
125
125
|
data.annotate h
|
126
126
|
end
|
127
|
+
|
128
|
+
data.read
|
129
|
+
|
130
|
+
data
|
127
131
|
end
|
128
132
|
|
129
133
|
def self.parse_header(stream, options = {})
|
data/lib/rbbt/util/cmd.rb
CHANGED
@@ -217,7 +217,7 @@ module CMD
|
|
217
217
|
end
|
218
218
|
end
|
219
219
|
|
220
|
-
def self.
|
220
|
+
def self.cmd_pid(*args)
|
221
221
|
all_args = *args
|
222
222
|
|
223
223
|
all_args << {} unless Hash === all_args.last
|
@@ -248,4 +248,9 @@ module CMD
|
|
248
248
|
nil
|
249
249
|
end
|
250
250
|
|
251
|
+
def self.cmd_log(*args)
|
252
|
+
cmd_pid(*args)
|
253
|
+
nil
|
254
|
+
end
|
255
|
+
|
251
256
|
end
|
@@ -287,7 +287,9 @@ module Misc
|
|
287
287
|
when Symbol
|
288
288
|
obj.to_s
|
289
289
|
when (defined?(Path) and Path)
|
290
|
-
if Step
|
290
|
+
if defined?(Step) && Open.exists?(Step.info_file(obj))
|
291
|
+
obj2str(Workflow.load_step(obj))
|
292
|
+
elsif defined?(Step) && Step === obj.resource
|
291
293
|
"Step file: " + obj
|
292
294
|
else
|
293
295
|
if obj.exists?
|
@@ -322,7 +324,11 @@ module Misc
|
|
322
324
|
remove_long_items(obj)
|
323
325
|
when File
|
324
326
|
if obj.respond_to? :filename and obj.filename
|
325
|
-
|
327
|
+
if defined?(Step) && Open.exists?(Step.info_file(obj.filename))
|
328
|
+
obj2str(Workflow.load_step(obj.filename))
|
329
|
+
else
|
330
|
+
"<IO:" << obj.filename << "--" << mtime_str(obj.filename) << ">"
|
331
|
+
end
|
326
332
|
else
|
327
333
|
"<IO:" << obj.path << "--" << mtime_str(obj.path) << ">"
|
328
334
|
end
|
@@ -330,7 +336,11 @@ module Misc
|
|
330
336
|
"<IO:" << obj.short_path << ">"
|
331
337
|
when IO
|
332
338
|
if obj.respond_to? :filename and obj.filename
|
333
|
-
|
339
|
+
if defined?(Step) && Open.exists?(Step.info_file(obj.filename))
|
340
|
+
obj2str(Workflow.load_step(obj.filename))
|
341
|
+
else
|
342
|
+
"<IO:" << obj.filename << "--" << mtime_str(obj.filename) << ">"
|
343
|
+
end
|
334
344
|
else
|
335
345
|
|
336
346
|
if obj.respond_to? :obj2str
|
@@ -242,48 +242,6 @@ module Misc
|
|
242
242
|
|
243
243
|
return options
|
244
244
|
|
245
|
-
options = {}
|
246
|
-
string.split(/#/).each do |str|
|
247
|
-
if str.match(/(.*)=(.*)/)
|
248
|
-
option, value = $1, $2
|
249
|
-
else
|
250
|
-
option, value = str, true
|
251
|
-
end
|
252
|
-
|
253
|
-
option = option.sub(":",'').to_sym if option.chars.first == ':'
|
254
|
-
value = value.sub(":",'').to_sym if String === value and value.chars.first == ':'
|
255
|
-
|
256
|
-
if value == true
|
257
|
-
options[option] = option.to_s.chars.first != '!'
|
258
|
-
else
|
259
|
-
options[option] = Thread.start do
|
260
|
-
$SAFE = 0;
|
261
|
-
case
|
262
|
-
when value =~ /^(?:true|T)$/i
|
263
|
-
true
|
264
|
-
when value =~ /^(?:false|F)$/i
|
265
|
-
false
|
266
|
-
when Symbol === value
|
267
|
-
value
|
268
|
-
when (String === value and value =~ /^\/(.*)\/$/)
|
269
|
-
Regexp.new /#{$1}/
|
270
|
-
else
|
271
|
-
begin
|
272
|
-
Kernel.const_get value
|
273
|
-
rescue
|
274
|
-
begin
|
275
|
-
raise if value =~ /[a-z]/ and defined? value
|
276
|
-
eval(value)
|
277
|
-
rescue Exception
|
278
|
-
value
|
279
|
-
end
|
280
|
-
end
|
281
|
-
end
|
282
|
-
end.value
|
283
|
-
end
|
284
|
-
end
|
285
|
-
|
286
|
-
options
|
287
245
|
end
|
288
246
|
|
289
247
|
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'rbbt/util/cmd'
|
2
|
+
module ProcPath
|
3
|
+
CMD.tool :procpath do
|
4
|
+
'pip install procpath'
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.record(pid, path, options = {})
|
8
|
+
IndiferentHash.setup(options)
|
9
|
+
options = Misc.add_defaults options, "interval" => 30
|
10
|
+
|
11
|
+
cmd_options = %w(interval recnum reevalnum).inject({}){|acc,k| acc[k] = options[k]; acc}
|
12
|
+
|
13
|
+
Log.debug "ProcPath recording #{pid} in #{path} (#{Misc.fingerprint options})"
|
14
|
+
procpath_thread = Thread.new do
|
15
|
+
begin
|
16
|
+
procpath_pid = CMD.cmd_pid(:procpath, "record --database-file '#{path}' '$..children[?(@.stat.pid == #{pid})]'", cmd_options.merge(:nofail => true, :add_option_dashes => true))
|
17
|
+
rescue Exception
|
18
|
+
Log.exceptions $!
|
19
|
+
Process.kill "INT", procpath_pid
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
procpath_thread.report_on_exception = false
|
24
|
+
|
25
|
+
Process.wait pid.to_i
|
26
|
+
procpath_thread.raise Interrupt
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.plot(path, output, options = {})
|
30
|
+
IndiferentHash.setup(options)
|
31
|
+
options = Misc.add_defaults options, "query-name" => 'rss', 'epsilon' => 0.5, "moving-average-window" => 10
|
32
|
+
|
33
|
+
cmd_options = %w(query-name epsilon monitor-average-window title logarithmic after before custom-query-file custom-value-expr).inject({}){|acc,k| acc[k] = options[k]; acc}
|
34
|
+
CMD.cmd_log(:procpath, "plot --database-file '#{path}' --plot-file '#{output}' ", cmd_options.merge(:nofail => true, :add_option_dashes => true))
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.monitor(pid, path)
|
38
|
+
database, options_str = path.split("#")
|
39
|
+
options = options_str.nil? ? {} : Misc.string2hash(options_str)
|
40
|
+
|
41
|
+
database = File.expand_path database
|
42
|
+
Log.low "ProcPath monitor #{pid} in #{database} (#{Misc.fingerprint options})"
|
43
|
+
|
44
|
+
ProcPath.record(pid, database + '.sqlite3', options)
|
45
|
+
ProcPath.plot(database + '.sqlite3', database + '.cpu.svg', options.merge("query-name" => 'cpu'))
|
46
|
+
ProcPath.plot(database + '.sqlite3', database + '.rss.svg', options.merge("query-name" => 'rss'))
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
@@ -16,6 +16,10 @@ end
|
|
16
16
|
|
17
17
|
module Workflow
|
18
18
|
|
19
|
+
def self.job_path?(path)
|
20
|
+
path.split("/")[-4] == "jobs"
|
21
|
+
end
|
22
|
+
|
19
23
|
def log(status, message = nil, &block)
|
20
24
|
Step.log(status, message, nil, &block)
|
21
25
|
end
|
@@ -301,8 +305,9 @@ module Workflow
|
|
301
305
|
|
302
306
|
def setup_override_dependency(dep, workflow, task_name)
|
303
307
|
dep = Step === dep ? dep : Workflow.load_step(dep)
|
308
|
+
dep.workflow = workflow
|
304
309
|
dep.info[:name] = dep.name
|
305
|
-
dep.original_task_name ||= dep.task_name
|
310
|
+
dep.original_task_name ||= dep.task_name if dep.workflow
|
306
311
|
begin
|
307
312
|
workflow = Kernel.const_get workflow if String === workflow
|
308
313
|
dep.task = workflow.tasks[task_name] if dep.task.nil? && workflow.tasks.include?(task_name)
|
@@ -94,17 +94,21 @@ class Step
|
|
94
94
|
Log.debug "Saving job input #{name} (#{type}) into #{path}"
|
95
95
|
case
|
96
96
|
when Step === value
|
97
|
-
Open.
|
97
|
+
Open.ln_s(value.path, path)
|
98
98
|
when type.to_s == "file"
|
99
99
|
if String === value && File.exists?(value)
|
100
|
-
Open.
|
100
|
+
Open.ln_s(value, path)
|
101
101
|
else
|
102
102
|
Open.write(path + '.yaml', value.to_yaml)
|
103
103
|
end
|
104
104
|
when Array === value
|
105
|
-
Open.write(path, value * "\n")
|
105
|
+
Open.write(path, value.collect{|v| Step === v ? v.path : v.to_s} * "\n")
|
106
106
|
when IO === value
|
107
|
-
|
107
|
+
if value.filename && String === value.filename && File.exists?(value.filename)
|
108
|
+
Open.ln_s(value.filename, path)
|
109
|
+
else
|
110
|
+
Open.write(path, value)
|
111
|
+
end
|
108
112
|
else
|
109
113
|
Open.write(path, value.to_s)
|
110
114
|
end
|
@@ -117,13 +121,17 @@ class Step
|
|
117
121
|
task_name = Symbol === job.overriden ? job.overriden : job.task_name
|
118
122
|
workflow = job.workflow
|
119
123
|
workflow = Kernel.const_get workflow if String === workflow
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
+
if workflow
|
125
|
+
task_info = workflow.task_info(task_name)
|
126
|
+
input_types = task_info[:input_types]
|
127
|
+
task_inputs = task_info[:inputs]
|
128
|
+
input_defaults = task_info[:input_defaults]
|
129
|
+
else
|
130
|
+
task_info = input_types = task_inputs = input_defaults = {}
|
131
|
+
end
|
124
132
|
|
125
133
|
inputs = {}
|
126
|
-
real_inputs = job.real_inputs ||
|
134
|
+
real_inputs = job.real_inputs || job.info[:real_inputs]
|
127
135
|
job.recursive_inputs.zip(job.recursive_inputs.fields).each do |value,name|
|
128
136
|
next unless task_inputs.include? name.to_sym
|
129
137
|
next unless real_inputs.include? name.to_sym
|
@@ -497,8 +505,8 @@ class Step
|
|
497
505
|
|
498
506
|
def running?
|
499
507
|
return false if ! (started? || status == :ending)
|
500
|
-
|
501
|
-
|
508
|
+
return nil unless Open.exist?(self.pid_file)
|
509
|
+
pid = Open.read(self.pid_file).to_i
|
502
510
|
|
503
511
|
return false if done? or error? or aborted?
|
504
512
|
|
@@ -522,8 +530,7 @@ class Step
|
|
522
530
|
end
|
523
531
|
|
524
532
|
def nopid?
|
525
|
-
|
526
|
-
! pid && ! (status.nil? || status == :aborted || status == :done || status == :error || status == :cleaned)
|
533
|
+
! Open.exists?(pid_file) && ! (status.nil? || status == :aborted || status == :done || status == :error || status == :cleaned)
|
527
534
|
end
|
528
535
|
|
529
536
|
def aborted?
|
@@ -103,7 +103,7 @@ class Step
|
|
103
103
|
end
|
104
104
|
|
105
105
|
job.dup_inputs unless status == 'done' or job.started?
|
106
|
-
job.init_info(status == 'noinfo') unless status == 'waiting' || status == 'done' || job.started?
|
106
|
+
job.init_info(status == 'noinfo') unless status == 'waiting' || status == 'done' || job.started? || ! Workflow.job_path?(job.path)
|
107
107
|
|
108
108
|
canfail = ComputeDependency === job && job.canfail?
|
109
109
|
end
|
@@ -130,7 +130,6 @@ class Step
|
|
130
130
|
(inputs.flatten.select{|i| Step === i} + inputs.flatten.select{|dep| Path === dep && Step === dep.resource}.collect{|dep| dep.resource})
|
131
131
|
end
|
132
132
|
|
133
|
-
|
134
133
|
def execute_dependency(dependency, log = true)
|
135
134
|
task_name = self.task_name
|
136
135
|
canfail_paths = self.canfail_paths
|
@@ -122,7 +122,6 @@ class Step
|
|
122
122
|
reject{|dependency| (defined?(WorkflowRemoteClient) && WorkflowRemoteClient::RemoteStep === dependency) || Open.remote?(dependency.path) }.
|
123
123
|
reject{|dependency| dependency.error? }.
|
124
124
|
#select{|dependency| Open.exists?(dependency.path) || ((Open.exists?(dependency.info_file) && (dependency.status == :cleaned) || dependency.status == :waiting)) }.
|
125
|
-
#select{|dependency| Open.exists?(dependency.path) || ((Open.exists?(dependency.info_file) && (dependency.status == :cleaned) || dependency.status == :waiting)) }.
|
126
125
|
select{|dependency| dependency.updatable? }.
|
127
126
|
collect{|dependency| Workflow.relocate_dependency(self, dependency)}
|
128
127
|
end
|
@@ -374,7 +373,6 @@ class Step
|
|
374
373
|
Log.exception $!
|
375
374
|
ensure
|
376
375
|
Step.purge_stream_cache
|
377
|
-
set_info :pid, nil
|
378
376
|
Open.rm pid_file if Open.exist?(pid_file)
|
379
377
|
end
|
380
378
|
end
|
@@ -389,7 +387,6 @@ class Step
|
|
389
387
|
_clean_finished
|
390
388
|
rescue
|
391
389
|
stop_dependencies
|
392
|
-
set_info :pid, nil
|
393
390
|
Open.rm pid_file if Open.exist?(pid_file)
|
394
391
|
end
|
395
392
|
end
|
@@ -450,7 +447,7 @@ class Step
|
|
450
447
|
ensure
|
451
448
|
no_load = false unless IO === result
|
452
449
|
Open.rm pid_file if Open.exist?(pid_file) unless no_load
|
453
|
-
set_info :pid, nil unless no_load
|
450
|
+
#set_info :pid, nil unless no_load
|
454
451
|
end
|
455
452
|
end
|
456
453
|
|
@@ -560,7 +557,7 @@ class Step
|
|
560
557
|
RbbtSemaphore.post_semaphore(semaphore) if semaphore
|
561
558
|
Kernel.exit! -1
|
562
559
|
end
|
563
|
-
set_info :pid, nil
|
560
|
+
#set_info :pid, nil
|
564
561
|
ensure
|
565
562
|
RbbtSemaphore.post_semaphore(semaphore) if semaphore
|
566
563
|
end
|
data/lib/rbbt/workflow/usage.rb
CHANGED
@@ -57,7 +57,7 @@ module Task
|
|
57
57
|
puts Log.color(:magenta, "Input select options")
|
58
58
|
puts
|
59
59
|
selects.collect{|p| p}.uniq.each do |input,options|
|
60
|
-
puts Log.color(:blue, input.to_s + ": ") << Misc.format_paragraph(options.collect{|o| o.to_s} * ", ") << "\n"
|
60
|
+
puts Log.color(:blue, input.to_s + ": ") << Misc.format_paragraph(options.collect{|o| Array === o ? o.first.to_s : o.to_s} * ", ") << "\n"
|
61
61
|
puts unless Log.compact
|
62
62
|
end
|
63
63
|
puts
|
@@ -22,11 +22,14 @@ class Step
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def self.prov_report_msg(status, name, path, info = nil)
|
25
|
-
parts = path.sub(/\{.*/,'').
|
25
|
+
parts = path.sub(/\{.*/,'').split "/"
|
26
26
|
|
27
|
+
parts.pop
|
28
|
+
|
27
29
|
task = Log.color(:yellow, parts.pop)
|
28
30
|
workflow = Log.color(:magenta, parts.pop)
|
29
|
-
if status.to_s == 'noinfo'
|
31
|
+
#if status.to_s == 'noinfo' && parts.last != 'jobs'
|
32
|
+
if ! Workflow.job_path?(path)
|
30
33
|
task, status, workflow = Log.color(:yellow, info[:task_name]), Log.color(:green, "file"), Log.color(:magenta, "-")
|
31
34
|
end
|
32
35
|
|
@@ -0,0 +1,165 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rbbt-util'
|
4
|
+
require 'rbbt/util/simpleopt'
|
5
|
+
|
6
|
+
#$0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
|
7
|
+
|
8
|
+
options = SOPT.setup <<EOF
|
9
|
+
|
10
|
+
Clean error or aborted jobs
|
11
|
+
|
12
|
+
$ rbbt mnl [options]
|
13
|
+
|
14
|
+
-h--help Print this help
|
15
|
+
-d--done Done jobs only
|
16
|
+
-e--error Error jobs only
|
17
|
+
-a--aborted SLURM aboted jobs
|
18
|
+
-j--job* Job ids
|
19
|
+
-s--search* Regular expression
|
20
|
+
-t--tail* Show the last lines of the STDERR
|
21
|
+
-SBP--sbatch_parameters show sbatch parameters
|
22
|
+
-dr--dry_run Do not erase anything
|
23
|
+
EOF
|
24
|
+
|
25
|
+
if options[:help]
|
26
|
+
if defined? rbbt_usage
|
27
|
+
rbbt_usage
|
28
|
+
else
|
29
|
+
puts SOPT.doc
|
30
|
+
end
|
31
|
+
exit 0
|
32
|
+
end
|
33
|
+
|
34
|
+
Log.severity = 4
|
35
|
+
done, error, aborted, jobid, search, tail, sbatch_parameters, dry_run = options.values_at :done, :error, :aborted, :job, :search, :tail, :sbatch_parameters, :dry_run
|
36
|
+
|
37
|
+
workdir = File.expand_path('~/rbbt-slurm')
|
38
|
+
Path.setup(workdir)
|
39
|
+
|
40
|
+
running_jobs = begin
|
41
|
+
squeue_txt = CMD.cmd('squeue').read
|
42
|
+
squeue_txt.split("\n").collect{|l| l.to_i.to_s}
|
43
|
+
rescue
|
44
|
+
Log.warn "Cannot determine if jobs are running, they will seem to be all alive (Job ID in green)"
|
45
|
+
squeue_txt = nil
|
46
|
+
$norunningjobs = true
|
47
|
+
[]
|
48
|
+
end
|
49
|
+
|
50
|
+
if squeue_txt
|
51
|
+
job_nodes = {}
|
52
|
+
squeue_txt.split("\n").each do |line|
|
53
|
+
parts = line.strip.split(/\s+/)
|
54
|
+
job_nodes[parts.first] = parts.last.split(",")
|
55
|
+
end
|
56
|
+
else
|
57
|
+
job_nodes = nil
|
58
|
+
end
|
59
|
+
|
60
|
+
count = 0
|
61
|
+
workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
62
|
+
dir = File.dirname(fcmd)
|
63
|
+
|
64
|
+
if m = Open.read(fcmd).match(/#CMD: (.*)/)
|
65
|
+
cmd = m[1]
|
66
|
+
else
|
67
|
+
cmd = nil
|
68
|
+
end
|
69
|
+
|
70
|
+
if m = Open.read(fcmd).match(/# Run command\n(.*?)\n/im)
|
71
|
+
exe = m[1]
|
72
|
+
else
|
73
|
+
exe = nil
|
74
|
+
end
|
75
|
+
|
76
|
+
if m = Open.read(fcmd).match(/^CONTAINER_DIR=(.*)/)
|
77
|
+
container_home = m[1]
|
78
|
+
else
|
79
|
+
container_home = nil
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
if File.exists?(fid = File.join(dir, 'job.id'))
|
84
|
+
id = Open.read(fid).chomp
|
85
|
+
else
|
86
|
+
id = nil
|
87
|
+
end
|
88
|
+
|
89
|
+
if File.exists?(fstatus = File.join(dir, 'exit.status'))
|
90
|
+
exit_status = Open.read(fstatus).to_i
|
91
|
+
else
|
92
|
+
exit_status = nil
|
93
|
+
end
|
94
|
+
|
95
|
+
if File.exists?(fstatus = File.join(dir, 'job.status'))
|
96
|
+
nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
|
97
|
+
elsif job_nodes[id]
|
98
|
+
nodes = job_nodes[id]
|
99
|
+
else
|
100
|
+
nodes = []
|
101
|
+
end
|
102
|
+
|
103
|
+
if File.exists?(File.join(dir, 'std.out'))
|
104
|
+
outt = File.mtime File.join(dir, 'std.out')
|
105
|
+
errt = File.mtime File.join(dir, 'std.err')
|
106
|
+
time_diff = Time.now - [outt, errt].max
|
107
|
+
end
|
108
|
+
|
109
|
+
fdep = File.join(dir, 'dependencies.list')
|
110
|
+
deps = Open.read(fdep).split("\n") if File.exists?(fdep)
|
111
|
+
|
112
|
+
fcadep = File.join(dir, 'canfail_dependencies.list')
|
113
|
+
cadeps = Open.read(fcadep).split("\n") if File.exists?(fcadep)
|
114
|
+
|
115
|
+
aborted = error = true if aborted.nil? && error.nil?
|
116
|
+
if done || error || aborted || running || queued || jobid || search
|
117
|
+
select = false
|
118
|
+
select = true if done && exit_status && exit_status.to_i == 0
|
119
|
+
select = true if error && exit_status && exit_status.to_i != 0
|
120
|
+
select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
|
121
|
+
select = select && jobid.split(",").include?(id) if jobid
|
122
|
+
select = select && cmd.match(/#{search}/) if search
|
123
|
+
next unless select
|
124
|
+
end
|
125
|
+
|
126
|
+
|
127
|
+
puts Log.color(:yellow, "**ERASING**")
|
128
|
+
puts Log.color :blue, dir
|
129
|
+
puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.slurm')).to_s
|
130
|
+
puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
|
131
|
+
puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
|
132
|
+
puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
|
133
|
+
puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home
|
134
|
+
puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
|
135
|
+
puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
|
136
|
+
puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
|
137
|
+
puts Log.color(:magenta, "Nodes: ") << nodes * ", "
|
138
|
+
puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
|
139
|
+
|
140
|
+
if options[:sbatch_parameters]
|
141
|
+
puts Log.color(:magenta, "SBATCH parameters: ")
|
142
|
+
puts Log.color :blue, CMD.cmd('grep "^#SBATCH" |tail -n +6', :in => Open.read(fcmd)).read.strip
|
143
|
+
end
|
144
|
+
|
145
|
+
if tail && File.exists?(File.join(dir, 'std.err'))
|
146
|
+
if exit_status && exit_status != 0
|
147
|
+
puts Log.color(:magenta, "First error or exception found: ")
|
148
|
+
puts CMD.cmd("grep -i -w 'error\\|[a-z]*exception' #{File.join(dir, 'std.err')} -A #{tail.to_i} |head -n #{tail.to_i}", :no_fail => true).read
|
149
|
+
elsif exit_status
|
150
|
+
puts Log.color(:magenta, "Completed jobs: ")
|
151
|
+
puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
|
152
|
+
else
|
153
|
+
puts Log.color(:magenta, "Log tail: ")
|
154
|
+
puts CMD.cmd("tail -n #{tail.to_i} #{File.join(dir, 'std.err')}").read
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
count += 1
|
159
|
+
|
160
|
+
Open.rm_rf dir unless dry_run
|
161
|
+
end
|
162
|
+
|
163
|
+
puts
|
164
|
+
puts "Found #{count} jobs"
|
165
|
+
|