rbbt-util 5.32.7 → 5.32.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/rbbt +1 -0
- data/lib/rbbt/hpc/batch.rb +23 -7
- data/lib/rbbt/hpc/slurm.rb +29 -10
- data/lib/rbbt/persist/tsv/adapter.rb +1 -5
- data/lib/rbbt/resource.rb +22 -9
- data/lib/rbbt/tsv/csv.rb +2 -2
- data/lib/rbbt/tsv/manipulate.rb +2 -0
- data/lib/rbbt/util/R.rb +2 -2
- data/lib/rbbt/util/cmd.rb +39 -18
- data/lib/rbbt/util/log/progress/report.rb +20 -17
- data/lib/rbbt/util/python.rb +24 -3
- data/lib/rbbt/util/simpleDSL.rb +4 -4
- data/lib/rbbt/workflow.rb +20 -2
- data/lib/rbbt/workflow/step.rb +37 -6
- data/lib/rbbt/workflow/step/accessor.rb +2 -2
- data/lib/rbbt/workflow/util/data.rb +31 -0
- data/lib/rbbt/workflow/util/trace.rb +2 -1
- data/python/rbbt.py +3 -0
- data/share/install/software/lib/install_helpers +1 -1
- data/share/rbbt_commands/hpc/list +11 -7
- data/share/rbbt_commands/hpc/orchestrate +6 -1
- data/share/rbbt_commands/hpc/task +6 -1
- data/share/rbbt_commands/lsf/clean +212 -0
- data/share/rbbt_commands/lsf/list +315 -0
- data/share/rbbt_commands/lsf/orchestrate +61 -0
- data/share/rbbt_commands/lsf/tail +55 -0
- data/share/rbbt_commands/lsf/task +60 -0
- data/share/rbbt_commands/slurm/clean +212 -0
- data/share/rbbt_commands/slurm/list +315 -0
- data/share/rbbt_commands/slurm/orchestrate +61 -0
- data/share/rbbt_commands/slurm/tail +55 -0
- data/share/rbbt_commands/slurm/task +60 -0
- data/share/rbbt_commands/workflow/forget_deps +5 -4
- data/test/rbbt/util/test_python.rb +3 -2
- data/test/rbbt/util/test_simpleDSL.rb +3 -3
- data/test/rbbt/workflow/util/test_data.rb +35 -0
- metadata +97 -84
data/lib/rbbt/util/simpleDSL.rb
CHANGED
@@ -38,13 +38,13 @@ module SimpleDSL
|
|
38
38
|
hook_method(method)
|
39
39
|
|
40
40
|
# Execute
|
41
|
+
@config ||= {}
|
41
42
|
if actions.is_a? Proc
|
42
43
|
begin
|
43
|
-
require '
|
44
|
-
|
45
|
-
require 'ruby2ruby'
|
46
|
-
@config[@@method_name] = actions.to_ruby.collect[1..-2].join
|
44
|
+
require 'method_source'
|
45
|
+
@config[@@method_name] = actions.source.split("\n")[1..-2] * "\n"
|
47
46
|
rescue Exception
|
47
|
+
Log.exception $!
|
48
48
|
@config[@@method_name] = NoRuby2Ruby.new "The gem ruby2ruby is not installed. It will not work on ruby 1.9."
|
49
49
|
end
|
50
50
|
|
data/lib/rbbt/workflow.rb
CHANGED
@@ -347,6 +347,11 @@ module Workflow
|
|
347
347
|
|
348
348
|
inputs = IndiferentHash.setup(inputs)
|
349
349
|
|
350
|
+
not_overriden = inputs.delete :not_overriden
|
351
|
+
if not_overriden
|
352
|
+
inputs[:not_overriden] = :not_overriden_dep
|
353
|
+
end
|
354
|
+
|
350
355
|
Workflow.resolve_locals(inputs)
|
351
356
|
|
352
357
|
task_info = task_info(taskname)
|
@@ -400,7 +405,11 @@ module Workflow
|
|
400
405
|
jobname = DEFAULT_NAME if jobname.nil? or jobname.empty?
|
401
406
|
|
402
407
|
dependencies = real_dependencies(task, jobname, defaults.merge(inputs), task_dependencies[taskname] || [])
|
403
|
-
|
408
|
+
|
409
|
+
overriden_deps = dependencies.select{|d| d.overriden }
|
410
|
+
true_overriden_deps = overriden_deps.select{|d| TrueClass === d.overriden }
|
411
|
+
|
412
|
+
overriden = has_overriden_inputs || overriden_deps.any?
|
404
413
|
|
405
414
|
if real_inputs.empty? && Workflow::TAG != :inputs && ! overriden
|
406
415
|
step_path = step_path taskname, jobname, [], [], task.extension
|
@@ -413,7 +422,16 @@ module Workflow
|
|
413
422
|
job = get_job_step step_path, task, input_values, dependencies
|
414
423
|
job.workflow = self
|
415
424
|
job.clean_name = jobname
|
416
|
-
|
425
|
+
|
426
|
+
case not_overriden
|
427
|
+
when TrueClass
|
428
|
+
job.overriden = has_overriden_inputs || true_overriden_deps.any?
|
429
|
+
when :not_overriden_dep
|
430
|
+
job.overriden = true if has_overriden_inputs || true_overriden_deps.any?
|
431
|
+
else
|
432
|
+
job.overriden = true if has_overriden_inputs || overriden_deps.any?
|
433
|
+
end
|
434
|
+
|
417
435
|
job.real_inputs = real_inputs.keys
|
418
436
|
job
|
419
437
|
end
|
data/lib/rbbt/workflow/step.rb
CHANGED
@@ -34,13 +34,40 @@ class Step
|
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
|
+
|
38
|
+
def overriden?
|
39
|
+
return true if @overriden
|
40
|
+
return true if dependencies.select{|dep| dep.overriden? }.any?
|
41
|
+
info[:archived_info].each do |f,i|
|
42
|
+
return true if i[:overriden] || i["overriden"]
|
43
|
+
end if info[:archived_info]
|
44
|
+
return false
|
45
|
+
end
|
46
|
+
|
37
47
|
def overriden
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
48
|
+
@overriden
|
49
|
+
#if @overriden.nil?
|
50
|
+
# return false if dependencies.nil?
|
51
|
+
# dependencies.select{|dep| dep.overriden? }.any?
|
52
|
+
#else
|
53
|
+
# @overriden
|
54
|
+
#end
|
55
|
+
end
|
56
|
+
|
57
|
+
def overriden_deps
|
58
|
+
ord = []
|
59
|
+
deps = dependencies.dup
|
60
|
+
while dep = deps.shift
|
61
|
+
case dep.overriden
|
62
|
+
when FalseClass
|
63
|
+
next
|
64
|
+
when Symbol
|
65
|
+
ord << dep
|
66
|
+
else
|
67
|
+
deps += dep.dependencies
|
68
|
+
end
|
43
69
|
end
|
70
|
+
ord
|
44
71
|
end
|
45
72
|
|
46
73
|
def initialize(path, task = nil, inputs = nil, dependencies = nil, bindings = nil, clean_name = nil)
|
@@ -134,7 +161,11 @@ class Step
|
|
134
161
|
|
135
162
|
archived_info = {}
|
136
163
|
dependencies.each do |dep|
|
137
|
-
|
164
|
+
if Symbol === dep.overriden && ! Open.exists?(dep.info_file)
|
165
|
+
archived_info[dep.path] = dep.overriden
|
166
|
+
else
|
167
|
+
archived_info[dep.path] = dep.info
|
168
|
+
end
|
138
169
|
archived_info.merge!(dep.archived_info)
|
139
170
|
end if dependencies
|
140
171
|
|
@@ -248,7 +248,7 @@ class Step
|
|
248
248
|
def init_info(force = false)
|
249
249
|
return nil if @exec || info_file.nil? || (Open.exists?(info_file) && ! force)
|
250
250
|
Open.lock(info_file, :lock => info_lock) do
|
251
|
-
i = {:status => :waiting, :pid => Process.pid, :path => path, :real_inputs => real_inputs}
|
251
|
+
i = {:status => :waiting, :pid => Process.pid, :path => path, :real_inputs => real_inputs, :overriden => overriden}
|
252
252
|
i[:dependencies] = dependencies.collect{|dep| [dep.task_name, dep.name, dep.path]} if dependencies
|
253
253
|
Misc.sensiblewrite(info_file, Step.serialize_info(i), :force => true, :lock => false)
|
254
254
|
@info_cache = IndiferentHash.setup(i)
|
@@ -551,7 +551,7 @@ class Step
|
|
551
551
|
|
552
552
|
def aborted?
|
553
553
|
status = self.status
|
554
|
-
status == :aborted || ((status != :dependencies && status != :cleaned && status != :noinfo && status != :setup && status != :noinfo) && nopid?)
|
554
|
+
status == :aborted || ((status != :ending && status != :dependencies && status != :cleaned && status != :noinfo && status != :setup && status != :noinfo) && nopid?)
|
555
555
|
end
|
556
556
|
|
557
557
|
# {{{ INFO
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'rbbt/workflow'
|
2
|
+
require 'rbbt/workflow/examples'
|
3
|
+
|
4
|
+
module Workflow
|
5
|
+
module Data
|
6
|
+
def data(directory)
|
7
|
+
@@data_directory = directory
|
8
|
+
end
|
9
|
+
|
10
|
+
def get_datadir(clean_name)
|
11
|
+
data_dir = File.join(@@data_directory, clean_name)
|
12
|
+
raise "Data dir not found #{data_dir}" unless File.directory?(data_dir)
|
13
|
+
if Path === @@data_directory
|
14
|
+
@@data_directory.annotate data_dir
|
15
|
+
else
|
16
|
+
Path.setup(data_dir)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def data_task(name, workflow, oname, *rest, &block)
|
21
|
+
dep_task(name, workflow, oname, *rest) do |jobname, options|
|
22
|
+
data_dir = self.get_datadir(jobname)
|
23
|
+
task_info = workflow.task_info(oname)
|
24
|
+
dir_options = Workflow.load_inputs(data_dir.options, task_info[:inputs], task_info[:input_types])
|
25
|
+
data_options = block.call data_dir, dir_options, task_info
|
26
|
+
{:inputs => data_options.merge(options)}
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
@@ -188,6 +188,7 @@ rbbt.png_plot('#{plot}', 'plot(timeline)', width=#{width}, height=#{height}, poi
|
|
188
188
|
seed_jobs.each do |step|
|
189
189
|
jobs += step.rec_dependencies + [step]
|
190
190
|
step.info[:archived_info].each do |path,ainfo|
|
191
|
+
next unless Hash === ainfo
|
191
192
|
archived_step = Step.new path
|
192
193
|
|
193
194
|
archived_step.define_singleton_method :info do
|
@@ -205,7 +206,7 @@ rbbt.png_plot('#{plot}', 'plot(timeline)', width=#{width}, height=#{height}, poi
|
|
205
206
|
|
206
207
|
end
|
207
208
|
|
208
|
-
jobs = jobs.uniq.sort_by{|job| t = job.info[:started] || Open.mtime(job.path) || Time.now; Time === t ? t : Time.parse(t) }
|
209
|
+
jobs = jobs.uniq.sort_by{|job| [job, job.info]; t = job.info[:started] || Open.mtime(job.path) || Time.now; Time === t ? t : Time.parse(t) }
|
209
210
|
|
210
211
|
data = trace_job_times(jobs, options[:fix_gap])
|
211
212
|
|
data/python/rbbt.py
ADDED
@@ -499,7 +499,7 @@ install_jar(){
|
|
499
499
|
local url="$2"
|
500
500
|
|
501
501
|
[ -d "$OPT_DIR/$name/" ] || mkdir -p "$OPT_DIR/$name/"
|
502
|
-
wget "$url" -O "$OPT_DIR
|
502
|
+
wget "$url" -O "$OPT_DIR/$name/$name.jar" || wget "$url" -O "$OPT_DIR/$name/$name.jar" --no-check-certificate || (rm "$OPT_DIR/$name/$name.jar"; exit -1)
|
503
503
|
link "$OPT_DIR/$name/$name.jar" "$OPT_JAR_DIR/$name.jar"
|
504
504
|
}
|
505
505
|
|
@@ -173,7 +173,7 @@ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
173
173
|
select = true if queued && deps && (running_jobs & deps).any? || queued && is_running && nodes.empty?
|
174
174
|
select = true if running && nodes.any? && (exit_status.nil? && running_jobs.include?(id)) && (!deps || (running_jobs & deps).empty?)
|
175
175
|
select = true if jobid && jobid.split(",").include?(id)
|
176
|
-
select = select &&
|
176
|
+
select = select && step_path.match(/#{search}/) if search
|
177
177
|
next unless select
|
178
178
|
elsif search
|
179
179
|
select = false
|
@@ -206,9 +206,9 @@ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
206
206
|
puts Log.color(:magenta, "BATCH parameters: ")
|
207
207
|
case job_batch_system
|
208
208
|
when 'slurm'
|
209
|
-
text = CMD.cmd('grep "^#SBATCH"
|
209
|
+
text = CMD.cmd('grep "^#SBATCH" ', :in => Open.read(fcmd)).read.strip
|
210
210
|
when 'lsf'
|
211
|
-
text = CMD.cmd('grep "^#BSUB"
|
211
|
+
text = CMD.cmd('grep "^#BSUB" ', :in => Open.read(fcmd)).read.strip
|
212
212
|
else
|
213
213
|
text = ""
|
214
214
|
end
|
@@ -292,13 +292,17 @@ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
292
292
|
step_path = step_line.split(": ").last.strip
|
293
293
|
step = Step.new step_path
|
294
294
|
step.load_dependencies_from_info
|
295
|
+
has_bar = false
|
295
296
|
(step.rec_dependencies + [step]).reverse.each do |j|
|
296
297
|
next if j.done?
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
298
|
+
if j.file(:progress).exists?
|
299
|
+
bar = Log::ProgressBar.new
|
300
|
+
bar.load(j.file(:progress).yaml)
|
301
|
+
puts Log.color(:magenta, "Progress: ") + bar.report_msg + " " + Log.color(:yellow, j.task_signature)
|
302
|
+
has_bar = true
|
303
|
+
end
|
301
304
|
end
|
305
|
+
puts Log.color(:magenta, "Progress: ") + Log.color(:yellow, step.task_signature) + " #{step.status}" unless has_bar
|
302
306
|
end
|
303
307
|
end
|
304
308
|
|
@@ -19,14 +19,19 @@ $slurm_options = SOPT.get <<EOF
|
|
19
19
|
-e--exclusive Make exclusive use of the node
|
20
20
|
-hm--highmem Make use of highmem cores
|
21
21
|
-wc--wipe_container* Wipe the jobs from the contain directory
|
22
|
+
-pd--purge_deps Purge job dependencies
|
22
23
|
-CS--contain_and_sync Contain and sync to default locations
|
23
24
|
-ci--copy_image When using a container directory, copy image there
|
24
25
|
-t--tail Tail the logs
|
25
26
|
-BPP--batch_procpath* Save Procpath performance for batch job; specify only options
|
26
27
|
-q--queue* Queue
|
27
28
|
-t--task_cpus* Tasks
|
28
|
-
-W--workflows* Additional workflows
|
29
29
|
-tm--time* Time
|
30
|
+
-m--mem* SLURM minimum memory
|
31
|
+
-mcpu--mem_per_cpu* SLURM minimum memory per CPU
|
32
|
+
-lin--licenses* SLURM licenses
|
33
|
+
-cons--constraint* SLURM constraint
|
34
|
+
-W--workflows* Additional workflows
|
30
35
|
-OR--orchestration_rules* Orchestration rules
|
31
36
|
-rmb--remove_batch_basedir Remove the SLURM working directory (command, STDIN, exit status, ...)
|
32
37
|
EOF
|
@@ -18,14 +18,19 @@ $slurm_options = SOPT.get <<EOF
|
|
18
18
|
-e--exclusive Make exclusive use of the node
|
19
19
|
-hm--highmem Make use of highmem cores
|
20
20
|
-wc--wipe_container* Wipe the jobs from the contain directory
|
21
|
+
-pd--purge_deps Purge job dependencies
|
21
22
|
-CS--contain_and_sync Contain and sync to default locations
|
22
23
|
-ci--copy_image When using a container directory, copy image there
|
23
24
|
-t--tail Tail the logs
|
24
25
|
-BPP--batch_procpath* Save Procpath performance for batch job; specify only options
|
25
26
|
-q--queue* Queue
|
26
27
|
-t--task_cpus* Tasks
|
27
|
-
-W--workflows* Additional workflows
|
28
28
|
-tm--time* Time
|
29
|
+
-m--mem* SLURM minimum memory
|
30
|
+
-mcpu--mem_per_cpu* SLURM minimum memory per CPU
|
31
|
+
-lin--licenses* SLURM licenses
|
32
|
+
-cons--constraint* SLURM constraint
|
33
|
+
-W--workflows* Additional workflows
|
29
34
|
-rmb--remove_batch_dir Remove the batch working directory (command, STDIN, exit status, ...)
|
30
35
|
-bs--batch_system* Batch system to use: auto, lsf, slurm (default is auto-detect)
|
31
36
|
EOF
|
@@ -0,0 +1,212 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rbbt-util'
|
4
|
+
require 'rbbt/util/simpleopt'
|
5
|
+
require 'rbbt/hpc'
|
6
|
+
|
7
|
+
#$0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
|
8
|
+
|
9
|
+
options = SOPT.setup <<EOF
|
10
|
+
|
11
|
+
Clean error or aborted jobs
|
12
|
+
|
13
|
+
$ rbbt slurm clean [options]
|
14
|
+
|
15
|
+
-h--help Print this help
|
16
|
+
-d--done Done jobs only
|
17
|
+
-e--error Error jobs only
|
18
|
+
-a--aborted SLURM aboted jobs
|
19
|
+
-q--queued Queued jobs only
|
20
|
+
-j--job* Job ids
|
21
|
+
-s--search* Regular expression
|
22
|
+
-t--tail* Show the last lines of the STDERR
|
23
|
+
-BP--batch_parameters show batch parameters
|
24
|
+
-dr--dry_run Do not erase anything
|
25
|
+
EOF
|
26
|
+
|
27
|
+
if options[:help]
|
28
|
+
if defined? rbbt_usage
|
29
|
+
rbbt_usage
|
30
|
+
else
|
31
|
+
puts SOPT.doc
|
32
|
+
end
|
33
|
+
exit 0
|
34
|
+
end
|
35
|
+
|
36
|
+
batch_system = options.delete :batch_system
|
37
|
+
batch_system ||= 'auto'
|
38
|
+
|
39
|
+
HPC::BATCH_MODULE = HPC.batch_system batch_system
|
40
|
+
|
41
|
+
raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
|
42
|
+
|
43
|
+
Log.severity = 4
|
44
|
+
done, error, aborted, queued, jobid, search, tail, batch_parameters, dry_run = options.values_at :done, :error, :aborted, :queued, :job, :search, :tail, :batch_parameters, :dry_run
|
45
|
+
|
46
|
+
workdir = File.expand_path('~/rbbt-batch')
|
47
|
+
Path.setup(workdir)
|
48
|
+
|
49
|
+
running_jobs = begin
|
50
|
+
squeue_txt = HPC::BATCH_MODULE.job_status
|
51
|
+
squeue_txt.split("\n").collect{|l| l.to_i.to_s}
|
52
|
+
rescue
|
53
|
+
Log.warn "Cannot determine if jobs are running, they will seem to be all alive (Job ID in green)"
|
54
|
+
squeue_txt = nil
|
55
|
+
$norunningjobs = true
|
56
|
+
[]
|
57
|
+
end
|
58
|
+
|
59
|
+
if squeue_txt
|
60
|
+
job_nodes = {}
|
61
|
+
squeue_txt.split("\n").each do |line|
|
62
|
+
parts = line.strip.split(/\s+/)
|
63
|
+
job_nodes[parts.first] = parts.last.split(",")
|
64
|
+
end
|
65
|
+
else
|
66
|
+
job_nodes = nil
|
67
|
+
end
|
68
|
+
|
69
|
+
count = 0
|
70
|
+
workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
71
|
+
dir = File.dirname(fcmd)
|
72
|
+
command_txt = Open.read(fcmd)
|
73
|
+
|
74
|
+
if m = command_txt.match(/#CMD: (.*)/)
|
75
|
+
cmd = m[1]
|
76
|
+
else
|
77
|
+
cmd = nil
|
78
|
+
end
|
79
|
+
|
80
|
+
if m = command_txt.match(/# Run command\n(.*?)\n/im)
|
81
|
+
exe = m[1]
|
82
|
+
else
|
83
|
+
exe = nil
|
84
|
+
end
|
85
|
+
|
86
|
+
if m = command_txt.match(/^CONTAINER_DIR=(.*)/)
|
87
|
+
container_home = m[1]
|
88
|
+
else
|
89
|
+
container_home = nil
|
90
|
+
end
|
91
|
+
|
92
|
+
if m = command_txt.match(/^BATCH_SYSTEM=(.*)/)
|
93
|
+
job_batch_system = m[1].downcase
|
94
|
+
else
|
95
|
+
job_batch_system = nil
|
96
|
+
end
|
97
|
+
|
98
|
+
different_system = job_batch_system != batch_system
|
99
|
+
|
100
|
+
if File.exists?(fid = File.join(dir, 'job.id'))
|
101
|
+
id = Open.read(fid).chomp
|
102
|
+
else
|
103
|
+
id = nil
|
104
|
+
end
|
105
|
+
|
106
|
+
if File.exists?(fstatus = File.join(dir, 'exit.status'))
|
107
|
+
exit_status = Open.read(fstatus).to_i
|
108
|
+
else
|
109
|
+
exit_status = nil
|
110
|
+
end
|
111
|
+
|
112
|
+
if File.exists?(fstatus = File.join(dir, 'job.status'))
|
113
|
+
fstatus_txt = Open.read(fstatus)
|
114
|
+
begin
|
115
|
+
if job_batch_system == "lsf"
|
116
|
+
nodes = Open.read(fstatus).split("\n").last.split(/\s+/)[5].split(",")
|
117
|
+
else
|
118
|
+
nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
|
119
|
+
end
|
120
|
+
rescue
|
121
|
+
nodes = []
|
122
|
+
end
|
123
|
+
elsif job_nodes[id]
|
124
|
+
nodes = job_nodes[id]
|
125
|
+
else
|
126
|
+
nodes = []
|
127
|
+
end
|
128
|
+
|
129
|
+
if File.exists?(File.join(dir, 'std.out'))
|
130
|
+
outt = File.mtime File.join(dir, 'std.out')
|
131
|
+
errt = File.mtime File.join(dir, 'std.err')
|
132
|
+
time_diff = Time.now - [outt, errt].max
|
133
|
+
end
|
134
|
+
|
135
|
+
fdep = File.join(dir, 'dependencies.list')
|
136
|
+
deps = Open.read(fdep).split("\n") if File.exists?(fdep)
|
137
|
+
|
138
|
+
fcadep = File.join(dir, 'canfail_dependencies.list')
|
139
|
+
cadeps = Open.read(fcadep).split("\n") if File.exists?(fcadep)
|
140
|
+
|
141
|
+
aborted = error = true if aborted.nil? && error.nil?
|
142
|
+
#if done || error || aborted || running || queued || jobid || search
|
143
|
+
# select = false
|
144
|
+
# select = true if done && exit_status && exit_status.to_i == 0
|
145
|
+
# select = true if error && exit_status && exit_status.to_i != 0
|
146
|
+
# select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
|
147
|
+
# select = select && jobid.split(",").include?(id) if jobid
|
148
|
+
# select = select && cmd.match(/#{search}/) if search
|
149
|
+
# next unless select
|
150
|
+
#end
|
151
|
+
|
152
|
+
if done || error || aborted || queued || jobid
|
153
|
+
select = false
|
154
|
+
select = true if done && exit_status == 0
|
155
|
+
select = true if error && exit_status && exit_status != 0
|
156
|
+
select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
|
157
|
+
is_running = exit_status.nil? && ( (running_jobs.include?(id) && (!deps || (running_jobs & deps).empty?)) || different_system )
|
158
|
+
select = true if queued && deps && (running_jobs & deps).any? || queued && is_running && nodes.empty?
|
159
|
+
select = true if jobid && jobid.split(",").include?(id)
|
160
|
+
select = select && cmd.match(/#{search}/) if search
|
161
|
+
next unless select
|
162
|
+
elsif search
|
163
|
+
select = false
|
164
|
+
select = true if search && cmd.match(/#{search}/)
|
165
|
+
next unless select
|
166
|
+
end
|
167
|
+
|
168
|
+
|
169
|
+
puts Log.color(:yellow, "**ERASING**")
|
170
|
+
puts Log.color :blue, dir
|
171
|
+
puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.batch')).to_s
|
172
|
+
puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
|
173
|
+
puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
|
174
|
+
puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
|
175
|
+
puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home
|
176
|
+
puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
|
177
|
+
puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
|
178
|
+
puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
|
179
|
+
puts Log.color(:magenta, "Nodes: ") << nodes * ", "
|
180
|
+
puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
|
181
|
+
|
182
|
+
if options[:batch_parameters]
|
183
|
+
puts Log.color(:magenta, "BATCH parameters: ")
|
184
|
+
case job_batch_system
|
185
|
+
when 'slurm'
|
186
|
+
puts Log.color :blue, CMD.cmd('grep "^#SBATCH" |tail -n +6', :in => Open.read(fcmd)).read.strip
|
187
|
+
when 'lsf'
|
188
|
+
puts Log.color :blue, CMD.cmd('grep "^#BSUB" |tail -n +6', :in => Open.read(fcmd)).read.strip
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
if tail && File.exists?(File.join(dir, 'std.err'))
|
193
|
+
if exit_status && exit_status != 0
|
194
|
+
puts Log.color(:magenta, "First error or exception found: ")
|
195
|
+
puts CMD.cmd("grep -i -w 'error\\|[a-z]*exception' #{File.join(dir, 'std.err')} -A #{tail.to_i} |head -n #{tail.to_i}", :no_fail => true).read
|
196
|
+
elsif exit_status
|
197
|
+
puts Log.color(:magenta, "Completed jobs: ")
|
198
|
+
puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
|
199
|
+
else
|
200
|
+
puts Log.color(:magenta, "Log tail: ")
|
201
|
+
puts CMD.cmd("tail -n #{tail.to_i} #{File.join(dir, 'std.err')}").read
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
count += 1
|
206
|
+
|
207
|
+
Open.rm_rf dir unless dry_run
|
208
|
+
end
|
209
|
+
|
210
|
+
puts
|
211
|
+
puts "Found #{count} jobs"
|
212
|
+
|