rbbt-util 5.29.0 → 5.30.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/hpc/orchestrate.rb +96 -8
- data/lib/rbbt/hpc/slurm.rb +57 -24
- data/lib/rbbt/persist.rb +4 -0
- data/lib/rbbt/persist/tsv/adapter.rb +44 -13
- data/lib/rbbt/tsv.rb +6 -2
- data/lib/rbbt/util/cmd.rb +6 -1
- data/lib/rbbt/util/misc/inspect.rb +13 -3
- data/lib/rbbt/util/misc/options.rb +0 -42
- data/lib/rbbt/util/procpath.rb +49 -0
- data/lib/rbbt/workflow/accessor.rb +6 -1
- data/lib/rbbt/workflow/step/accessor.rb +20 -13
- data/lib/rbbt/workflow/step/dependencies.rb +1 -2
- data/lib/rbbt/workflow/step/run.rb +2 -5
- data/lib/rbbt/workflow/usage.rb +1 -1
- data/lib/rbbt/workflow/util/provenance.rb +5 -2
- data/share/rbbt_commands/slurm/clean +165 -0
- data/share/rbbt_commands/slurm/list +174 -95
- data/share/rbbt_commands/slurm/orchestrate +3 -2
- data/share/rbbt_commands/slurm/task +1 -0
- data/share/rbbt_commands/tsv/slice +3 -3
- data/share/rbbt_commands/workflow/info +1 -1
- data/share/rbbt_commands/workflow/task +27 -7
- data/share/rbbt_commands/workflow/write_info +52 -0
- data/test/rbbt/test_workflow.rb +7 -7
- data/test/rbbt/util/test_procpath.rb +23 -0
- metadata +7 -2
@@ -20,15 +20,18 @@ $ rbbt mnl [options]
|
|
20
20
|
-j--job* Job ids
|
21
21
|
-s--search* Regular expression
|
22
22
|
-t--tail* Show the last lines of the STDERR
|
23
|
+
-SBP--sbatch_parameters show sbatch parameters
|
24
|
+
-PERF--procpath_performance show Procpath performance summary
|
25
|
+
-sacct--sacct_peformance show sacct performance summary
|
23
26
|
EOF
|
24
27
|
|
25
28
|
if options[:help]
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
29
|
+
if defined? rbbt_usage
|
30
|
+
rbbt_usage
|
31
|
+
else
|
32
|
+
puts SOPT.doc
|
33
|
+
end
|
34
|
+
exit 0
|
32
35
|
end
|
33
36
|
|
34
37
|
Log.severity = 4
|
@@ -38,101 +41,177 @@ workdir = File.expand_path('~/rbbt-slurm')
|
|
38
41
|
Path.setup(workdir)
|
39
42
|
|
40
43
|
running_jobs = begin
|
41
|
-
|
44
|
+
squeue_txt = CMD.cmd('squeue').read
|
45
|
+
squeue_txt.split("\n").collect{|l| l.to_i.to_s}
|
42
46
|
rescue
|
43
|
-
|
44
|
-
|
45
|
-
|
47
|
+
Log.warn "Cannot determine if jobs are running, they will seem to be all alive (Job ID in green)"
|
48
|
+
squeue_txt = nil
|
49
|
+
$norunningjobs = true
|
50
|
+
[]
|
46
51
|
end
|
47
52
|
|
53
|
+
if squeue_txt
|
54
|
+
job_nodes = {}
|
55
|
+
squeue_txt.split("\n").each do |line|
|
56
|
+
parts = line.strip.split(/\s+/)
|
57
|
+
job_nodes[parts.first] = parts.last.split(",")
|
58
|
+
end
|
59
|
+
else
|
60
|
+
job_nodes = nil
|
61
|
+
end
|
62
|
+
|
48
63
|
count = 0
|
49
64
|
workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
65
|
+
dir = File.dirname(fcmd)
|
66
|
+
|
67
|
+
if m = Open.read(fcmd).match(/#CMD: (.*)/)
|
68
|
+
cmd = m[1]
|
69
|
+
else
|
70
|
+
cmd = nil
|
71
|
+
end
|
72
|
+
|
73
|
+
if m = Open.read(fcmd).match(/# Run command\n(.*?)\n/im)
|
74
|
+
exe = m[1].sub('step_path=$(','')
|
75
|
+
else
|
76
|
+
exe = nil
|
77
|
+
end
|
78
|
+
|
79
|
+
if m = Open.read(fcmd).match(/^CONTAINER_DIR=(.*)/)
|
80
|
+
container_home = m[1]
|
81
|
+
else
|
82
|
+
container_home = nil
|
83
|
+
end
|
84
|
+
|
85
|
+
|
86
|
+
if File.exists?(fid = File.join(dir, 'job.id'))
|
87
|
+
id = Open.read(fid).chomp
|
88
|
+
else
|
89
|
+
id = nil
|
90
|
+
end
|
91
|
+
|
92
|
+
if File.exists?(fstatus = File.join(dir, 'exit.status'))
|
93
|
+
exit_status = Open.read(fstatus).to_i
|
94
|
+
else
|
95
|
+
exit_status = nil
|
96
|
+
end
|
97
|
+
|
98
|
+
if File.exists?(fstatus = File.join(dir, 'job.status'))
|
99
|
+
nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
|
100
|
+
elsif job_nodes[id]
|
101
|
+
nodes = job_nodes[id]
|
102
|
+
else
|
103
|
+
nodes = []
|
104
|
+
end
|
105
|
+
|
106
|
+
if File.exists?(File.join(dir, 'std.out'))
|
107
|
+
outt = File.mtime File.join(dir, 'std.out')
|
108
|
+
errt = File.mtime File.join(dir, 'std.err')
|
109
|
+
time_diff = Time.now - [outt, errt].max
|
110
|
+
end
|
111
|
+
|
112
|
+
fdep = File.join(dir, 'dependencies.list')
|
113
|
+
deps = Open.read(fdep).split("\n") if File.exists?(fdep)
|
114
|
+
|
115
|
+
fcadep = File.join(dir, 'canfail_dependencies.list')
|
116
|
+
cadeps = Open.read(fcadep).split("\n") if File.exists?(fcadep)
|
117
|
+
|
118
|
+
if done || error || aborted || running || queued || jobid || search
|
119
|
+
select = false
|
120
|
+
select = true if done && exit_status == 0
|
121
|
+
select = true if error && exit_status && exit_status != 0
|
122
|
+
select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
|
123
|
+
select = true if queued && deps && (running_jobs & deps).any?
|
124
|
+
select = true if running && (exit_status.nil? && running_jobs.include?(id)) && (!deps || (running_jobs & deps).empty?)
|
125
|
+
select = true if jobid && jobid.split(",").include?(id)
|
126
|
+
select = true if search && cmd.match(/#{search}/)
|
127
|
+
next unless select
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
puts Log.color :blue, dir
|
132
|
+
puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.slurm')).to_s
|
133
|
+
puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
|
134
|
+
puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
|
135
|
+
puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
|
136
|
+
puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home
|
137
|
+
puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
|
138
|
+
puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
|
139
|
+
puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
|
140
|
+
puts Log.color(:magenta, "Nodes: ") << nodes * ", "
|
141
|
+
puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
|
142
|
+
|
143
|
+
if options[:sbatch_parameters]
|
144
|
+
puts Log.color(:magenta, "SBATCH parameters: ")
|
145
|
+
text = CMD.cmd('grep "^#SBATCH" |tail -n +6', :in => Open.read(fcmd)).read.strip
|
146
|
+
lines = text.split("\n").collect{|line| header, _sep, value = line.partition(/\s+/); Log.color(:yellow, header + ": ") + value}
|
147
|
+
puts Log.color :yellow, lines * "\n"
|
148
|
+
end
|
149
|
+
|
150
|
+
fprocpath = File.join(dir, 'procpath.sqlite3')
|
151
|
+
if options[:procpath_performance] && Open.exists?(fprocpath)
|
152
|
+
puts Log.color(:magenta, "Procpath summary: ")
|
153
|
+
require 'rbbt/tsv/csv'
|
154
|
+
meta = TSV.csv(CMD.cmd("sqlite3 -header -csv #{fprocpath} 'select * from meta;' "))
|
155
|
+
perf = TSV.csv(CMD.cmd("sqlite3 -header -csv #{fprocpath} 'select * from record;' "))
|
156
|
+
|
157
|
+
page_size = meta["page_size"].first.to_f
|
158
|
+
clock_ticks = meta["clock_ticks"].first.to_f
|
159
|
+
|
160
|
+
cpu_average = {}
|
161
|
+
rss_average = {}
|
162
|
+
perf.through :key, ["ts", 'stat_pid', "stat_utime", "stat_stime", "stat_cutime", "stat_cstime", "stat_rss"] do |k, values|
|
163
|
+
time, stat_pid, ucpu, scpu, ccpu, cscpu, rss = values
|
164
|
+
time = time.to_f
|
165
|
+
|
166
|
+
cpu = Misc.sum([ucpu, scpu].collect{|v| v.to_f})
|
167
|
+
cpu_average[stat_pid] ||= {}
|
168
|
+
cpu_average[stat_pid][time] ||= []
|
169
|
+
cpu_average[stat_pid][time] << cpu.to_f
|
170
|
+
rss_average[time] ||= []
|
171
|
+
rss_average[time] << rss.to_f * page_size
|
172
|
+
end
|
173
|
+
|
174
|
+
ticks = 0
|
175
|
+
cpu_average.each do |stat_pid, cpu_average_pid|
|
176
|
+
start = cpu_average_pid.keys.sort.first
|
177
|
+
eend = cpu_average_pid.keys.sort.last
|
178
|
+
ticks += Misc.sum(cpu_average_pid[eend]) - Misc.sum(cpu_average_pid[start])
|
179
|
+
end
|
180
|
+
start = rss_average.keys.sort.first
|
181
|
+
eend = rss_average.keys.sort.last
|
182
|
+
time_elapsed = eend - start
|
183
|
+
puts Log.color(:yellow, "CPU average: ") + "%.2f" % ( ticks / clock_ticks / time_elapsed * 100).to_s
|
184
|
+
puts Log.color(:yellow, "RSS average: ") + "%.2f GB" % Misc.mean(rss_average.collect{|t,l| Misc.sum(l) / (1024 * 1024 * 1024)}).to_s
|
185
|
+
|
186
|
+
end
|
187
|
+
|
188
|
+
if options[:sacct_peformance]
|
189
|
+
begin
|
190
|
+
tsv = TSV.open(CMD.cmd("sacct -j #{id} -o 'jobid,AveRSS,MaxRSS,MaxDiskRead,MaxDiskWrite' -P|grep 'JobID\\|\.batch'"), :header_hash => '', :sep => "|", :type => :list)
|
191
|
+
values = tsv[tsv.keys.first]
|
192
|
+
if values.compact.any?
|
193
|
+
puts Log.color(:magenta, "SACCT performance: ")
|
194
|
+
puts values.zip(values.fields).collect{|v,t| Log.color(:yellow, t + ": ") + v.to_s } * "\n"
|
56
195
|
end
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
if File.exists?(fstatus = File.join(dir, 'exit.status'))
|
78
|
-
exit_status = Open.read(fstatus).to_i
|
79
|
-
else
|
80
|
-
exit_status = nil
|
81
|
-
end
|
82
|
-
|
83
|
-
if File.exists?(fstatus = File.join(dir, 'job.status'))
|
84
|
-
nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
|
85
|
-
else
|
86
|
-
nodes = []
|
87
|
-
end
|
88
|
-
|
89
|
-
if File.exists?(File.join(dir, 'std.out'))
|
90
|
-
outt = File.mtime File.join(dir, 'std.out')
|
91
|
-
errt = File.mtime File.join(dir, 'std.err')
|
92
|
-
time_diff = Time.now - [outt, errt].max
|
93
|
-
end
|
94
|
-
|
95
|
-
fdep = File.join(dir, 'dependencies.list')
|
96
|
-
deps = Open.read(fdep).split("\n") if File.exists?(fdep)
|
97
|
-
|
98
|
-
if done || error || aborted || running || queued || jobid || search
|
99
|
-
select = false
|
100
|
-
select = true if done && exit_status == 0
|
101
|
-
select = true if error && exit_status && exit_status != 0
|
102
|
-
select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
|
103
|
-
select = true if queued && deps && (running_jobs & deps).any?
|
104
|
-
select = true if running && (exit_status.nil? && running_jobs.include?(id)) && (!deps || (running_jobs & deps).empty?)
|
105
|
-
select = true if jobid && jobid.split(",").include?(id)
|
106
|
-
select = true if search && cmd.match(/#{search}/)
|
107
|
-
next unless select
|
108
|
-
end
|
109
|
-
|
110
|
-
|
111
|
-
puts Log.color :blue, dir
|
112
|
-
puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.slurm')).to_s
|
113
|
-
puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
|
114
|
-
puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
|
115
|
-
puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
|
116
|
-
puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home
|
117
|
-
puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
|
118
|
-
puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
|
119
|
-
puts Log.color(:magenta, "Nodes: ") << nodes * ", "
|
120
|
-
puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
|
121
|
-
|
122
|
-
if tail && File.exists?(File.join(dir, 'std.err'))
|
123
|
-
if exit_status && exit_status != 0
|
124
|
-
puts Log.color(:magenta, "First error or exception found: ")
|
125
|
-
puts CMD.cmd("grep -i -w 'error\\|[a-z]*exception' #{File.join(dir, 'std.err')} -A #{tail.to_i} |head -n #{tail.to_i}", :no_fail => true).read
|
126
|
-
elsif exit_status
|
127
|
-
puts Log.color(:magenta, "Completed jobs: ")
|
128
|
-
puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
|
129
|
-
else
|
130
|
-
puts Log.color(:magenta, "Log tail: ")
|
131
|
-
puts CMD.cmd("tail -n #{tail.to_i} #{File.join(dir, 'std.err')}").read
|
132
|
-
end
|
133
|
-
end
|
134
|
-
|
135
|
-
count += 1
|
196
|
+
rescue
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
|
201
|
+
if tail && File.exists?(File.join(dir, 'std.err'))
|
202
|
+
if exit_status && exit_status != 0
|
203
|
+
puts Log.color(:magenta, "First error or exception found: ")
|
204
|
+
puts CMD.cmd("grep -i -w 'error\\|[a-z]*exception' #{File.join(dir, 'std.err')} -A #{tail.to_i} |head -n #{tail.to_i}", :no_fail => true).read
|
205
|
+
elsif exit_status
|
206
|
+
puts Log.color(:magenta, "Completed jobs: ")
|
207
|
+
puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
|
208
|
+
else
|
209
|
+
puts Log.color(:magenta, "Log tail: ")
|
210
|
+
puts CMD.cmd("tail -n #{tail.to_i} #{File.join(dir, 'std.err')}").read
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
count += 1
|
136
215
|
|
137
216
|
end
|
138
217
|
|
@@ -21,11 +21,12 @@ $slurm_options = SOPT.get <<EOF
|
|
21
21
|
-CS--contain_and_sync Contain and sync to default locations
|
22
22
|
-ci--copy_image When using a container directory, copy image there
|
23
23
|
-t--tail Tail the logs
|
24
|
+
-SPERF--SLURM_procpath* Save Procpath performance for SLURM job; specify only options
|
24
25
|
-q--queue* Queue
|
25
26
|
-t--task_cpus* Tasks
|
26
27
|
-W--workflows* Additional workflows
|
27
28
|
-tm--time* Time
|
28
|
-
-
|
29
|
+
-OR--orchestration_rules* Orchestration rules
|
29
30
|
-rmb--remove_slurm_basedir Remove the SLURM working directory (command, STDIN, exit status, ...)
|
30
31
|
EOF
|
31
32
|
|
@@ -43,5 +44,5 @@ class Step
|
|
43
44
|
end
|
44
45
|
end
|
45
46
|
|
46
|
-
ARGV.concat ["-W", $slurm_options[:workflows]] if $slurm_options[:workflows]
|
47
|
+
ARGV.concat ["-W", $slurm_options[:workflows], '--detach'] if $slurm_options[:workflows]
|
47
48
|
load Rbbt.share.rbbt_commands.workflow.task.find
|
@@ -20,6 +20,7 @@ $slurm_options = SOPT.get <<EOF
|
|
20
20
|
-CS--contain_and_sync Contain and sync to default locations
|
21
21
|
-ci--copy_image When using a container directory, copy image there
|
22
22
|
-t--tail Tail the logs
|
23
|
+
-SPERF--SLURM_procpath* Save Procpath performance for SLURM job; specify only options
|
23
24
|
-q--queue* Queue
|
24
25
|
-t--task_cpus* Tasks
|
25
26
|
-W--workflows* Additional workflows
|
@@ -35,7 +35,7 @@ file = case file
|
|
35
35
|
fields = options[:fields]
|
36
36
|
raise ParameterException, "Please specify the fields to slice" if fields.nil?
|
37
37
|
|
38
|
-
options[:header_hash]
|
38
|
+
options[:header_hash] ||= options["header_hash"]
|
39
39
|
|
40
40
|
case
|
41
41
|
when options[:tokyocabinet]
|
@@ -45,8 +45,8 @@ when options[:tokyocabinet_bd]
|
|
45
45
|
tsv = Persist.open_tokyocabinet(file, false, nil, TokyoCabinet::BDB)
|
46
46
|
puts tsv.summary
|
47
47
|
else
|
48
|
-
stream = TSV.traverse file, options.merge(:into => :stream, :type => :list, :
|
49
|
-
|
48
|
+
stream = TSV.traverse file, options.merge(:into => :stream, :type => :list, :fields => fields.split(","), :unnamed => true) do |k,fields,names|
|
49
|
+
[k,fields].flatten * "\t"
|
50
50
|
end
|
51
51
|
puts stream.read
|
52
52
|
exit 0
|
@@ -86,7 +86,7 @@ messages = info[:messages]
|
|
86
86
|
backtrace = info[:backtrace]
|
87
87
|
pid = info[:pid]
|
88
88
|
exception = info[:exception]
|
89
|
-
rest = info.keys - [:inputs, :dependencies, :status, :time_elapsed, :messages, :backtrace, :exception, :
|
89
|
+
rest = info.keys - [:inputs, :dependencies, :status, :time_elapsed, :messages, :backtrace, :exception, :archived_info]
|
90
90
|
|
91
91
|
|
92
92
|
puts Log.color(:magenta, "File") << ": " << step.path
|
@@ -20,7 +20,7 @@ def usage(workflow = nil, task = nil, exception=nil, abridge = false)
|
|
20
20
|
puts
|
21
21
|
if workflow.nil?
|
22
22
|
puts "No workflow specified. Use `rbbt workflow list` to list available workflows."
|
23
|
-
exit -1
|
23
|
+
exit! -1
|
24
24
|
end
|
25
25
|
|
26
26
|
if task.nil?
|
@@ -203,10 +203,11 @@ The `recursive_clean` cleans all the job dependency steps recursively.
|
|
203
203
|
-prec--prepare_cpus* Number of dependencies prepared in parallel
|
204
204
|
-rwt--remote_workflow_tasks* Load a yaml file describing remote workflow tasks
|
205
205
|
-od--override_deps* Override deps using 'Workflow#task=<path>' array_separated
|
206
|
+
-PERF--procpath_performance* Measure performance using procpath
|
206
207
|
EOF
|
207
208
|
|
208
209
|
workflow = ARGV.shift
|
209
|
-
usage and exit -1 if workflow.nil?
|
210
|
+
usage and exit! -1 if workflow.nil?
|
210
211
|
|
211
212
|
task = ARGV.shift
|
212
213
|
|
@@ -232,7 +233,8 @@ else
|
|
232
233
|
remote_workflows = {}
|
233
234
|
end
|
234
235
|
|
235
|
-
Workflow.workdir = Path.setup(File.expand_path(options.delete(:workdir_all))) if options[:workdir_all]
|
236
|
+
#Workflow.workdir = Path.setup(File.expand_path(options.delete(:workdir_all))) if options[:workdir_all]
|
237
|
+
Workflow.workdir.search_paths.merge!({:workdir => File.expand_path(options.delete(:workdir_all)), :default => :workdir }) if options[:workdir_all]
|
236
238
|
|
237
239
|
workflow = Workflow.require_workflow workflow
|
238
240
|
|
@@ -406,6 +408,23 @@ begin
|
|
406
408
|
exit 0
|
407
409
|
end
|
408
410
|
|
411
|
+
if options[:procpath_performance]
|
412
|
+
require 'rbbt/util/procpath'
|
413
|
+
current_pid = job.info[:pid]
|
414
|
+
job.fork
|
415
|
+
job.soft_grace
|
416
|
+
sleep 2 if job.info[:pid] == current_pid
|
417
|
+
if job.info[:pid] != current_pid
|
418
|
+
pid = job.info[:pid]
|
419
|
+
begin
|
420
|
+
ProcPath.monitor(pid, options[:procpath_performance])
|
421
|
+
rescue Errno::ECHILD
|
422
|
+
Log.warn "Procpath didn't find process #{pid} to monitor. Maybe it finished already"
|
423
|
+
rescue
|
424
|
+
Log.warn "Procpath failed: #{$!.message}"
|
425
|
+
end
|
426
|
+
end
|
427
|
+
end
|
409
428
|
|
410
429
|
if do_fork
|
411
430
|
ENV["RBBT_NO_PROGRESS"] = "true"
|
@@ -422,7 +441,6 @@ begin
|
|
422
441
|
res = job
|
423
442
|
end
|
424
443
|
|
425
|
-
|
426
444
|
if options.delete(:printpath)
|
427
445
|
job.join
|
428
446
|
raise job.messages.last if (job.error? || job.aborted?) && job.messages
|
@@ -486,7 +504,7 @@ rescue ParameterException
|
|
486
504
|
puts
|
487
505
|
report_options saved_job_options
|
488
506
|
puts
|
489
|
-
exit -1
|
507
|
+
exit! -1
|
490
508
|
end
|
491
509
|
|
492
510
|
if options.delete(:list_job_files)
|
@@ -538,7 +556,7 @@ when Step
|
|
538
556
|
io.abort if io.respond_to? :abort
|
539
557
|
io.join if io.respond_to? :join
|
540
558
|
ensure
|
541
|
-
exit -1
|
559
|
+
exit! -1
|
542
560
|
end
|
543
561
|
rescue Exception
|
544
562
|
Log.exception $!
|
@@ -547,9 +565,11 @@ when Step
|
|
547
565
|
io.abort if io.respond_to? :abort
|
548
566
|
io.join if io.respond_to? :join
|
549
567
|
ensure
|
550
|
-
exit -1
|
568
|
+
exit! -1
|
551
569
|
end
|
552
570
|
end
|
571
|
+
elsif detach
|
572
|
+
exit! 0
|
553
573
|
else
|
554
574
|
res.join
|
555
575
|
out.puts Open.read(res.path) if Open.exist?(res.path) || Open.remote?(res.path) || Open.ssh?(res.path)
|
@@ -0,0 +1,52 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rbbt/workflow'
|
4
|
+
|
5
|
+
require 'rbbt-util'
|
6
|
+
require 'rbbt-util'
|
7
|
+
require 'rbbt/util/simpleopt'
|
8
|
+
|
9
|
+
$0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
|
10
|
+
|
11
|
+
options = SOPT.setup <<EOF
|
12
|
+
Examine the info of a job result
|
13
|
+
|
14
|
+
$ rbbt workflow info <job-result> <key> <value>
|
15
|
+
|
16
|
+
-h--help Help
|
17
|
+
-f--force Write info even if key is already present
|
18
|
+
-r--recursive Write info for all dependencies as well
|
19
|
+
-p--check_pid Check that recursive jobs where created by the same process
|
20
|
+
EOF
|
21
|
+
|
22
|
+
SOPT.usage if options[:help]
|
23
|
+
|
24
|
+
file, key, value = ARGV
|
25
|
+
|
26
|
+
force, recursive, check_pid = options.values_at :force, :recursive, :check_pid
|
27
|
+
|
28
|
+
def get_step(file)
|
29
|
+
file = file.sub(/\.(info|files)/,'')
|
30
|
+
step = Workflow.load_step file
|
31
|
+
step
|
32
|
+
end
|
33
|
+
|
34
|
+
raise ParameterException if key.nil? || value.nil?
|
35
|
+
|
36
|
+
if %w(DELETE nil).include? value
|
37
|
+
value = nil
|
38
|
+
force = true
|
39
|
+
end
|
40
|
+
|
41
|
+
step = get_step file
|
42
|
+
|
43
|
+
step.set_info key, value if force || ! step.info.include?(key)
|
44
|
+
|
45
|
+
pid = step.info[:pid]
|
46
|
+
host = step.info[:pid_hostname]
|
47
|
+
|
48
|
+
step.rec_dependencies.each do |dep|
|
49
|
+
dep.set_info key, value if (force || ! dep.info.include?(key)) && (!check_pid || dep.info[:pid].to_s == pid and dep.info[:pid_hostname] == host)
|
50
|
+
rescue
|
51
|
+
Log.warn "Could no set info #{key} for #{dep.path}: #{$!.message}"
|
52
|
+
end if recursive
|