rbbt-util 5.29.4 → 5.30.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/hpc/orchestrate.rb +1 -1
- data/lib/rbbt/hpc/slurm.rb +21 -10
- data/lib/rbbt/persist/tsv/adapter.rb +44 -13
- data/lib/rbbt/tsv.rb +3 -2
- data/lib/rbbt/util/cmd.rb +6 -1
- data/lib/rbbt/util/misc/options.rb +0 -42
- data/lib/rbbt/util/procpath.rb +49 -0
- data/lib/rbbt/workflow.rb +1 -1
- data/lib/rbbt/workflow/step/accessor.rb +3 -4
- data/lib/rbbt/workflow/step/run.rb +2 -4
- data/lib/rbbt/workflow/usage.rb +1 -1
- data/lib/rbbt/workflow/util/archive.rb +1 -1
- data/lib/rbbt/workflow/util/provenance.rb +2 -1
- data/share/rbbt_commands/migrate +1 -1
- data/share/rbbt_commands/slurm/list +82 -8
- data/share/rbbt_commands/slurm/orchestrate +1 -0
- data/share/rbbt_commands/slurm/task +1 -0
- data/share/rbbt_commands/tsv/slice +3 -3
- data/share/rbbt_commands/workflow/info +1 -1
- data/share/rbbt_commands/workflow/task +18 -1
- data/share/rbbt_commands/workflow/write_info +52 -0
- data/test/rbbt/util/test_procpath.rb +23 -0
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d45adac7949e3fea0d710418d93837cf0aa715bcedb2212c6e68f8dd749382ae
|
4
|
+
data.tar.gz: 9a9857c6b1565b9ed55f18f50fb1b242a1477dd6868111e10252ecc0c286ca44
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9dac4b1211fd40894f00b1a84f4f10abad83f2169579ec327a8da86f05b4274cc11e2192902638eb78b9de8f39351993f17a3b08dd5390a0f2c04ef385a0e1a2
|
7
|
+
data.tar.gz: 01b9d69b003088e78f2665b57e9ed8acc8104143bc826aabe49059e77fd0e20279d9393245f7bb20efcab807180273f31d2285a62113947cf151571a96f2d949
|
data/lib/rbbt/hpc/orchestrate.rb
CHANGED
@@ -83,7 +83,7 @@ module HPC
|
|
83
83
|
|
84
84
|
deps = seen[dep.path] ||= self.orchestrate_job(dep, options, skip_dep, seen)
|
85
85
|
if job.canfail_paths.include? dep.path
|
86
|
-
[deps].flatten.collect{|id| ['canfail', id] * ":"}
|
86
|
+
[deps].flatten.compact.collect{|id| ['canfail', id] * ":"}
|
87
87
|
else
|
88
88
|
deps
|
89
89
|
end
|
data/lib/rbbt/hpc/slurm.rb
CHANGED
@@ -33,7 +33,8 @@ module HPC
|
|
33
33
|
group = File.basename(File.dirname(ENV['HOME']))
|
34
34
|
|
35
35
|
if contain_and_sync
|
36
|
-
|
36
|
+
random_file = TmpFile.random_name
|
37
|
+
contain = "/scratch/tmp/rbbt-#{user}/#{random_file}" if contain.nil?
|
37
38
|
sync = "~/.rbbt/var/jobs" if sync.nil?
|
38
39
|
wipe_container = "post" if wipe_container.nil?
|
39
40
|
end
|
@@ -243,7 +244,7 @@ EOF
|
|
243
244
|
end
|
244
245
|
|
245
246
|
if contain
|
246
|
-
rbbt_cmd << " " << %(--workdir_all='#{contain.gsub("'", '\'')}
|
247
|
+
rbbt_cmd << " " << %(--workdir_all='#{contain.gsub("'", '\'')}/workdir')
|
247
248
|
end
|
248
249
|
end
|
249
250
|
|
@@ -251,6 +252,10 @@ EOF
|
|
251
252
|
cmd =<<-EOF
|
252
253
|
#{exec_cmd} \\
|
253
254
|
#{rbbt_cmd}
|
255
|
+
EOF
|
256
|
+
annotate_cmd =<<-EOF
|
257
|
+
#{exec_cmd} \\
|
258
|
+
workflow write_info --recursive --force=false --check_pid "$step_path" slurm_job $SLURM_JOB_ID
|
254
259
|
EOF
|
255
260
|
|
256
261
|
header +=<<-EOF
|
@@ -260,11 +265,14 @@ EOF
|
|
260
265
|
run +=<<-EOF
|
261
266
|
|
262
267
|
# Run command
|
263
|
-
#{cmd}
|
268
|
+
step_path=$(#{cmd})
|
264
269
|
|
265
270
|
# Save exit status
|
266
271
|
exit_status=$?
|
267
272
|
|
273
|
+
# Annotate info with SLURM job_info
|
274
|
+
#{annotate_cmd}
|
275
|
+
|
268
276
|
EOF
|
269
277
|
|
270
278
|
# CODA
|
@@ -286,7 +294,7 @@ EOF
|
|
286
294
|
sync = sync.strip
|
287
295
|
source = File.join(File.expand_path(contain), source)
|
288
296
|
else
|
289
|
-
source = File.join(File.expand_path(contain), '
|
297
|
+
source = File.join(File.expand_path(contain), 'workdir/var/jobs')
|
290
298
|
end
|
291
299
|
|
292
300
|
target = File.expand_path(sync)
|
@@ -516,7 +524,11 @@ EOF
|
|
516
524
|
dry_run = options.delete :dry_run
|
517
525
|
tail = options.delete :tail
|
518
526
|
dependencies = options.delete :slurm_dependencies
|
527
|
+
procpath = options.delete :SLURM_procpath
|
528
|
+
|
519
529
|
options[:jobname] = job.clean_name
|
530
|
+
log_level = options.delete :log
|
531
|
+
log_level ||= Log.severity
|
520
532
|
|
521
533
|
workflow = job.workflow
|
522
534
|
|
@@ -541,15 +553,14 @@ EOF
|
|
541
553
|
inputs_dir = File.join(tmp_directory, 'inputs_dir')
|
542
554
|
saved = Step.save_job_inputs(job, inputs_dir)
|
543
555
|
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
else
|
548
|
-
cmd = ['workflow', 'task', workflow.to_s, task.to_s, '-pf', '--log', (options[:log] || Log.severity).to_s]
|
549
|
-
end
|
556
|
+
cmd = ['workflow', 'task', workflow.to_s, task.to_s, '--printpath', '--log', log_level.to_s]
|
557
|
+
|
558
|
+
cmd << "--procpath_performance='#{tmp_directory}/procpath##{procpath.gsub(',', '#')}'" if procpath
|
550
559
|
|
551
560
|
cmd << "--override_deps='#{override_deps.gsub("'", '\'')}'" if override_deps and not override_deps.empty?
|
552
561
|
|
562
|
+
cmd << "--load_inputs='#{inputs_dir}'" if saved && saved.any?
|
563
|
+
|
553
564
|
template = self.template(cmd, options)
|
554
565
|
jobid = self.issue_template(template, options.merge(:slurm_basedir => slurm_basedir, :dry_run => dry_run, :slurm_dependencies => dependencies))
|
555
566
|
|
@@ -104,9 +104,6 @@ module Persist
|
|
104
104
|
write(true) if closed? || ! write?
|
105
105
|
res = begin
|
106
106
|
yield
|
107
|
-
rescue Exception
|
108
|
-
Log.exception $!
|
109
|
-
raise $!
|
110
107
|
ensure
|
111
108
|
close
|
112
109
|
end
|
@@ -115,7 +112,6 @@ module Persist
|
|
115
112
|
end
|
116
113
|
|
117
114
|
def read_and_close
|
118
|
-
#return yield if @locked
|
119
115
|
if read? || write?
|
120
116
|
begin
|
121
117
|
return yield
|
@@ -134,6 +130,41 @@ module Persist
|
|
134
130
|
end
|
135
131
|
end
|
136
132
|
|
133
|
+
def read_lock
|
134
|
+
read if closed?
|
135
|
+
if read?
|
136
|
+
return yield
|
137
|
+
end
|
138
|
+
|
139
|
+
lock do
|
140
|
+
close
|
141
|
+
read true
|
142
|
+
begin
|
143
|
+
yield
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
def write_lock
|
149
|
+
write if closed?
|
150
|
+
if write?
|
151
|
+
begin
|
152
|
+
return yield
|
153
|
+
ensure
|
154
|
+
close
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
lock do
|
159
|
+
close
|
160
|
+
write true
|
161
|
+
begin
|
162
|
+
yield
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
|
137
168
|
def merge!(hash)
|
138
169
|
hash.each do |key,values|
|
139
170
|
self[key] = values
|
@@ -141,38 +172,38 @@ module Persist
|
|
141
172
|
end
|
142
173
|
|
143
174
|
def range(*args)
|
144
|
-
self.
|
175
|
+
self.read_lock do
|
145
176
|
super(*args)
|
146
177
|
end
|
147
178
|
end
|
148
179
|
|
149
180
|
def include?(*args)
|
150
|
-
self.
|
181
|
+
self.read_lock do
|
151
182
|
super(*args) #- TSV::ENTRY_KEYS.to_a
|
152
183
|
end
|
153
184
|
end
|
154
185
|
|
155
186
|
def [](*args)
|
156
|
-
self.
|
187
|
+
self.read_lock do
|
157
188
|
super(*args) #- TSV::ENTRY_KEYS.to_a
|
158
189
|
end
|
159
190
|
end
|
160
191
|
|
161
192
|
def []=(*args)
|
162
|
-
self.
|
193
|
+
self.write_lock do
|
163
194
|
super(*args) #- TSV::ENTRY_KEYS.to_a
|
164
195
|
end
|
165
196
|
end
|
166
197
|
|
167
198
|
def keys(*args)
|
168
|
-
self.
|
199
|
+
self.read_lock do
|
169
200
|
super(*args)
|
170
201
|
end
|
171
202
|
end
|
172
203
|
|
173
204
|
|
174
205
|
def prefix(key)
|
175
|
-
self.
|
206
|
+
self.read_lock do
|
176
207
|
range(key, 1, key + MAX_CHAR, 1)
|
177
208
|
end
|
178
209
|
end
|
@@ -184,13 +215,13 @@ module Persist
|
|
184
215
|
|
185
216
|
|
186
217
|
def size(*args)
|
187
|
-
self.
|
218
|
+
self.read_lock do
|
188
219
|
super(*args)
|
189
220
|
end
|
190
221
|
end
|
191
222
|
|
192
223
|
def each(*args, &block)
|
193
|
-
self.
|
224
|
+
self.read_lock do
|
194
225
|
super(*args, &block)
|
195
226
|
end
|
196
227
|
end
|
@@ -208,7 +239,7 @@ module Persist
|
|
208
239
|
end
|
209
240
|
|
210
241
|
def values_at(*keys)
|
211
|
-
self.
|
242
|
+
self.read_lock do
|
212
243
|
keys.collect do |k|
|
213
244
|
self[k]
|
214
245
|
end
|
data/lib/rbbt/tsv.rb
CHANGED
@@ -113,11 +113,12 @@ module TSV
|
|
113
113
|
|
114
114
|
data.entity_options = entity_options
|
115
115
|
|
116
|
-
if Path === source
|
117
|
-
|
116
|
+
if Path === source && data.identifiers
|
117
|
+
Path.setup(data.identifiers, source.pkgdir, source.resource)
|
118
118
|
end
|
119
119
|
|
120
120
|
if data.respond_to? :persistence_path
|
121
|
+
data.read
|
121
122
|
data
|
122
123
|
else
|
123
124
|
h = data.dup
|
data/lib/rbbt/util/cmd.rb
CHANGED
@@ -217,7 +217,7 @@ module CMD
|
|
217
217
|
end
|
218
218
|
end
|
219
219
|
|
220
|
-
def self.
|
220
|
+
def self.cmd_pid(*args)
|
221
221
|
all_args = *args
|
222
222
|
|
223
223
|
all_args << {} unless Hash === all_args.last
|
@@ -248,4 +248,9 @@ module CMD
|
|
248
248
|
nil
|
249
249
|
end
|
250
250
|
|
251
|
+
def self.cmd_log(*args)
|
252
|
+
cmd_pid(*args)
|
253
|
+
nil
|
254
|
+
end
|
255
|
+
|
251
256
|
end
|
@@ -242,48 +242,6 @@ module Misc
|
|
242
242
|
|
243
243
|
return options
|
244
244
|
|
245
|
-
options = {}
|
246
|
-
string.split(/#/).each do |str|
|
247
|
-
if str.match(/(.*)=(.*)/)
|
248
|
-
option, value = $1, $2
|
249
|
-
else
|
250
|
-
option, value = str, true
|
251
|
-
end
|
252
|
-
|
253
|
-
option = option.sub(":",'').to_sym if option.chars.first == ':'
|
254
|
-
value = value.sub(":",'').to_sym if String === value and value.chars.first == ':'
|
255
|
-
|
256
|
-
if value == true
|
257
|
-
options[option] = option.to_s.chars.first != '!'
|
258
|
-
else
|
259
|
-
options[option] = Thread.start do
|
260
|
-
$SAFE = 0;
|
261
|
-
case
|
262
|
-
when value =~ /^(?:true|T)$/i
|
263
|
-
true
|
264
|
-
when value =~ /^(?:false|F)$/i
|
265
|
-
false
|
266
|
-
when Symbol === value
|
267
|
-
value
|
268
|
-
when (String === value and value =~ /^\/(.*)\/$/)
|
269
|
-
Regexp.new /#{$1}/
|
270
|
-
else
|
271
|
-
begin
|
272
|
-
Kernel.const_get value
|
273
|
-
rescue
|
274
|
-
begin
|
275
|
-
raise if value =~ /[a-z]/ and defined? value
|
276
|
-
eval(value)
|
277
|
-
rescue Exception
|
278
|
-
value
|
279
|
-
end
|
280
|
-
end
|
281
|
-
end
|
282
|
-
end.value
|
283
|
-
end
|
284
|
-
end
|
285
|
-
|
286
|
-
options
|
287
245
|
end
|
288
246
|
|
289
247
|
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'rbbt/util/cmd'
|
2
|
+
module ProcPath
|
3
|
+
CMD.tool :procpath do
|
4
|
+
'pip install procpath'
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.record(pid, path, options = {})
|
8
|
+
IndiferentHash.setup(options)
|
9
|
+
options = Misc.add_defaults options, "interval" => 30
|
10
|
+
|
11
|
+
cmd_options = %w(interval recnum reevalnum).inject({}){|acc,k| acc[k] = options[k]; acc}
|
12
|
+
|
13
|
+
Log.debug "ProcPath recording #{pid} in #{path} (#{Misc.fingerprint options})"
|
14
|
+
procpath_thread = Thread.new do
|
15
|
+
begin
|
16
|
+
procpath_pid = CMD.cmd_pid(:procpath, "record --database-file '#{path}' '$..children[?(@.stat.pid == #{pid})]'", cmd_options.merge(:nofail => true, :add_option_dashes => true))
|
17
|
+
rescue Exception
|
18
|
+
Log.exceptions $!
|
19
|
+
Process.kill "INT", procpath_pid
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
procpath_thread.report_on_exception = false
|
24
|
+
|
25
|
+
Process.wait pid.to_i
|
26
|
+
procpath_thread.raise Interrupt
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.plot(path, output, options = {})
|
30
|
+
IndiferentHash.setup(options)
|
31
|
+
options = Misc.add_defaults options, "query-name" => 'rss', 'epsilon' => 0.5, "moving-average-window" => 10
|
32
|
+
|
33
|
+
cmd_options = %w(query-name epsilon monitor-average-window title logarithmic after before custom-query-file custom-value-expr).inject({}){|acc,k| acc[k] = options[k]; acc}
|
34
|
+
CMD.cmd_log(:procpath, "plot --database-file '#{path}' --plot-file '#{output}' ", cmd_options.merge(:nofail => true, :add_option_dashes => true))
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.monitor(pid, path)
|
38
|
+
database, options_str = path.split("#")
|
39
|
+
options = options_str.nil? ? {} : Misc.string2hash(options_str)
|
40
|
+
|
41
|
+
database = File.expand_path database
|
42
|
+
Log.low "ProcPath monitor #{pid} in #{database} (#{Misc.fingerprint options})"
|
43
|
+
|
44
|
+
ProcPath.record(pid, database + '.sqlite3', options)
|
45
|
+
ProcPath.plot(database + '.sqlite3', database + '.cpu.svg', options.merge("query-name" => 'cpu'))
|
46
|
+
ProcPath.plot(database + '.sqlite3', database + '.rss.svg', options.merge("query-name" => 'rss'))
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
data/lib/rbbt/workflow.rb
CHANGED
@@ -190,7 +190,7 @@ module Workflow
|
|
190
190
|
return Misc.string2const Misc.camel_case(wf_name)
|
191
191
|
end
|
192
192
|
|
193
|
-
Log.
|
193
|
+
Log.high{"Loading workflow #{wf_name}"}
|
194
194
|
require_local_workflow(wf_name) or
|
195
195
|
(Workflow.autoinstall and `rbbt workflow install #{Misc.snake_case(wf_name)} || rbbt workflow install #{wf_name}` and require_local_workflow(wf_name)) or raise("Workflow not found or could not be loaded: #{ wf_name }")
|
196
196
|
begin
|
@@ -505,8 +505,8 @@ class Step
|
|
505
505
|
|
506
506
|
def running?
|
507
507
|
return false if ! (started? || status == :ending)
|
508
|
-
|
509
|
-
|
508
|
+
return nil unless Open.exist?(self.pid_file)
|
509
|
+
pid = Open.read(self.pid_file).to_i
|
510
510
|
|
511
511
|
return false if done? or error? or aborted?
|
512
512
|
|
@@ -530,8 +530,7 @@ class Step
|
|
530
530
|
end
|
531
531
|
|
532
532
|
def nopid?
|
533
|
-
|
534
|
-
! pid && ! (status.nil? || status == :aborted || status == :done || status == :error || status == :cleaned)
|
533
|
+
! Open.exists?(pid_file) && ! (status.nil? || status == :aborted || status == :done || status == :error || status == :cleaned)
|
535
534
|
end
|
536
535
|
|
537
536
|
def aborted?
|
@@ -373,7 +373,6 @@ class Step
|
|
373
373
|
Log.exception $!
|
374
374
|
ensure
|
375
375
|
Step.purge_stream_cache
|
376
|
-
set_info :pid, nil
|
377
376
|
Open.rm pid_file if Open.exist?(pid_file)
|
378
377
|
end
|
379
378
|
end
|
@@ -388,7 +387,6 @@ class Step
|
|
388
387
|
_clean_finished
|
389
388
|
rescue
|
390
389
|
stop_dependencies
|
391
|
-
set_info :pid, nil
|
392
390
|
Open.rm pid_file if Open.exist?(pid_file)
|
393
391
|
end
|
394
392
|
end
|
@@ -449,7 +447,7 @@ class Step
|
|
449
447
|
ensure
|
450
448
|
no_load = false unless IO === result
|
451
449
|
Open.rm pid_file if Open.exist?(pid_file) unless no_load
|
452
|
-
set_info :pid, nil unless no_load
|
450
|
+
#set_info :pid, nil unless no_load
|
453
451
|
end
|
454
452
|
end
|
455
453
|
|
@@ -559,7 +557,7 @@ class Step
|
|
559
557
|
RbbtSemaphore.post_semaphore(semaphore) if semaphore
|
560
558
|
Kernel.exit! -1
|
561
559
|
end
|
562
|
-
set_info :pid, nil
|
560
|
+
#set_info :pid, nil
|
563
561
|
ensure
|
564
562
|
RbbtSemaphore.post_semaphore(semaphore) if semaphore
|
565
563
|
end
|
data/lib/rbbt/workflow/usage.rb
CHANGED
@@ -57,7 +57,7 @@ module Task
|
|
57
57
|
puts Log.color(:magenta, "Input select options")
|
58
58
|
puts
|
59
59
|
selects.collect{|p| p}.uniq.each do |input,options|
|
60
|
-
puts Log.color(:blue, input.to_s + ": ") << Misc.format_paragraph(options.collect{|o| o.to_s} * ", ") << "\n"
|
60
|
+
puts Log.color(:blue, input.to_s + ": ") << Misc.format_paragraph(options.collect{|o| Array === o ? o.first.to_s : o.to_s} * ", ") << "\n"
|
61
61
|
puts unless Log.compact
|
62
62
|
end
|
63
63
|
puts
|
@@ -78,6 +78,7 @@ class Step
|
|
78
78
|
name = info[:name] || File.basename(path)
|
79
79
|
status = :unsync if status == :done and not Open.exist?(path)
|
80
80
|
status = :notfound if status == :noinfo and not Open.exist?(path)
|
81
|
+
|
81
82
|
str = " " * offset
|
82
83
|
str << prov_report_msg(status, name, path, info)
|
83
84
|
step.dependencies.reverse.each do |dep|
|
@@ -90,7 +91,7 @@ class Step
|
|
90
91
|
if expand_repeats
|
91
92
|
str << Log.color(:green, Log.uncolor(prov_report(dep, offset+1, task)))
|
92
93
|
else
|
93
|
-
str << Log.color(:green, " " * (offset + 1) + Log.uncolor(prov_report_msg(status, name, path, info)))
|
94
|
+
str << Log.color(:green, " " * (offset + 1) + Log.uncolor(prov_report_msg(dep.status, dep.info[:name], dep.path, dep.info)))
|
94
95
|
end
|
95
96
|
end
|
96
97
|
end if step.dependencies
|
data/share/rbbt_commands/migrate
CHANGED
@@ -21,6 +21,8 @@ $ rbbt mnl [options]
|
|
21
21
|
-s--search* Regular expression
|
22
22
|
-t--tail* Show the last lines of the STDERR
|
23
23
|
-SBP--sbatch_parameters show sbatch parameters
|
24
|
+
-PERF--procpath_performance show Procpath performance summary
|
25
|
+
-sacct--sacct_peformance show sacct performance summary
|
24
26
|
EOF
|
25
27
|
|
26
28
|
if options[:help]
|
@@ -69,7 +71,7 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
69
71
|
end
|
70
72
|
|
71
73
|
if m = Open.read(fcmd).match(/# Run command\n(.*?)\n/im)
|
72
|
-
exe = m[1]
|
74
|
+
exe = m[1].sub('step_path=$(','')
|
73
75
|
else
|
74
76
|
exe = nil
|
75
77
|
end
|
@@ -96,15 +98,24 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
96
98
|
if File.exists?(fstatus = File.join(dir, 'job.status'))
|
97
99
|
nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
|
98
100
|
elsif job_nodes[id]
|
99
|
-
nodes = job_nodes[id]
|
101
|
+
nodes = job_nodes[id].reject{|n| n.include? "("}
|
100
102
|
else
|
101
103
|
nodes = []
|
102
104
|
end
|
103
105
|
|
106
|
+
if File.exists?(File.join(dir, 'exit.status'))
|
107
|
+
now = File.ctime(File.join(dir, 'exit.status'))
|
108
|
+
else
|
109
|
+
now = Time.now
|
110
|
+
end
|
111
|
+
|
104
112
|
if File.exists?(File.join(dir, 'std.out'))
|
113
|
+
cerrt = File.ctime File.join(dir, 'std.err')
|
114
|
+
coutt = File.ctime File.join(dir, 'std.out')
|
105
115
|
outt = File.mtime File.join(dir, 'std.out')
|
106
116
|
errt = File.mtime File.join(dir, 'std.err')
|
107
|
-
time_diff =
|
117
|
+
time_diff = now - [outt, errt].max
|
118
|
+
time_elapsed = now - [cerrt, coutt].min
|
108
119
|
end
|
109
120
|
|
110
121
|
fdep = File.join(dir, 'dependencies.list')
|
@@ -113,14 +124,19 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
113
124
|
fcadep = File.join(dir, 'canfail_dependencies.list')
|
114
125
|
cadeps = Open.read(fcadep).split("\n") if File.exists?(fcadep)
|
115
126
|
|
116
|
-
if done || error || aborted || running || queued || jobid
|
127
|
+
if done || error || aborted || running || queued || jobid
|
117
128
|
select = false
|
118
129
|
select = true if done && exit_status == 0
|
119
130
|
select = true if error && exit_status && exit_status != 0
|
120
131
|
select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
|
121
|
-
|
122
|
-
select = true if
|
132
|
+
is_running = exit_status.nil? && running_jobs.include?(id) && (!deps || (running_jobs & deps).empty?)
|
133
|
+
select = true if queued && deps && (running_jobs & deps).any? || queued && is_running && nodes.empty?
|
134
|
+
select = true if running && nodes.any? && (exit_status.nil? && running_jobs.include?(id)) && (!deps || (running_jobs & deps).empty?)
|
123
135
|
select = true if jobid && jobid.split(",").include?(id)
|
136
|
+
select &= search && cmd.match(/#{search}/) if search
|
137
|
+
next unless select
|
138
|
+
elsif search
|
139
|
+
select = false
|
124
140
|
select = true if search && cmd.match(/#{search}/)
|
125
141
|
next unless select
|
126
142
|
end
|
@@ -128,6 +144,7 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
128
144
|
|
129
145
|
puts Log.color :blue, dir
|
130
146
|
puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.slurm')).to_s
|
147
|
+
puts Log.color(:magenta, "Started: ") << File.ctime(File.join(dir, 'std.err')).to_s if File.exist?(File.join(dir, 'std.err'))
|
131
148
|
puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
|
132
149
|
puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
|
133
150
|
puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
|
@@ -136,13 +153,70 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
136
153
|
puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
|
137
154
|
puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
|
138
155
|
puts Log.color(:magenta, "Nodes: ") << nodes * ", "
|
139
|
-
puts Log.color(:magenta, "
|
156
|
+
puts Log.color(:magenta, "Time elapsed: ") << Misc.format_seconds(time_elapsed) if time_elapsed
|
157
|
+
puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? || File.exists?(File.join(dir, 'exit.status')) ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
|
140
158
|
|
141
159
|
if options[:sbatch_parameters]
|
142
160
|
puts Log.color(:magenta, "SBATCH parameters: ")
|
143
|
-
|
161
|
+
text = CMD.cmd('grep "^#SBATCH" |tail -n +6', :in => Open.read(fcmd)).read.strip
|
162
|
+
lines = text.split("\n").collect{|line| header, _sep, value = line.partition(/\s+/); Log.color(:yellow, header + ": ") + value}
|
163
|
+
puts Log.color :yellow, lines * "\n"
|
144
164
|
end
|
145
165
|
|
166
|
+
fprocpath = File.join(dir, 'procpath.sqlite3')
|
167
|
+
if options[:procpath_performance] && Open.exists?(fprocpath)
|
168
|
+
puts Log.color(:magenta, "Procpath summary: ")
|
169
|
+
require 'rbbt/tsv/csv'
|
170
|
+
meta = TSV.csv(CMD.cmd("sqlite3 -header -csv #{fprocpath} 'select * from meta;' "))
|
171
|
+
perf = TSV.csv(CMD.cmd("sqlite3 -header -csv #{fprocpath} 'select * from record;' "))
|
172
|
+
|
173
|
+
page_size = meta["page_size"].first.to_f
|
174
|
+
clock_ticks = meta["clock_ticks"].first.to_f
|
175
|
+
|
176
|
+
cpu_average = {}
|
177
|
+
rss_average = {}
|
178
|
+
perf.through :key, ["ts", 'stat_pid', "stat_utime", "stat_stime", "stat_cutime", "stat_cstime", "stat_rss"] do |k, values|
|
179
|
+
time, stat_pid, ucpu, scpu, ccpu, cscpu, rss = values
|
180
|
+
time = time.to_f
|
181
|
+
|
182
|
+
cpu = Misc.sum([ucpu, scpu].collect{|v| v.to_f})
|
183
|
+
cpu_average[stat_pid] ||= {}
|
184
|
+
cpu_average[stat_pid][time] ||= []
|
185
|
+
cpu_average[stat_pid][time] << cpu.to_f
|
186
|
+
rss_average[time] ||= []
|
187
|
+
rss_average[time] << rss.to_f * page_size
|
188
|
+
end
|
189
|
+
|
190
|
+
ticks = 0
|
191
|
+
cpu_average.each do |stat_pid, cpu_average_pid|
|
192
|
+
start = cpu_average_pid.keys.sort.first
|
193
|
+
eend = cpu_average_pid.keys.sort.last
|
194
|
+
ticks += Misc.sum(cpu_average_pid[eend]) - Misc.sum(cpu_average_pid[start])
|
195
|
+
end
|
196
|
+
start = rss_average.keys.sort.first
|
197
|
+
eend = rss_average.keys.sort.last
|
198
|
+
time_elapsed = eend - start
|
199
|
+
ticks = 1 if ticks == 0
|
200
|
+
time_elapsed = 1 if time_elapsed == 0
|
201
|
+
puts Log.color(:yellow, "CPU average: ") + "%.2f" % ( ticks / clock_ticks / time_elapsed * 100).to_s
|
202
|
+
puts Log.color(:yellow, "RSS average: ") + "%.2f GB" % Misc.mean(rss_average.collect{|t,l| Misc.sum(l) / (1024 * 1024 * 1024)}).to_s
|
203
|
+
puts Log.color(:yellow, "Time: ") + Misc.format_seconds((eend - start))
|
204
|
+
|
205
|
+
end
|
206
|
+
|
207
|
+
if options[:sacct_peformance]
|
208
|
+
begin
|
209
|
+
tsv = TSV.open(CMD.cmd("sacct -j #{id} -o 'jobid,AveRSS,MaxRSS,MaxDiskRead,MaxDiskWrite' -P|grep 'JobID\\|\.batch'"), :header_hash => '', :sep => "|", :type => :list)
|
210
|
+
values = tsv[tsv.keys.first]
|
211
|
+
if values.compact.any?
|
212
|
+
puts Log.color(:magenta, "SACCT performance: ")
|
213
|
+
puts values.zip(values.fields).collect{|v,t| Log.color(:yellow, t + ": ") + v.to_s } * "\n"
|
214
|
+
end
|
215
|
+
rescue
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
|
146
220
|
if tail && File.exists?(File.join(dir, 'std.err'))
|
147
221
|
if exit_status && exit_status != 0
|
148
222
|
puts Log.color(:magenta, "First error or exception found: ")
|
@@ -21,6 +21,7 @@ $slurm_options = SOPT.get <<EOF
|
|
21
21
|
-CS--contain_and_sync Contain and sync to default locations
|
22
22
|
-ci--copy_image When using a container directory, copy image there
|
23
23
|
-t--tail Tail the logs
|
24
|
+
-SPERF--SLURM_procpath* Save Procpath performance for SLURM job; specify only options
|
24
25
|
-q--queue* Queue
|
25
26
|
-t--task_cpus* Tasks
|
26
27
|
-W--workflows* Additional workflows
|
@@ -20,6 +20,7 @@ $slurm_options = SOPT.get <<EOF
|
|
20
20
|
-CS--contain_and_sync Contain and sync to default locations
|
21
21
|
-ci--copy_image When using a container directory, copy image there
|
22
22
|
-t--tail Tail the logs
|
23
|
+
-SPERF--SLURM_procpath* Save Procpath performance for SLURM job; specify only options
|
23
24
|
-q--queue* Queue
|
24
25
|
-t--task_cpus* Tasks
|
25
26
|
-W--workflows* Additional workflows
|
@@ -35,7 +35,7 @@ file = case file
|
|
35
35
|
fields = options[:fields]
|
36
36
|
raise ParameterException, "Please specify the fields to slice" if fields.nil?
|
37
37
|
|
38
|
-
options[:header_hash]
|
38
|
+
options[:header_hash] ||= options["header_hash"]
|
39
39
|
|
40
40
|
case
|
41
41
|
when options[:tokyocabinet]
|
@@ -45,8 +45,8 @@ when options[:tokyocabinet_bd]
|
|
45
45
|
tsv = Persist.open_tokyocabinet(file, false, nil, TokyoCabinet::BDB)
|
46
46
|
puts tsv.summary
|
47
47
|
else
|
48
|
-
stream = TSV.traverse file, options.merge(:into => :stream, :type => :list, :
|
49
|
-
|
48
|
+
stream = TSV.traverse file, options.merge(:into => :stream, :type => :list, :fields => fields.split(","), :unnamed => true) do |k,fields,names|
|
49
|
+
[k,fields].flatten * "\t"
|
50
50
|
end
|
51
51
|
puts stream.read
|
52
52
|
exit 0
|
@@ -86,7 +86,7 @@ messages = info[:messages]
|
|
86
86
|
backtrace = info[:backtrace]
|
87
87
|
pid = info[:pid]
|
88
88
|
exception = info[:exception]
|
89
|
-
rest = info.keys - [:inputs, :dependencies, :status, :time_elapsed, :messages, :backtrace, :exception, :
|
89
|
+
rest = info.keys - [:inputs, :dependencies, :status, :time_elapsed, :messages, :backtrace, :exception, :archived_info]
|
90
90
|
|
91
91
|
|
92
92
|
puts Log.color(:magenta, "File") << ": " << step.path
|
@@ -203,6 +203,7 @@ The `recursive_clean` cleans all the job dependency steps recursively.
|
|
203
203
|
-prec--prepare_cpus* Number of dependencies prepared in parallel
|
204
204
|
-rwt--remote_workflow_tasks* Load a yaml file describing remote workflow tasks
|
205
205
|
-od--override_deps* Override deps using 'Workflow#task=<path>' array_separated
|
206
|
+
-PERF--procpath_performance* Measure performance using procpath
|
206
207
|
EOF
|
207
208
|
|
208
209
|
workflow = ARGV.shift
|
@@ -407,6 +408,23 @@ begin
|
|
407
408
|
exit 0
|
408
409
|
end
|
409
410
|
|
411
|
+
if options[:procpath_performance]
|
412
|
+
require 'rbbt/util/procpath'
|
413
|
+
current_pid = job.info[:pid]
|
414
|
+
job.fork
|
415
|
+
job.soft_grace
|
416
|
+
sleep 2 if job.info[:pid] == current_pid
|
417
|
+
if job.info[:pid] != current_pid
|
418
|
+
pid = job.info[:pid]
|
419
|
+
begin
|
420
|
+
ProcPath.monitor(pid, options[:procpath_performance])
|
421
|
+
rescue Errno::ECHILD
|
422
|
+
Log.warn "Procpath didn't find process #{pid} to monitor. Maybe it finished already"
|
423
|
+
rescue
|
424
|
+
Log.warn "Procpath failed: #{$!.message}"
|
425
|
+
end
|
426
|
+
end
|
427
|
+
end
|
410
428
|
|
411
429
|
if do_fork
|
412
430
|
ENV["RBBT_NO_PROGRESS"] = "true"
|
@@ -423,7 +441,6 @@ begin
|
|
423
441
|
res = job
|
424
442
|
end
|
425
443
|
|
426
|
-
|
427
444
|
if options.delete(:printpath)
|
428
445
|
job.join
|
429
446
|
raise job.messages.last if (job.error? || job.aborted?) && job.messages
|
@@ -0,0 +1,52 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rbbt/workflow'
|
4
|
+
|
5
|
+
require 'rbbt-util'
|
6
|
+
require 'rbbt-util'
|
7
|
+
require 'rbbt/util/simpleopt'
|
8
|
+
|
9
|
+
$0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
|
10
|
+
|
11
|
+
options = SOPT.setup <<EOF
|
12
|
+
Examine the info of a job result
|
13
|
+
|
14
|
+
$ rbbt workflow info <job-result> <key> <value>
|
15
|
+
|
16
|
+
-h--help Help
|
17
|
+
-f--force Write info even if key is already present
|
18
|
+
-r--recursive Write info for all dependencies as well
|
19
|
+
-p--check_pid Check that recursive jobs where created by the same process
|
20
|
+
EOF
|
21
|
+
|
22
|
+
SOPT.usage if options[:help]
|
23
|
+
|
24
|
+
file, key, value = ARGV
|
25
|
+
|
26
|
+
force, recursive, check_pid = options.values_at :force, :recursive, :check_pid
|
27
|
+
|
28
|
+
def get_step(file)
|
29
|
+
file = file.sub(/\.(info|files)/,'')
|
30
|
+
step = Workflow.load_step file
|
31
|
+
step
|
32
|
+
end
|
33
|
+
|
34
|
+
raise ParameterException if key.nil? || value.nil?
|
35
|
+
|
36
|
+
if %w(DELETE nil).include? value
|
37
|
+
value = nil
|
38
|
+
force = true
|
39
|
+
end
|
40
|
+
|
41
|
+
step = get_step file
|
42
|
+
|
43
|
+
step.set_info key, value if force || ! step.info.include?(key)
|
44
|
+
|
45
|
+
pid = step.info[:pid]
|
46
|
+
host = step.info[:pid_hostname]
|
47
|
+
|
48
|
+
step.rec_dependencies.each do |dep|
|
49
|
+
dep.set_info key, value if (force || ! dep.info.include?(key)) && (!check_pid || dep.info[:pid].to_s == pid and dep.info[:pid_hostname] == host)
|
50
|
+
rescue
|
51
|
+
Log.warn "Could no set info #{key} for #{dep.path}: #{$!.message}"
|
52
|
+
end if recursive
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/util/procpath'
|
3
|
+
|
4
|
+
class TestProcPath < Test::Unit::TestCase
|
5
|
+
def test_record_and_plot
|
6
|
+
Log.with_severity 0 do
|
7
|
+
pid = Process.fork do
|
8
|
+
a = ""
|
9
|
+
(0..1000).each do
|
10
|
+
a << (0..rand(10000).to_i).to_a.collect{|i| "TEST #{i}" } * " "
|
11
|
+
sleep 0.1
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
TmpFile.with_file(nil, false) do |db|
|
16
|
+
|
17
|
+
ProcPath.record(pid, db, :interval => '1', "recnum" => 100)
|
18
|
+
ProcPath.plot(db, db + '.svg', "moving-average-window" => 1 )
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.30.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-02-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -300,6 +300,7 @@ files:
|
|
300
300
|
- lib/rbbt/util/misc/system.rb
|
301
301
|
- lib/rbbt/util/named_array.rb
|
302
302
|
- lib/rbbt/util/open.rb
|
303
|
+
- lib/rbbt/util/procpath.rb
|
303
304
|
- lib/rbbt/util/python.rb
|
304
305
|
- lib/rbbt/util/semaphore.rb
|
305
306
|
- lib/rbbt/util/simpleDSL.rb
|
@@ -438,6 +439,7 @@ files:
|
|
438
439
|
- share/rbbt_commands/workflow/server
|
439
440
|
- share/rbbt_commands/workflow/task
|
440
441
|
- share/rbbt_commands/workflow/trace
|
442
|
+
- share/rbbt_commands/workflow/write_info
|
441
443
|
- share/unicorn.rb
|
442
444
|
- share/workflow_config.ru
|
443
445
|
- test/rbbt/annotations/test_util.rb
|
@@ -516,6 +518,7 @@ files:
|
|
516
518
|
- test/rbbt/util/test_log.rb
|
517
519
|
- test/rbbt/util/test_misc.rb
|
518
520
|
- test/rbbt/util/test_open.rb
|
521
|
+
- test/rbbt/util/test_procpath.rb
|
519
522
|
- test/rbbt/util/test_python.rb
|
520
523
|
- test/rbbt/util/test_semaphore.rb
|
521
524
|
- test/rbbt/util/test_simpleDSL.rb
|
@@ -563,6 +566,7 @@ test_files:
|
|
563
566
|
- test/rbbt/workflow/test_task.rb
|
564
567
|
- test/rbbt/resource/test_path.rb
|
565
568
|
- test/rbbt/util/test_colorize.rb
|
569
|
+
- test/rbbt/util/test_procpath.rb
|
566
570
|
- test/rbbt/util/misc/test_omics.rb
|
567
571
|
- test/rbbt/util/misc/test_pipes.rb
|
568
572
|
- test/rbbt/util/misc/test_format.rb
|