rbbt-util 5.29.4 → 5.30.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3ec6302ccfe3f38a074f7f0d10511090c8f4db4186228ad93adb2888e0edbf5e
4
- data.tar.gz: fcaa50b654461f128b9539fc47ed00b008d3f713e89b8bbe963c2b898c3c168b
3
+ metadata.gz: d45adac7949e3fea0d710418d93837cf0aa715bcedb2212c6e68f8dd749382ae
4
+ data.tar.gz: 9a9857c6b1565b9ed55f18f50fb1b242a1477dd6868111e10252ecc0c286ca44
5
5
  SHA512:
6
- metadata.gz: 2a2537aef150df77142a593742399d28bb96334decc0e33b69e9cbac2853085487100aa90cd711b342bac7eda536d15c080be22891d5b9f38bfa4601282ae5de
7
- data.tar.gz: d79cc4afa294d63cebd79f73b759bc46c3c4e0285db7be7406a594aa49e5b572b0c4f91b98c73b5f64c31c908d58d8dee43853ec18b1ead9e814b4370de0d60d
6
+ metadata.gz: 9dac4b1211fd40894f00b1a84f4f10abad83f2169579ec327a8da86f05b4274cc11e2192902638eb78b9de8f39351993f17a3b08dd5390a0f2c04ef385a0e1a2
7
+ data.tar.gz: 01b9d69b003088e78f2665b57e9ed8acc8104143bc826aabe49059e77fd0e20279d9393245f7bb20efcab807180273f31d2285a62113947cf151571a96f2d949
@@ -83,7 +83,7 @@ module HPC
83
83
 
84
84
  deps = seen[dep.path] ||= self.orchestrate_job(dep, options, skip_dep, seen)
85
85
  if job.canfail_paths.include? dep.path
86
- [deps].flatten.collect{|id| ['canfail', id] * ":"}
86
+ [deps].flatten.compact.collect{|id| ['canfail', id] * ":"}
87
87
  else
88
88
  deps
89
89
  end
@@ -33,7 +33,8 @@ module HPC
33
33
  group = File.basename(File.dirname(ENV['HOME']))
34
34
 
35
35
  if contain_and_sync
36
- contain = "/scratch/tmp/rbbt-#{user}" if contain.nil?
36
+ random_file = TmpFile.random_name
37
+ contain = "/scratch/tmp/rbbt-#{user}/#{random_file}" if contain.nil?
37
38
  sync = "~/.rbbt/var/jobs" if sync.nil?
38
39
  wipe_container = "post" if wipe_container.nil?
39
40
  end
@@ -243,7 +244,7 @@ EOF
243
244
  end
244
245
 
245
246
  if contain
246
- rbbt_cmd << " " << %(--workdir_all='#{contain.gsub("'", '\'')}/.rbbt/var/jobs')
247
+ rbbt_cmd << " " << %(--workdir_all='#{contain.gsub("'", '\'')}/workdir')
247
248
  end
248
249
  end
249
250
 
@@ -251,6 +252,10 @@ EOF
251
252
  cmd =<<-EOF
252
253
  #{exec_cmd} \\
253
254
  #{rbbt_cmd}
255
+ EOF
256
+ annotate_cmd =<<-EOF
257
+ #{exec_cmd} \\
258
+ workflow write_info --recursive --force=false --check_pid "$step_path" slurm_job $SLURM_JOB_ID
254
259
  EOF
255
260
 
256
261
  header +=<<-EOF
@@ -260,11 +265,14 @@ EOF
260
265
  run +=<<-EOF
261
266
 
262
267
  # Run command
263
- #{cmd}
268
+ step_path=$(#{cmd})
264
269
 
265
270
  # Save exit status
266
271
  exit_status=$?
267
272
 
273
+ # Annotate info with SLURM job_info
274
+ #{annotate_cmd}
275
+
268
276
  EOF
269
277
 
270
278
  # CODA
@@ -286,7 +294,7 @@ EOF
286
294
  sync = sync.strip
287
295
  source = File.join(File.expand_path(contain), source)
288
296
  else
289
- source = File.join(File.expand_path(contain), '.rbbt/var/jobs')
297
+ source = File.join(File.expand_path(contain), 'workdir/var/jobs')
290
298
  end
291
299
 
292
300
  target = File.expand_path(sync)
@@ -516,7 +524,11 @@ EOF
516
524
  dry_run = options.delete :dry_run
517
525
  tail = options.delete :tail
518
526
  dependencies = options.delete :slurm_dependencies
527
+ procpath = options.delete :SLURM_procpath
528
+
519
529
  options[:jobname] = job.clean_name
530
+ log_level = options.delete :log
531
+ log_level ||= Log.severity
520
532
 
521
533
  workflow = job.workflow
522
534
 
@@ -541,15 +553,14 @@ EOF
541
553
  inputs_dir = File.join(tmp_directory, 'inputs_dir')
542
554
  saved = Step.save_job_inputs(job, inputs_dir)
543
555
 
544
- if saved && saved.any?
545
- options[:inputs_dir] = inputs_dir
546
- cmd = ['workflow', 'task', workflow.to_s, task.to_s, '-pf', '--load_inputs', inputs_dir, '--log', (options[:log] || Log.severity).to_s]
547
- else
548
- cmd = ['workflow', 'task', workflow.to_s, task.to_s, '-pf', '--log', (options[:log] || Log.severity).to_s]
549
- end
556
+ cmd = ['workflow', 'task', workflow.to_s, task.to_s, '--printpath', '--log', log_level.to_s]
557
+
558
+ cmd << "--procpath_performance='#{tmp_directory}/procpath##{procpath.gsub(',', '#')}'" if procpath
550
559
 
551
560
  cmd << "--override_deps='#{override_deps.gsub("'", '\'')}'" if override_deps and not override_deps.empty?
552
561
 
562
+ cmd << "--load_inputs='#{inputs_dir}'" if saved && saved.any?
563
+
553
564
  template = self.template(cmd, options)
554
565
  jobid = self.issue_template(template, options.merge(:slurm_basedir => slurm_basedir, :dry_run => dry_run, :slurm_dependencies => dependencies))
555
566
 
@@ -104,9 +104,6 @@ module Persist
104
104
  write(true) if closed? || ! write?
105
105
  res = begin
106
106
  yield
107
- rescue Exception
108
- Log.exception $!
109
- raise $!
110
107
  ensure
111
108
  close
112
109
  end
@@ -115,7 +112,6 @@ module Persist
115
112
  end
116
113
 
117
114
  def read_and_close
118
- #return yield if @locked
119
115
  if read? || write?
120
116
  begin
121
117
  return yield
@@ -134,6 +130,41 @@ module Persist
134
130
  end
135
131
  end
136
132
 
133
+ def read_lock
134
+ read if closed?
135
+ if read?
136
+ return yield
137
+ end
138
+
139
+ lock do
140
+ close
141
+ read true
142
+ begin
143
+ yield
144
+ end
145
+ end
146
+ end
147
+
148
+ def write_lock
149
+ write if closed?
150
+ if write?
151
+ begin
152
+ return yield
153
+ ensure
154
+ close
155
+ end
156
+ end
157
+
158
+ lock do
159
+ close
160
+ write true
161
+ begin
162
+ yield
163
+ end
164
+ end
165
+ end
166
+
167
+
137
168
  def merge!(hash)
138
169
  hash.each do |key,values|
139
170
  self[key] = values
@@ -141,38 +172,38 @@ module Persist
141
172
  end
142
173
 
143
174
  def range(*args)
144
- self.read_and_close do
175
+ self.read_lock do
145
176
  super(*args)
146
177
  end
147
178
  end
148
179
 
149
180
  def include?(*args)
150
- self.read_and_close do
181
+ self.read_lock do
151
182
  super(*args) #- TSV::ENTRY_KEYS.to_a
152
183
  end
153
184
  end
154
185
 
155
186
  def [](*args)
156
- self.read_and_close do
187
+ self.read_lock do
157
188
  super(*args) #- TSV::ENTRY_KEYS.to_a
158
189
  end
159
190
  end
160
191
 
161
192
  def []=(*args)
162
- self.write_and_close do
193
+ self.write_lock do
163
194
  super(*args) #- TSV::ENTRY_KEYS.to_a
164
195
  end
165
196
  end
166
197
 
167
198
  def keys(*args)
168
- self.read_and_close do
199
+ self.read_lock do
169
200
  super(*args)
170
201
  end
171
202
  end
172
203
 
173
204
 
174
205
  def prefix(key)
175
- self.read_and_close do
206
+ self.read_lock do
176
207
  range(key, 1, key + MAX_CHAR, 1)
177
208
  end
178
209
  end
@@ -184,13 +215,13 @@ module Persist
184
215
 
185
216
 
186
217
  def size(*args)
187
- self.read_and_close do
218
+ self.read_lock do
188
219
  super(*args)
189
220
  end
190
221
  end
191
222
 
192
223
  def each(*args, &block)
193
- self.read_and_close do
224
+ self.read_lock do
194
225
  super(*args, &block)
195
226
  end
196
227
  end
@@ -208,7 +239,7 @@ module Persist
208
239
  end
209
240
 
210
241
  def values_at(*keys)
211
- self.read_and_close do
242
+ self.read_lock do
212
243
  keys.collect do |k|
213
244
  self[k]
214
245
  end
data/lib/rbbt/tsv.rb CHANGED
@@ -113,11 +113,12 @@ module TSV
113
113
 
114
114
  data.entity_options = entity_options
115
115
 
116
- if Path === source and data.identifiers
117
- data.identifiers = Path.setup(data.identifiers, source.pkgdir, source.resource)
116
+ if Path === source && data.identifiers
117
+ Path.setup(data.identifiers, source.pkgdir, source.resource)
118
118
  end
119
119
 
120
120
  if data.respond_to? :persistence_path
121
+ data.read
121
122
  data
122
123
  else
123
124
  h = data.dup
data/lib/rbbt/util/cmd.rb CHANGED
@@ -217,7 +217,7 @@ module CMD
217
217
  end
218
218
  end
219
219
 
220
- def self.cmd_log(*args)
220
+ def self.cmd_pid(*args)
221
221
  all_args = *args
222
222
 
223
223
  all_args << {} unless Hash === all_args.last
@@ -248,4 +248,9 @@ module CMD
248
248
  nil
249
249
  end
250
250
 
251
+ def self.cmd_log(*args)
252
+ cmd_pid(*args)
253
+ nil
254
+ end
255
+
251
256
  end
@@ -242,48 +242,6 @@ module Misc
242
242
 
243
243
  return options
244
244
 
245
- options = {}
246
- string.split(/#/).each do |str|
247
- if str.match(/(.*)=(.*)/)
248
- option, value = $1, $2
249
- else
250
- option, value = str, true
251
- end
252
-
253
- option = option.sub(":",'').to_sym if option.chars.first == ':'
254
- value = value.sub(":",'').to_sym if String === value and value.chars.first == ':'
255
-
256
- if value == true
257
- options[option] = option.to_s.chars.first != '!'
258
- else
259
- options[option] = Thread.start do
260
- $SAFE = 0;
261
- case
262
- when value =~ /^(?:true|T)$/i
263
- true
264
- when value =~ /^(?:false|F)$/i
265
- false
266
- when Symbol === value
267
- value
268
- when (String === value and value =~ /^\/(.*)\/$/)
269
- Regexp.new /#{$1}/
270
- else
271
- begin
272
- Kernel.const_get value
273
- rescue
274
- begin
275
- raise if value =~ /[a-z]/ and defined? value
276
- eval(value)
277
- rescue Exception
278
- value
279
- end
280
- end
281
- end
282
- end.value
283
- end
284
- end
285
-
286
- options
287
245
  end
288
246
 
289
247
  end
@@ -0,0 +1,49 @@
1
+ require 'rbbt/util/cmd'
2
+ module ProcPath
3
+ CMD.tool :procpath do
4
+ 'pip install procpath'
5
+ end
6
+
7
+ def self.record(pid, path, options = {})
8
+ IndiferentHash.setup(options)
9
+ options = Misc.add_defaults options, "interval" => 30
10
+
11
+ cmd_options = %w(interval recnum reevalnum).inject({}){|acc,k| acc[k] = options[k]; acc}
12
+
13
+ Log.debug "ProcPath recording #{pid} in #{path} (#{Misc.fingerprint options})"
14
+ procpath_thread = Thread.new do
15
+ begin
16
+ procpath_pid = CMD.cmd_pid(:procpath, "record --database-file '#{path}' '$..children[?(@.stat.pid == #{pid})]'", cmd_options.merge(:nofail => true, :add_option_dashes => true))
17
+ rescue Exception
18
+ Log.exceptions $!
19
+ Process.kill "INT", procpath_pid
20
+ end
21
+ end
22
+
23
+ procpath_thread.report_on_exception = false
24
+
25
+ Process.wait pid.to_i
26
+ procpath_thread.raise Interrupt
27
+ end
28
+
29
+ def self.plot(path, output, options = {})
30
+ IndiferentHash.setup(options)
31
+ options = Misc.add_defaults options, "query-name" => 'rss', 'epsilon' => 0.5, "moving-average-window" => 10
32
+
33
+ cmd_options = %w(query-name epsilon monitor-average-window title logarithmic after before custom-query-file custom-value-expr).inject({}){|acc,k| acc[k] = options[k]; acc}
34
+ CMD.cmd_log(:procpath, "plot --database-file '#{path}' --plot-file '#{output}' ", cmd_options.merge(:nofail => true, :add_option_dashes => true))
35
+ end
36
+
37
+ def self.monitor(pid, path)
38
+ database, options_str = path.split("#")
39
+ options = options_str.nil? ? {} : Misc.string2hash(options_str)
40
+
41
+ database = File.expand_path database
42
+ Log.low "ProcPath monitor #{pid} in #{database} (#{Misc.fingerprint options})"
43
+
44
+ ProcPath.record(pid, database + '.sqlite3', options)
45
+ ProcPath.plot(database + '.sqlite3', database + '.cpu.svg', options.merge("query-name" => 'cpu'))
46
+ ProcPath.plot(database + '.sqlite3', database + '.rss.svg', options.merge("query-name" => 'rss'))
47
+ end
48
+ end
49
+
data/lib/rbbt/workflow.rb CHANGED
@@ -190,7 +190,7 @@ module Workflow
190
190
  return Misc.string2const Misc.camel_case(wf_name)
191
191
  end
192
192
 
193
- Log.info{"Loading workflow #{wf_name}"}
193
+ Log.high{"Loading workflow #{wf_name}"}
194
194
  require_local_workflow(wf_name) or
195
195
  (Workflow.autoinstall and `rbbt workflow install #{Misc.snake_case(wf_name)} || rbbt workflow install #{wf_name}` and require_local_workflow(wf_name)) or raise("Workflow not found or could not be loaded: #{ wf_name }")
196
196
  begin
@@ -505,8 +505,8 @@ class Step
505
505
 
506
506
  def running?
507
507
  return false if ! (started? || status == :ending)
508
- pid = info[:pid]
509
- return nil if pid.nil?
508
+ return nil unless Open.exist?(self.pid_file)
509
+ pid = Open.read(self.pid_file).to_i
510
510
 
511
511
  return false if done? or error? or aborted?
512
512
 
@@ -530,8 +530,7 @@ class Step
530
530
  end
531
531
 
532
532
  def nopid?
533
- pid = info[:pid] || Open.exists?(pid_file)
534
- ! pid && ! (status.nil? || status == :aborted || status == :done || status == :error || status == :cleaned)
533
+ ! Open.exists?(pid_file) && ! (status.nil? || status == :aborted || status == :done || status == :error || status == :cleaned)
535
534
  end
536
535
 
537
536
  def aborted?
@@ -373,7 +373,6 @@ class Step
373
373
  Log.exception $!
374
374
  ensure
375
375
  Step.purge_stream_cache
376
- set_info :pid, nil
377
376
  Open.rm pid_file if Open.exist?(pid_file)
378
377
  end
379
378
  end
@@ -388,7 +387,6 @@ class Step
388
387
  _clean_finished
389
388
  rescue
390
389
  stop_dependencies
391
- set_info :pid, nil
392
390
  Open.rm pid_file if Open.exist?(pid_file)
393
391
  end
394
392
  end
@@ -449,7 +447,7 @@ class Step
449
447
  ensure
450
448
  no_load = false unless IO === result
451
449
  Open.rm pid_file if Open.exist?(pid_file) unless no_load
452
- set_info :pid, nil unless no_load
450
+ #set_info :pid, nil unless no_load
453
451
  end
454
452
  end
455
453
 
@@ -559,7 +557,7 @@ class Step
559
557
  RbbtSemaphore.post_semaphore(semaphore) if semaphore
560
558
  Kernel.exit! -1
561
559
  end
562
- set_info :pid, nil
560
+ #set_info :pid, nil
563
561
  ensure
564
562
  RbbtSemaphore.post_semaphore(semaphore) if semaphore
565
563
  end
@@ -57,7 +57,7 @@ module Task
57
57
  puts Log.color(:magenta, "Input select options")
58
58
  puts
59
59
  selects.collect{|p| p}.uniq.each do |input,options|
60
- puts Log.color(:blue, input.to_s + ": ") << Misc.format_paragraph(options.collect{|o| o.to_s} * ", ") << "\n"
60
+ puts Log.color(:blue, input.to_s + ": ") << Misc.format_paragraph(options.collect{|o| Array === o ? o.first.to_s : o.to_s} * ", ") << "\n"
61
61
  puts unless Log.compact
62
62
  end
63
63
  puts
@@ -232,7 +232,7 @@ puts resource[path].find(search_path)
232
232
  ppp Open.read(tmp_include_file)
233
233
  puts cmd
234
234
  else
235
- CMD.cmd_log(cmd)
235
+ CMD.cmd_log(cmd, :log => Log::INFO)
236
236
  end
237
237
  end
238
238
  end
@@ -78,6 +78,7 @@ class Step
78
78
  name = info[:name] || File.basename(path)
79
79
  status = :unsync if status == :done and not Open.exist?(path)
80
80
  status = :notfound if status == :noinfo and not Open.exist?(path)
81
+
81
82
  str = " " * offset
82
83
  str << prov_report_msg(status, name, path, info)
83
84
  step.dependencies.reverse.each do |dep|
@@ -90,7 +91,7 @@ class Step
90
91
  if expand_repeats
91
92
  str << Log.color(:green, Log.uncolor(prov_report(dep, offset+1, task)))
92
93
  else
93
- str << Log.color(:green, " " * (offset + 1) + Log.uncolor(prov_report_msg(status, name, path, info)))
94
+ str << Log.color(:green, " " * (offset + 1) + Log.uncolor(prov_report_msg(dep.status, dep.info[:name], dep.path, dep.info)))
94
95
  end
95
96
  end
96
97
  end if step.dependencies
@@ -107,6 +107,6 @@ real_paths.each do |source|
107
107
  puts cmd
108
108
  exit 0
109
109
  else
110
- CMD.cmd_log(cmd)
110
+ CMD.cmd_log(cmd, :log => Log::INFO)
111
111
  end
112
112
  end
@@ -21,6 +21,8 @@ $ rbbt mnl [options]
21
21
  -s--search* Regular expression
22
22
  -t--tail* Show the last lines of the STDERR
23
23
  -SBP--sbatch_parameters show sbatch parameters
24
+ -PERF--procpath_performance show Procpath performance summary
25
+ -sacct--sacct_peformance show sacct performance summary
24
26
  EOF
25
27
 
26
28
  if options[:help]
@@ -69,7 +71,7 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
69
71
  end
70
72
 
71
73
  if m = Open.read(fcmd).match(/# Run command\n(.*?)\n/im)
72
- exe = m[1]
74
+ exe = m[1].sub('step_path=$(','')
73
75
  else
74
76
  exe = nil
75
77
  end
@@ -96,15 +98,24 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
96
98
  if File.exists?(fstatus = File.join(dir, 'job.status'))
97
99
  nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
98
100
  elsif job_nodes[id]
99
- nodes = job_nodes[id]
101
+ nodes = job_nodes[id].reject{|n| n.include? "("}
100
102
  else
101
103
  nodes = []
102
104
  end
103
105
 
106
+ if File.exists?(File.join(dir, 'exit.status'))
107
+ now = File.ctime(File.join(dir, 'exit.status'))
108
+ else
109
+ now = Time.now
110
+ end
111
+
104
112
  if File.exists?(File.join(dir, 'std.out'))
113
+ cerrt = File.ctime File.join(dir, 'std.err')
114
+ coutt = File.ctime File.join(dir, 'std.out')
105
115
  outt = File.mtime File.join(dir, 'std.out')
106
116
  errt = File.mtime File.join(dir, 'std.err')
107
- time_diff = Time.now - [outt, errt].max
117
+ time_diff = now - [outt, errt].max
118
+ time_elapsed = now - [cerrt, coutt].min
108
119
  end
109
120
 
110
121
  fdep = File.join(dir, 'dependencies.list')
@@ -113,14 +124,19 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
113
124
  fcadep = File.join(dir, 'canfail_dependencies.list')
114
125
  cadeps = Open.read(fcadep).split("\n") if File.exists?(fcadep)
115
126
 
116
- if done || error || aborted || running || queued || jobid || search
127
+ if done || error || aborted || running || queued || jobid
117
128
  select = false
118
129
  select = true if done && exit_status == 0
119
130
  select = true if error && exit_status && exit_status != 0
120
131
  select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
121
- select = true if queued && deps && (running_jobs & deps).any?
122
- select = true if running && (exit_status.nil? && running_jobs.include?(id)) && (!deps || (running_jobs & deps).empty?)
132
+ is_running = exit_status.nil? && running_jobs.include?(id) && (!deps || (running_jobs & deps).empty?)
133
+ select = true if queued && deps && (running_jobs & deps).any? || queued && is_running && nodes.empty?
134
+ select = true if running && nodes.any? && (exit_status.nil? && running_jobs.include?(id)) && (!deps || (running_jobs & deps).empty?)
123
135
  select = true if jobid && jobid.split(",").include?(id)
136
+ select &= search && cmd.match(/#{search}/) if search
137
+ next unless select
138
+ elsif search
139
+ select = false
124
140
  select = true if search && cmd.match(/#{search}/)
125
141
  next unless select
126
142
  end
@@ -128,6 +144,7 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
128
144
 
129
145
  puts Log.color :blue, dir
130
146
  puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.slurm')).to_s
147
+ puts Log.color(:magenta, "Started: ") << File.ctime(File.join(dir, 'std.err')).to_s if File.exist?(File.join(dir, 'std.err'))
131
148
  puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
132
149
  puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
133
150
  puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
@@ -136,13 +153,70 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
136
153
  puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
137
154
  puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
138
155
  puts Log.color(:magenta, "Nodes: ") << nodes * ", "
139
- puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
156
+ puts Log.color(:magenta, "Time elapsed: ") << Misc.format_seconds(time_elapsed) if time_elapsed
157
+ puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? || File.exists?(File.join(dir, 'exit.status')) ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
140
158
 
141
159
  if options[:sbatch_parameters]
142
160
  puts Log.color(:magenta, "SBATCH parameters: ")
143
- puts Log.color :blue, CMD.cmd('grep "^#SBATCH" |tail -n +6', :in => Open.read(fcmd)).read.strip
161
+ text = CMD.cmd('grep "^#SBATCH" |tail -n +6', :in => Open.read(fcmd)).read.strip
162
+ lines = text.split("\n").collect{|line| header, _sep, value = line.partition(/\s+/); Log.color(:yellow, header + ": ") + value}
163
+ puts Log.color :yellow, lines * "\n"
144
164
  end
145
165
 
166
+ fprocpath = File.join(dir, 'procpath.sqlite3')
167
+ if options[:procpath_performance] && Open.exists?(fprocpath)
168
+ puts Log.color(:magenta, "Procpath summary: ")
169
+ require 'rbbt/tsv/csv'
170
+ meta = TSV.csv(CMD.cmd("sqlite3 -header -csv #{fprocpath} 'select * from meta;' "))
171
+ perf = TSV.csv(CMD.cmd("sqlite3 -header -csv #{fprocpath} 'select * from record;' "))
172
+
173
+ page_size = meta["page_size"].first.to_f
174
+ clock_ticks = meta["clock_ticks"].first.to_f
175
+
176
+ cpu_average = {}
177
+ rss_average = {}
178
+ perf.through :key, ["ts", 'stat_pid', "stat_utime", "stat_stime", "stat_cutime", "stat_cstime", "stat_rss"] do |k, values|
179
+ time, stat_pid, ucpu, scpu, ccpu, cscpu, rss = values
180
+ time = time.to_f
181
+
182
+ cpu = Misc.sum([ucpu, scpu].collect{|v| v.to_f})
183
+ cpu_average[stat_pid] ||= {}
184
+ cpu_average[stat_pid][time] ||= []
185
+ cpu_average[stat_pid][time] << cpu.to_f
186
+ rss_average[time] ||= []
187
+ rss_average[time] << rss.to_f * page_size
188
+ end
189
+
190
+ ticks = 0
191
+ cpu_average.each do |stat_pid, cpu_average_pid|
192
+ start = cpu_average_pid.keys.sort.first
193
+ eend = cpu_average_pid.keys.sort.last
194
+ ticks += Misc.sum(cpu_average_pid[eend]) - Misc.sum(cpu_average_pid[start])
195
+ end
196
+ start = rss_average.keys.sort.first
197
+ eend = rss_average.keys.sort.last
198
+ time_elapsed = eend - start
199
+ ticks = 1 if ticks == 0
200
+ time_elapsed = 1 if time_elapsed == 0
201
+ puts Log.color(:yellow, "CPU average: ") + "%.2f" % ( ticks / clock_ticks / time_elapsed * 100).to_s
202
+ puts Log.color(:yellow, "RSS average: ") + "%.2f GB" % Misc.mean(rss_average.collect{|t,l| Misc.sum(l) / (1024 * 1024 * 1024)}).to_s
203
+ puts Log.color(:yellow, "Time: ") + Misc.format_seconds((eend - start))
204
+
205
+ end
206
+
207
+ if options[:sacct_peformance]
208
+ begin
209
+ tsv = TSV.open(CMD.cmd("sacct -j #{id} -o 'jobid,AveRSS,MaxRSS,MaxDiskRead,MaxDiskWrite' -P|grep 'JobID\\|\.batch'"), :header_hash => '', :sep => "|", :type => :list)
210
+ values = tsv[tsv.keys.first]
211
+ if values.compact.any?
212
+ puts Log.color(:magenta, "SACCT performance: ")
213
+ puts values.zip(values.fields).collect{|v,t| Log.color(:yellow, t + ": ") + v.to_s } * "\n"
214
+ end
215
+ rescue
216
+ end
217
+ end
218
+
219
+
146
220
  if tail && File.exists?(File.join(dir, 'std.err'))
147
221
  if exit_status && exit_status != 0
148
222
  puts Log.color(:magenta, "First error or exception found: ")
@@ -21,6 +21,7 @@ $slurm_options = SOPT.get <<EOF
21
21
  -CS--contain_and_sync Contain and sync to default locations
22
22
  -ci--copy_image When using a container directory, copy image there
23
23
  -t--tail Tail the logs
24
+ -SPERF--SLURM_procpath* Save Procpath performance for SLURM job; specify only options
24
25
  -q--queue* Queue
25
26
  -t--task_cpus* Tasks
26
27
  -W--workflows* Additional workflows
@@ -20,6 +20,7 @@ $slurm_options = SOPT.get <<EOF
20
20
  -CS--contain_and_sync Contain and sync to default locations
21
21
  -ci--copy_image When using a container directory, copy image there
22
22
  -t--tail Tail the logs
23
+ -SPERF--SLURM_procpath* Save Procpath performance for SLURM job; specify only options
23
24
  -q--queue* Queue
24
25
  -t--task_cpus* Tasks
25
26
  -W--workflows* Additional workflows
@@ -35,7 +35,7 @@ file = case file
35
35
  fields = options[:fields]
36
36
  raise ParameterException, "Please specify the fields to slice" if fields.nil?
37
37
 
38
- options[:header_hash] = options["header_hash"]
38
+ options[:header_hash] ||= options["header_hash"]
39
39
 
40
40
  case
41
41
  when options[:tokyocabinet]
@@ -45,8 +45,8 @@ when options[:tokyocabinet_bd]
45
45
  tsv = Persist.open_tokyocabinet(file, false, nil, TokyoCabinet::BDB)
46
46
  puts tsv.summary
47
47
  else
48
- stream = TSV.traverse file, options.merge(:into => :stream, :type => :list, :keys => fields, :unnamed => true) do |*p|
49
- p * "\t"
48
+ stream = TSV.traverse file, options.merge(:into => :stream, :type => :list, :fields => fields.split(","), :unnamed => true) do |k,fields,names|
49
+ [k,fields].flatten * "\t"
50
50
  end
51
51
  puts stream.read
52
52
  exit 0
@@ -86,7 +86,7 @@ messages = info[:messages]
86
86
  backtrace = info[:backtrace]
87
87
  pid = info[:pid]
88
88
  exception = info[:exception]
89
- rest = info.keys - [:inputs, :dependencies, :status, :time_elapsed, :messages, :backtrace, :exception, :pid, :archived_info]
89
+ rest = info.keys - [:inputs, :dependencies, :status, :time_elapsed, :messages, :backtrace, :exception, :archived_info]
90
90
 
91
91
 
92
92
  puts Log.color(:magenta, "File") << ": " << step.path
@@ -203,6 +203,7 @@ The `recursive_clean` cleans all the job dependency steps recursively.
203
203
  -prec--prepare_cpus* Number of dependencies prepared in parallel
204
204
  -rwt--remote_workflow_tasks* Load a yaml file describing remote workflow tasks
205
205
  -od--override_deps* Override deps using 'Workflow#task=<path>' array_separated
206
+ -PERF--procpath_performance* Measure performance using procpath
206
207
  EOF
207
208
 
208
209
  workflow = ARGV.shift
@@ -407,6 +408,23 @@ begin
407
408
  exit 0
408
409
  end
409
410
 
411
+ if options[:procpath_performance]
412
+ require 'rbbt/util/procpath'
413
+ current_pid = job.info[:pid]
414
+ job.fork
415
+ job.soft_grace
416
+ sleep 2 if job.info[:pid] == current_pid
417
+ if job.info[:pid] != current_pid
418
+ pid = job.info[:pid]
419
+ begin
420
+ ProcPath.monitor(pid, options[:procpath_performance])
421
+ rescue Errno::ECHILD
422
+ Log.warn "Procpath didn't find process #{pid} to monitor. Maybe it finished already"
423
+ rescue
424
+ Log.warn "Procpath failed: #{$!.message}"
425
+ end
426
+ end
427
+ end
410
428
 
411
429
  if do_fork
412
430
  ENV["RBBT_NO_PROGRESS"] = "true"
@@ -423,7 +441,6 @@ begin
423
441
  res = job
424
442
  end
425
443
 
426
-
427
444
  if options.delete(:printpath)
428
445
  job.join
429
446
  raise job.messages.last if (job.error? || job.aborted?) && job.messages
@@ -0,0 +1,52 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt/workflow'
4
+
5
+ require 'rbbt-util'
6
+ require 'rbbt-util'
7
+ require 'rbbt/util/simpleopt'
8
+
9
+ $0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
10
+
11
+ options = SOPT.setup <<EOF
12
+ Examine the info of a job result
13
+
14
+ $ rbbt workflow info <job-result> <key> <value>
15
+
16
+ -h--help Help
17
+ -f--force Write info even if key is already present
18
+ -r--recursive Write info for all dependencies as well
19
+ -p--check_pid Check that recursive jobs where created by the same process
20
+ EOF
21
+
22
+ SOPT.usage if options[:help]
23
+
24
+ file, key, value = ARGV
25
+
26
+ force, recursive, check_pid = options.values_at :force, :recursive, :check_pid
27
+
28
+ def get_step(file)
29
+ file = file.sub(/\.(info|files)/,'')
30
+ step = Workflow.load_step file
31
+ step
32
+ end
33
+
34
+ raise ParameterException if key.nil? || value.nil?
35
+
36
+ if %w(DELETE nil).include? value
37
+ value = nil
38
+ force = true
39
+ end
40
+
41
+ step = get_step file
42
+
43
+ step.set_info key, value if force || ! step.info.include?(key)
44
+
45
+ pid = step.info[:pid]
46
+ host = step.info[:pid_hostname]
47
+
48
+ step.rec_dependencies.each do |dep|
49
+ dep.set_info key, value if (force || ! dep.info.include?(key)) && (!check_pid || dep.info[:pid].to_s == pid and dep.info[:pid_hostname] == host)
50
+ rescue
51
+ Log.warn "Could no set info #{key} for #{dep.path}: #{$!.message}"
52
+ end if recursive
@@ -0,0 +1,23 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/util/procpath'
3
+
4
+ class TestProcPath < Test::Unit::TestCase
5
+ def test_record_and_plot
6
+ Log.with_severity 0 do
7
+ pid = Process.fork do
8
+ a = ""
9
+ (0..1000).each do
10
+ a << (0..rand(10000).to_i).to_a.collect{|i| "TEST #{i}" } * " "
11
+ sleep 0.1
12
+ end
13
+ end
14
+
15
+ TmpFile.with_file(nil, false) do |db|
16
+
17
+ ProcPath.record(pid, db, :interval => '1', "recnum" => 100)
18
+ ProcPath.plot(db, db + '.svg', "moving-average-window" => 1 )
19
+ end
20
+ end
21
+ end
22
+ end
23
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.29.4
4
+ version: 5.30.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-01-28 00:00:00.000000000 Z
11
+ date: 2021-02-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -300,6 +300,7 @@ files:
300
300
  - lib/rbbt/util/misc/system.rb
301
301
  - lib/rbbt/util/named_array.rb
302
302
  - lib/rbbt/util/open.rb
303
+ - lib/rbbt/util/procpath.rb
303
304
  - lib/rbbt/util/python.rb
304
305
  - lib/rbbt/util/semaphore.rb
305
306
  - lib/rbbt/util/simpleDSL.rb
@@ -438,6 +439,7 @@ files:
438
439
  - share/rbbt_commands/workflow/server
439
440
  - share/rbbt_commands/workflow/task
440
441
  - share/rbbt_commands/workflow/trace
442
+ - share/rbbt_commands/workflow/write_info
441
443
  - share/unicorn.rb
442
444
  - share/workflow_config.ru
443
445
  - test/rbbt/annotations/test_util.rb
@@ -516,6 +518,7 @@ files:
516
518
  - test/rbbt/util/test_log.rb
517
519
  - test/rbbt/util/test_misc.rb
518
520
  - test/rbbt/util/test_open.rb
521
+ - test/rbbt/util/test_procpath.rb
519
522
  - test/rbbt/util/test_python.rb
520
523
  - test/rbbt/util/test_semaphore.rb
521
524
  - test/rbbt/util/test_simpleDSL.rb
@@ -563,6 +566,7 @@ test_files:
563
566
  - test/rbbt/workflow/test_task.rb
564
567
  - test/rbbt/resource/test_path.rb
565
568
  - test/rbbt/util/test_colorize.rb
569
+ - test/rbbt/util/test_procpath.rb
566
570
  - test/rbbt/util/misc/test_omics.rb
567
571
  - test/rbbt/util/misc/test_pipes.rb
568
572
  - test/rbbt/util/misc/test_format.rb