rbbt-util 5.29.0 → 5.30.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/rbbt/tsv.rb CHANGED
@@ -113,8 +113,8 @@ module TSV
113
113
 
114
114
  data.entity_options = entity_options
115
115
 
116
- if Path === source and data.identifiers
117
- data.identifiers = Path.setup(data.identifiers, source.pkgdir, source.resource)
116
+ if Path === source && data.identifiers
117
+ Path.setup(data.identifiers, source.pkgdir, source.resource)
118
118
  end
119
119
 
120
120
  if data.respond_to? :persistence_path
@@ -124,6 +124,10 @@ module TSV
124
124
  data.clear
125
125
  data.annotate h
126
126
  end
127
+
128
+ data.read
129
+
130
+ data
127
131
  end
128
132
 
129
133
  def self.parse_header(stream, options = {})
data/lib/rbbt/util/cmd.rb CHANGED
@@ -217,7 +217,7 @@ module CMD
217
217
  end
218
218
  end
219
219
 
220
- def self.cmd_log(*args)
220
+ def self.cmd_pid(*args)
221
221
  all_args = *args
222
222
 
223
223
  all_args << {} unless Hash === all_args.last
@@ -248,4 +248,9 @@ module CMD
248
248
  nil
249
249
  end
250
250
 
251
+ def self.cmd_log(*args)
252
+ cmd_pid(*args)
253
+ nil
254
+ end
255
+
251
256
  end
@@ -287,7 +287,9 @@ module Misc
287
287
  when Symbol
288
288
  obj.to_s
289
289
  when (defined?(Path) and Path)
290
- if Step === obj.resource
290
+ if defined?(Step) && Open.exists?(Step.info_file(obj))
291
+ obj2str(Workflow.load_step(obj))
292
+ elsif defined?(Step) && Step === obj.resource
291
293
  "Step file: " + obj
292
294
  else
293
295
  if obj.exists?
@@ -322,7 +324,11 @@ module Misc
322
324
  remove_long_items(obj)
323
325
  when File
324
326
  if obj.respond_to? :filename and obj.filename
325
- "<IO:" << obj.filename << "--" << mtime_str(obj.filename) << ">"
327
+ if defined?(Step) && Open.exists?(Step.info_file(obj.filename))
328
+ obj2str(Workflow.load_step(obj.filename))
329
+ else
330
+ "<IO:" << obj.filename << "--" << mtime_str(obj.filename) << ">"
331
+ end
326
332
  else
327
333
  "<IO:" << obj.path << "--" << mtime_str(obj.path) << ">"
328
334
  end
@@ -330,7 +336,11 @@ module Misc
330
336
  "<IO:" << obj.short_path << ">"
331
337
  when IO
332
338
  if obj.respond_to? :filename and obj.filename
333
- "<IO:" << obj.filename << "--" << mtime_str(obj.filename) << ">"
339
+ if defined?(Step) && Open.exists?(Step.info_file(obj.filename))
340
+ obj2str(Workflow.load_step(obj.filename))
341
+ else
342
+ "<IO:" << obj.filename << "--" << mtime_str(obj.filename) << ">"
343
+ end
334
344
  else
335
345
 
336
346
  if obj.respond_to? :obj2str
@@ -242,48 +242,6 @@ module Misc
242
242
 
243
243
  return options
244
244
 
245
- options = {}
246
- string.split(/#/).each do |str|
247
- if str.match(/(.*)=(.*)/)
248
- option, value = $1, $2
249
- else
250
- option, value = str, true
251
- end
252
-
253
- option = option.sub(":",'').to_sym if option.chars.first == ':'
254
- value = value.sub(":",'').to_sym if String === value and value.chars.first == ':'
255
-
256
- if value == true
257
- options[option] = option.to_s.chars.first != '!'
258
- else
259
- options[option] = Thread.start do
260
- $SAFE = 0;
261
- case
262
- when value =~ /^(?:true|T)$/i
263
- true
264
- when value =~ /^(?:false|F)$/i
265
- false
266
- when Symbol === value
267
- value
268
- when (String === value and value =~ /^\/(.*)\/$/)
269
- Regexp.new /#{$1}/
270
- else
271
- begin
272
- Kernel.const_get value
273
- rescue
274
- begin
275
- raise if value =~ /[a-z]/ and defined? value
276
- eval(value)
277
- rescue Exception
278
- value
279
- end
280
- end
281
- end
282
- end.value
283
- end
284
- end
285
-
286
- options
287
245
  end
288
246
 
289
247
  end
@@ -0,0 +1,49 @@
1
+ require 'rbbt/util/cmd'
2
+ module ProcPath
3
+ CMD.tool :procpath do
4
+ 'pip install procpath'
5
+ end
6
+
7
+ def self.record(pid, path, options = {})
8
+ IndiferentHash.setup(options)
9
+ options = Misc.add_defaults options, "interval" => 30
10
+
11
+ cmd_options = %w(interval recnum reevalnum).inject({}){|acc,k| acc[k] = options[k]; acc}
12
+
13
+ Log.debug "ProcPath recording #{pid} in #{path} (#{Misc.fingerprint options})"
14
+ procpath_thread = Thread.new do
15
+ begin
16
+ procpath_pid = CMD.cmd_pid(:procpath, "record --database-file '#{path}' '$..children[?(@.stat.pid == #{pid})]'", cmd_options.merge(:nofail => true, :add_option_dashes => true))
17
+ rescue Exception
18
+ Log.exceptions $!
19
+ Process.kill "INT", procpath_pid
20
+ end
21
+ end
22
+
23
+ procpath_thread.report_on_exception = false
24
+
25
+ Process.wait pid.to_i
26
+ procpath_thread.raise Interrupt
27
+ end
28
+
29
+ def self.plot(path, output, options = {})
30
+ IndiferentHash.setup(options)
31
+ options = Misc.add_defaults options, "query-name" => 'rss', 'epsilon' => 0.5, "moving-average-window" => 10
32
+
33
+ cmd_options = %w(query-name epsilon monitor-average-window title logarithmic after before custom-query-file custom-value-expr).inject({}){|acc,k| acc[k] = options[k]; acc}
34
+ CMD.cmd_log(:procpath, "plot --database-file '#{path}' --plot-file '#{output}' ", cmd_options.merge(:nofail => true, :add_option_dashes => true))
35
+ end
36
+
37
+ def self.monitor(pid, path)
38
+ database, options_str = path.split("#")
39
+ options = options_str.nil? ? {} : Misc.string2hash(options_str)
40
+
41
+ database = File.expand_path database
42
+ Log.low "ProcPath monitor #{pid} in #{database} (#{Misc.fingerprint options})"
43
+
44
+ ProcPath.record(pid, database + '.sqlite3', options)
45
+ ProcPath.plot(database + '.sqlite3', database + '.cpu.svg', options.merge("query-name" => 'cpu'))
46
+ ProcPath.plot(database + '.sqlite3', database + '.rss.svg', options.merge("query-name" => 'rss'))
47
+ end
48
+ end
49
+
@@ -16,6 +16,10 @@ end
16
16
 
17
17
  module Workflow
18
18
 
19
+ def self.job_path?(path)
20
+ path.split("/")[-4] == "jobs"
21
+ end
22
+
19
23
  def log(status, message = nil, &block)
20
24
  Step.log(status, message, nil, &block)
21
25
  end
@@ -301,8 +305,9 @@ module Workflow
301
305
 
302
306
  def setup_override_dependency(dep, workflow, task_name)
303
307
  dep = Step === dep ? dep : Workflow.load_step(dep)
308
+ dep.workflow = workflow
304
309
  dep.info[:name] = dep.name
305
- dep.original_task_name ||= dep.task_name
310
+ dep.original_task_name ||= dep.task_name if dep.workflow
306
311
  begin
307
312
  workflow = Kernel.const_get workflow if String === workflow
308
313
  dep.task = workflow.tasks[task_name] if dep.task.nil? && workflow.tasks.include?(task_name)
@@ -94,17 +94,21 @@ class Step
94
94
  Log.debug "Saving job input #{name} (#{type}) into #{path}"
95
95
  case
96
96
  when Step === value
97
- Open.link(value.path, path)
97
+ Open.ln_s(value.path, path)
98
98
  when type.to_s == "file"
99
99
  if String === value && File.exists?(value)
100
- Open.link(value, path)
100
+ Open.ln_s(value, path)
101
101
  else
102
102
  Open.write(path + '.yaml', value.to_yaml)
103
103
  end
104
104
  when Array === value
105
- Open.write(path, value * "\n")
105
+ Open.write(path, value.collect{|v| Step === v ? v.path : v.to_s} * "\n")
106
106
  when IO === value
107
- Open.write(path, value)
107
+ if value.filename && String === value.filename && File.exists?(value.filename)
108
+ Open.ln_s(value.filename, path)
109
+ else
110
+ Open.write(path, value)
111
+ end
108
112
  else
109
113
  Open.write(path, value.to_s)
110
114
  end
@@ -117,13 +121,17 @@ class Step
117
121
  task_name = Symbol === job.overriden ? job.overriden : job.task_name
118
122
  workflow = job.workflow
119
123
  workflow = Kernel.const_get workflow if String === workflow
120
- task_info = workflow.task_info(task_name)
121
- input_types = task_info[:input_types]
122
- task_inputs = task_info[:inputs]
123
- input_defaults = task_info[:input_defaults]
124
+ if workflow
125
+ task_info = workflow.task_info(task_name)
126
+ input_types = task_info[:input_types]
127
+ task_inputs = task_info[:inputs]
128
+ input_defaults = task_info[:input_defaults]
129
+ else
130
+ task_info = input_types = task_inputs = input_defaults = {}
131
+ end
124
132
 
125
133
  inputs = {}
126
- real_inputs = job.real_inputs || job.info[:real_inputs]
134
+ real_inputs = job.real_inputs || job.info[:real_inputs]
127
135
  job.recursive_inputs.zip(job.recursive_inputs.fields).each do |value,name|
128
136
  next unless task_inputs.include? name.to_sym
129
137
  next unless real_inputs.include? name.to_sym
@@ -497,8 +505,8 @@ class Step
497
505
 
498
506
  def running?
499
507
  return false if ! (started? || status == :ending)
500
- pid = info[:pid]
501
- return nil if pid.nil?
508
+ return nil unless Open.exist?(self.pid_file)
509
+ pid = Open.read(self.pid_file).to_i
502
510
 
503
511
  return false if done? or error? or aborted?
504
512
 
@@ -522,8 +530,7 @@ class Step
522
530
  end
523
531
 
524
532
  def nopid?
525
- pid = info[:pid] || Open.exists?(pid_file)
526
- ! pid && ! (status.nil? || status == :aborted || status == :done || status == :error || status == :cleaned)
533
+ ! Open.exists?(pid_file) && ! (status.nil? || status == :aborted || status == :done || status == :error || status == :cleaned)
527
534
  end
528
535
 
529
536
  def aborted?
@@ -103,7 +103,7 @@ class Step
103
103
  end
104
104
 
105
105
  job.dup_inputs unless status == 'done' or job.started?
106
- job.init_info(status == 'noinfo') unless status == 'waiting' || status == 'done' || job.started?
106
+ job.init_info(status == 'noinfo') unless status == 'waiting' || status == 'done' || job.started? || ! Workflow.job_path?(job.path)
107
107
 
108
108
  canfail = ComputeDependency === job && job.canfail?
109
109
  end
@@ -130,7 +130,6 @@ class Step
130
130
  (inputs.flatten.select{|i| Step === i} + inputs.flatten.select{|dep| Path === dep && Step === dep.resource}.collect{|dep| dep.resource})
131
131
  end
132
132
 
133
-
134
133
  def execute_dependency(dependency, log = true)
135
134
  task_name = self.task_name
136
135
  canfail_paths = self.canfail_paths
@@ -122,7 +122,6 @@ class Step
122
122
  reject{|dependency| (defined?(WorkflowRemoteClient) && WorkflowRemoteClient::RemoteStep === dependency) || Open.remote?(dependency.path) }.
123
123
  reject{|dependency| dependency.error? }.
124
124
  #select{|dependency| Open.exists?(dependency.path) || ((Open.exists?(dependency.info_file) && (dependency.status == :cleaned) || dependency.status == :waiting)) }.
125
- #select{|dependency| Open.exists?(dependency.path) || ((Open.exists?(dependency.info_file) && (dependency.status == :cleaned) || dependency.status == :waiting)) }.
126
125
  select{|dependency| dependency.updatable? }.
127
126
  collect{|dependency| Workflow.relocate_dependency(self, dependency)}
128
127
  end
@@ -374,7 +373,6 @@ class Step
374
373
  Log.exception $!
375
374
  ensure
376
375
  Step.purge_stream_cache
377
- set_info :pid, nil
378
376
  Open.rm pid_file if Open.exist?(pid_file)
379
377
  end
380
378
  end
@@ -389,7 +387,6 @@ class Step
389
387
  _clean_finished
390
388
  rescue
391
389
  stop_dependencies
392
- set_info :pid, nil
393
390
  Open.rm pid_file if Open.exist?(pid_file)
394
391
  end
395
392
  end
@@ -450,7 +447,7 @@ class Step
450
447
  ensure
451
448
  no_load = false unless IO === result
452
449
  Open.rm pid_file if Open.exist?(pid_file) unless no_load
453
- set_info :pid, nil unless no_load
450
+ #set_info :pid, nil unless no_load
454
451
  end
455
452
  end
456
453
 
@@ -560,7 +557,7 @@ class Step
560
557
  RbbtSemaphore.post_semaphore(semaphore) if semaphore
561
558
  Kernel.exit! -1
562
559
  end
563
- set_info :pid, nil
560
+ #set_info :pid, nil
564
561
  ensure
565
562
  RbbtSemaphore.post_semaphore(semaphore) if semaphore
566
563
  end
@@ -57,7 +57,7 @@ module Task
57
57
  puts Log.color(:magenta, "Input select options")
58
58
  puts
59
59
  selects.collect{|p| p}.uniq.each do |input,options|
60
- puts Log.color(:blue, input.to_s + ": ") << Misc.format_paragraph(options.collect{|o| o.to_s} * ", ") << "\n"
60
+ puts Log.color(:blue, input.to_s + ": ") << Misc.format_paragraph(options.collect{|o| Array === o ? o.first.to_s : o.to_s} * ", ") << "\n"
61
61
  puts unless Log.compact
62
62
  end
63
63
  puts
@@ -22,11 +22,14 @@ class Step
22
22
  end
23
23
 
24
24
  def self.prov_report_msg(status, name, path, info = nil)
25
- parts = path.sub(/\{.*/,'').sub(/#{Regexp.quote(name)}$/,'').split "/"
25
+ parts = path.sub(/\{.*/,'').split "/"
26
26
 
27
+ parts.pop
28
+
27
29
  task = Log.color(:yellow, parts.pop)
28
30
  workflow = Log.color(:magenta, parts.pop)
29
- if status.to_s == 'noinfo' and parts.last != 'jobs'
31
+ #if status.to_s == 'noinfo' && parts.last != 'jobs'
32
+ if ! Workflow.job_path?(path)
30
33
  task, status, workflow = Log.color(:yellow, info[:task_name]), Log.color(:green, "file"), Log.color(:magenta, "-")
31
34
  end
32
35
 
@@ -0,0 +1,165 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+
6
+ #$0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
7
+
8
+ options = SOPT.setup <<EOF
9
+
10
+ Clean error or aborted jobs
11
+
12
+ $ rbbt mnl [options]
13
+
14
+ -h--help Print this help
15
+ -d--done Done jobs only
16
+ -e--error Error jobs only
17
+ -a--aborted SLURM aboted jobs
18
+ -j--job* Job ids
19
+ -s--search* Regular expression
20
+ -t--tail* Show the last lines of the STDERR
21
+ -SBP--sbatch_parameters show sbatch parameters
22
+ -dr--dry_run Do not erase anything
23
+ EOF
24
+
25
+ if options[:help]
26
+ if defined? rbbt_usage
27
+ rbbt_usage
28
+ else
29
+ puts SOPT.doc
30
+ end
31
+ exit 0
32
+ end
33
+
34
+ Log.severity = 4
35
+ done, error, aborted, jobid, search, tail, sbatch_parameters, dry_run = options.values_at :done, :error, :aborted, :job, :search, :tail, :sbatch_parameters, :dry_run
36
+
37
+ workdir = File.expand_path('~/rbbt-slurm')
38
+ Path.setup(workdir)
39
+
40
+ running_jobs = begin
41
+ squeue_txt = CMD.cmd('squeue').read
42
+ squeue_txt.split("\n").collect{|l| l.to_i.to_s}
43
+ rescue
44
+ Log.warn "Cannot determine if jobs are running, they will seem to be all alive (Job ID in green)"
45
+ squeue_txt = nil
46
+ $norunningjobs = true
47
+ []
48
+ end
49
+
50
+ if squeue_txt
51
+ job_nodes = {}
52
+ squeue_txt.split("\n").each do |line|
53
+ parts = line.strip.split(/\s+/)
54
+ job_nodes[parts.first] = parts.last.split(",")
55
+ end
56
+ else
57
+ job_nodes = nil
58
+ end
59
+
60
+ count = 0
61
+ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
62
+ dir = File.dirname(fcmd)
63
+
64
+ if m = Open.read(fcmd).match(/#CMD: (.*)/)
65
+ cmd = m[1]
66
+ else
67
+ cmd = nil
68
+ end
69
+
70
+ if m = Open.read(fcmd).match(/# Run command\n(.*?)\n/im)
71
+ exe = m[1]
72
+ else
73
+ exe = nil
74
+ end
75
+
76
+ if m = Open.read(fcmd).match(/^CONTAINER_DIR=(.*)/)
77
+ container_home = m[1]
78
+ else
79
+ container_home = nil
80
+ end
81
+
82
+
83
+ if File.exists?(fid = File.join(dir, 'job.id'))
84
+ id = Open.read(fid).chomp
85
+ else
86
+ id = nil
87
+ end
88
+
89
+ if File.exists?(fstatus = File.join(dir, 'exit.status'))
90
+ exit_status = Open.read(fstatus).to_i
91
+ else
92
+ exit_status = nil
93
+ end
94
+
95
+ if File.exists?(fstatus = File.join(dir, 'job.status'))
96
+ nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
97
+ elsif job_nodes[id]
98
+ nodes = job_nodes[id]
99
+ else
100
+ nodes = []
101
+ end
102
+
103
+ if File.exists?(File.join(dir, 'std.out'))
104
+ outt = File.mtime File.join(dir, 'std.out')
105
+ errt = File.mtime File.join(dir, 'std.err')
106
+ time_diff = Time.now - [outt, errt].max
107
+ end
108
+
109
+ fdep = File.join(dir, 'dependencies.list')
110
+ deps = Open.read(fdep).split("\n") if File.exists?(fdep)
111
+
112
+ fcadep = File.join(dir, 'canfail_dependencies.list')
113
+ cadeps = Open.read(fcadep).split("\n") if File.exists?(fcadep)
114
+
115
+ aborted = error = true if aborted.nil? && error.nil?
116
+ if done || error || aborted || running || queued || jobid || search
117
+ select = false
118
+ select = true if done && exit_status && exit_status.to_i == 0
119
+ select = true if error && exit_status && exit_status.to_i != 0
120
+ select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
121
+ select = select && jobid.split(",").include?(id) if jobid
122
+ select = select && cmd.match(/#{search}/) if search
123
+ next unless select
124
+ end
125
+
126
+
127
+ puts Log.color(:yellow, "**ERASING**")
128
+ puts Log.color :blue, dir
129
+ puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.slurm')).to_s
130
+ puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
131
+ puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
132
+ puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
133
+ puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home
134
+ puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
135
+ puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
136
+ puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
137
+ puts Log.color(:magenta, "Nodes: ") << nodes * ", "
138
+ puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
139
+
140
+ if options[:sbatch_parameters]
141
+ puts Log.color(:magenta, "SBATCH parameters: ")
142
+ puts Log.color :blue, CMD.cmd('grep "^#SBATCH" |tail -n +6', :in => Open.read(fcmd)).read.strip
143
+ end
144
+
145
+ if tail && File.exists?(File.join(dir, 'std.err'))
146
+ if exit_status && exit_status != 0
147
+ puts Log.color(:magenta, "First error or exception found: ")
148
+ puts CMD.cmd("grep -i -w 'error\\|[a-z]*exception' #{File.join(dir, 'std.err')} -A #{tail.to_i} |head -n #{tail.to_i}", :no_fail => true).read
149
+ elsif exit_status
150
+ puts Log.color(:magenta, "Completed jobs: ")
151
+ puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
152
+ else
153
+ puts Log.color(:magenta, "Log tail: ")
154
+ puts CMD.cmd("tail -n #{tail.to_i} #{File.join(dir, 'std.err')}").read
155
+ end
156
+ end
157
+
158
+ count += 1
159
+
160
+ Open.rm_rf dir unless dry_run
161
+ end
162
+
163
+ puts
164
+ puts "Found #{count} jobs"
165
+