rbbt-util 5.28.12 → 5.29.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -110,6 +110,8 @@ module Persist
110
110
  def self.load_file(path, type)
111
111
  begin
112
112
  case (type || :marshal).to_sym
113
+ when :path
114
+ path
113
115
  when :nil
114
116
  nil
115
117
  when :boolean
@@ -167,6 +169,8 @@ module Persist
167
169
  end
168
170
 
169
171
  case (type || :marshal).to_sym
172
+ when :path
173
+ nil
170
174
  when :nil
171
175
  nil
172
176
  when :boolean
@@ -243,6 +243,7 @@ module TSV
243
243
  Log.debug("Attachment of fields:#{Misc.fingerprint fields } from #{other.filename.inspect} finished.")
244
244
 
245
245
  if complete
246
+ Log.warn "Attaching through index and completing empty rows; keys with wrong format may appear (#{other.key_field} insted of #{self.key_field})" if index
246
247
  fill = TrueClass === complete ? nil : complete
247
248
  field_length = self.fields.length
248
249
  common_fields = (other.fields & self.fields)
@@ -255,11 +256,11 @@ module TSV
255
256
  case type
256
257
  when :single
257
258
  missing.each do |k|
258
- self[k] = nil
259
+ self[k] = fill
259
260
  end
260
261
  when :list
261
262
  missing.each do |k|
262
- values = [nil] * field_length
263
+ values = [fill] * field_length
263
264
  other_values = other[k]
264
265
  other_common_pos.zip(this_common_pos).each do |o,t|
265
266
  values[t] = other_values[o]
@@ -267,8 +268,9 @@ module TSV
267
268
  self[k] = values
268
269
  end
269
270
  when :double
271
+ fill = [] if fill.nil?
270
272
  missing.each do |k|
271
- values = [[]] * field_length
273
+ values = [fill] * field_length
272
274
  other_values = other[k]
273
275
  other_common_pos.zip(this_common_pos).each do |o,t|
274
276
  values[t] = other_values[o]
@@ -276,8 +278,9 @@ module TSV
276
278
  self[k] = values
277
279
  end
278
280
  when :flat
281
+ fill = [] if fill.nil?
279
282
  missing.each do |k|
280
- self[k] = []
283
+ self[k] = fill
281
284
  end
282
285
  end
283
286
  end
@@ -2,6 +2,3 @@ require 'rbbt/util/concurrency'
2
2
 
3
3
  require 'rbbt/tsv/parallel/through'
4
4
  require 'rbbt/tsv/parallel/traverse'
5
-
6
- module TSV
7
- end
@@ -287,7 +287,9 @@ module Misc
287
287
  when Symbol
288
288
  obj.to_s
289
289
  when (defined?(Path) and Path)
290
- if Step === obj.resource
290
+ if defined?(Step) && Open.exists?(Step.info_file(obj))
291
+ obj2str(Workflow.load_step(obj))
292
+ elsif defined?(Step) && Step === obj.resource
291
293
  "Step file: " + obj
292
294
  else
293
295
  if obj.exists?
@@ -322,7 +324,11 @@ module Misc
322
324
  remove_long_items(obj)
323
325
  when File
324
326
  if obj.respond_to? :filename and obj.filename
325
- "<IO:" << obj.filename << "--" << mtime_str(obj.filename) << ">"
327
+ if defined?(Step) && Open.exists?(Step.info_file(obj.filename))
328
+ obj2str(Workflow.load_step(obj.filename))
329
+ else
330
+ "<IO:" << obj.filename << "--" << mtime_str(obj.filename) << ">"
331
+ end
326
332
  else
327
333
  "<IO:" << obj.path << "--" << mtime_str(obj.path) << ">"
328
334
  end
@@ -330,7 +336,11 @@ module Misc
330
336
  "<IO:" << obj.short_path << ">"
331
337
  when IO
332
338
  if obj.respond_to? :filename and obj.filename
333
- "<IO:" << obj.filename << "--" << mtime_str(obj.filename) << ">"
339
+ if defined?(Step) && Open.exists?(Step.info_file(obj.filename))
340
+ obj2str(Workflow.load_step(obj.filename))
341
+ else
342
+ "<IO:" << obj.filename << "--" << mtime_str(obj.filename) << ">"
343
+ end
334
344
  else
335
345
 
336
346
  if obj.respond_to? :obj2str
@@ -385,7 +385,7 @@ module Workflow
385
385
  next if default == v
386
386
  next if (String === default and Symbol === v and v.to_s == default)
387
387
  next if (Symbol === default and String === v and v == default.to_s)
388
- real_inputs[k] = v
388
+ real_inputs[k.to_sym] = v
389
389
  end
390
390
 
391
391
  jobname_input_value = inputs[jobname_input] || all_defaults[jobname_input]
@@ -410,6 +410,7 @@ module Workflow
410
410
  job.workflow = self
411
411
  job.clean_name = jobname
412
412
  job.overriden = overriden
413
+ job.real_inputs = real_inputs.keys
413
414
  job
414
415
  end
415
416
 
@@ -16,6 +16,10 @@ end
16
16
 
17
17
  module Workflow
18
18
 
19
+ def self.job_path?(path)
20
+ path.split("/")[-4] == "jobs"
21
+ end
22
+
19
23
  def log(status, message = nil, &block)
20
24
  Step.log(status, message, nil, &block)
21
25
  end
@@ -301,7 +305,9 @@ module Workflow
301
305
 
302
306
  def setup_override_dependency(dep, workflow, task_name)
303
307
  dep = Step === dep ? dep : Workflow.load_step(dep)
308
+ dep.workflow = workflow
304
309
  dep.info[:name] = dep.name
310
+ dep.original_task_name ||= dep.task_name if dep.workflow
305
311
  begin
306
312
  workflow = Kernel.const_get workflow if String === workflow
307
313
  dep.task = workflow.tasks[task_name] if dep.task.nil? && workflow.tasks.include?(task_name)
@@ -309,7 +315,7 @@ module Workflow
309
315
  Log.exception $!
310
316
  end
311
317
  dep.task_name = task_name
312
- dep.overriden = true
318
+ dep.overriden = dep.original_task_name.to_sym
313
319
  dep
314
320
  end
315
321
 
@@ -77,6 +77,7 @@ module Workflow
77
77
  task name do
78
78
  raise RbbtException, "dependency not found in dep_task" if dependencies.empty?
79
79
  dep = dependencies.last.join
80
+ raise dep.get_exception if dep.error?
80
81
  set_info :result_type, dep.info[:result_type]
81
82
  forget = config :forget_dep_tasks, :forget_dep_tasks, :default => FORGET_DEP_TASKS
82
83
  if forget
@@ -50,8 +50,8 @@ module Workflow
50
50
  case input_types[input]
51
51
  when :file
52
52
  Log.debug "Pointing #{ input } to #{file}"
53
- if file =~ /\.read$/
54
- inputs[input.to_sym] = Open.read(file)
53
+ if file =~ /\.yaml/
54
+ inputs[input.to_sym] = YAML.load(Open.read(file))
55
55
  else
56
56
  inputs[input.to_sym] = Open.realpath(file)
57
57
  end
@@ -12,6 +12,7 @@ class Step
12
12
  attr_accessor :exec
13
13
  attr_accessor :relocated
14
14
  attr_accessor :result, :mutex, :seen
15
+ attr_accessor :real_inputs, :original_task_name
15
16
 
16
17
  RBBT_DEBUG_CLEAN = ENV["RBBT_DEBUG_CLEAN"] == 'true'
17
18
 
@@ -145,11 +146,13 @@ class Step
145
146
  seen = []
146
147
  while path = deps.pop
147
148
  dep_info = archived_info[path]
148
- dep_info[:inputs].each do |k,v|
149
- all_inputs[k] = v unless all_inputs.include?(k)
150
- end if dep_info[:inputs]
151
- deps.concat(dep_info[:dependencies].collect{|p| p.last } - seen) if dep_info[:dependencies]
152
- deps.concat(dep_info[:archived_dependencies].collect{|p| p.last } - seen) if dep_info[:archived_dependencies]
149
+ if dep_info
150
+ dep_info[:inputs].each do |k,v|
151
+ all_inputs[k] = v unless all_inputs.include?(k)
152
+ end if dep_info[:inputs]
153
+ deps.concat(dep_info[:dependencies].collect{|p| p.last } - seen) if dep_info[:dependencies]
154
+ deps.concat(dep_info[:archived_dependencies].collect{|p| p.last } - seen) if dep_info[:archived_dependencies]
155
+ end
153
156
  seen << path
154
157
  end
155
158
 
@@ -93,18 +93,22 @@ class Step
93
93
 
94
94
  Log.debug "Saving job input #{name} (#{type}) into #{path}"
95
95
  case
96
+ when Step === value
97
+ Open.ln_s(value.path, path)
98
+ when type.to_s == "file"
99
+ if String === value && File.exists?(value)
100
+ Open.ln_s(value, path)
101
+ else
102
+ Open.write(path + '.yaml', value.to_yaml)
103
+ end
96
104
  when Array === value
97
- Open.write(path, value * "\n")
105
+ Open.write(path, value.collect{|v| Step === v ? v.path : v.to_s} * "\n")
98
106
  when IO === value
99
- Open.write(path, value)
100
- when type == "file"
101
- if String === value && File.exists?(value)
102
- Open.link(value, path)
107
+ if value.filename && String === value.filename && File.exists?(value.filename)
108
+ Open.ln_s(value.filename, path)
103
109
  else
104
- Open.write(path + '.read', value.to_s)
110
+ Open.write(path, value)
105
111
  end
106
- when Step === value
107
- value = value.produce.load
108
112
  else
109
113
  Open.write(path, value.to_s)
110
114
  end
@@ -114,18 +118,24 @@ class Step
114
118
  def self.save_job_inputs(job, dir, options = nil)
115
119
  options = IndiferentHash.setup options.dup if options
116
120
 
117
- task_name = job.task_name
121
+ task_name = Symbol === job.overriden ? job.overriden : job.task_name
118
122
  workflow = job.workflow
119
123
  workflow = Kernel.const_get workflow if String === workflow
120
- task_info = workflow.task_info(task_name)
121
- input_types = task_info[:input_types]
122
- task_inputs = task_info[:inputs]
123
- input_defaults = task_info[:input_defaults]
124
+ if workflow
125
+ task_info = workflow.task_info(task_name)
126
+ input_types = task_info[:input_types]
127
+ task_inputs = task_info[:inputs]
128
+ input_defaults = task_info[:input_defaults]
129
+ else
130
+ task_info = input_types = task_inputs = input_defaults = {}
131
+ end
124
132
 
125
133
  inputs = {}
134
+ real_inputs = job.real_inputs || job.info[:real_inputs]
126
135
  job.recursive_inputs.zip(job.recursive_inputs.fields).each do |value,name|
127
136
  next unless task_inputs.include? name.to_sym
128
- next if options and ! options.include?(name)
137
+ next unless real_inputs.include? name.to_sym
138
+ next if options && ! options.include?(name)
129
139
  next if value.nil?
130
140
  next if input_defaults[name] == value
131
141
  inputs[name] = value
@@ -222,7 +232,7 @@ class Step
222
232
  def init_info(force = false)
223
233
  return nil if @exec || info_file.nil? || (Open.exists?(info_file) && ! force)
224
234
  Open.lock(info_file, :lock => info_lock) do
225
- i = {:status => :waiting, :pid => Process.pid, :path => path}
235
+ i = {:status => :waiting, :pid => Process.pid, :path => path, :real_inputs => real_inputs}
226
236
  i[:dependencies] = dependencies.collect{|dep| [dep.task_name, dep.name, dep.path]} if dependencies
227
237
  Misc.sensiblewrite(info_file, Step.serialize_info(i), :force => true, :lock => false)
228
238
  @info_cache = IndiferentHash.setup(i)
@@ -103,7 +103,7 @@ class Step
103
103
  end
104
104
 
105
105
  job.dup_inputs unless status == 'done' or job.started?
106
- job.init_info(status == 'noinfo') unless status == 'waiting' || status == 'done' || job.started?
106
+ job.init_info(status == 'noinfo') unless status == 'waiting' || status == 'done' || job.started? || ! Workflow.job_path?(job.path)
107
107
 
108
108
  canfail = ComputeDependency === job && job.canfail?
109
109
  end
@@ -130,7 +130,6 @@ class Step
130
130
  (inputs.flatten.select{|i| Step === i} + inputs.flatten.select{|dep| Path === dep && Step === dep.resource}.collect{|dep| dep.resource})
131
131
  end
132
132
 
133
-
134
133
  def execute_dependency(dependency, log = true)
135
134
  task_name = self.task_name
136
135
  canfail_paths = self.canfail_paths
@@ -122,7 +122,6 @@ class Step
122
122
  reject{|dependency| (defined?(WorkflowRemoteClient) && WorkflowRemoteClient::RemoteStep === dependency) || Open.remote?(dependency.path) }.
123
123
  reject{|dependency| dependency.error? }.
124
124
  #select{|dependency| Open.exists?(dependency.path) || ((Open.exists?(dependency.info_file) && (dependency.status == :cleaned) || dependency.status == :waiting)) }.
125
- #select{|dependency| Open.exists?(dependency.path) || ((Open.exists?(dependency.info_file) && (dependency.status == :cleaned) || dependency.status == :waiting)) }.
126
125
  select{|dependency| dependency.updatable? }.
127
126
  collect{|dependency| Workflow.relocate_dependency(self, dependency)}
128
127
  end
@@ -26,15 +26,25 @@ module Workflow
26
26
  workload
27
27
  end
28
28
 
29
+ def self.workload(jobs)
30
+ jobs.inject({}) do |acc,job|
31
+ Orchestrator.job_workload(job).each do |j,d|
32
+ acc[j] = d unless acc.keys.collect{|k| k.path }.include? j.path
33
+ end
34
+ acc
35
+ end
36
+ end
37
+
29
38
  def self.job_rules(rules, job)
30
39
  workflow = job.workflow.to_s
31
40
  task_name = job.task_name.to_s
41
+ defaults = rules["defaults"] || {}
32
42
 
33
- return IndiferentHash.setup(rules["defaults"]) unless rules[workflow]
34
- return IndiferentHash.setup(rules["defaults"]) unless rules[workflow][task_name]
43
+ return IndiferentHash.setup(defaults) unless rules[workflow]
44
+ return IndiferentHash.setup(defaults) unless rules[workflow][task_name]
35
45
 
36
46
  job_rules = IndiferentHash.setup(rules[workflow][task_name])
37
- rules["defaults"].each{|k,v| job_rules[k] = v if job_rules[k].nil? } if rules["defaults"]
47
+ defaults.each{|k,v| job_rules[k] = v if job_rules[k].nil? } if defaults
38
48
  job_rules
39
49
  end
40
50
 
@@ -169,12 +179,7 @@ module Workflow
169
179
  def process(rules, jobs)
170
180
  begin
171
181
 
172
- workload = jobs.inject({}) do |acc,job|
173
- Orchestrator.job_workload(job).each do |j,d|
174
- acc[j] = d unless acc.keys.collect{|k| k.path }.include? j.path
175
- end
176
- acc
177
- end
182
+ workload = Orchestrator.workload(jobs)
178
183
  all_jobs = workload.keys
179
184
 
180
185
  top_level_jobs = jobs.collect{|job| job.path }
@@ -22,11 +22,14 @@ class Step
22
22
  end
23
23
 
24
24
  def self.prov_report_msg(status, name, path, info = nil)
25
- parts = path.sub(/\{.*/,'').sub(/#{Regexp.quote(name)}$/,'').split "/"
25
+ parts = path.sub(/\{.*/,'').split "/"
26
26
 
27
+ parts.pop
28
+
27
29
  task = Log.color(:yellow, parts.pop)
28
30
  workflow = Log.color(:magenta, parts.pop)
29
- if status.to_s == 'noinfo' and parts.last != 'jobs'
31
+ #if status.to_s == 'noinfo' && parts.last != 'jobs'
32
+ if ! Workflow.job_path?(path)
30
33
  task, status, workflow = Log.color(:yellow, info[:task_name]), Log.color(:green, "file"), Log.color(:magenta, "-")
31
34
  end
32
35
 
@@ -0,0 +1,165 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+
6
+ #$0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
7
+
8
+ options = SOPT.setup <<EOF
9
+
10
+ Clean error or aborted jobs
11
+
12
+ $ rbbt mnl [options]
13
+
14
+ -h--help Print this help
15
+ -d--done Done jobs only
16
+ -e--error Error jobs only
17
+ -a--aborted SLURM aboted jobs
18
+ -j--job* Job ids
19
+ -s--search* Regular expression
20
+ -t--tail* Show the last lines of the STDERR
21
+ -SBP--sbatch_parameters show sbatch parameters
22
+ -dr--dry_run Do not erase anything
23
+ EOF
24
+
25
+ if options[:help]
26
+ if defined? rbbt_usage
27
+ rbbt_usage
28
+ else
29
+ puts SOPT.doc
30
+ end
31
+ exit 0
32
+ end
33
+
34
+ Log.severity = 4
35
+ done, error, aborted, jobid, search, tail, sbatch_parameters, dry_run = options.values_at :done, :error, :aborted, :job, :search, :tail, :sbatch_parameters, :dry_run
36
+
37
+ workdir = File.expand_path('~/rbbt-slurm')
38
+ Path.setup(workdir)
39
+
40
+ running_jobs = begin
41
+ squeue_txt = CMD.cmd('squeue').read
42
+ squeue_txt.split("\n").collect{|l| l.to_i.to_s}
43
+ rescue
44
+ Log.warn "Cannot determine if jobs are running, they will seem to be all alive (Job ID in green)"
45
+ squeue_txt = nil
46
+ $norunningjobs = true
47
+ []
48
+ end
49
+
50
+ if squeue_txt
51
+ job_nodes = {}
52
+ squeue_txt.split("\n").each do |line|
53
+ parts = line.strip.split(/\s+/)
54
+ job_nodes[parts.first] = parts.last.split(",")
55
+ end
56
+ else
57
+ job_nodes = nil
58
+ end
59
+
60
+ count = 0
61
+ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
62
+ dir = File.dirname(fcmd)
63
+
64
+ if m = Open.read(fcmd).match(/#CMD: (.*)/)
65
+ cmd = m[1]
66
+ else
67
+ cmd = nil
68
+ end
69
+
70
+ if m = Open.read(fcmd).match(/# Run command\n(.*?)\n/im)
71
+ exe = m[1]
72
+ else
73
+ exe = nil
74
+ end
75
+
76
+ if m = Open.read(fcmd).match(/^CONTAINER_DIR=(.*)/)
77
+ container_home = m[1]
78
+ else
79
+ container_home = nil
80
+ end
81
+
82
+
83
+ if File.exists?(fid = File.join(dir, 'job.id'))
84
+ id = Open.read(fid).chomp
85
+ else
86
+ id = nil
87
+ end
88
+
89
+ if File.exists?(fstatus = File.join(dir, 'exit.status'))
90
+ exit_status = Open.read(fstatus).to_i
91
+ else
92
+ exit_status = nil
93
+ end
94
+
95
+ if File.exists?(fstatus = File.join(dir, 'job.status'))
96
+ nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
97
+ elsif job_nodes[id]
98
+ nodes = job_nodes[id]
99
+ else
100
+ nodes = []
101
+ end
102
+
103
+ if File.exists?(File.join(dir, 'std.out'))
104
+ outt = File.mtime File.join(dir, 'std.out')
105
+ errt = File.mtime File.join(dir, 'std.err')
106
+ time_diff = Time.now - [outt, errt].max
107
+ end
108
+
109
+ fdep = File.join(dir, 'dependencies.list')
110
+ deps = Open.read(fdep).split("\n") if File.exists?(fdep)
111
+
112
+ fcadep = File.join(dir, 'canfail_dependencies.list')
113
+ cadeps = Open.read(fcadep).split("\n") if File.exists?(fcadep)
114
+
115
+ aborted = error = true if aborted.nil? && error.nil?
116
+ if done || error || aborted || running || queued || jobid || search
117
+ select = false
118
+ select = true if done && exit_status && exit_status.to_i == 0
119
+ select = true if error && exit_status && exit_status.to_i != 0
120
+ select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
121
+ select = select && jobid.split(",").include?(id) if jobid
122
+ select = select && cmd.match(/#{search}/) if search
123
+ next unless select
124
+ end
125
+
126
+
127
+ puts Log.color(:yellow, "**ERASING**")
128
+ puts Log.color :blue, dir
129
+ puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.slurm')).to_s
130
+ puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
131
+ puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
132
+ puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
133
+ puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home
134
+ puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
135
+ puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
136
+ puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
137
+ puts Log.color(:magenta, "Nodes: ") << nodes * ", "
138
+ puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
139
+
140
+ if options[:sbatch_parameters]
141
+ puts Log.color(:magenta, "SBATCH parameters: ")
142
+ puts Log.color :blue, CMD.cmd('grep "^#SBATCH" |tail -n +6', :in => Open.read(fcmd)).read.strip
143
+ end
144
+
145
+ if tail && File.exists?(File.join(dir, 'std.err'))
146
+ if exit_status && exit_status != 0
147
+ puts Log.color(:magenta, "First error or exception found: ")
148
+ puts CMD.cmd("grep -i -w 'error\\|[a-z]*exception' #{File.join(dir, 'std.err')} -A #{tail.to_i} |head -n #{tail.to_i}", :no_fail => true).read
149
+ elsif exit_status
150
+ puts Log.color(:magenta, "Completed jobs: ")
151
+ puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
152
+ else
153
+ puts Log.color(:magenta, "Log tail: ")
154
+ puts CMD.cmd("tail -n #{tail.to_i} #{File.join(dir, 'std.err')}").read
155
+ end
156
+ end
157
+
158
+ count += 1
159
+
160
+ Open.rm_rf dir unless dry_run
161
+ end
162
+
163
+ puts
164
+ puts "Found #{count} jobs"
165
+