rbbt-util 5.28.14 → 5.29.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -385,7 +385,7 @@ module Workflow
385
385
  next if default == v
386
386
  next if (String === default and Symbol === v and v.to_s == default)
387
387
  next if (Symbol === default and String === v and v == default.to_s)
388
- real_inputs[k] = v
388
+ real_inputs[k.to_sym] = v
389
389
  end
390
390
 
391
391
  jobname_input_value = inputs[jobname_input] || all_defaults[jobname_input]
@@ -410,6 +410,7 @@ module Workflow
410
410
  job.workflow = self
411
411
  job.clean_name = jobname
412
412
  job.overriden = overriden
413
+ job.real_inputs = real_inputs.keys
413
414
  job
414
415
  end
415
416
 
@@ -302,6 +302,7 @@ module Workflow
302
302
  def setup_override_dependency(dep, workflow, task_name)
303
303
  dep = Step === dep ? dep : Workflow.load_step(dep)
304
304
  dep.info[:name] = dep.name
305
+ dep.original_task_name ||= dep.task_name
305
306
  begin
306
307
  workflow = Kernel.const_get workflow if String === workflow
307
308
  dep.task = workflow.tasks[task_name] if dep.task.nil? && workflow.tasks.include?(task_name)
@@ -309,7 +310,7 @@ module Workflow
309
310
  Log.exception $!
310
311
  end
311
312
  dep.task_name = task_name
312
- dep.overriden = true
313
+ dep.overriden = dep.original_task_name.to_sym
313
314
  dep
314
315
  end
315
316
 
@@ -50,8 +50,8 @@ module Workflow
50
50
  case input_types[input]
51
51
  when :file
52
52
  Log.debug "Pointing #{ input } to #{file}"
53
- if file =~ /\.read$/
54
- inputs[input.to_sym] = Open.read(file)
53
+ if file =~ /\.yaml/
54
+ inputs[input.to_sym] = YAML.load(Open.read(file))
55
55
  else
56
56
  inputs[input.to_sym] = Open.realpath(file)
57
57
  end
@@ -12,6 +12,7 @@ class Step
12
12
  attr_accessor :exec
13
13
  attr_accessor :relocated
14
14
  attr_accessor :result, :mutex, :seen
15
+ attr_accessor :real_inputs, :original_task_name
15
16
 
16
17
  RBBT_DEBUG_CLEAN = ENV["RBBT_DEBUG_CLEAN"] == 'true'
17
18
 
@@ -145,11 +146,13 @@ class Step
145
146
  seen = []
146
147
  while path = deps.pop
147
148
  dep_info = archived_info[path]
148
- dep_info[:inputs].each do |k,v|
149
- all_inputs[k] = v unless all_inputs.include?(k)
150
- end if dep_info[:inputs]
151
- deps.concat(dep_info[:dependencies].collect{|p| p.last } - seen) if dep_info[:dependencies]
152
- deps.concat(dep_info[:archived_dependencies].collect{|p| p.last } - seen) if dep_info[:archived_dependencies]
149
+ if dep_info
150
+ dep_info[:inputs].each do |k,v|
151
+ all_inputs[k] = v unless all_inputs.include?(k)
152
+ end if dep_info[:inputs]
153
+ deps.concat(dep_info[:dependencies].collect{|p| p.last } - seen) if dep_info[:dependencies]
154
+ deps.concat(dep_info[:archived_dependencies].collect{|p| p.last } - seen) if dep_info[:archived_dependencies]
155
+ end
153
156
  seen << path
154
157
  end
155
158
 
@@ -93,18 +93,18 @@ class Step
93
93
 
94
94
  Log.debug "Saving job input #{name} (#{type}) into #{path}"
95
95
  case
96
- when Array === value
97
- Open.write(path, value * "\n")
98
- when IO === value
99
- Open.write(path, value)
100
- when type == "file"
96
+ when Step === value
97
+ Open.link(value.path, path)
98
+ when type.to_s == "file"
101
99
  if String === value && File.exists?(value)
102
100
  Open.link(value, path)
103
101
  else
104
- Open.write(path + '.read', value.to_s)
102
+ Open.write(path + '.yaml', value.to_yaml)
105
103
  end
106
- when Step === value
107
- value = value.produce.load
104
+ when Array === value
105
+ Open.write(path, value * "\n")
106
+ when IO === value
107
+ Open.write(path, value)
108
108
  else
109
109
  Open.write(path, value.to_s)
110
110
  end
@@ -114,7 +114,7 @@ class Step
114
114
  def self.save_job_inputs(job, dir, options = nil)
115
115
  options = IndiferentHash.setup options.dup if options
116
116
 
117
- task_name = job.task_name
117
+ task_name = Symbol === job.overriden ? job.overriden : job.task_name
118
118
  workflow = job.workflow
119
119
  workflow = Kernel.const_get workflow if String === workflow
120
120
  task_info = workflow.task_info(task_name)
@@ -123,9 +123,11 @@ class Step
123
123
  input_defaults = task_info[:input_defaults]
124
124
 
125
125
  inputs = {}
126
+ real_inputs = job.real_inputs || job.info[:real_inputs]
126
127
  job.recursive_inputs.zip(job.recursive_inputs.fields).each do |value,name|
127
128
  next unless task_inputs.include? name.to_sym
128
- next if options and ! options.include?(name)
129
+ next unless real_inputs.include? name.to_sym
130
+ next if options && ! options.include?(name)
129
131
  next if value.nil?
130
132
  next if input_defaults[name] == value
131
133
  inputs[name] = value
@@ -222,7 +224,7 @@ class Step
222
224
  def init_info(force = false)
223
225
  return nil if @exec || info_file.nil? || (Open.exists?(info_file) && ! force)
224
226
  Open.lock(info_file, :lock => info_lock) do
225
- i = {:status => :waiting, :pid => Process.pid, :path => path}
227
+ i = {:status => :waiting, :pid => Process.pid, :path => path, :real_inputs => real_inputs}
226
228
  i[:dependencies] = dependencies.collect{|dep| [dep.task_name, dep.name, dep.path]} if dependencies
227
229
  Misc.sensiblewrite(info_file, Step.serialize_info(i), :force => true, :lock => false)
228
230
  @info_cache = IndiferentHash.setup(i)
@@ -26,15 +26,25 @@ module Workflow
26
26
  workload
27
27
  end
28
28
 
29
+ def self.workload(jobs)
30
+ jobs.inject({}) do |acc,job|
31
+ Orchestrator.job_workload(job).each do |j,d|
32
+ acc[j] = d unless acc.keys.collect{|k| k.path }.include? j.path
33
+ end
34
+ acc
35
+ end
36
+ end
37
+
29
38
  def self.job_rules(rules, job)
30
39
  workflow = job.workflow.to_s
31
40
  task_name = job.task_name.to_s
41
+ defaults = rules["defaults"] || {}
32
42
 
33
- return IndiferentHash.setup(rules["defaults"]) unless rules[workflow]
34
- return IndiferentHash.setup(rules["defaults"]) unless rules[workflow][task_name]
43
+ return IndiferentHash.setup(defaults) unless rules[workflow]
44
+ return IndiferentHash.setup(defaults) unless rules[workflow][task_name]
35
45
 
36
46
  job_rules = IndiferentHash.setup(rules[workflow][task_name])
37
- rules["defaults"].each{|k,v| job_rules[k] = v if job_rules[k].nil? } if rules["defaults"]
47
+ defaults.each{|k,v| job_rules[k] = v if job_rules[k].nil? } if defaults
38
48
  job_rules
39
49
  end
40
50
 
@@ -169,12 +179,7 @@ module Workflow
169
179
  def process(rules, jobs)
170
180
  begin
171
181
 
172
- workload = jobs.inject({}) do |acc,job|
173
- Orchestrator.job_workload(job).each do |j,d|
174
- acc[j] = d unless acc.keys.collect{|k| k.path }.include? j.path
175
- end
176
- acc
177
- end
182
+ workload = Orchestrator.workload(jobs)
178
183
  all_jobs = workload.keys
179
184
 
180
185
  top_level_jobs = jobs.collect{|job| job.path }
@@ -0,0 +1,141 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+
6
+ #$0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
7
+
8
+ options = SOPT.setup <<EOF
9
+
10
+ Queue a job in Marenostrum
11
+
12
+ $ rbbt mnl [options]
13
+
14
+ -h--help Print this help
15
+ -d--done Done jobs only
16
+ -e--error Error jobs only
17
+ -a--aborted SLURM aboted jobs
18
+ -r--running Running jobs only
19
+ -q--queued Queued jobs only
20
+ -j--job* Job ids
21
+ -s--search* Regular expression
22
+ -t--tail* Show the last lines of the STDERR
23
+ EOF
24
+
25
+ if options[:help]
26
+ if defined? rbbt_usage
27
+ rbbt_usage
28
+ else
29
+ puts SOPT.doc
30
+ end
31
+ exit 0
32
+ end
33
+
34
+ Log.severity = 4
35
+ done, error, running, queued, aborted, jobid, search, tail = options.values_at :done, :error, :running, :queued, :aborted, :job, :search, :tail
36
+
37
+ workdir = File.expand_path('~/rbbt-slurm')
38
+ Path.setup(workdir)
39
+
40
+ running_jobs = begin
41
+ CMD.cmd('squeue').read.split("\n").collect{|l| l.to_i.to_s}
42
+ rescue
43
+ Log.warn "Cannot determine if jobs are running, they will seem to be all alive (Job ID in green)"
44
+ $norunningjobs = true
45
+ []
46
+ end
47
+
48
+ count = 0
49
+ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
50
+ dir = File.dirname(fcmd)
51
+
52
+ if m = Open.read(fcmd).match(/#CMD: (.*)/)
53
+ cmd = m[1]
54
+ else
55
+ cmd = nil
56
+ end
57
+
58
+ if m = Open.read(fcmd).match(/# Run command\n(.*?)\n/im)
59
+ exe = m[1]
60
+ else
61
+ exe = nil
62
+ end
63
+
64
+ if m = Open.read(fcmd).match(/^CONTAINER_DIR=(.*)/)
65
+ container_home = m[1]
66
+ else
67
+ container_home = nil
68
+ end
69
+
70
+
71
+ if File.exists?(fid = File.join(dir, 'job.id'))
72
+ id = Open.read(fid).chomp
73
+ else
74
+ id = nil
75
+ end
76
+
77
+ if File.exists?(fstatus = File.join(dir, 'exit.status'))
78
+ exit_status = Open.read(fstatus).to_i
79
+ else
80
+ exit_status = nil
81
+ end
82
+
83
+ if File.exists?(fstatus = File.join(dir, 'job.status'))
84
+ nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
85
+ else
86
+ nodes = []
87
+ end
88
+
89
+ if File.exists?(File.join(dir, 'std.out'))
90
+ outt = File.mtime File.join(dir, 'std.out')
91
+ errt = File.mtime File.join(dir, 'std.err')
92
+ time_diff = Time.now - [outt, errt].max
93
+ end
94
+
95
+ fdep = File.join(dir, 'dependencies.list')
96
+ deps = Open.read(fdep).split("\n") if File.exists?(fdep)
97
+
98
+ if done || error || aborted || running || queued || jobid || search
99
+ select = false
100
+ select = true if done && exit_status == 0
101
+ select = true if error && exit_status && exit_status != 0
102
+ select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
103
+ select = true if queued && deps && (running_jobs & deps).any?
104
+ select = true if running && (exit_status.nil? && running_jobs.include?(id)) && (!deps || (running_jobs & deps).empty?)
105
+ select = true if jobid && jobid.split(",").include?(id)
106
+ select = true if search && cmd.match(/#{search}/)
107
+ next unless select
108
+ end
109
+
110
+
111
+ puts Log.color :blue, dir
112
+ puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.slurm')).to_s
113
+ puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
114
+ puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
115
+ puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
116
+ puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home
117
+ puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
118
+ puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
119
+ puts Log.color(:magenta, "Nodes: ") << nodes * ", "
120
+ puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
121
+
122
+ if tail && File.exists?(File.join(dir, 'std.err'))
123
+ if exit_status && exit_status != 0
124
+ puts Log.color(:magenta, "First error or exception found: ")
125
+ puts CMD.cmd("grep -i -w 'error\\|[a-z]*exception' #{File.join(dir, 'std.err')} -A #{tail.to_i} |head -n #{tail.to_i}", :no_fail => true).read
126
+ elsif exit_status
127
+ puts Log.color(:magenta, "Completed jobs: ")
128
+ puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
129
+ else
130
+ puts Log.color(:magenta, "Log tail: ")
131
+ puts CMD.cmd("tail -n #{tail.to_i} #{File.join(dir, 'std.err')}").read
132
+ end
133
+ end
134
+
135
+ count += 1
136
+
137
+ end
138
+
139
+ puts
140
+ puts "Found #{count} jobs"
141
+
@@ -0,0 +1,47 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt/util/simpleopt'
4
+ require 'rbbt/workflow'
5
+ require 'rbbt/workflow/usage'
6
+ require 'rbbt/hpc'
7
+ require 'rbbt/hpc/orchestrate'
8
+ require 'time'
9
+
10
+ $slurm_options = SOPT.get <<EOF
11
+ -dr--dry_run Print only the template
12
+ -cj--clean_job Clean job
13
+ --drbbt* Use development version of rbbt
14
+ -sing--singularity Use Singularity
15
+ -ug--user_group* Use alternative user group for group project directory
16
+ -c--contain* Contain in directory (using Singularity)
17
+ -s--sync* Contain in directory and sync jobs
18
+ -e--exclusive Make exclusive use of the node
19
+ -hm--highmem Make use of highmem cores
20
+ -wc--wipe_container* Wipe the jobs from the contain directory
21
+ -CS--contain_and_sync Contain and sync to default locations
22
+ -ci--copy_image When using a container directory, copy image there
23
+ -t--tail Tail the logs
24
+ -q--queue* Queue
25
+ -t--task_cpus* Tasks
26
+ -W--workflows* Additional workflows
27
+ -tm--time* Time
28
+ -R--rules* Orchestration rules
29
+ -rmb--remove_slurm_basedir Remove the SLURM working directory (command, STDIN, exit status, ...)
30
+ EOF
31
+
32
+ class Step
33
+ def run(*args)
34
+ if done?
35
+ self.load
36
+ else
37
+ begin
38
+ Log.debug "Issuing SLURM job for #{self.path}"
39
+ HPC::SLURM.orchestrate_job(self, SOPT::GOT_OPTIONS.merge($slurm_options))
40
+ rescue HPC::SBATCH
41
+ end
42
+ end
43
+ end
44
+ end
45
+
46
+ ARGV.concat ["-W", $slurm_options[:workflows]] if $slurm_options[:workflows]
47
+ load Rbbt.share.rbbt_commands.workflow.task.find
@@ -9,8 +9,9 @@ require 'time'
9
9
  $slurm_options = SOPT.get <<EOF
10
10
  -dr--dry_run Print only the template
11
11
  -cj--clean_job Clean job
12
- --drbbt Use development version of rbbt
12
+ --drbbt* Use development version of rbbt
13
13
  -sing--singularity Use Singularity
14
+ -ug--user_group* Use alternative user group for group project directory
14
15
  -c--contain* Contain in directory (using Singularity)
15
16
  -s--sync* Contain in directory and sync jobs
16
17
  -e--exclusive Make exclusive use of the node
@@ -21,8 +22,9 @@ $slurm_options = SOPT.get <<EOF
21
22
  -t--tail Tail the logs
22
23
  -q--queue* Queue
23
24
  -t--task_cpus* Tasks
25
+ -W--workflows* Additional workflows
24
26
  -tm--time* Time
25
- -S--server* SLURM login node
27
+ -rmb--remove_slurm_basedir Remove the SLURM working directory (command, STDIN, exit status, ...)
26
28
  EOF
27
29
 
28
30
  class Step
@@ -30,9 +32,14 @@ class Step
30
32
  if done?
31
33
  self.load
32
34
  else
33
- Marenostrum::SLURM.run_job(self, SOPT::GOT_OPTIONS.merge($slurm_options))
35
+ begin
36
+ Log.debug "Issuing SLURM job for #{self.path}"
37
+ HPC::SLURM.run_job(self, SOPT::GOT_OPTIONS.merge($slurm_options))
38
+ rescue HPC::SBATCH
39
+ end
34
40
  end
35
41
  end
36
42
  end
37
43
 
44
+ ARGV.concat ["-W", $slurm_options[:workflows]] if $slurm_options[:workflows]
38
45
  load Rbbt.share.rbbt_commands.workflow.task.find
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.28.14
4
+ version: 5.29.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-12-18 00:00:00.000000000 Z
11
+ date: 2021-01-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -211,6 +211,8 @@ files:
211
211
  - lib/rbbt/entity/identifiers.rb
212
212
  - lib/rbbt/fix_width_table.rb
213
213
  - lib/rbbt/hpc.rb
214
+ - lib/rbbt/hpc/orchestrate.rb
215
+ - lib/rbbt/hpc/slurm.rb
214
216
  - lib/rbbt/knowledge_base.rb
215
217
  - lib/rbbt/knowledge_base/enrichment.rb
216
218
  - lib/rbbt/knowledge_base/entity.rb
@@ -376,6 +378,9 @@ files:
376
378
  - share/rbbt_commands/resource/produce
377
379
  - share/rbbt_commands/resource/read
378
380
  - share/rbbt_commands/rsync
381
+ - share/rbbt_commands/slurm/list
382
+ - share/rbbt_commands/slurm/orchestrate
383
+ - share/rbbt_commands/slurm/task
379
384
  - share/rbbt_commands/stat/abs
380
385
  - share/rbbt_commands/stat/boxplot
381
386
  - share/rbbt_commands/stat/compare_lists
@@ -430,7 +435,6 @@ files:
430
435
  - share/rbbt_commands/workflow/remote/list
431
436
  - share/rbbt_commands/workflow/remote/remove
432
437
  - share/rbbt_commands/workflow/server
433
- - share/rbbt_commands/workflow/slurm
434
438
  - share/rbbt_commands/workflow/task
435
439
  - share/rbbt_commands/workflow/trace
436
440
  - share/unicorn.rb