rbbt-util 5.30.9 → 5.31.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rbbt/hpc.rb +3 -0
  3. data/lib/rbbt/hpc/batch.rb +623 -0
  4. data/lib/rbbt/hpc/lsf.rb +119 -0
  5. data/lib/rbbt/hpc/orchestrate.rb +24 -19
  6. data/lib/rbbt/hpc/slurm.rb +62 -559
  7. data/lib/rbbt/resource/path.rb +3 -1
  8. data/lib/rbbt/tsv/accessor.rb +5 -2
  9. data/lib/rbbt/tsv/dumper.rb +1 -0
  10. data/lib/rbbt/tsv/parallel/traverse.rb +1 -1
  11. data/lib/rbbt/tsv/stream.rb +5 -6
  12. data/lib/rbbt/util/cmd.rb +15 -1
  13. data/lib/rbbt/util/config.rb +2 -2
  14. data/lib/rbbt/util/log.rb +22 -1
  15. data/lib/rbbt/util/log/progress.rb +17 -2
  16. data/lib/rbbt/util/log/progress/report.rb +36 -3
  17. data/lib/rbbt/util/misc/development.rb +2 -2
  18. data/lib/rbbt/util/misc/inspect.rb +17 -1
  19. data/lib/rbbt/util/misc/omics.rb +60 -1
  20. data/lib/rbbt/util/misc/options.rb +5 -0
  21. data/lib/rbbt/workflow/accessor.rb +7 -2
  22. data/lib/rbbt/workflow/definition.rb +7 -3
  23. data/lib/rbbt/workflow/step/accessor.rb +1 -1
  24. data/lib/rbbt/workflow/step/run.rb +9 -0
  25. data/lib/rbbt/workflow/usage.rb +13 -13
  26. data/lib/rbbt/workflow/util/archive.rb +5 -3
  27. data/lib/rbbt/workflow/util/provenance.rb +26 -21
  28. data/share/config.ru +3 -3
  29. data/share/rbbt_commands/{slurm → hpc}/clean +91 -18
  30. data/share/rbbt_commands/{slurm → hpc}/list +119 -31
  31. data/share/rbbt_commands/hpc/orchestrate +81 -0
  32. data/share/rbbt_commands/hpc/tail +81 -0
  33. data/share/rbbt_commands/hpc/task +80 -0
  34. data/test/rbbt/hpc/test_batch.rb +65 -0
  35. data/test/rbbt/hpc/test_slurm.rb +30 -0
  36. data/test/rbbt/util/misc/test_development.rb +11 -0
  37. data/test/rbbt/util/test_config.rb +13 -3
  38. data/test/test_helper.rb +3 -1
  39. metadata +16 -7
  40. data/share/rbbt_commands/slurm/orchestrate +0 -48
  41. data/share/rbbt_commands/slurm/task +0 -46
@@ -135,9 +135,14 @@ module Misc
135
135
  end
136
136
 
137
137
  new_options
138
+
139
+ options.replace new_options
138
140
  end
139
141
 
140
142
  def self.process_options(hash, *keys)
143
+ defaults = keys.pop if Hash === keys.last
144
+ hahs = Misc.add_defaults hash, defaults if defaults
145
+
141
146
  if keys.length == 1
142
147
  hash.include?(keys.first.to_sym) ? hash.delete(keys.first.to_sym) : hash.delete(keys.first.to_s)
143
148
  else
@@ -250,6 +250,8 @@ module Workflow
250
250
  end
251
251
 
252
252
  def assign_dep_inputs(_inputs, options, all_d, task_info)
253
+ IndiferentHash.setup(_inputs)
254
+
253
255
  options.each{|i,v|
254
256
  next if i == :compute or i == "compute"
255
257
  case v
@@ -259,13 +261,16 @@ module Workflow
259
261
  rec_dependency = all_d.flatten.select{|d| d.task_name.to_sym == v }.first
260
262
 
261
263
  if rec_dependency.nil?
262
- if _inputs.include? v
263
- _inputs[i] = _inputs.delete(v)
264
+ if _inputs.include?(v)
265
+ #_inputs[i] = _inputs.delete(v)
266
+ _inputs[i] = _inputs[v] unless _inputs.include? i #_inputs.delete(v)
264
267
  else
265
268
  _inputs[i] = v unless _inputs.include? i
266
269
  end
267
270
  else
268
271
  input_options = task_info[:input_options][i] || {}
272
+
273
+ #ToDo why was this always true?
269
274
  if input_options[:stream] or true
270
275
  #rec_dependency.run(true).grace unless rec_dependency.done? or rec_dependency.running?
271
276
  _inputs[i] = rec_dependency
@@ -79,9 +79,10 @@ module Workflow
79
79
  dep = dependencies.last.join
80
80
  raise dep.get_exception if dep.error?
81
81
  set_info :result_type, dep.info[:result_type]
82
- forget = config :forget_dep_tasks, :forget_dep_tasks, :default => FORGET_DEP_TASKS
82
+ forget = config :forget_dep_tasks, "forget_dep_tasks", "key:forget_dep_tasks", :default => FORGET_DEP_TASKS
83
83
  if forget
84
- remove = config :remove_dep_tasks, :remove_dep_tasks, :default => REMOVE_DEP_TASKS
84
+ remove = config :remove_dep_tasks, "remove_dep_tasks", "key:remove_dep_tasks", :default => REMOVE_DEP_TASKS
85
+
85
86
  self.archive_deps
86
87
  self.copy_files_dir
87
88
  self.dependencies = self.dependencies - [dep]
@@ -92,7 +93,10 @@ module Workflow
92
93
  when 'true'
93
94
  dep.clean
94
95
  when 'recursive'
95
- dep.recursive_clean
96
+ dep.rec_dependencies.each do |d|
97
+ d.clean unless config(:remove_dep, d.task_signature, d.task_name, d.workflow.to_s, :default => true).to_s == 'false'
98
+ end
99
+ dep.clean unless config(:remove_dep, dep.task_signature, dep.task_name, dep.workflow.to_s, :default => true).to_s == 'false'
96
100
  end
97
101
  else
98
102
  if Open.exists?(dep.files_dir)
@@ -99,7 +99,7 @@ class Step
99
99
  if String === value && File.exists?(value)
100
100
  Open.ln_s(value, path)
101
101
  else
102
- Open.write(path + '.yaml', value.to_yaml)
102
+ Open.write(path + '.yaml', value.to_s.to_yaml)
103
103
  end
104
104
  when Array === value
105
105
  Open.write(path, value.collect{|v| Step === v ? v.path : v.to_s} * "\n")
@@ -623,6 +623,15 @@ class Step
623
623
  Log.warn "Exception removing result of aborted job: #{$!.message}"
624
624
  end
625
625
  end
626
+
627
+ if Open.exists?(tmp_path) && status != :done
628
+ Log.warn "Aborted job had finished. Removing tmp result -- #{ tmp_path }"
629
+ begin
630
+ Open.rm tmp_path
631
+ rescue Exception
632
+ Log.warn "Exception removing tmp result of aborted job: #{$!.message}"
633
+ end
634
+ end
626
635
  end
627
636
 
628
637
  def _abort
@@ -6,19 +6,6 @@ module Task
6
6
  puts "\n" << Misc.format_paragraph(description.strip) << "\n" if description and not description.empty?
7
7
  puts
8
8
 
9
- case
10
- when (input_types.values & [:array]).any?
11
- puts Log.color(:green, Misc.format_paragraph("Lists are specified as arguments using ',' or '|'. When specified as files the '\\n'
12
- also works in addition to the others. You may use the '--array_separator' option
13
- the change this default. Whenever a file is specified it may also accept STDIN using
14
- the '-' character."))
15
- puts
16
-
17
- when (input_types.values & [:text, :tsv]).any?
18
- puts Log.color(:green, Misc.format_paragraph("Whenever a file is specified it may also accept STDIN using the '-' character."))
19
- puts
20
- end
21
-
22
9
  selects = []
23
10
  if inputs.any?
24
11
  inputs.zip(input_types.values_at(*inputs)).select{|i,t| t.to_sym == :select && input_options[i] && input_options[i][:select_options] }.each{|i,t| selects << [i, input_options[i][:select_options]] }
@@ -50,6 +37,19 @@ module Task
50
37
  puts
51
38
  end
52
39
 
40
+ case
41
+ when (input_types.values & [:array]).any?
42
+ puts Log.color(:green, Misc.format_paragraph("Lists are specified as arguments using ',' or '|'. When specified as files the '\\n'
43
+ also works in addition to the others. You may use the '--array_separator' option
44
+ the change this default. Whenever a file is specified it may also accept STDIN using
45
+ the '-' character."))
46
+ puts
47
+
48
+ when (input_types.values & [:text, :tsv]).any?
49
+ puts Log.color(:green, Misc.format_paragraph("Whenever a file is specified it may also accept STDIN using the '-' character."))
50
+ puts
51
+ end
52
+
53
53
  puts Log.color(:magenta, "Returns: ") << Log.color(:blue, result_type.to_s) << "\n"
54
54
  puts
55
55
 
@@ -51,7 +51,7 @@ class Step
51
51
  end
52
52
  end
53
53
 
54
- def self.job_files_for_archive(files, recursive = false)
54
+ def self.job_files_for_archive(files, recursive = false, skip_overriden = false)
55
55
  job_files = Set.new
56
56
 
57
57
  jobs = files.collect do |file|
@@ -65,6 +65,8 @@ class Step
65
65
 
66
66
  jobs.each do |step|
67
67
  next unless File.exists?(step.path)
68
+ next if skip_overriden && step.overriden
69
+
68
70
  job_files << step.path
69
71
  job_files << step.info_file if File.exists?(step.info_file)
70
72
  job_files << Step.md5_file(step.path) if File.exists?(Step.md5_file step.path)
@@ -258,9 +260,9 @@ puts resource[path].find(search_path)
258
260
  end
259
261
  end
260
262
 
261
- def self.purge(path, recursive = false)
263
+ def self.purge(path, recursive = false, skip_overriden = true)
262
264
  path = [path] if String === path
263
- job_files = job_files_for_archive path, recursive
265
+ job_files = job_files_for_archive path, recursive, skip_overriden
264
266
 
265
267
  job_files.each do |file|
266
268
  begin
@@ -1,23 +1,28 @@
1
1
  class Step
2
+
3
+ def self.status_color(status)
4
+ case status.to_sym
5
+ when :error, :aborted, :missing, :dead, :unsync
6
+ :red
7
+ when :streaming, :started
8
+ :cyan
9
+ when :done, :noinfo
10
+ :green
11
+ when :dependencies, :waiting, :setup
12
+ :yellow
13
+ when :notfound, :cleaned
14
+ :blue
15
+ else
16
+ if status.to_s.index ">"
17
+ :cyan
18
+ else
19
+ :cyan
20
+ end
21
+ end
22
+ end
23
+
2
24
  def self.prov_status_msg(status)
3
- color = case status.to_sym
4
- when :error, :aborted, :missing, :dead, :unsync
5
- :red
6
- when :streaming, :started
7
- :cyan
8
- when :done, :noinfo
9
- :green
10
- when :dependencies, :waiting, :setup
11
- :yellow
12
- when :notfound, :cleaned
13
- :blue
14
- else
15
- if status.to_s.index ">"
16
- :cyan
17
- else
18
- :cyan
19
- end
20
- end
25
+ color = status_color(status)
21
26
  Log.color(color, status.to_s)
22
27
  end
23
28
 
@@ -25,7 +30,7 @@ class Step
25
30
  parts = path.sub(/\{.*/,'').split "/"
26
31
 
27
32
  parts.pop
28
-
33
+
29
34
  task = Log.color(:yellow, parts.pop)
30
35
  workflow = Log.color(:magenta, parts.pop)
31
36
  #if status.to_s == 'noinfo' && parts.last != 'jobs'
@@ -89,7 +94,7 @@ class Step
89
94
  str << prov_report(dep, offset + 1, task, seen, expand_repeats)
90
95
  else
91
96
  if expand_repeats
92
- str << Log.color(:green, Log.uncolor(prov_report(dep, offset+1, task)))
97
+ str << Log.color(Step.status_color(dep.status), Log.uncolor(prov_report(dep, offset+1, task)))
93
98
  else
94
99
  info = dep.info || {}
95
100
  status = info[:status] || :missing
@@ -98,7 +103,7 @@ class Step
98
103
  status = :unsync if status == :done and not Open.exist?(path)
99
104
  status = :notfound if status == :noinfo and not Open.exist?(path)
100
105
 
101
- str << Log.color(status == :notfound ? :blue : :green, " " * (offset + 1) + Log.uncolor(prov_report_msg(status, name, path, info)))
106
+ str << Log.color(Step.status_color(status), " " * (offset + 1) + Log.uncolor(prov_report_msg(status, name, path, info)))
102
107
  end
103
108
  end
104
109
  end if step.dependencies
data/share/config.ru CHANGED
@@ -43,6 +43,9 @@ app_eval app, Rbbt.etc['app.d/remote_workflow_tasks.rb'].find_all
43
43
  #{{{ BASE
44
44
  app_eval app, Rbbt.etc['app.d/base.rb'].find
45
45
 
46
+ #{{{ SINATRA
47
+ load_file Rbbt.lib['sinatra.rb'].find_all
48
+
46
49
  #{{{ RESOURCES
47
50
  load_file Rbbt.etc['app.d/resources.rb'].find
48
51
 
@@ -70,9 +73,6 @@ load_file Rbbt.etc['app.d/preload.rb'].find_all
70
73
  #{{{ PRELOAD
71
74
  load_file Rbbt.etc['app.d/semaphores.rb'].find_all
72
75
 
73
- #{{{ SINATRA
74
- load_file Rbbt.lib['sinatra.rb'].find_all
75
-
76
76
  Entity.entity_list_cache = Rbbt.var.sinatra.app[app_name].find.entity_lists
77
77
  Entity.entity_map_cache = Rbbt.var.sinatra.app[app_name].find.entity_maps
78
78
  Entity.entity_property_cache = Rbbt.var.sinatra.app[app_name].find.entity_properties
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'rbbt-util'
4
4
  require 'rbbt/util/simpleopt'
5
+ require 'rbbt/hpc'
5
6
 
6
7
  #$0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
7
8
 
@@ -9,16 +10,17 @@ options = SOPT.setup <<EOF
9
10
 
10
11
  Clean error or aborted jobs
11
12
 
12
- $ rbbt mnl [options]
13
+ $ rbbt slurm clean [options]
13
14
 
14
15
  -h--help Print this help
15
16
  -d--done Done jobs only
16
17
  -e--error Error jobs only
17
18
  -a--aborted SLURM aboted jobs
19
+ -q--queued Queued jobs only
18
20
  -j--job* Job ids
19
21
  -s--search* Regular expression
20
22
  -t--tail* Show the last lines of the STDERR
21
- -SBP--sbatch_parameters show sbatch parameters
23
+ -BP--batch_parameters show batch parameters
22
24
  -dr--dry_run Do not erase anything
23
25
  EOF
24
26
 
@@ -31,14 +33,47 @@ if options[:help]
31
33
  exit 0
32
34
  end
33
35
 
36
+ batch_system = options.delete :batch_system
37
+ batch_system ||= 'auto'
38
+
39
+ HPC::BATCH_MODULE = case batch_system.to_s.downcase
40
+ when 'slurm'
41
+ HPC::SLURM
42
+ when 'lsf'
43
+ HPC::LSF
44
+ when 'auto'
45
+ case $previous_commands.last
46
+ when 'slurm'
47
+ HPC::SLURM
48
+ when 'lsf'
49
+ HPC::LSF
50
+ else
51
+ case Rbbt::Config.get(:batch_system, :batch, :batch_system, :hpc, :HPC, :BATCH).to_s.downcase
52
+ when 'slurm'
53
+ HPC::SLURM
54
+ when 'lsf'
55
+ HPC::LSF
56
+ else
57
+ case ENV["BATCH_SYSTEM"].to_s.downcase
58
+ when 'slurm'
59
+ HPC::SLURM
60
+ when 'lsf'
61
+ HPC::LSF
62
+ end
63
+ end
64
+ end
65
+ end
66
+
67
+ raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
68
+
34
69
  Log.severity = 4
35
- done, error, aborted, jobid, search, tail, sbatch_parameters, dry_run = options.values_at :done, :error, :aborted, :job, :search, :tail, :sbatch_parameters, :dry_run
70
+ done, error, aborted, queued, jobid, search, tail, batch_parameters, dry_run = options.values_at :done, :error, :aborted, :queued, :job, :search, :tail, :batch_parameters, :dry_run
36
71
 
37
- workdir = File.expand_path('~/rbbt-slurm')
72
+ workdir = File.expand_path('~/rbbt-batch')
38
73
  Path.setup(workdir)
39
74
 
40
75
  running_jobs = begin
41
- squeue_txt = CMD.cmd('squeue').read
76
+ squeue_txt = HPC::BATCH_MODULE.job_status
42
77
  squeue_txt.split("\n").collect{|l| l.to_i.to_s}
43
78
  rescue
44
79
  Log.warn "Cannot determine if jobs are running, they will seem to be all alive (Job ID in green)"
@@ -58,27 +93,35 @@ else
58
93
  end
59
94
 
60
95
  count = 0
61
- workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
96
+ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
62
97
  dir = File.dirname(fcmd)
98
+ command_txt = Open.read(fcmd)
63
99
 
64
- if m = Open.read(fcmd).match(/#CMD: (.*)/)
100
+ if m = command_txt.match(/#CMD: (.*)/)
65
101
  cmd = m[1]
66
102
  else
67
103
  cmd = nil
68
104
  end
69
105
 
70
- if m = Open.read(fcmd).match(/# Run command\n(.*?)\n/im)
106
+ if m = command_txt.match(/# Run command\n(.*?)\n/im)
71
107
  exe = m[1]
72
108
  else
73
109
  exe = nil
74
110
  end
75
111
 
76
- if m = Open.read(fcmd).match(/^CONTAINER_DIR=(.*)/)
112
+ if m = command_txt.match(/^CONTAINER_DIR=(.*)/)
77
113
  container_home = m[1]
78
114
  else
79
115
  container_home = nil
80
116
  end
81
117
 
118
+ if m = command_txt.match(/^BATCH_SYSTEM=(.*)/)
119
+ job_batch_system = m[1].downcase
120
+ else
121
+ job_batch_system = nil
122
+ end
123
+
124
+ different_system = job_batch_system != batch_system
82
125
 
83
126
  if File.exists?(fid = File.join(dir, 'job.id'))
84
127
  id = Open.read(fid).chomp
@@ -93,7 +136,16 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
93
136
  end
94
137
 
95
138
  if File.exists?(fstatus = File.join(dir, 'job.status'))
96
- nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
139
+ fstatus_txt = Open.read(fstatus)
140
+ begin
141
+ if job_batch_system == "lsf"
142
+ nodes = Open.read(fstatus).split("\n").last.split(/\s+/)[5].split(",")
143
+ else
144
+ nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
145
+ end
146
+ rescue
147
+ nodes = []
148
+ end
97
149
  elsif job_nodes[id]
98
150
  nodes = job_nodes[id]
99
151
  else
@@ -113,20 +165,36 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
113
165
  cadeps = Open.read(fcadep).split("\n") if File.exists?(fcadep)
114
166
 
115
167
  aborted = error = true if aborted.nil? && error.nil?
116
- if done || error || aborted || running || queued || jobid || search
168
+ #if done || error || aborted || running || queued || jobid || search
169
+ # select = false
170
+ # select = true if done && exit_status && exit_status.to_i == 0
171
+ # select = true if error && exit_status && exit_status.to_i != 0
172
+ # select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
173
+ # select = select && jobid.split(",").include?(id) if jobid
174
+ # select = select && cmd.match(/#{search}/) if search
175
+ # next unless select
176
+ #end
177
+
178
+ if done || error || aborted || queued || jobid
117
179
  select = false
118
- select = true if done && exit_status && exit_status.to_i == 0
119
- select = true if error && exit_status && exit_status.to_i != 0
180
+ select = true if done && exit_status == 0
181
+ select = true if error && exit_status && exit_status != 0
120
182
  select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
121
- select = select && jobid.split(",").include?(id) if jobid
183
+ is_running = exit_status.nil? && ( (running_jobs.include?(id) && (!deps || (running_jobs & deps).empty?)) || different_system )
184
+ select = true if queued && deps && (running_jobs & deps).any? || queued && is_running && nodes.empty?
185
+ select = true if jobid && jobid.split(",").include?(id)
122
186
  select = select && cmd.match(/#{search}/) if search
123
187
  next unless select
188
+ elsif search
189
+ select = false
190
+ select = true if search && cmd.match(/#{search}/)
191
+ next unless select
124
192
  end
125
193
 
126
194
 
127
195
  puts Log.color(:yellow, "**ERASING**")
128
196
  puts Log.color :blue, dir
129
- puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.slurm')).to_s
197
+ puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.batch')).to_s
130
198
  puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
131
199
  puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
132
200
  puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
@@ -137,9 +205,14 @@ workdir.glob("**/command.slurm").sort_by{|f| File.mtime(f)}.each do |fcmd|
137
205
  puts Log.color(:magenta, "Nodes: ") << nodes * ", "
138
206
  puts Log.color(:magenta, "Output: ") << File.exists?(File.join(dir, 'std.out')).to_s << (id.nil? ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
139
207
 
140
- if options[:sbatch_parameters]
141
- puts Log.color(:magenta, "SBATCH parameters: ")
142
- puts Log.color :blue, CMD.cmd('grep "^#SBATCH" |tail -n +6', :in => Open.read(fcmd)).read.strip
208
+ if options[:batch_parameters]
209
+ puts Log.color(:magenta, "BATCH parameters: ")
210
+ case job_batch_system
211
+ when 'slurm'
212
+ puts Log.color :blue, CMD.cmd('grep "^#SBATCH" |tail -n +6', :in => Open.read(fcmd)).read.strip
213
+ when 'lsf'
214
+ puts Log.color :blue, CMD.cmd('grep "^#BSUB" |tail -n +6', :in => Open.read(fcmd)).read.strip
215
+ end
143
216
  end
144
217
 
145
218
  if tail && File.exists?(File.join(dir, 'std.err'))