rbbt-util 5.29.1 → 5.30.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9b88fc549c1c1dc5cd56f06d933776d56540e3d0f4bacf77f04a449abcda974f
4
- data.tar.gz: caf80ab624418c6c0a744038d98569c3a32d18dc8d9de162b42363e1c281e8c2
3
+ metadata.gz: 649eff7bb5d00dd4bf47e39c8e7aba41279f0318d97289aae42e2f404eee7969
4
+ data.tar.gz: cb215144a707557db37b8e160c860622676ef2f29aa395d15813d28b7c8ba233
5
5
  SHA512:
6
- metadata.gz: 8944f1d996afa5610f70046f4e61cf326461050f06f347529cd7e403440e0d9b47d0c33b862bc83e89d4479ce889e773aef0bbabfaf0dae797c3a2164f0e8a8c
7
- data.tar.gz: fa0051835f35e23873bde6a075d81805d2878643e35e96e20c5d7423f0c7c48eaf25b2c5afad48215481b0ddd17b9d251555eae5b5c4a7fa420d9796dba9130b
6
+ metadata.gz: 34b782a247f1816cd57b6acfef861a84f1d0003376e048b1c441c841cee7abe09b2a7d9d858eaa99bef8f2dfdbdb88352d0c8b6bef5cfec4a72a79e75f24db61
7
+ data.tar.gz: 5a1bc971d33e35eb7e9b57e258d705393dfac17896ad68dabd58f0584ede460bcccbced20a68ac7a5250fc03932357ef185d1f9908b798691dde85d3cf173c35
@@ -1,23 +1,110 @@
1
1
  require 'rbbt/workflow/util/orchestrator'
2
2
  module HPC
3
3
  module SLURM
4
- def self.orchestrate_job(job, options, seen = {})
4
+
5
+ def self.job_rules(rules, job)
6
+ workflow = job.workflow.to_s
7
+ task_name = job.task_name.to_s
8
+ defaults = rules["defaults"] || {}
9
+
10
+ job_rules = IndiferentHash.setup(defaults.dup)
11
+
12
+ rules["chains"].each do |name,info|
13
+ IndiferentHash.setup(info)
14
+ chain_tasks = info[:tasks].split(/,\s*/)
15
+
16
+ chain_tasks.each do |task|
17
+ task_workflow, chain_task = task.split("#")
18
+ chain_task, task_workflow = task_workflow, info[:workflow] if chain_task.nil? or chain_tasks.empty?
19
+ job_rules["chain_tasks"] ||= {}
20
+ job_rules["chain_tasks"][task_workflow] ||= []
21
+ job_rules["chain_tasks"][task_workflow] << chain_task
22
+ next unless task_name == chain_task.to_s && workflow == task_workflow.to_s
23
+ config_keys = job_rules.delete :config_keys
24
+ job_rules = IndiferentHash.setup(job_rules.merge(info))
25
+ if config_keys
26
+ config_keys.gsub!(/,\s+/,',')
27
+ job_rules[:config_keys] = job_rules[:config_keys] ? config_keys + "," + job_rules[:config_keys] : config_keys
28
+ end
29
+ end
30
+
31
+ if job_rules["chain_tasks"][workflow] && job_rules["chain_tasks"][workflow].include?(task_name)
32
+ break
33
+ else
34
+ job_rules.delete "chain_tasks"
35
+ end
36
+ end if rules["chains"]
37
+
38
+ config_keys = job_rules.delete :config_keys
39
+ job_rules = IndiferentHash.setup(job_rules.merge(rules[workflow][task_name])) if rules[workflow] && rules[workflow][task_name]
40
+
41
+ if config_keys
42
+ config_keys.gsub!(/,\s+/,',')
43
+ job_rules[:config_keys] = job_rules[:config_keys] ? config_keys + "," + job_rules[:config_keys] : config_keys
44
+ end
45
+
46
+ if rules["skip"] && rules["skip"][workflow]
47
+ job_rules["skip"] = true if rules["skip"][workflow].split(/,\s*/).include? task_name
48
+ end
49
+
50
+ job_rules
51
+ end
52
+
53
+ def self.get_job_dependencies(job, job_rules)
54
+ deps = job.dependencies || []
55
+ deps += job.input_dependencies || []
56
+ deps
57
+ end
58
+
59
+ def self.orchestrate_job(job, options, skip = false, seen = {})
5
60
  return if job.done?
6
61
  return unless job.path.split("/")[-4] == "jobs"
62
+ seen[:orchestration_target_job] ||= job
63
+
7
64
  options.delete "recursive_clean"
65
+ options.delete "clean_task"
66
+ options.delete "clean"
8
67
  options.delete "tail"
9
- rules = YAML.load(Open.read(options[:rules])) if options[:rules]
68
+ options.delete "printfile"
69
+ options.delete "detach"
70
+
71
+ rules = YAML.load(Open.read(options[:orchestration_rules])) if options[:orchestration_rules]
10
72
  rules ||= {}
73
+ IndiferentHash.setup(rules)
11
74
 
12
- deps = job.dependencies || []
13
- deps += job.input_dependencies || []
75
+ job_rules = self.job_rules(rules, job)
76
+
77
+ deps = get_job_dependencies(job, job_rules)
14
78
 
15
79
  dep_ids = deps.collect do |dep|
16
- seen[dep.path] ||= self.orchestrate_job(dep, options.dup, seen)
17
- end.compact
80
+ skip_dep = job_rules["chain_tasks"] &&
81
+ job_rules["chain_tasks"][job.workflow.to_s] && job_rules["chain_tasks"][job.workflow.to_s].include?(job.task_name.to_s) &&
82
+ job_rules["chain_tasks"][dep.workflow.to_s] && job_rules["chain_tasks"][dep.workflow.to_s].include?(dep.task_name.to_s)
83
+
84
+ deps = seen[dep.path] ||= self.orchestrate_job(dep, options, skip_dep, seen)
85
+ if job.canfail_paths.include? dep.path
86
+ [deps].flatten.compact.collect{|id| ['canfail', id] * ":"}
87
+ else
88
+ deps
89
+ end
90
+ end.flatten.compact.uniq
91
+
92
+ skip = true if job_rules[:skip]
93
+ return dep_ids if skip and seen[:orchestration_target_job] != job
94
+
95
+ job_rules.delete :chain_tasks
96
+ job_rules.delete :tasks
97
+ job_rules.delete :workflow
98
+
99
+ config_keys = job_rules.delete(:config_keys)
100
+
101
+ job_options = IndiferentHash.setup(options.merge(job_rules).merge(:slurm_dependencies => dep_ids))
102
+ job_options.delete :orchestration_rules
103
+ if config_keys
104
+ config_keys.gsub!(/,\s+/,',')
105
+ job_options[:config_keys] = job_options[:config_keys] ? config_keys + "," + job_options[:config_keys] : config_keys
106
+ end
18
107
 
19
- job_rules = Workflow::Orchestrator.job_rules(rules, job)
20
- job_options = options.merge(job_rules).merge(:slurm_dependencies => dep_ids)
21
108
  run_job(job, job_options)
22
109
  end
23
110
  end
@@ -33,7 +33,8 @@ module HPC
33
33
  group = File.basename(File.dirname(ENV['HOME']))
34
34
 
35
35
  if contain_and_sync
36
- contain = "/scratch/tmp/rbbt-#{user}" if contain.nil?
36
+ random_file = TmpFile.random_name
37
+ contain = "/scratch/tmp/rbbt-#{user}/#{random_file}" if contain.nil?
37
38
  sync = "~/.rbbt/var/jobs" if sync.nil?
38
39
  wipe_container = "post" if wipe_container.nil?
39
40
  end
@@ -58,11 +59,11 @@ module HPC
58
59
  when FalseClass
59
60
  '--' << o << "=false"
60
61
  else
61
- ['--' << o, "'#{v}'"] * " "
62
+ ['--' << o, "'#{v.to_s.gsub("'", '\'')}'"] * " "
62
63
  end
63
64
  end * " "
64
65
 
65
- rbbt_cmd << " --config_keys='#{config_keys}'" if config_keys and not config_keys.empty?
66
+ rbbt_cmd << " --config_keys='#{config_keys.gsub("'", '\'')}'" if config_keys and not config_keys.empty?
66
67
 
67
68
  time = Misc.format_seconds Misc.timespan(time) unless time.include? ":"
68
69
 
@@ -76,6 +77,7 @@ module HPC
76
77
  fjob = File.join(slurm_basedir, 'job.id')
77
78
  fexit = File.join(slurm_basedir, 'exit.status')
78
79
  fsync = File.join(slurm_basedir, 'sync.log')
80
+ fsyncexit = File.join(slurm_basedir, 'sync.status')
79
81
  fcmd = File.join(slurm_basedir, 'command.slurm')
80
82
 
81
83
  #{{{ GENERATE TEMPLATE
@@ -107,10 +109,6 @@ module HPC
107
109
  EOF
108
110
  end
109
111
 
110
- header +=<<-EOF
111
- #CMD: #{rbbt_cmd}
112
- EOF
113
-
114
112
  # ENV
115
113
  env = ""
116
114
  env +=<<-EOF
@@ -246,7 +244,7 @@ EOF
246
244
  end
247
245
 
248
246
  if contain
249
- rbbt_cmd << " " << %(--workdir_all='#{contain}')
247
+ rbbt_cmd << " " << %(--workdir_all='#{contain.gsub("'", '\'')}/workdir')
250
248
  end
251
249
  end
252
250
 
@@ -254,16 +252,27 @@ EOF
254
252
  cmd =<<-EOF
255
253
  #{exec_cmd} \\
256
254
  #{rbbt_cmd}
255
+ EOF
256
+ annotate_cmd =<<-EOF
257
+ #{exec_cmd} \\
258
+ workflow write_info --recursive --force=false --check_pid "$step_path" slurm_job $SLURM_JOB_ID
257
259
  EOF
258
260
 
261
+ header +=<<-EOF
262
+ #CMD: #{rbbt_cmd}
263
+ EOF
264
+
259
265
  run +=<<-EOF
260
266
 
261
267
  # Run command
262
- #{cmd}
268
+ step_path=$(#{cmd})
263
269
 
264
270
  # Save exit status
265
271
  exit_status=$?
266
272
 
273
+ # Annotate info with SLURM job_info
274
+ #{annotate_cmd}
275
+
267
276
  EOF
268
277
 
269
278
  # CODA
@@ -273,10 +282,10 @@ EOF
273
282
  coda +=<<-EOF
274
283
  singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rbbt system clean all -q &>> #{fsync}
275
284
  EOF
276
- else
277
- coda +=<<-EOF
278
- rbbt system clean all -q &>> #{fsync}
279
- EOF
285
+ # else
286
+ # coda +=<<-EOF
287
+ #rbbt system clean all -q &>> #{fsync}
288
+ #EOF
280
289
  end
281
290
 
282
291
  if sync.include?("=>")
@@ -285,7 +294,7 @@ EOF
285
294
  sync = sync.strip
286
295
  source = File.join(File.expand_path(contain), source)
287
296
  else
288
- source = File.join(File.expand_path(contain), '.rbbt/var/jobs')
297
+ source = File.join(File.expand_path(contain), 'workdir/var/jobs')
289
298
  end
290
299
 
291
300
  target = File.expand_path(sync)
@@ -295,6 +304,7 @@ EOF
295
304
  mkdir -p "$(dirname '#{target}')"
296
305
  rsync -avztAXHP --copy-unsafe-links "#{source}/" "#{target}/" &>> #{fsync}
297
306
  sync_es="$?"
307
+ echo $sync_es > #{fsyncexit}
298
308
  find '#{target}' -type l -ls | awk '$13 ~ /^#{target.gsub('/','\/')}/ { sub("#{source}", "#{target}", $13); print $11, $13 }' | while read A B; do rm $A; ln -s $B $A; done
299
309
  EOF
300
310
 
@@ -320,23 +330,24 @@ singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rm -v /dev/shm/sem
320
330
  EOF
321
331
  else
322
332
  coda +=<<-EOF
323
- #{exec_cmd} system clean
333
+ ##{exec_cmd} system clean
324
334
  if [ $exit_status == '0' -a $sync_es == '0' ]; then
325
335
  rm -Rfv #{contain} &>> #{fsync}
326
336
  else
327
337
  echo "ERROR: Process failed or results could not sync correctly. Contain directory not purged" &>> #{fsync}
328
338
  fi
329
- unset sync_es
330
339
  EOF
331
340
 
332
341
  end
333
342
  end
334
343
  end
344
+
335
345
  coda +=<<-EOF
336
346
 
337
347
  # Write exit status to file
338
348
  echo $exit_status > #{fexit}
339
349
  EOF
350
+
340
351
  if sync
341
352
  coda +=<<-EOF
342
353
  if [ "$sync_es" == '0' ]; then
@@ -361,6 +372,11 @@ EOF
361
372
 
362
373
  slurm_basedir = options[:slurm_basedir]
363
374
  dependencies = options.delete :slurm_dependencies
375
+ dependencies = [] if dependencies.nil?
376
+
377
+ canfail_dependencies = dependencies.select{|dep| dep =~ /^canfail:(\d+)/ }.collect{|dep| dep.partition(":").last}
378
+ dependencies = dependencies.reject{|dep| dep =~ /^canfail:(\d+)/ }
379
+
364
380
  Open.mkdir slurm_basedir
365
381
 
366
382
  dry_run = options.delete :dry_run
@@ -369,6 +385,7 @@ EOF
369
385
  ferr = File.join(slurm_basedir, 'std.err')
370
386
  fjob = File.join(slurm_basedir, 'job.id')
371
387
  fdep = File.join(slurm_basedir, 'dependencies.list')
388
+ fcfdep = File.join(slurm_basedir, 'canfail_dependencies.list')
372
389
  fexit = File.join(slurm_basedir, 'exit.status')
373
390
  fsync = File.join(slurm_basedir, 'sync.log')
374
391
  fcmd = File.join(slurm_basedir, 'command.slurm')
@@ -400,8 +417,21 @@ EOF
400
417
  Open.rm fexit
401
418
  Open.rm fout
402
419
  Open.rm ferr
420
+
403
421
  Open.write(fdep, dependencies * "\n") if dependencies.any?
404
- dep_str = dependencies.any? ? "--dependency=afterok:" + dependencies * ":" : ''
422
+ Open.write(fcfdep, canfail_dependencies * "\n") if canfail_dependencies.any?
423
+
424
+
425
+ dep_str = '--dependency='
426
+ normal_dep_str = dependencies.any? ? "afterok:" + dependencies * ":" : nil
427
+ canfail_dep_str = canfail_dependencies.any? ? "afterany:" + canfail_dependencies * ":" : nil
428
+
429
+ if normal_dep_str.nil? && canfail_dep_str.nil?
430
+ dep_str = ""
431
+ else
432
+ dep_str += [normal_dep_str, canfail_dep_str].compact * ","
433
+ end
434
+
405
435
  job = CMD.cmd("sbatch #{dep_str} '#{fcmd}'").read.scan(/\d+/).first.to_i
406
436
  Log.debug "SBATCH job id: #{job}"
407
437
  Open.write(fjob, job.to_s)
@@ -494,7 +524,11 @@ EOF
494
524
  dry_run = options.delete :dry_run
495
525
  tail = options.delete :tail
496
526
  dependencies = options.delete :slurm_dependencies
527
+ procpath = options.delete :SLURM_procpath
528
+
497
529
  options[:jobname] = job.clean_name
530
+ log_level = options.delete :log
531
+ log_level ||= Log.severity
498
532
 
499
533
  workflow = job.workflow
500
534
 
@@ -519,14 +553,13 @@ EOF
519
553
  inputs_dir = File.join(tmp_directory, 'inputs_dir')
520
554
  saved = Step.save_job_inputs(job, inputs_dir)
521
555
 
522
- if saved && saved.any?
523
- options[:inputs_dir] = inputs_dir
524
- cmd = ['workflow', 'task', workflow.to_s, task.to_s, '-pf', '--load_inputs', inputs_dir, '--log', (options[:log] || Log.severity).to_s]
525
- else
526
- cmd = ['workflow', 'task', workflow.to_s, task.to_s, '-pf', '--log', (options[:log] || Log.severity).to_s]
527
- end
556
+ cmd = ['workflow', 'task', workflow.to_s, task.to_s, '--printpath', '--log', log_level.to_s]
557
+
558
+ cmd << "--procpath_performance='#{tmp_directory}/procpath##{procpath.gsub(',', '#')}'" if procpath
559
+
560
+ cmd << "--override_deps='#{override_deps.gsub("'", '\'')}'" if override_deps and not override_deps.empty?
528
561
 
529
- cmd << "--override_deps='#{override_deps}'" if override_deps and not override_deps.empty?
562
+ cmd << "--load_inputs='#{inputs_dir}'" if saved && saved.any?
530
563
 
531
564
  template = self.template(cmd, options)
532
565
  jobid = self.issue_template(template, options.merge(:slurm_basedir => slurm_basedir, :dry_run => dry_run, :slurm_dependencies => dependencies))
data/lib/rbbt/persist.rb CHANGED
@@ -110,6 +110,8 @@ module Persist
110
110
  def self.load_file(path, type)
111
111
  begin
112
112
  case (type || :marshal).to_sym
113
+ when :path
114
+ path
113
115
  when :nil
114
116
  nil
115
117
  when :boolean
@@ -167,6 +169,8 @@ module Persist
167
169
  end
168
170
 
169
171
  case (type || :marshal).to_sym
172
+ when :path
173
+ nil
170
174
  when :nil
171
175
  nil
172
176
  when :boolean
@@ -104,9 +104,6 @@ module Persist
104
104
  write(true) if closed? || ! write?
105
105
  res = begin
106
106
  yield
107
- rescue Exception
108
- Log.exception $!
109
- raise $!
110
107
  ensure
111
108
  close
112
109
  end
@@ -115,7 +112,6 @@ module Persist
115
112
  end
116
113
 
117
114
  def read_and_close
118
- #return yield if @locked
119
115
  if read? || write?
120
116
  begin
121
117
  return yield
@@ -134,6 +130,41 @@ module Persist
134
130
  end
135
131
  end
136
132
 
133
+ def read_lock
134
+ read if closed?
135
+ if read?
136
+ return yield
137
+ end
138
+
139
+ lock do
140
+ close
141
+ read true
142
+ begin
143
+ yield
144
+ end
145
+ end
146
+ end
147
+
148
+ def write_lock
149
+ write if closed?
150
+ if write?
151
+ begin
152
+ return yield
153
+ ensure
154
+ close
155
+ end
156
+ end
157
+
158
+ lock do
159
+ close
160
+ write true
161
+ begin
162
+ yield
163
+ end
164
+ end
165
+ end
166
+
167
+
137
168
  def merge!(hash)
138
169
  hash.each do |key,values|
139
170
  self[key] = values
@@ -141,38 +172,38 @@ module Persist
141
172
  end
142
173
 
143
174
  def range(*args)
144
- self.read_and_close do
175
+ self.read_lock do
145
176
  super(*args)
146
177
  end
147
178
  end
148
179
 
149
180
  def include?(*args)
150
- self.read_and_close do
181
+ self.read_lock do
151
182
  super(*args) #- TSV::ENTRY_KEYS.to_a
152
183
  end
153
184
  end
154
185
 
155
186
  def [](*args)
156
- self.read_and_close do
187
+ self.read_lock do
157
188
  super(*args) #- TSV::ENTRY_KEYS.to_a
158
189
  end
159
190
  end
160
191
 
161
192
  def []=(*args)
162
- self.write_and_close do
193
+ self.write_lock do
163
194
  super(*args) #- TSV::ENTRY_KEYS.to_a
164
195
  end
165
196
  end
166
197
 
167
198
  def keys(*args)
168
- self.read_and_close do
199
+ self.read_lock do
169
200
  super(*args)
170
201
  end
171
202
  end
172
203
 
173
204
 
174
205
  def prefix(key)
175
- self.read_and_close do
206
+ self.read_lock do
176
207
  range(key, 1, key + MAX_CHAR, 1)
177
208
  end
178
209
  end
@@ -184,13 +215,13 @@ module Persist
184
215
 
185
216
 
186
217
  def size(*args)
187
- self.read_and_close do
218
+ self.read_lock do
188
219
  super(*args)
189
220
  end
190
221
  end
191
222
 
192
223
  def each(*args, &block)
193
- self.read_and_close do
224
+ self.read_lock do
194
225
  super(*args, &block)
195
226
  end
196
227
  end
@@ -208,7 +239,7 @@ module Persist
208
239
  end
209
240
 
210
241
  def values_at(*keys)
211
- self.read_and_close do
242
+ self.read_lock do
212
243
  keys.collect do |k|
213
244
  self[k]
214
245
  end