rbbt-util 5.29.1 → 5.30.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9b88fc549c1c1dc5cd56f06d933776d56540e3d0f4bacf77f04a449abcda974f
4
- data.tar.gz: caf80ab624418c6c0a744038d98569c3a32d18dc8d9de162b42363e1c281e8c2
3
+ metadata.gz: 649eff7bb5d00dd4bf47e39c8e7aba41279f0318d97289aae42e2f404eee7969
4
+ data.tar.gz: cb215144a707557db37b8e160c860622676ef2f29aa395d15813d28b7c8ba233
5
5
  SHA512:
6
- metadata.gz: 8944f1d996afa5610f70046f4e61cf326461050f06f347529cd7e403440e0d9b47d0c33b862bc83e89d4479ce889e773aef0bbabfaf0dae797c3a2164f0e8a8c
7
- data.tar.gz: fa0051835f35e23873bde6a075d81805d2878643e35e96e20c5d7423f0c7c48eaf25b2c5afad48215481b0ddd17b9d251555eae5b5c4a7fa420d9796dba9130b
6
+ metadata.gz: 34b782a247f1816cd57b6acfef861a84f1d0003376e048b1c441c841cee7abe09b2a7d9d858eaa99bef8f2dfdbdb88352d0c8b6bef5cfec4a72a79e75f24db61
7
+ data.tar.gz: 5a1bc971d33e35eb7e9b57e258d705393dfac17896ad68dabd58f0584ede460bcccbced20a68ac7a5250fc03932357ef185d1f9908b798691dde85d3cf173c35
@@ -1,23 +1,110 @@
1
1
  require 'rbbt/workflow/util/orchestrator'
2
2
  module HPC
3
3
  module SLURM
4
- def self.orchestrate_job(job, options, seen = {})
4
+
5
+ def self.job_rules(rules, job)
6
+ workflow = job.workflow.to_s
7
+ task_name = job.task_name.to_s
8
+ defaults = rules["defaults"] || {}
9
+
10
+ job_rules = IndiferentHash.setup(defaults.dup)
11
+
12
+ rules["chains"].each do |name,info|
13
+ IndiferentHash.setup(info)
14
+ chain_tasks = info[:tasks].split(/,\s*/)
15
+
16
+ chain_tasks.each do |task|
17
+ task_workflow, chain_task = task.split("#")
18
+ chain_task, task_workflow = task_workflow, info[:workflow] if chain_task.nil? or chain_tasks.empty?
19
+ job_rules["chain_tasks"] ||= {}
20
+ job_rules["chain_tasks"][task_workflow] ||= []
21
+ job_rules["chain_tasks"][task_workflow] << chain_task
22
+ next unless task_name == chain_task.to_s && workflow == task_workflow.to_s
23
+ config_keys = job_rules.delete :config_keys
24
+ job_rules = IndiferentHash.setup(job_rules.merge(info))
25
+ if config_keys
26
+ config_keys.gsub!(/,\s+/,',')
27
+ job_rules[:config_keys] = job_rules[:config_keys] ? config_keys + "," + job_rules[:config_keys] : config_keys
28
+ end
29
+ end
30
+
31
+ if job_rules["chain_tasks"][workflow] && job_rules["chain_tasks"][workflow].include?(task_name)
32
+ break
33
+ else
34
+ job_rules.delete "chain_tasks"
35
+ end
36
+ end if rules["chains"]
37
+
38
+ config_keys = job_rules.delete :config_keys
39
+ job_rules = IndiferentHash.setup(job_rules.merge(rules[workflow][task_name])) if rules[workflow] && rules[workflow][task_name]
40
+
41
+ if config_keys
42
+ config_keys.gsub!(/,\s+/,',')
43
+ job_rules[:config_keys] = job_rules[:config_keys] ? config_keys + "," + job_rules[:config_keys] : config_keys
44
+ end
45
+
46
+ if rules["skip"] && rules["skip"][workflow]
47
+ job_rules["skip"] = true if rules["skip"][workflow].split(/,\s*/).include? task_name
48
+ end
49
+
50
+ job_rules
51
+ end
52
+
53
+ def self.get_job_dependencies(job, job_rules)
54
+ deps = job.dependencies || []
55
+ deps += job.input_dependencies || []
56
+ deps
57
+ end
58
+
59
+ def self.orchestrate_job(job, options, skip = false, seen = {})
5
60
  return if job.done?
6
61
  return unless job.path.split("/")[-4] == "jobs"
62
+ seen[:orchestration_target_job] ||= job
63
+
7
64
  options.delete "recursive_clean"
65
+ options.delete "clean_task"
66
+ options.delete "clean"
8
67
  options.delete "tail"
9
- rules = YAML.load(Open.read(options[:rules])) if options[:rules]
68
+ options.delete "printfile"
69
+ options.delete "detach"
70
+
71
+ rules = YAML.load(Open.read(options[:orchestration_rules])) if options[:orchestration_rules]
10
72
  rules ||= {}
73
+ IndiferentHash.setup(rules)
11
74
 
12
- deps = job.dependencies || []
13
- deps += job.input_dependencies || []
75
+ job_rules = self.job_rules(rules, job)
76
+
77
+ deps = get_job_dependencies(job, job_rules)
14
78
 
15
79
  dep_ids = deps.collect do |dep|
16
- seen[dep.path] ||= self.orchestrate_job(dep, options.dup, seen)
17
- end.compact
80
+ skip_dep = job_rules["chain_tasks"] &&
81
+ job_rules["chain_tasks"][job.workflow.to_s] && job_rules["chain_tasks"][job.workflow.to_s].include?(job.task_name.to_s) &&
82
+ job_rules["chain_tasks"][dep.workflow.to_s] && job_rules["chain_tasks"][dep.workflow.to_s].include?(dep.task_name.to_s)
83
+
84
+ deps = seen[dep.path] ||= self.orchestrate_job(dep, options, skip_dep, seen)
85
+ if job.canfail_paths.include? dep.path
86
+ [deps].flatten.compact.collect{|id| ['canfail', id] * ":"}
87
+ else
88
+ deps
89
+ end
90
+ end.flatten.compact.uniq
91
+
92
+ skip = true if job_rules[:skip]
93
+ return dep_ids if skip and seen[:orchestration_target_job] != job
94
+
95
+ job_rules.delete :chain_tasks
96
+ job_rules.delete :tasks
97
+ job_rules.delete :workflow
98
+
99
+ config_keys = job_rules.delete(:config_keys)
100
+
101
+ job_options = IndiferentHash.setup(options.merge(job_rules).merge(:slurm_dependencies => dep_ids))
102
+ job_options.delete :orchestration_rules
103
+ if config_keys
104
+ config_keys.gsub!(/,\s+/,',')
105
+ job_options[:config_keys] = job_options[:config_keys] ? config_keys + "," + job_options[:config_keys] : config_keys
106
+ end
18
107
 
19
- job_rules = Workflow::Orchestrator.job_rules(rules, job)
20
- job_options = options.merge(job_rules).merge(:slurm_dependencies => dep_ids)
21
108
  run_job(job, job_options)
22
109
  end
23
110
  end
@@ -33,7 +33,8 @@ module HPC
33
33
  group = File.basename(File.dirname(ENV['HOME']))
34
34
 
35
35
  if contain_and_sync
36
- contain = "/scratch/tmp/rbbt-#{user}" if contain.nil?
36
+ random_file = TmpFile.random_name
37
+ contain = "/scratch/tmp/rbbt-#{user}/#{random_file}" if contain.nil?
37
38
  sync = "~/.rbbt/var/jobs" if sync.nil?
38
39
  wipe_container = "post" if wipe_container.nil?
39
40
  end
@@ -58,11 +59,11 @@ module HPC
58
59
  when FalseClass
59
60
  '--' << o << "=false"
60
61
  else
61
- ['--' << o, "'#{v}'"] * " "
62
+ ['--' << o, "'#{v.to_s.gsub("'", '\'')}'"] * " "
62
63
  end
63
64
  end * " "
64
65
 
65
- rbbt_cmd << " --config_keys='#{config_keys}'" if config_keys and not config_keys.empty?
66
+ rbbt_cmd << " --config_keys='#{config_keys.gsub("'", '\'')}'" if config_keys and not config_keys.empty?
66
67
 
67
68
  time = Misc.format_seconds Misc.timespan(time) unless time.include? ":"
68
69
 
@@ -76,6 +77,7 @@ module HPC
76
77
  fjob = File.join(slurm_basedir, 'job.id')
77
78
  fexit = File.join(slurm_basedir, 'exit.status')
78
79
  fsync = File.join(slurm_basedir, 'sync.log')
80
+ fsyncexit = File.join(slurm_basedir, 'sync.status')
79
81
  fcmd = File.join(slurm_basedir, 'command.slurm')
80
82
 
81
83
  #{{{ GENERATE TEMPLATE
@@ -107,10 +109,6 @@ module HPC
107
109
  EOF
108
110
  end
109
111
 
110
- header +=<<-EOF
111
- #CMD: #{rbbt_cmd}
112
- EOF
113
-
114
112
  # ENV
115
113
  env = ""
116
114
  env +=<<-EOF
@@ -246,7 +244,7 @@ EOF
246
244
  end
247
245
 
248
246
  if contain
249
- rbbt_cmd << " " << %(--workdir_all='#{contain}')
247
+ rbbt_cmd << " " << %(--workdir_all='#{contain.gsub("'", '\'')}/workdir')
250
248
  end
251
249
  end
252
250
 
@@ -254,16 +252,27 @@ EOF
254
252
  cmd =<<-EOF
255
253
  #{exec_cmd} \\
256
254
  #{rbbt_cmd}
255
+ EOF
256
+ annotate_cmd =<<-EOF
257
+ #{exec_cmd} \\
258
+ workflow write_info --recursive --force=false --check_pid "$step_path" slurm_job $SLURM_JOB_ID
257
259
  EOF
258
260
 
261
+ header +=<<-EOF
262
+ #CMD: #{rbbt_cmd}
263
+ EOF
264
+
259
265
  run +=<<-EOF
260
266
 
261
267
  # Run command
262
- #{cmd}
268
+ step_path=$(#{cmd})
263
269
 
264
270
  # Save exit status
265
271
  exit_status=$?
266
272
 
273
+ # Annotate info with SLURM job_info
274
+ #{annotate_cmd}
275
+
267
276
  EOF
268
277
 
269
278
  # CODA
@@ -273,10 +282,10 @@ EOF
273
282
  coda +=<<-EOF
274
283
  singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rbbt system clean all -q &>> #{fsync}
275
284
  EOF
276
- else
277
- coda +=<<-EOF
278
- rbbt system clean all -q &>> #{fsync}
279
- EOF
285
+ # else
286
+ # coda +=<<-EOF
287
+ #rbbt system clean all -q &>> #{fsync}
288
+ #EOF
280
289
  end
281
290
 
282
291
  if sync.include?("=>")
@@ -285,7 +294,7 @@ EOF
285
294
  sync = sync.strip
286
295
  source = File.join(File.expand_path(contain), source)
287
296
  else
288
- source = File.join(File.expand_path(contain), '.rbbt/var/jobs')
297
+ source = File.join(File.expand_path(contain), 'workdir/var/jobs')
289
298
  end
290
299
 
291
300
  target = File.expand_path(sync)
@@ -295,6 +304,7 @@ EOF
295
304
  mkdir -p "$(dirname '#{target}')"
296
305
  rsync -avztAXHP --copy-unsafe-links "#{source}/" "#{target}/" &>> #{fsync}
297
306
  sync_es="$?"
307
+ echo $sync_es > #{fsyncexit}
298
308
  find '#{target}' -type l -ls | awk '$13 ~ /^#{target.gsub('/','\/')}/ { sub("#{source}", "#{target}", $13); print $11, $13 }' | while read A B; do rm $A; ln -s $B $A; done
299
309
  EOF
300
310
 
@@ -320,23 +330,24 @@ singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rm -v /dev/shm/sem
320
330
  EOF
321
331
  else
322
332
  coda +=<<-EOF
323
- #{exec_cmd} system clean
333
+ ##{exec_cmd} system clean
324
334
  if [ $exit_status == '0' -a $sync_es == '0' ]; then
325
335
  rm -Rfv #{contain} &>> #{fsync}
326
336
  else
327
337
  echo "ERROR: Process failed or results could not sync correctly. Contain directory not purged" &>> #{fsync}
328
338
  fi
329
- unset sync_es
330
339
  EOF
331
340
 
332
341
  end
333
342
  end
334
343
  end
344
+
335
345
  coda +=<<-EOF
336
346
 
337
347
  # Write exit status to file
338
348
  echo $exit_status > #{fexit}
339
349
  EOF
350
+
340
351
  if sync
341
352
  coda +=<<-EOF
342
353
  if [ "$sync_es" == '0' ]; then
@@ -361,6 +372,11 @@ EOF
361
372
 
362
373
  slurm_basedir = options[:slurm_basedir]
363
374
  dependencies = options.delete :slurm_dependencies
375
+ dependencies = [] if dependencies.nil?
376
+
377
+ canfail_dependencies = dependencies.select{|dep| dep =~ /^canfail:(\d+)/ }.collect{|dep| dep.partition(":").last}
378
+ dependencies = dependencies.reject{|dep| dep =~ /^canfail:(\d+)/ }
379
+
364
380
  Open.mkdir slurm_basedir
365
381
 
366
382
  dry_run = options.delete :dry_run
@@ -369,6 +385,7 @@ EOF
369
385
  ferr = File.join(slurm_basedir, 'std.err')
370
386
  fjob = File.join(slurm_basedir, 'job.id')
371
387
  fdep = File.join(slurm_basedir, 'dependencies.list')
388
+ fcfdep = File.join(slurm_basedir, 'canfail_dependencies.list')
372
389
  fexit = File.join(slurm_basedir, 'exit.status')
373
390
  fsync = File.join(slurm_basedir, 'sync.log')
374
391
  fcmd = File.join(slurm_basedir, 'command.slurm')
@@ -400,8 +417,21 @@ EOF
400
417
  Open.rm fexit
401
418
  Open.rm fout
402
419
  Open.rm ferr
420
+
403
421
  Open.write(fdep, dependencies * "\n") if dependencies.any?
404
- dep_str = dependencies.any? ? "--dependency=afterok:" + dependencies * ":" : ''
422
+ Open.write(fcfdep, canfail_dependencies * "\n") if canfail_dependencies.any?
423
+
424
+
425
+ dep_str = '--dependency='
426
+ normal_dep_str = dependencies.any? ? "afterok:" + dependencies * ":" : nil
427
+ canfail_dep_str = canfail_dependencies.any? ? "afterany:" + canfail_dependencies * ":" : nil
428
+
429
+ if normal_dep_str.nil? && canfail_dep_str.nil?
430
+ dep_str = ""
431
+ else
432
+ dep_str += [normal_dep_str, canfail_dep_str].compact * ","
433
+ end
434
+
405
435
  job = CMD.cmd("sbatch #{dep_str} '#{fcmd}'").read.scan(/\d+/).first.to_i
406
436
  Log.debug "SBATCH job id: #{job}"
407
437
  Open.write(fjob, job.to_s)
@@ -494,7 +524,11 @@ EOF
494
524
  dry_run = options.delete :dry_run
495
525
  tail = options.delete :tail
496
526
  dependencies = options.delete :slurm_dependencies
527
+ procpath = options.delete :SLURM_procpath
528
+
497
529
  options[:jobname] = job.clean_name
530
+ log_level = options.delete :log
531
+ log_level ||= Log.severity
498
532
 
499
533
  workflow = job.workflow
500
534
 
@@ -519,14 +553,13 @@ EOF
519
553
  inputs_dir = File.join(tmp_directory, 'inputs_dir')
520
554
  saved = Step.save_job_inputs(job, inputs_dir)
521
555
 
522
- if saved && saved.any?
523
- options[:inputs_dir] = inputs_dir
524
- cmd = ['workflow', 'task', workflow.to_s, task.to_s, '-pf', '--load_inputs', inputs_dir, '--log', (options[:log] || Log.severity).to_s]
525
- else
526
- cmd = ['workflow', 'task', workflow.to_s, task.to_s, '-pf', '--log', (options[:log] || Log.severity).to_s]
527
- end
556
+ cmd = ['workflow', 'task', workflow.to_s, task.to_s, '--printpath', '--log', log_level.to_s]
557
+
558
+ cmd << "--procpath_performance='#{tmp_directory}/procpath##{procpath.gsub(',', '#')}'" if procpath
559
+
560
+ cmd << "--override_deps='#{override_deps.gsub("'", '\'')}'" if override_deps and not override_deps.empty?
528
561
 
529
- cmd << "--override_deps='#{override_deps}'" if override_deps and not override_deps.empty?
562
+ cmd << "--load_inputs='#{inputs_dir}'" if saved && saved.any?
530
563
 
531
564
  template = self.template(cmd, options)
532
565
  jobid = self.issue_template(template, options.merge(:slurm_basedir => slurm_basedir, :dry_run => dry_run, :slurm_dependencies => dependencies))
data/lib/rbbt/persist.rb CHANGED
@@ -110,6 +110,8 @@ module Persist
110
110
  def self.load_file(path, type)
111
111
  begin
112
112
  case (type || :marshal).to_sym
113
+ when :path
114
+ path
113
115
  when :nil
114
116
  nil
115
117
  when :boolean
@@ -167,6 +169,8 @@ module Persist
167
169
  end
168
170
 
169
171
  case (type || :marshal).to_sym
172
+ when :path
173
+ nil
170
174
  when :nil
171
175
  nil
172
176
  when :boolean
@@ -104,9 +104,6 @@ module Persist
104
104
  write(true) if closed? || ! write?
105
105
  res = begin
106
106
  yield
107
- rescue Exception
108
- Log.exception $!
109
- raise $!
110
107
  ensure
111
108
  close
112
109
  end
@@ -115,7 +112,6 @@ module Persist
115
112
  end
116
113
 
117
114
  def read_and_close
118
- #return yield if @locked
119
115
  if read? || write?
120
116
  begin
121
117
  return yield
@@ -134,6 +130,41 @@ module Persist
134
130
  end
135
131
  end
136
132
 
133
+ def read_lock
134
+ read if closed?
135
+ if read?
136
+ return yield
137
+ end
138
+
139
+ lock do
140
+ close
141
+ read true
142
+ begin
143
+ yield
144
+ end
145
+ end
146
+ end
147
+
148
+ def write_lock
149
+ write if closed?
150
+ if write?
151
+ begin
152
+ return yield
153
+ ensure
154
+ close
155
+ end
156
+ end
157
+
158
+ lock do
159
+ close
160
+ write true
161
+ begin
162
+ yield
163
+ end
164
+ end
165
+ end
166
+
167
+
137
168
  def merge!(hash)
138
169
  hash.each do |key,values|
139
170
  self[key] = values
@@ -141,38 +172,38 @@ module Persist
141
172
  end
142
173
 
143
174
  def range(*args)
144
- self.read_and_close do
175
+ self.read_lock do
145
176
  super(*args)
146
177
  end
147
178
  end
148
179
 
149
180
  def include?(*args)
150
- self.read_and_close do
181
+ self.read_lock do
151
182
  super(*args) #- TSV::ENTRY_KEYS.to_a
152
183
  end
153
184
  end
154
185
 
155
186
  def [](*args)
156
- self.read_and_close do
187
+ self.read_lock do
157
188
  super(*args) #- TSV::ENTRY_KEYS.to_a
158
189
  end
159
190
  end
160
191
 
161
192
  def []=(*args)
162
- self.write_and_close do
193
+ self.write_lock do
163
194
  super(*args) #- TSV::ENTRY_KEYS.to_a
164
195
  end
165
196
  end
166
197
 
167
198
  def keys(*args)
168
- self.read_and_close do
199
+ self.read_lock do
169
200
  super(*args)
170
201
  end
171
202
  end
172
203
 
173
204
 
174
205
  def prefix(key)
175
- self.read_and_close do
206
+ self.read_lock do
176
207
  range(key, 1, key + MAX_CHAR, 1)
177
208
  end
178
209
  end
@@ -184,13 +215,13 @@ module Persist
184
215
 
185
216
 
186
217
  def size(*args)
187
- self.read_and_close do
218
+ self.read_lock do
188
219
  super(*args)
189
220
  end
190
221
  end
191
222
 
192
223
  def each(*args, &block)
193
- self.read_and_close do
224
+ self.read_lock do
194
225
  super(*args, &block)
195
226
  end
196
227
  end
@@ -208,7 +239,7 @@ module Persist
208
239
  end
209
240
 
210
241
  def values_at(*keys)
211
- self.read_and_close do
242
+ self.read_lock do
212
243
  keys.collect do |k|
213
244
  self[k]
214
245
  end