rbbt-util 5.28.10 → 5.29.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,24 @@
1
+ require 'rbbt/workflow/util/orchestrator'
2
+ module HPC
3
+ module SLURM
4
+ def self.orchestrate_job(job, options, seen = {})
5
+ return if job.done?
6
+ return unless job.path.split("/")[-4] == "jobs"
7
+ options.delete "recursive_clean"
8
+ options.delete "tail"
9
+ rules = YAML.load(Open.read(options[:rules])) if options[:rules]
10
+ rules ||= {}
11
+
12
+ deps = job.dependencies || []
13
+ deps += job.input_dependencies || []
14
+
15
+ dep_ids = deps.collect do |dep|
16
+ seen[dep.path] ||= self.orchestrate_job(dep, options.dup, seen)
17
+ end.compact
18
+
19
+ job_rules = Workflow::Orchestrator.job_rules(rules, job)
20
+ job_options = options.merge(job_rules).merge(:slurm_dependencies => dep_ids)
21
+ run_job(job, job_options)
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,570 @@
1
+ module HPC
2
+ class SBATCH < Exception;
3
+ attr_accessor :directory
4
+ def initialize(directory)
5
+ @directory = directory
6
+ end
7
+ end
8
+
9
+ module SLURM
10
+
11
+ def self.template(args, options = {})
12
+
13
+ development = options.delete :drbbt
14
+ singularity = options.delete :singularity
15
+ contain = options.delete :contain
16
+ sync = options.delete :sync
17
+ user_group = options.delete :user_group
18
+ contain_and_sync = options.delete :contain_and_sync
19
+ wipe_container = options.delete :wipe_container
20
+ copy_image = options.delete :copy_image
21
+ exclusive = options.delete :exclusive
22
+ highmem = options.delete :highmem
23
+
24
+ queue = options.delete(:queue) || 'bsc_ls'
25
+ task_cpus = options.delete(:task_cpus) || 1
26
+ nodes = options.delete(:nodes) || 1
27
+ time = options.delete(:time) || "0:00:10"
28
+
29
+ inputs_dir = options.delete :inputs_dir
30
+ config_keys = options.delete :config_keys
31
+
32
+ user = ENV['USER'] || `whoami`.strip
33
+ group = File.basename(File.dirname(ENV['HOME']))
34
+
35
+ if contain_and_sync
36
+ contain = "/scratch/tmp/rbbt-#{user}" if contain.nil?
37
+ sync = "~/.rbbt/var/jobs" if sync.nil?
38
+ wipe_container = "post" if wipe_container.nil?
39
+ end
40
+
41
+ contain = nil if contain == "" || contain == "none"
42
+ sync = nil if sync == "" || sync == "none"
43
+
44
+ contain = File.expand_path(contain) if contain
45
+
46
+ name = options[:name] ||= Misc.obj2digest({:options => options.collect{|k,v| [k,v]}.sort_by{|k,v| k.to_s }, :args => args})
47
+ options.delete(:name)
48
+ slurm_basedir = options[:slurm_basedir] ||= File.expand_path(File.join('~/rbbt-slurm', name)) if slurm_basedir.nil?
49
+ options.delete(:slurm_basedir)
50
+
51
+ rbbt_cmd = args.reject{|e| e == '--' }.collect{|e| e.include?(" ")? '"' + e + '"' : e } * " "
52
+
53
+ rbbt_cmd += " " << options.collect do |o,v|
54
+ o = o.to_s
55
+ case v
56
+ when TrueClass
57
+ '--' << o
58
+ when FalseClass
59
+ '--' << o << "=false"
60
+ else
61
+ ['--' << o, "'#{v}'"] * " "
62
+ end
63
+ end * " "
64
+
65
+ rbbt_cmd << " --config_keys='#{config_keys}'" if config_keys and not config_keys.empty?
66
+
67
+ time = Misc.format_seconds Misc.timespan(time) unless time.include? ":"
68
+
69
+
70
+ #{{{ PREPARE LOCAL LOGFILES
71
+
72
+ Open.mkdir slurm_basedir
73
+
74
+ fout = File.join(slurm_basedir, 'std.out')
75
+ ferr = File.join(slurm_basedir, 'std.err')
76
+ fjob = File.join(slurm_basedir, 'job.id')
77
+ fexit = File.join(slurm_basedir, 'exit.status')
78
+ fsync = File.join(slurm_basedir, 'sync.log')
79
+ fcmd = File.join(slurm_basedir, 'command.slurm')
80
+
81
+ #{{{ GENERATE TEMPLATE
82
+
83
+ # HEADER
84
+ header =<<-EOF
85
+ #!/bin/bash
86
+ #SBATCH --qos="#{queue}"
87
+ #SBATCH --job-name="#{name}"
88
+ #SBATCH --workdir="#{Dir.pwd}"
89
+ #SBATCH --output="#{fout}"
90
+ #SBATCH --error="#{ferr}"
91
+ #SBATCH --cpus-per-task="#{task_cpus}"
92
+ #SBATCH --time="#{time}"
93
+ #SBATCH --nodes="#{nodes}"
94
+ EOF
95
+
96
+ prep = ""
97
+
98
+ if highmem
99
+ header +=<<-EOF
100
+ #SBATCH --constraint=highmem
101
+ EOF
102
+ end
103
+
104
+ if exclusive
105
+ header +=<<-EOF
106
+ #SBATCH --exclusive
107
+ EOF
108
+ end
109
+
110
+ header +=<<-EOF
111
+ #CMD: #{rbbt_cmd}
112
+ EOF
113
+
114
+ # ENV
115
+ env = ""
116
+ env +=<<-EOF
117
+ # Prepare env
118
+ [[ -f ~/config/load.sh ]] && source ~/config/load.sh
119
+ module load java
120
+
121
+ # Calculate max available memory
122
+ let "MAX_MEMORY=$SLURM_MEM_PER_CPU * $SLURM_CPUS_PER_TASK" || let MAX_MEMORY="$(grep MemTotal /proc/meminfo|grep -o "[[:digit:]]*") / 1024"
123
+ EOF
124
+
125
+
126
+ # RUN
127
+ run = ""
128
+ exec_cmd = %(env _JAVA_OPTIONS="-Xms1g -Xmx${MAX_MEMORY}m")
129
+
130
+
131
+ if singularity
132
+ #{{{ SINGULARITY
133
+
134
+ singularity_exec = %(singularity exec -e -B $SINGULARITY_OPT_DIR:/singularity_opt/ -B /apps/)
135
+
136
+ env +=<<-EOF
137
+ module load intel/2018.1
138
+ module load singularity
139
+ PROJECTS_ROOT="/gpfs/projects/bsc26/"
140
+ SINGULARITY_IMG="$PROJECTS_ROOT/rbbt.singularity.img"
141
+ SINGULARITY_OPT_DIR="$PROJECTS_ROOT/singularity_opt/"
142
+ SINGULARITY_RUBY_INLINE="$HOME/.singularity_ruby_inline"
143
+ mkdir -p "$SINGULARITY_RUBY_INLINE"
144
+ EOF
145
+
146
+ if contain
147
+ scratch_group_dir = File.join('/gpfs/scratch/', group)
148
+ projects_group_dir = File.join('/gpfs/projects/', group)
149
+
150
+ prep +=<<-EOF
151
+
152
+ # Prepare container dir
153
+ CONTAINER_DIR="#{contain}"
154
+ mkdir -p $CONTAINER_DIR/.rbbt/etc/
155
+
156
+ for dir in .ruby_inline git home; do
157
+ mkdir -p $CONTAINER_DIR/$dir
158
+ done
159
+
160
+ for tmpd in persist_locks produce_locks R_sockets sensiblewrite sensiblewrite_locks step_info_locks tsv_open_locks; do
161
+ mkdir -p $CONTAINER_DIR/.rbbt/tmp/$tmpd
162
+ done
163
+
164
+ # Copy environment
165
+ cp ~/.rbbt/etc/environment $CONTAINER_DIR/.rbbt/etc/
166
+
167
+ # Set search_paths
168
+ echo "singularity: /singularity_opt/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" > $CONTAINER_DIR/.rbbt/etc/search_paths
169
+ echo "rbbt_user: /home/rbbt/.rbbt/{TOPLEVEL}/{SUBPATH}" >> $CONTAINER_DIR/.rbbt/etc/search_paths
170
+ echo "outside_home: $CONTAINER_DIR/home/{TOPLEVEL}/{SUBPATH}" >> $CONTAINER_DIR/.rbbt/etc/search_paths
171
+ echo "group_projects: #{projects_group_dir}/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" >> $CONTAINER_DIR/.rbbt/etc/search_paths
172
+ echo "group_scratch: #{scratch_group_dir}/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" >> $CONTAINER_DIR/.rbbt/etc/search_paths
173
+ echo "user_projects: #{projects_group_dir}/#{user}/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" >> $CONTAINER_DIR/.rbbt/etc/search_paths
174
+ echo "user_scratch: #{scratch_group_dir}/#{user}/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" >> $CONTAINER_DIR/.rbbt/etc/search_paths
175
+ EOF
176
+
177
+ if user_group && group != user_group
178
+ prep +=<<-EOF
179
+
180
+ # Add user_group search_path
181
+ echo "#{user_group}: /gpfs/projects/#{user_group}/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" >> $CONTAINER_DIR/.rbbt/etc/search_paths
182
+ EOF
183
+ end
184
+
185
+ if inputs_dir
186
+ prep +=<<-EOF
187
+
188
+ # Copy inputs
189
+ [[ -d '#{inputs_dir}' ]] && cp -R '#{inputs_dir}' $CONTAINER_DIR/inputs
190
+ EOF
191
+ rbbt_cmd = rbbt_cmd.sub(inputs_dir, "#{contain}/inputs")
192
+ end
193
+
194
+ if copy_image
195
+ prep +=<<EOF
196
+
197
+ # Copy image
198
+ rsync -avz "$SINGULARITY_IMG" "$CONTAINER_DIR/rbbt.singularity.img" 1>&2
199
+ SINGULARITY_IMG="$CONTAINER_DIR/rbbt.singularity.img"
200
+ EOF
201
+ end
202
+
203
+ if wipe_container == "pre" || wipe_container == "both"
204
+ if singularity
205
+ prep +=<<-EOF
206
+
207
+ # Clean container pre
208
+ singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rm -Rfv .rbbt/var/jobs &>> #{fsync}
209
+ singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rbbt system clean -f &>> #{fsync}
210
+ singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rm -Rfv tmp/ &>> #{fsync}
211
+ EOF
212
+ else
213
+ prep = ""
214
+ end
215
+ end
216
+ end
217
+
218
+ if contain
219
+ singularity_exec << %( -C -H "$CONTAINER_DIR" \
220
+ -B /scratch/tmp \
221
+ #{ group != user_group ? "-B /gpfs/projects/#{user_group}" : "" } \
222
+ -B #{scratch_group_dir} \
223
+ -B #{projects_group_dir} \
224
+ -B "$SINGULARITY_RUBY_INLINE":"$CONTAINER_DIR/.ruby_inline":rw \
225
+ -B ~/git:"$CONTAINER_DIR/git":ro \
226
+ #{Open.exists?('~/.rbbt/software/opt/')? '-B ~/.rbbt/software/opt/:"/opt/":ro' : '' } \
227
+ -B ~/.rbbt:"$CONTAINER_DIR/home/":ro \
228
+ "$SINGULARITY_IMG")
229
+ exec_cmd << ' TMPDIR="$CONTAINER_DIR/.rbbt/tmp" '
230
+ else
231
+ singularity_exec += %( -B "$SINGULARITY_RUBY_INLINE":"$HOME/.ruby_inline":rw "$SINGULARITY_IMG" )
232
+ end
233
+
234
+ if development
235
+ exec_cmd += " rbbt --dev='#{development}'"
236
+ else
237
+ exec_cmd += ' rbbt'
238
+ end
239
+
240
+ exec_cmd = singularity_exec + " " + exec_cmd
241
+ else
242
+ if development
243
+ exec_cmd << " " << %(~/git/rbbt-util/bin/rbbt --dev=#{development})
244
+ else
245
+ exec_cmd << " " << 'rbbt'
246
+ end
247
+
248
+ if contain
249
+ rbbt_cmd << " " << %(--workdir_all='#{contain}')
250
+ end
251
+ end
252
+
253
+
254
+ cmd =<<-EOF
255
+ #{exec_cmd} \\
256
+ #{rbbt_cmd}
257
+ EOF
258
+
259
+ run +=<<-EOF
260
+
261
+ # Run command
262
+ #{cmd}
263
+
264
+ # Save exit status
265
+ exit_status=$?
266
+
267
+ EOF
268
+
269
+ # CODA
270
+ coda = ""
271
+ if sync
272
+ if singularity
273
+ coda +=<<-EOF
274
+ singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rbbt system clean all -q &>> #{fsync}
275
+ EOF
276
+ else
277
+ coda +=<<-EOF
278
+ rbbt system clean all -q &>> #{fsync}
279
+ EOF
280
+ end
281
+
282
+ if sync.include?("=>")
283
+ source, _sep, sync = sync.partition("=>")
284
+ source = source.strip
285
+ sync = sync.strip
286
+ source = File.join(File.expand_path(contain), source)
287
+ else
288
+ source = File.join(File.expand_path(contain), '.rbbt/var/jobs')
289
+ end
290
+
291
+ target = File.expand_path(sync)
292
+ coda +=<<-EOF
293
+
294
+ # Sync data to target location
295
+ mkdir -p "$(dirname '#{target}')"
296
+ rsync -avztAXHP --copy-unsafe-links "#{source}/" "#{target}/" &>> #{fsync}
297
+ sync_es="$?"
298
+ find '#{target}' -type l -ls | awk '$13 ~ /^#{target.gsub('/','\/')}/ { sub("#{source}", "#{target}", $13); print $11, $13 }' | while read A B; do rm $A; ln -s $B $A; done
299
+ EOF
300
+
301
+ if contain && (wipe_container == "post" || wipe_container == "both")
302
+ prep =<<-EOF + prep
303
+ if ls -A '#{contain}' &> /dev/null ; then
304
+ echo "ERROR: Container directory not empty, refusing to wipe. #{contain}" &>> #{fsync}
305
+ fi
306
+ EOF
307
+ if singularity
308
+ coda +=<<-EOF
309
+ singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rm -v /dev/shm/sem.*.{in,out,process} /dev/shm/sem.Session-PID.*.sem 2> /dev/null >> #{fsync}
310
+
311
+
312
+ # Clean container directory
313
+ #if [ $exit_status == '0' -a $sync_es == '0' ]; then
314
+ singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rbbt system clean -f &>> #{fsync}
315
+ singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rm -Rfv .rbbt/var/jobs &>> #{fsync}
316
+ singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rm -Rfv tmp/ &>> #{fsync}
317
+ #else
318
+ # echo "ERROR: Process failed or results could not sync correctly. Contain directory not purged" &>> #{fsync}
319
+ #fi
320
+ EOF
321
+ else
322
+ coda +=<<-EOF
323
+ #{exec_cmd} system clean
324
+ if [ $exit_status == '0' -a $sync_es == '0' ]; then
325
+ rm -Rfv #{contain} &>> #{fsync}
326
+ else
327
+ echo "ERROR: Process failed or results could not sync correctly. Contain directory not purged" &>> #{fsync}
328
+ fi
329
+ unset sync_es
330
+ EOF
331
+
332
+ end
333
+ end
334
+ end
335
+ coda +=<<-EOF
336
+
337
+ # Write exit status to file
338
+ echo $exit_status > #{fexit}
339
+ EOF
340
+ if sync
341
+ coda +=<<-EOF
342
+ if [ "$sync_es" == '0' ]; then
343
+ unset sync_es
344
+ exit $exit_status
345
+ else
346
+ exit $sync_es
347
+ fi
348
+ EOF
349
+ else
350
+ coda +=<<-EOF
351
+ exit $exit_status
352
+ EOF
353
+ end
354
+
355
+ template = [header, env, prep, run, coda] * "\n"
356
+
357
+ template
358
+ end
359
+
360
+ def self.issue_template(template, options = {})
361
+
362
+ slurm_basedir = options[:slurm_basedir]
363
+ dependencies = options.delete :slurm_dependencies
364
+ Open.mkdir slurm_basedir
365
+
366
+ dry_run = options.delete :dry_run
367
+
368
+ fout = File.join(slurm_basedir, 'std.out')
369
+ ferr = File.join(slurm_basedir, 'std.err')
370
+ fjob = File.join(slurm_basedir, 'job.id')
371
+ fdep = File.join(slurm_basedir, 'dependencies.list')
372
+ fexit = File.join(slurm_basedir, 'exit.status')
373
+ fsync = File.join(slurm_basedir, 'sync.log')
374
+ fcmd = File.join(slurm_basedir, 'command.slurm')
375
+
376
+ job = nil
377
+ if options[:clean_job]
378
+ [fcmd, fjob, fout, ferr, fsync, fexit].each do |file|
379
+ Open.rm file if Open.exists? file
380
+ end
381
+ end
382
+
383
+ return if Open.exists?(fexit)
384
+
385
+ STDERR.puts Log.color(:magenta, "Issuing SLURM file: #{fcmd}")
386
+ STDERR.puts template
387
+
388
+ Open.write(fcmd, template) unless File.exists? fcmd
389
+ if File.exists?(fjob)
390
+ job = Open.read(fjob).to_i
391
+ else
392
+ if File.exists?(fout)
393
+ return
394
+ elsif dry_run
395
+ STDERR.puts Log.color(:magenta, "To execute run: ") + Log.color(:blue, "sbatch '#{slurm_basedir}/command.slurm'")
396
+ STDERR.puts Log.color(:magenta, "To monitor progress run (needs local rbbt): ") + Log.color(:blue, "rbbt mn --tail -w '#{slurm_basedir}'")
397
+ raise HPC::SBATCH, slurm_basedir
398
+ else
399
+ Open.rm fsync
400
+ Open.rm fexit
401
+ Open.rm fout
402
+ Open.rm ferr
403
+ Open.write(fdep, dependencies * "\n") if dependencies.any?
404
+ dep_str = dependencies.any? ? "--dependency=afterok:" + dependencies * ":" : ''
405
+ job = CMD.cmd("sbatch #{dep_str} '#{fcmd}'").read.scan(/\d+/).first.to_i
406
+ Log.debug "SBATCH job id: #{job}"
407
+ Open.write(fjob, job.to_s)
408
+ job
409
+ end
410
+ end
411
+ end
412
+
413
+ def self.follow_job(slurm_basedir, tail = true)
414
+ fjob = File.join(slurm_basedir, 'job.id')
415
+ fout = File.join(slurm_basedir, 'std.out')
416
+ ferr = File.join(slurm_basedir, 'std.err')
417
+ fstatus = File.join(slurm_basedir, 'job.status')
418
+
419
+ job = Open.read(fjob).strip if Open.exists?(fjob)
420
+
421
+ if job
422
+ status_txt = CMD.cmd("squeue --job #{job}").read
423
+ STDERR.puts Log.color(:magenta, "Status [#{job.to_i}]:")
424
+ STDERR.puts status_txt
425
+ lines = status_txt.split("\n").length
426
+ end
427
+
428
+ if tail
429
+ Log.severity = 10
430
+ while ! File.exists? fout
431
+ if job
432
+ STDERR.puts
433
+ Log.clear_line(STDERR)
434
+ STDERR.write Log.color(:magenta, "Waiting for Output")
435
+ 3.times do
436
+ STDERR.write Log.color(:magenta, ".")
437
+ sleep 1
438
+ end
439
+ status_txt = CMD.cmd("squeue --job #{job}").read
440
+ lines.times do
441
+ Log.clear_line(STDERR)
442
+ end
443
+ Log.clear_line(STDERR)
444
+ STDERR.puts Log.color(:magenta, "Status [#{job.to_i}]:")
445
+ STDERR.puts status_txt
446
+ lines = status_txt.split("\n").length
447
+ end
448
+ end
449
+ STDERR.puts
450
+ Log.clear_line(STDERR)
451
+ STDERR.puts Log.color(:magenta, "Output:")
452
+ begin
453
+ CMD.cmd("squeue --job #{job} > #{fstatus}")
454
+ out = CMD.cmd("tail -f '#{fout}'", :pipe => true) if File.exists?(fout) and not tail == :STDERR
455
+ err = CMD.cmd("tail -f '#{ferr}'", :pipe => true) if File.exists?(ferr)
456
+
457
+ terr = Misc.consume_stream(err, true, STDERR) if err
458
+ tout = Misc.consume_stream(out, true, STDOUT) if out
459
+
460
+ sleep 3 while CMD.cmd("squeue --job #{job}").read.include? job.to_s
461
+ rescue Aborted
462
+ ensure
463
+ begin
464
+ terr.exit if terr
465
+ tout.exit if tout
466
+ err.close if err
467
+ err.join if err
468
+ rescue Exception
469
+ end
470
+
471
+ begin
472
+ out.close if out
473
+ out.join if out
474
+ rescue Exception
475
+ end
476
+ end
477
+ end
478
+ end
479
+
480
+ def self.wait_for_job(slurm_basedir, time = 1)
481
+ fexit = File.join(slurm_basedir, 'exit.status')
482
+ fjob = File.join(slurm_basedir, 'job.id')
483
+ job = Open.read(fjob) if Open.exists?(fjob)
484
+
485
+
486
+ while ! Open.exists?(fexit)
487
+ sleep time
488
+ end
489
+ end
490
+
491
+ def self.run_job(job, options = {})
492
+ options = IndiferentHash.setup(options.dup)
493
+
494
+ dry_run = options.delete :dry_run
495
+ tail = options.delete :tail
496
+ dependencies = options.delete :slurm_dependencies
497
+ options[:jobname] = job.clean_name
498
+
499
+ workflow = job.workflow
500
+
501
+ task = Symbol === job.overriden ? job.overriden : job.task_name
502
+
503
+ if job.overriden
504
+ override_deps = job.rec_dependencies.
505
+ select{|dep| Symbol === dep.overriden }.
506
+ collect do |dep|
507
+
508
+ name = [dep.workflow.to_s, dep.task_name] * "#"
509
+ [name, dep.path] * "="
510
+ end * ","
511
+ end
512
+
513
+ remove_slurm_basedir = options.delete :remove_slurm_basedir
514
+ slurm_basedir = options.delete :SLURM_basedir
515
+ slurm_basedir = "~/rbbt-slurm" if slurm_basedir.nil?
516
+ TmpFile.with_file(nil, remove_slurm_basedir, :tmpdir => slurm_basedir, :prefix => "SLURM_rbbt_job-") do |tmp_directory|
517
+ options[:slurm_basedir] ||= tmp_directory
518
+ slurm_basedir = options[:slurm_basedir]
519
+ inputs_dir = File.join(tmp_directory, 'inputs_dir')
520
+ saved = Step.save_job_inputs(job, inputs_dir)
521
+
522
+ if saved && saved.any?
523
+ options[:inputs_dir] = inputs_dir
524
+ cmd = ['workflow', 'task', workflow.to_s, task.to_s, '-pf', '--load_inputs', inputs_dir, '--log', (options[:log] || Log.severity).to_s]
525
+ else
526
+ cmd = ['workflow', 'task', workflow.to_s, task.to_s, '-pf', '--log', (options[:log] || Log.severity).to_s]
527
+ end
528
+
529
+ cmd << "--override_deps='#{override_deps}'" if override_deps and not override_deps.empty?
530
+
531
+ template = self.template(cmd, options)
532
+ jobid = self.issue_template(template, options.merge(:slurm_basedir => slurm_basedir, :dry_run => dry_run, :slurm_dependencies => dependencies))
533
+
534
+ return jobid unless tail
535
+
536
+ t_monitor = Thread.new do
537
+ self.follow_job(slurm_basedir, :STDERR)
538
+ end
539
+ self.wait_for_job(slurm_basedir)
540
+ t_monitor.raise Aborted
541
+ return unless Open.read(File.join(slurm_basedir, 'exit.status')).strip == '0'
542
+ path = Open.read(File.join(slurm_basedir, 'std.out')).strip
543
+ if Open.exists?(path) && job.path != path
544
+ Log.info "Path of SLURM job #{path} is different from original job #{job.path}. Stablishing link."
545
+ Open.ln path, job.path
546
+ Open.ln path + '.info', job.path + '.info' if Open.exists?(path + '.info')
547
+ Open.ln path + '.files', job.path + '.files' if Open.exists?(path + '.files')
548
+ end
549
+ jobid
550
+ end
551
+ end
552
+ end
553
+
554
+ def self.relay(job, options={})
555
+ options = Misc.add_defaults options, :target => 'mn1', :search_path => 'user'
556
+ done_deps = job.dependencies.select do |dep|
557
+ dep.done?
558
+ end
559
+
560
+ error_deps = job.dependencies.select do |dep|
561
+ dep.error? && ! dep.recoverable_error?
562
+ end
563
+
564
+ (done_deps + error_deps).each do |dep|
565
+ Step.migrate(dep.path, options[:search_path], options)
566
+ end
567
+
568
+ end
569
+ end
570
+