scout-gear 10.9.0 → 10.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/.vimproject +25 -0
  3. data/VERSION +1 -1
  4. data/bin/scout +4 -1
  5. data/lib/scout/knowledge_base/registry.rb +2 -3
  6. data/lib/scout/workflow/definition.rb +11 -0
  7. data/lib/scout/workflow/deployment/local.rb +288 -0
  8. data/lib/scout/workflow/deployment/orchestrator/batches.rb +130 -0
  9. data/lib/scout/workflow/deployment/orchestrator/chains.rb +104 -0
  10. data/lib/scout/workflow/deployment/orchestrator/rules.rb +256 -0
  11. data/lib/scout/workflow/deployment/orchestrator/workload.rb +67 -0
  12. data/lib/scout/workflow/deployment/scheduler/job.rb +740 -0
  13. data/lib/scout/workflow/deployment/scheduler/lfs.rb +125 -0
  14. data/lib/scout/workflow/deployment/scheduler/pbs.rb +176 -0
  15. data/lib/scout/workflow/deployment/scheduler/slurm.rb +158 -0
  16. data/lib/scout/workflow/deployment/scheduler.rb +73 -0
  17. data/lib/scout/workflow/deployment.rb +10 -1
  18. data/lib/scout/workflow/exceptions.rb +2 -0
  19. data/lib/scout/workflow/step/config.rb +3 -0
  20. data/lib/scout/workflow/step/info.rb +2 -2
  21. data/lib/scout/workflow/step/progress.rb +52 -0
  22. data/lib/scout/workflow/step.rb +30 -1
  23. data/lib/scout/workflow/task.rb +2 -0
  24. data/scout-gear.gemspec +23 -4
  25. data/scout_commands/batch/list +1 -1
  26. data/scout_commands/workflow/cmd +5 -13
  27. data/scout_commands/workflow/info +1 -1
  28. data/scout_commands/workflow/task +61 -25
  29. data/test/scout/workflow/deployment/orchestrator/test_batches.rb +138 -0
  30. data/test/scout/workflow/deployment/orchestrator/test_chains.rb +171 -0
  31. data/test/scout/workflow/deployment/orchestrator/test_rules.rb +219 -0
  32. data/test/scout/workflow/deployment/orchestrator/test_workload.rb +117 -0
  33. data/test/scout/workflow/deployment/scheduler/test_job.rb +31 -0
  34. data/test/scout/workflow/deployment/scheduler/test_lfs.rb +32 -0
  35. data/test/scout/workflow/deployment/scheduler/test_pbs.rb +32 -0
  36. data/test/scout/workflow/deployment/scheduler/test_slurm.rb +32 -0
  37. data/test/scout/workflow/deployment/{test_orchestrator.rb → test_local.rb} +161 -33
  38. data/test/scout/workflow/deployment/test_scheduler.rb +75 -0
  39. data/test/scout/workflow/deployment/test_trace.rb +1 -1
  40. data/test/scout/workflow/step/test_progress.rb +27 -0
  41. data/test/scout/workflow/task/test_inputs.rb +17 -0
  42. data/test/test_helper.rb +2 -1
  43. metadata +22 -3
  44. data/lib/scout/workflow/deployment/orchestrator.rb +0 -292
@@ -0,0 +1,740 @@
1
+ module SchedulerJob
2
+ @batch_base_dir = File.expand_path(File.join('~/scout-batch'))
3
+ self.singleton_class.attr_accessor :batch_base_dir
4
+
5
+ module_function
6
+
7
+ public
8
+
9
+ def system
10
+ :batch
11
+ end
12
+
13
+ def exec_cmd(job, options = {})
14
+ options = IndiferentHash.add_defaults options, :launcher => :srun if self.system == :slurm
15
+
16
+ launcher, env_cmd, development = IndiferentHash.process_options options, :launcher, :env_cmd, :development
17
+
18
+ if contain = options[:contain]
19
+ contain = File.expand_path(contain)
20
+ env_cmd ||= ""
21
+ env_cmd << " TMPDIR='#{contain}/.scout/tmp' "
22
+ end
23
+
24
+ if options[:singularity]
25
+
26
+ group, user, user_group, scratch_group_dir, projects_group_dir = options.values_at :group, :user, :user_group, :scratch_group_dir, :projects_group_dir
27
+
28
+ singularity_img, singularity_opt_dir, singularity_ruby_inline, singularity_mounts = options.values_at :singularity_img, :singularity_opt_dir, :singularity_ruby_inline, :singularity_mounts
29
+
30
+ singularity_cmd = %(singularity exec -e -B "#{File.expand_path singularity_opt_dir}":/singularity_opt/ -B "#{File.expand_path singularity_ruby_inline}":"/.singularity_ruby_inline":rw )
31
+
32
+ if singularity_mounts
33
+ singularity_mounts.split(",").each do |mount|
34
+ singularity_cmd += "-B #{ mount } "
35
+ end
36
+ end
37
+
38
+ if contain && options[:hardened]
39
+ singularity_cmd << %( -C -H "#{contain}" \
40
+ -B "/.singularity_ruby_inline":"#{contain}/.singularity_ruby_inline":rw
41
+ -B "#{options[:batch_dir]}" \
42
+ -B /scratch/tmp \
43
+ #{ group != user_group ? "-B /gpfs/projects/#{user_group}" : "" } \
44
+ -B #{scratch_group_dir} \
45
+ -B #{projects_group_dir} \
46
+ -B /apps/ \
47
+ -B ~/git:"#{contain}/git":ro \
48
+ #{Open.exists?('~/.scout/software/opt/')? '-B ~/.scout/software/opt/:"/opt/":ro' : '' } \
49
+ -B ~/.scout:"#{contain}/home/":ro)
50
+ end
51
+
52
+ singularity_cmd << " #{singularity_img} "
53
+ end
54
+
55
+ base_cmd = if launcher
56
+ %(#{launcher} scout)
57
+ else
58
+ %(scout)
59
+ end
60
+
61
+ if env_cmd
62
+ exec_cmd = %(env #{env_cmd} #{ base_cmd })
63
+ else
64
+ exec_cmd = base_cmd
65
+ end
66
+
67
+ exec_cmd << "--dev '#{development}'" if development
68
+
69
+ exec_cmd = singularity_cmd + exec_cmd if singularity_cmd
70
+
71
+ exec_cmd
72
+ end
73
+
74
+ def scout_job_exec_cmd(job, options)
75
+
76
+ jobname = job.clean_name
77
+ workflow = job.workflow
78
+ task = job.task_name
79
+
80
+ IndiferentHash.add_defaults options, :jobname => jobname
81
+
82
+ task = job.task_name
83
+
84
+ if job.recursive_overriden_deps.any?
85
+ override_deps = job.recursive_overriden_deps.
86
+ select do |dep| Symbol === dep.overriden end.
87
+ collect do |dep|
88
+ o_workflow = dep.overriden_workflow || dep.workflow
89
+ o_workflow = o_workflow.name if o_workflow.respond_to?(:name)
90
+ o_task_name = dep.overriden_task || dep.task.name
91
+ name = [o_workflow, o_task_name] * "#"
92
+ [name, dep.path] * "="
93
+ end.uniq * ","
94
+ options[:override_deps] = override_deps unless override_deps.empty?
95
+ end
96
+
97
+ # Save inputs into inputs_dir (only if provided)
98
+ inputs_dir = IndiferentHash.process_options options, :inputs_dir
99
+ if inputs_dir
100
+ saved = job.save_inputs(inputs_dir)
101
+ options[:load_inputs] = inputs_dir if saved && !saved.nil? && !saved.empty?
102
+
103
+ saved.each do |input|
104
+ options.delete input
105
+ end if saved
106
+ end
107
+
108
+ cmds = CMD.process_cmd_options options.merge(:add_option_dashes => true)
109
+
110
+ <<-EOF.strip
111
+ workflow task #{workflow} #{task} #{cmds}
112
+ EOF
113
+ end
114
+
115
+ def header(options)
116
+ header =<<-EOF
117
+ #!/bin/bash
118
+ EOF
119
+
120
+ header
121
+ end
122
+
123
+ def batch_options(job, options)
124
+ IndiferentHash.setup(options)
125
+
126
+ batch_options = IndiferentHash.setup({})
127
+
128
+ keys = [
129
+ :queue,
130
+ :account,
131
+ :partition,
132
+ :exclusive,
133
+ :highmem,
134
+ :time,
135
+ :nodes,
136
+ :task_cpus,
137
+ :mem,
138
+ :mem_per_cpu,
139
+ :gres,
140
+ :lua_modules,
141
+ :conda,
142
+ :constraints,
143
+ :licenses,
144
+ :batch_dir,
145
+ :batch_name,
146
+ :contain,
147
+ :sync,
148
+ :contain_and_sync,
149
+ :copy_image,
150
+ :launcher,
151
+ :development,
152
+ :env_cmd,
153
+ :env,
154
+ :manifest,
155
+ :user_group,
156
+ :wipe_container,
157
+ :workdir,
158
+ :purge_deps,
159
+ :singularity,
160
+ :singularity_img,
161
+ :singularity_mounts,
162
+ :singularity_opt_dir,
163
+ :singularity_ruby_inline
164
+ ]
165
+
166
+ keys.each do |key|
167
+ next if options[key].nil?
168
+ batch_options[key] = IndiferentHash.process_options options, key
169
+ end
170
+
171
+ batch_dir = batch_options[:batch_dir]
172
+
173
+ batch_name = File.basename(batch_dir)
174
+ inputs_dir = File.join(batch_dir, 'inputs_dir')
175
+
176
+ keys_from_config = [
177
+ :queue,
178
+ :highmem,
179
+ :exclusive,
180
+ :launcher,
181
+ :development,
182
+ :env_cmd,
183
+ :env,
184
+ :user_group,
185
+ :singularity_img,
186
+ :singularity_mounts,
187
+ :singularity_opt_dir,
188
+ :singularity_ruby_inline,
189
+ :singularity
190
+ ]
191
+
192
+ keys_from_config.each do |key|
193
+ next unless batch_options.include? key
194
+ default_value = Scout::Config.get(key, "batch_#{key}", "batch")
195
+ next if default_value.nil?
196
+ IndiferentHash.add_defaults batch_options, default_value
197
+ end
198
+
199
+ user = batch_options[:user] ||= ENV['USER'] || `whoami`.strip
200
+ group = batch_options[:group] ||= File.basename(File.dirname(ENV['HOME']))
201
+ batch_options[:scratch_group_dir] = File.join('/gpfs/scratch/', group)
202
+ batch_options[:projects_group_dir] = File.join('/gpfs/projects/', group)
203
+
204
+ batch_options[:singularity] = true if batch_options[:singularity_img]
205
+
206
+ if batch_options[:contain_and_sync]
207
+ if batch_options[:contain].nil?
208
+ contain_base = Scout::Config.get(:contain_base_dir, :batch_contain, :batch, :default => "/scratch/tmp/scout-[USER]")
209
+ contain_base = contain_base.sub('[USER]', user)
210
+ random_file = TmpFile.random_name
211
+ batch_options[:contain] = File.join(contain_base, random_file)
212
+ end
213
+
214
+ batch_options[:sync] ||= "~/.scout/var/jobs"
215
+ batch_options[:wipe_container] ||= 'post'
216
+ end
217
+
218
+ if batch_options[:contain] && ! batch_options[:hardened]
219
+ options[:workdir_all] = batch_options[:contain]
220
+ end
221
+
222
+ IndiferentHash.add_defaults batch_options,
223
+ :batch_name => batch_name,
224
+ :inputs_dir => inputs_dir,
225
+ :nodes => 1,
226
+ :step_path => job.path,
227
+ :task_cpus => 1,
228
+ :time => '2min',
229
+ :env => {'JDK_JAVA_OPTIONS' => "-Xms1g -Xmx${MAX_MEMORY}m"},
230
+ :singularity_img => ENV["SINGULARITY_IMG"] || "~/scout.singularity.img",
231
+ :singularity_ruby_inline => ENV["SINGULARITY_RUBY_INLINE"] || "~/.singularity_ruby_inline",
232
+ :singularity_opt_dir => ENV["SINGULARITY_OPT_DIR"] || "~/singularity_opt",
233
+ :workdir => Dir.pwd
234
+
235
+ exec_cmd = exec_cmd(job, batch_options)
236
+ scout_cmd = scout_job_exec_cmd(job, options)
237
+
238
+ IndiferentHash.add_defaults batch_options,
239
+ :exec_cmd => exec_cmd,
240
+ :scout_cmd => scout_cmd
241
+
242
+ batch_dir = batch_options[:batch_dir]
243
+
244
+ IndiferentHash.add_defaults batch_options,
245
+ :fout => File.join(batch_dir, 'std.out'),
246
+ :ferr => File.join(batch_dir, 'std.err'),
247
+ :fjob => File.join(batch_dir, 'job.id'),
248
+ :fdep => File.join(batch_dir, 'dependencies.list'),
249
+ :fcfdep => File.join(batch_dir, 'canfail_dependencies.list'),
250
+ :fexit => File.join(batch_dir, 'exit.status'),
251
+ :fsync => File.join(batch_dir, 'sync.log'),
252
+ :fsexit => File.join(batch_dir, 'sync.status'),
253
+ :fenv => File.join(batch_dir, 'env.vars'),
254
+ :fcmd => File.join(batch_dir, 'command.batch')
255
+
256
+ batch_options
257
+ end
258
+
259
+ def meta_data(options)
260
+ meta =<<-EOF
261
+ #MANIFEST: #{(options[:manifest] || []) * ", "}
262
+ #DEPENDENCIES: #{(options[:dependencies] || []) * ", "}
263
+ #EXEC_CMD: #{options[:exec_cmd]}
264
+ #CMD: #{options[:scout_cmd]}
265
+ #STEP_PATH: #{options[:step_path]}
266
+ EOF
267
+
268
+ meta = meta.split("\n").reject{|line| line =~ /: $/} * "\n"
269
+ meta
270
+ end
271
+
272
+ def load_modules(modules = [])
273
+ modules = modules.split(/,\s*/) if String === modules
274
+
275
+ str = ""
276
+ modules.each do |mod|
277
+ str << "module load #{ mod }" << "\n"
278
+ end if modules
279
+
280
+ str
281
+ end
282
+
283
+ def load_conda(env = nil)
284
+ return "" if env.nil? || env.empty?
285
+
286
+ <<-EOF
287
+ if ! type conda | grep function &> /dev/null; then
288
+ if [ ! -z $CONDA_EXE ]; then
289
+ source "$(dirname $(dirname $CONDA_EXE))/etc/profile.d/conda.sh" &> /dev/null
290
+ fi
291
+ fi
292
+ conda activate #{ env }
293
+ EOF
294
+ end
295
+
296
+
297
+ def batch_system_variables
298
+ <<-EOF
299
+ let MAX_MEMORY="$(grep MemTotal /proc/meminfo|grep -o "[[:digit:]]*") / 1024"
300
+ EOF
301
+ end
302
+
303
+ def prepare_environment(options = {})
304
+ modules = options[:lua_modules]
305
+ conda = options[:conda]
306
+
307
+ prepare_environment = ""
308
+
309
+ functions = ""
310
+
311
+ if contain = options[:contain]
312
+ contain = File.expand_path(contain)
313
+ functions +=<<-EOF
314
+ function batch_erase_contain_dir(){
315
+ rm -Rfv '#{contain}' 2>1 >> '#{options[:fsync]}'
316
+ }
317
+ EOF
318
+
319
+ prepare_environment +=<<-EOF
320
+ if ls -A '#{contain}' &> /dev/null ; then
321
+ empty_contain_dir="false"
322
+ else
323
+ empty_contain_dir="true"
324
+ fi
325
+ EOF
326
+
327
+ prepare_environment +=<<-EOF if options[:wipe_container] == 'force'
328
+ batch_erase_contain_dir()
329
+ EOF
330
+ end
331
+
332
+ if sync = options[:sync]
333
+ source = if options[:singularity]
334
+ File.join(options[:contain], '.scout/var/jobs')
335
+ elsif options[:contain]
336
+ File.join(options[:contain], 'var/jobs')
337
+ else
338
+ '~/.scout/var/jobs/'
339
+ end
340
+
341
+ source = File.expand_path(source)
342
+ sync = File.expand_path(sync)
343
+ functions +=<<-EOF
344
+ function batch_sync_contain_dir(){
345
+ mkdir -p "$(dirname '#{sync}')"
346
+ rsync -avztAXHP --copy-unsafe-links "#{source}/" "#{sync}/" 2>1 >> '#{options[:fsync]}'
347
+ sync_es="$?"
348
+ echo $sync_es > '#{options[:fsexit]}'
349
+ find '#{sync}' -type l -ls | awk '$13 ~ /^#{sync.gsub('/','\/')}/ { sub("#{source}", "#{sync}", $13); print $11, $13 }' | while read A B; do rm $A; ln -s $B $A; done
350
+ }
351
+ EOF
352
+ end
353
+
354
+ if options[:env]
355
+ prepare_environment +=<<-EOF
356
+ # Set ENV variables
357
+ #{options[:env].collect{|n,v| "export #{n}=\"#{v}\"" } * "\n"}
358
+ EOF
359
+ end
360
+
361
+ if options[:singularity]
362
+
363
+ group, user, user_group, scratch_group_dir, projects_group_dir = options.values_at :group, :user, :user_group, :scratch_group_dir, :projects_group_dir
364
+
365
+ singularity_img, singularity_opt_dir, singularity_ruby_inline = options.values_at :singularity_img, :singularity_opt_dir, :singularity_ruby_inline
366
+
367
+ prepare_environment +=<<-EOF
368
+ # Load singularity modules
369
+ command -v singularity &> /dev/null || module load singularity
370
+ mkdir -p "#{File.expand_path singularity_opt_dir}"
371
+ EOF
372
+
373
+ if contain && options[:hardened]
374
+
375
+ prepare_environment +=<<-EOF
376
+ # Prepare container for singularity
377
+ mkdir -p "#{contain}"/.scout/etc/
378
+
379
+ for dir in .ruby_inline git home; do
380
+ mkdir -p "#{contain}"/$dir
381
+ done
382
+
383
+ for tmpd in persist_locks produce_locks R_sockets sensiblewrite sensiblewrite_locks step_info_locks tsv_open_locks; do
384
+ mkdir -p "#{contain}/.scout/tmp/$tmpd"
385
+ done
386
+
387
+ # Copy environment
388
+ cp ~/.scout/etc/environment #{contain}/.scout/etc/
389
+
390
+ # Set search_paths
391
+ echo "singularity: /singularity_opt/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" > #{contain}/.scout/etc/search_paths
392
+ echo "scout_user: /home/scout/.scout/{TOPLEVEL}/{SUBPATH}" >> #{contain}/.scout/etc/search_paths
393
+ echo "outside_home: #{contain}/home/{TOPLEVEL}/{SUBPATH}" >> #{contain}/.scout/etc/search_paths
394
+ echo "group_projects: #{projects_group_dir}/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" >> #{contain}/.scout/etc/search_paths
395
+ echo "group_scratch: #{scratch_group_dir}/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" >> #{contain}/.scout/etc/search_paths
396
+ echo "user_projects: #{projects_group_dir}/#{user}/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" >> #{contain}/.scout/etc/search_paths
397
+ echo "user_scratch: #{scratch_group_dir}/#{user}/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" >> #{contain}/.scout/etc/search_paths
398
+ EOF
399
+ end
400
+ end
401
+
402
+ [batch_system_variables, load_modules(modules), load_conda(conda), functions, prepare_environment].reject{|s| s.empty? } * "\n"
403
+ end
404
+
405
+ def execute(options)
406
+ exec_cmd, job_cmd, task_cpus = options.values_at :exec_cmd, :scout_cmd, :task_cpus
407
+
408
+ script=<<-EOF
409
+ step_path=$(
410
+ #{exec_cmd} #{job_cmd} --printpath
411
+ )
412
+ exit_status=$?
413
+
414
+ if [ $exit_status -eq 0 ]; then
415
+ [[ -z $BATCH_JOB_ID ]] || #{exec_cmd} workflow write_info --recursive --force=false --check_pid "$step_path" batch_job $BATCH_JOB_ID
416
+ [[ -z $BATCH_SYSTEM ]] || #{exec_cmd} workflow write_info --recursive --force=false --check_pid "$step_path" batch_system $BATCH_SYSTEM
417
+ #{exec_cmd} workflow write_info --recursive --force=false --check_pid "$step_path" batch_cpus #{task_cpus}
418
+ fi
419
+ EOF
420
+
421
+ script
422
+ end
423
+
424
+ def sync_environment(options = {})
425
+ sync_environment = ""
426
+
427
+ if options[:sync]
428
+ sync_environment +=<<-EOF
429
+ if [ $exit_status == '0' ]; then
430
+ batch_sync_contain_dir
431
+ else
432
+ sync_es=$exit_status
433
+ fi
434
+ EOF
435
+ end
436
+
437
+ sync_environment
438
+ end
439
+
440
+ def cleanup_environment(options = {})
441
+ cleanup_environment = ""
442
+
443
+ cleanup_environment +=<<-EOF if options[:purge_deps]
444
+ if [ $exit_status == '0' ]; then
445
+ #{options[:exec_cmd]} workflow forget_deps --purge --recursive_purge "$step_path" 2>1 >> '#{options[:fsync]}'
446
+ fi
447
+ EOF
448
+
449
+ if options[:sync]
450
+ if options[:wipe_container] == 'force'
451
+ cleanup_environment +=<<-EOF
452
+ batch_erase_contain_dir
453
+ EOF
454
+ elsif options[:wipe_container] == 'post' || options[:wipe_container] == 'both'
455
+ cleanup_environment +=<<-EOF
456
+ if [ $sync_es == '0' -a $empty_contain_dir == 'true' ]; then
457
+ batch_erase_contain_dir
458
+ fi
459
+ EOF
460
+ end
461
+ end
462
+ cleanup_environment
463
+ end
464
+
465
+ def coda(options)
466
+ coda =<<-EOF
467
+ echo $exit_status > '#{options[:fexit]}'
468
+ EOF
469
+
470
+ if options[:sync]
471
+ coda +=<<-EOF
472
+ if [ $sync_es == '0' ]; then
473
+ exit $exit_status
474
+ else
475
+ exit $sync_es
476
+ fi
477
+ EOF
478
+ else
479
+ coda +=<<-EOF
480
+ exit $exit_status
481
+ EOF
482
+ end
483
+
484
+ coda
485
+ end
486
+
487
+ def job_template(job, options = {})
488
+ batch_options = batch_options job, options
489
+
490
+ header = self.header(batch_options)
491
+
492
+ meta_data = self.meta_data(batch_options)
493
+
494
+ prepare_environment = self.prepare_environment(batch_options)
495
+
496
+ execute = self.execute(batch_options)
497
+
498
+ sync_environment = self.sync_environment(batch_options)
499
+
500
+ cleanup_environment = self.cleanup_environment(batch_options)
501
+
502
+ coda = self.coda(batch_options)
503
+
504
+ <<-EOF
505
+ #{header}
506
+
507
+ # #{Log.color :green, "0. Meta-data"}
508
+ #{meta_data}
509
+
510
+ # #{Log.color :green, "1. Prepare environment"}
511
+ #{prepare_environment(batch_options)}
512
+ env > #{batch_options[:fenv]}
513
+
514
+ # #{Log.color :green, "2. Execute"}
515
+ #{execute}
516
+
517
+ # #{Log.color :green, "3. Sync and cleanup environment"}
518
+ #{sync_environment}
519
+ #{cleanup_environment}
520
+
521
+ # #{Log.color :green, "4. Exit"}
522
+ #{coda}
523
+ EOF
524
+ end
525
+
526
+ def prepare_submision(template, batch_dir, clean_batch_job = false, batch_dependencies = [])
527
+ Open.mkdir batch_dir
528
+ fcmd = File.join(batch_dir, 'command.batch')
529
+ fdep = File.join(batch_dir, 'dependencies.list')
530
+ fcfdep = File.join(batch_dir, 'canfail_dependencies.list')
531
+
532
+ Open.write(fcmd, template)
533
+
534
+ %w(std.out std.err job.id job.status dependencies.list canfail_dependencies.list exit.status sync.log inputs_dir).each do |filename|
535
+ path = File.join(batch_dir, filename)
536
+ Open.rm_rf path if File.exist? path
537
+ end if clean_batch_job
538
+
539
+ batch_dependencies = [] if batch_dependencies.nil?
540
+
541
+ canfail_dependencies = batch_dependencies.select{|dep| dep =~ /^canfail:(\d+)/ }.collect{|dep| dep.partition(":").last}
542
+ dependencies = batch_dependencies.reject{|dep| dep =~ /^canfail:(\d+)/ }
543
+
544
+ Open.write(fdep, dependencies * "\n") if dependencies.any?
545
+ Open.write(fcfdep, canfail_dependencies * "\n") if canfail_dependencies.any?
546
+
547
+ fcmd
548
+ end
549
+
550
+ def batch_dir_for_id(batch_base_dir, id)
551
+ job_id_file = Dir.glob(File.join(batch_base_dir, '*/job.id')).select{|f| Open.read(f).strip == id.to_s }.first
552
+ job_id_file ? File.dirname(job_id_file) : nil
553
+ end
554
+
555
+ def run_job(job, options = {})
556
+ system = self.to_s.split("::").last
557
+
558
+ batch_base_dir, clean_batch_job, remove_batch_dir, procpath, tail, batch_dependencies, dry_run, orchestration_rules_file = IndiferentHash.process_options options,
559
+ :batch_base_dir, :clean_batch_job, :remove_batch_dir, :batch_procpath, :tail, :batch_dependencies, :dry_run, :orchestration_rules,
560
+ :batch_base_dir => SchedulerJob.batch_base_dir
561
+
562
+ if (batch_job = job.info[:batch_job]) && job_queued(batch_job)
563
+ Log.info "Job #{job.short_path} already queued in #{batch_job}"
564
+ return batch_job, batch_dir_for_id(batch_base_dir, batch_job)
565
+ end
566
+
567
+ if job.running?
568
+ Log.info "Job #{job.short_path} already running in #{job.info[:pid]}"
569
+
570
+ if job.info[:batch_job]
571
+ return job.info[:batch_job], batch_dir_for_id(batch_base_dir, batch_job)
572
+ else
573
+ return
574
+ end
575
+ end
576
+
577
+ workflow = job.workflow
578
+ task_name = job.task_name
579
+
580
+ options = IndiferentHash.setup(Workflow::Orchestrator.job_rules(Batch::Orchestration.orchestration_rules(orchestration_rules_file), job, true)).merge(options) if orchestration_rules_file
581
+
582
+ workflows_to_load = job.rec_dependencies.select{|d| Step === d}.collect{|d| d.workflow }.compact.collect(&:to_s) - [workflow.to_s]
583
+
584
+ TmpFile.with_file(nil, remove_batch_dir, :tmpdir => batch_base_dir, :prefix => "#{system}_scout_job-#{workflow.to_s}-#{task_name}-") do |batch_dir|
585
+ IndiferentHash.add_defaults options,
586
+ :batch_dir => batch_dir,
587
+ :inputs_dir => File.join(batch_dir, "inputs_dir"),
588
+ :workflows => workflows_to_load.any? ? workflows_to_load.uniq * "," : nil
589
+
590
+ options[:procpath_performance] ||= File.join(batch_dir, "procpath##{procpath.gsub(',', '#')}") if procpath
591
+
592
+ template = self.job_template(job, options.dup)
593
+
594
+ fcmd = prepare_submision(template, options[:batch_dir], clean_batch_job, batch_dependencies)
595
+
596
+ batch_job = run_template(batch_dir, dry_run)
597
+
598
+ hold_dependencies(job, batch_job) unless dry_run
599
+
600
+ return [batch_job, batch_dir] unless tail
601
+
602
+ t_monitor = Thread.new do
603
+ self.follow_job(batch_dir, :STDERR)
604
+ end
605
+ self.wait_for_job(batch_dir)
606
+ t_monitor.raise Aborted
607
+ return unless Open.read(File.join(batch_dir, 'exit.status')).strip == '0'
608
+ path = Open.read(File.join(batch_dir, 'std.out')).strip
609
+ if Open.exists?(path) && job.path != path
610
+ Log.info "Path of BATCH job #{path} is different from original job #{job.path}. Stablishing link."
611
+ Open.ln path, job.path
612
+ Open.ln path + '.info', job.path + '.info' if Open.exists?(path + '.info')
613
+ Open.ln path + '.files', job.path + '.files' if Open.exists?(path + '.files')
614
+ end
615
+
616
+ [batch_job, batch_dir]
617
+ end
618
+ end
619
+
620
+ def hold_dependencies(job, batch_job)
621
+ job.init_info
622
+ job.set_info :batch_job, batch_job
623
+ job.set_info :batch_system, self.system
624
+ job.dependencies.each do |dep|
625
+ next unless dep.waiting?
626
+ next if (dep_batch_job = dep.info[:batch_job]) && job_queued(dep_batch_job)
627
+
628
+ hold_dependencies(dep, batch_job)
629
+ end
630
+ end
631
+
632
+ def follow_job(batch_dir, tail = true)
633
+ fjob = File.join(batch_dir, 'job.id')
634
+ fout = File.join(batch_dir, 'std.out')
635
+ ferr = File.join(batch_dir, 'std.err')
636
+ fexit = File.join(batch_dir, 'exit.status')
637
+ fstatus = File.join(batch_dir, 'job.status')
638
+
639
+ job = Open.read(fjob).strip if Open.exists?(fjob)
640
+
641
+ if job && ! File.exist?(fexit)
642
+ begin
643
+ status_txt = job_status(job)
644
+ STDERR.puts Log.color(:magenta, "Status [#{job.to_i}]:")
645
+ STDERR.puts status_txt
646
+ lines = status_txt.split("\n").length
647
+ rescue
648
+ if ! File.exist?(fexit)
649
+ STDERR.puts Log.color(:magenta, "Job #{job.to_i} not done and not running. STDERR:")
650
+ STDERR.puts Open.read(ferr)
651
+ end
652
+ return
653
+ end
654
+ end
655
+
656
+ if File.exist?(fexit)
657
+ exit_status = Open.read(fexit)
658
+ if exit_status.to_i == 0
659
+ STDERR.puts Log.color(:magenta, "Job #{job} done with exit_status 0. STDOUT:")
660
+ STDERR.puts Open.read(fout)
661
+ else
662
+ STDERR.puts Log.color(:magenta, "Job #{job.to_i} done with exit_status #{exit_status}. STDERR:")
663
+ STDERR.puts Open.read(ferr)
664
+ end
665
+ return
666
+ end
667
+
668
+ if tail
669
+ Log.severity = 10
670
+ while ! File.exist? fout
671
+ if job
672
+ STDERR.puts
673
+ Log.clear_line(STDERR)
674
+ STDERR.write Log.color(:magenta, "Waiting for Output")
675
+ 3.times do
676
+ STDERR.write Log.color(:magenta, ".")
677
+ sleep 1
678
+ end
679
+ status_txt = job_status(job)
680
+ (lines + 1).times do
681
+ Log.clear_line(STDERR)
682
+ end
683
+ Log.clear_line(STDERR)
684
+ STDERR.puts Log.color(:magenta, "Status [#{job.to_i}]:")
685
+ STDERR.puts status_txt
686
+ lines = status_txt.split("\n").length
687
+ end
688
+ end
689
+ STDERR.puts
690
+ Log.clear_line(STDERR)
691
+ STDERR.puts Log.color(:magenta, "Output:")
692
+ begin
693
+ status_txt = job_status(job)
694
+ Open.write(fstatus, status_txt) unless status_txt.nil? || status_txt.empty?
695
+ out = CMD.cmd("tail -f '#{fout}'", :pipe => true) if File.exist?(fout) and not tail == :STDERR
696
+ err = CMD.cmd("tail -f '#{ferr}'", :pipe => true) if File.exist?(ferr)
697
+
698
+ terr = Misc.consume_stream(err, true, STDERR) if err
699
+ tout = Misc.consume_stream(out, true, STDOUT) if out
700
+
701
+ sleep 3 while job_queued(job)
702
+ rescue Aborted
703
+ ensure
704
+ begin
705
+ terr.exit if terr
706
+ tout.exit if tout
707
+ err.close if err
708
+ err.join if err
709
+ rescue Exception
710
+ end
711
+
712
+ begin
713
+ out.close if out
714
+ out.join if out
715
+ rescue Exception
716
+ end
717
+ end
718
+ end
719
+ end
720
+
721
+ def job_queued(job)
722
+ job_status(job).split(/[\s\.]+/).include?(job.to_s)
723
+ end
724
+
725
+ def jobs
726
+ job_status.split("\n").collect{|l| l.scan(/\d{5,}/).first}.compact.flatten.uniq
727
+ end
728
+
729
+ def wait_for_job(batch_dir, time = 1)
730
+ fexit = File.join(batch_dir, 'exit.status')
731
+ fjob = File.join(batch_dir, 'job.id')
732
+ job = Open.read(fjob) if Open.exists?(fjob)
733
+
734
+ while ! Open.exists?(fexit)
735
+ sleep time
736
+ end
737
+ end
738
+
739
+ extend self
740
+ end