rbbt-util 5.30.13 → 5.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/hpc.rb +3 -0
- data/lib/rbbt/hpc/batch.rb +623 -0
- data/lib/rbbt/hpc/lsf.rb +119 -0
- data/lib/rbbt/hpc/orchestrate.rb +12 -11
- data/lib/rbbt/hpc/slurm.rb +62 -567
- data/lib/rbbt/resource/path.rb +3 -1
- data/lib/rbbt/tsv/accessor.rb +5 -2
- data/lib/rbbt/tsv/dumper.rb +1 -0
- data/lib/rbbt/tsv/parallel/traverse.rb +1 -1
- data/lib/rbbt/tsv/stream.rb +5 -6
- data/lib/rbbt/util/log.rb +22 -1
- data/lib/rbbt/util/misc/development.rb +2 -2
- data/lib/rbbt/util/misc/options.rb +5 -0
- data/lib/rbbt/workflow/step/accessor.rb +1 -1
- data/lib/rbbt/workflow/usage.rb +13 -13
- data/share/config.ru +3 -3
- data/share/rbbt_commands/{slurm → hpc}/clean +91 -18
- data/share/rbbt_commands/{slurm → hpc}/list +100 -30
- data/share/rbbt_commands/hpc/orchestrate +81 -0
- data/share/rbbt_commands/hpc/tail +81 -0
- data/share/rbbt_commands/hpc/task +80 -0
- data/test/rbbt/hpc/test_batch.rb +65 -0
- data/test/rbbt/hpc/test_slurm.rb +30 -0
- data/test/rbbt/util/misc/test_development.rb +11 -0
- data/test/test_helper.rb +3 -1
- metadata +16 -7
- data/share/rbbt_commands/slurm/orchestrate +0 -48
- data/share/rbbt_commands/slurm/task +0 -46
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 71eb12f214343e48d8b2fdbcf5886e382dd3c60c8faeb89625cfb9aa56a5ad08
|
4
|
+
data.tar.gz: cf8f167c63aecfd2f389650ea9c0baeac59c769c9258bdedc973bc58d8a845e4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a8a23f6a6ab179cb2d4a23ad4556e8f5d5cb1aee600e51b9205a6545c62e364353504adf94cafe8b0e9878881d67c14a27b95724587e1ec109157ad749bdf43d
|
7
|
+
data.tar.gz: 2d44d126bf3c7963a2846ebac4a749f1c124b82d94b925a1f1c6d2d4ff371ace5b29d86ee80f581c9aab7368b8d4a403687520dd62289c4da964e00263e65065
|
data/lib/rbbt/hpc.rb
CHANGED
@@ -0,0 +1,623 @@
|
|
1
|
+
module HPC
|
2
|
+
class SBATCH < Exception;
|
3
|
+
attr_accessor :directory
|
4
|
+
def initialize(directory)
|
5
|
+
@directory = directory
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
module TemplateGeneration
|
10
|
+
def exec_cmd(job, options = {})
|
11
|
+
env_cmd = Misc.process_options options, :env_cmd
|
12
|
+
development = Misc.process_options options, :development
|
13
|
+
|
14
|
+
job_cmd = self.rbbt_job_exec_cmd(job, options)
|
15
|
+
|
16
|
+
if options[:singularity]
|
17
|
+
|
18
|
+
group, user, user_group, scratch_group_dir, projects_group_dir = options.values_at :group, :user, :user_group, :scratch_group_dir, :projects_group_dir
|
19
|
+
|
20
|
+
singularity_img, singularity_opt_dir, singularity_ruby_inline = options.values_at :singularity_img, :singularity_opt_dir, :singularity_ruby_inline
|
21
|
+
|
22
|
+
singularity_cmd = %(singularity exec -e -B #{singularity_opt_dir}:/singularity_opt/ -B /apps/)
|
23
|
+
|
24
|
+
if contain = options[:contain]
|
25
|
+
contain = File.expand_path(contain)
|
26
|
+
singularity_cmd << %( -C -H "#{contain}" \
|
27
|
+
-B "#{options[:batch_dir]}" \
|
28
|
+
-B /scratch/tmp \
|
29
|
+
#{ group != user_group ? "-B /gpfs/projects/#{user_group}" : "" } \
|
30
|
+
-B #{scratch_group_dir} \
|
31
|
+
-B #{projects_group_dir} \
|
32
|
+
-B "#{singularity_ruby_inline}":"#{contain}/.ruby_inline":rw \
|
33
|
+
-B ~/git:"#{contain}/git":ro \
|
34
|
+
#{Open.exists?('~/.rbbt/software/opt/')? '-B ~/.rbbt/software/opt/:"/opt/":ro' : '' } \
|
35
|
+
-B ~/.rbbt:"#{contain}/home/":ro)
|
36
|
+
singularity_cmd << " #{singularity_img} "
|
37
|
+
end
|
38
|
+
env_cmd ||= ""
|
39
|
+
env_cmd << " TMPDIR='#{contain}/.rbbt/tmp' "
|
40
|
+
end
|
41
|
+
|
42
|
+
if env_cmd
|
43
|
+
exec_cmd = %(env #{env_cmd} rbbt)
|
44
|
+
else
|
45
|
+
exec_cmd = %(rbbt)
|
46
|
+
end
|
47
|
+
|
48
|
+
exec_cmd << "--dev '#{development}'" if development
|
49
|
+
|
50
|
+
exec_cmd = singularity_cmd + exec_cmd if singularity_cmd
|
51
|
+
|
52
|
+
exec_cmd
|
53
|
+
end
|
54
|
+
|
55
|
+
def rbbt_job_exec_cmd(job, options)
|
56
|
+
|
57
|
+
jobname = job.clean_name
|
58
|
+
workflow = job.workflow
|
59
|
+
task = job.task_name
|
60
|
+
|
61
|
+
Misc.add_defaults options, :jobname => jobname
|
62
|
+
|
63
|
+
task = Symbol === job.overriden ? job.overriden : job.task_name
|
64
|
+
|
65
|
+
if job.overriden
|
66
|
+
override_deps = job.rec_dependencies.
|
67
|
+
select{|dep| Symbol === dep.overriden }.
|
68
|
+
collect do |dep|
|
69
|
+
|
70
|
+
name = [dep.workflow.to_s, dep.task_name] * "#"
|
71
|
+
[name, dep.path] * "="
|
72
|
+
end * ","
|
73
|
+
|
74
|
+
options[:override_deps] = override_deps
|
75
|
+
end
|
76
|
+
|
77
|
+
# Save inputs into inputs_dir
|
78
|
+
inputs_dir = Misc.process_options options, :inputs_dir
|
79
|
+
saved = Step.save_job_inputs(job, inputs_dir) if inputs_dir
|
80
|
+
options[:load_inputs] = inputs_dir if saved && saved.any?
|
81
|
+
|
82
|
+
saved.each do |input|
|
83
|
+
options.delete input
|
84
|
+
end if saved
|
85
|
+
|
86
|
+
cmds = CMD.process_cmd_options options.merge(:add_option_dashes => true)
|
87
|
+
|
88
|
+
<<-EOF.strip
|
89
|
+
workflow task #{workflow} #{task} #{cmds}
|
90
|
+
EOF
|
91
|
+
end
|
92
|
+
|
93
|
+
def header(options)
|
94
|
+
header =<<-EOF
|
95
|
+
#!/bin/bash
|
96
|
+
EOF
|
97
|
+
|
98
|
+
header
|
99
|
+
end
|
100
|
+
|
101
|
+
def batch_options(job, options)
|
102
|
+
IndiferentHash.setup(options)
|
103
|
+
|
104
|
+
batch_options = IndiferentHash.setup({})
|
105
|
+
|
106
|
+
keys = [
|
107
|
+
:batch_dir,
|
108
|
+
:batch_modules,
|
109
|
+
:batch_name,
|
110
|
+
:contain,
|
111
|
+
:contain_and_sync,
|
112
|
+
:copy_image,
|
113
|
+
:drbbt,
|
114
|
+
:env_cmd,
|
115
|
+
:exclusive,
|
116
|
+
:highmem,
|
117
|
+
:manifest,
|
118
|
+
:nodes,
|
119
|
+
:queue,
|
120
|
+
:singularity,
|
121
|
+
:sync,
|
122
|
+
:task_cpus,
|
123
|
+
:time,
|
124
|
+
:user_group,
|
125
|
+
:wipe_container,
|
126
|
+
:workdir,
|
127
|
+
]
|
128
|
+
|
129
|
+
keys.each do |key|
|
130
|
+
next if options[key].nil?
|
131
|
+
batch_options[key] = Misc.process_options options, key
|
132
|
+
end
|
133
|
+
|
134
|
+
batch_dir = batch_options[:batch_dir]
|
135
|
+
|
136
|
+
batch_name = File.basename(batch_dir)
|
137
|
+
inputs_dir = File.join(batch_dir, 'inputs_dir')
|
138
|
+
|
139
|
+
keys_from_config = [
|
140
|
+
:queue,
|
141
|
+
:highmem,
|
142
|
+
:exclusive,
|
143
|
+
:env_cmd,
|
144
|
+
:user_group,
|
145
|
+
:singularity_img,
|
146
|
+
:singularity_opt_dir,
|
147
|
+
:singularity_ruby_inline,
|
148
|
+
:singularity
|
149
|
+
]
|
150
|
+
|
151
|
+
keys_from_config.each do |key|
|
152
|
+
next unless batch_options.include? key
|
153
|
+
default_value = Rbbt::Config.get(key, "batch_#{key}", "batch")
|
154
|
+
next if default_value.nil?
|
155
|
+
Misc.add_defaults batch_options, default_value
|
156
|
+
end
|
157
|
+
|
158
|
+
user = batch_options[:user] ||= ENV['USER'] || `whoami`.strip
|
159
|
+
group = batch_options[:group] ||= File.basename(File.dirname(ENV['HOME']))
|
160
|
+
batch_options[:scratch_group_dir] = File.join('/gpfs/scratch/', group)
|
161
|
+
batch_options[:projects_group_dir] = File.join('/gpfs/projects/', group)
|
162
|
+
|
163
|
+
if batch_options[:contain_and_sync]
|
164
|
+
if batch_options[:contain].nil?
|
165
|
+
contain_base = Rbbt::Config.get(:contain_base_dir, :batch_contain, :batch, :default => "/scratch/tmp/rbbt-[USER]")
|
166
|
+
contain_base = contain_base.sub('[USER]', user)
|
167
|
+
random_file = TmpFile.random_name
|
168
|
+
batch_options[:contain] = File.join(contain_base, random_file)
|
169
|
+
end
|
170
|
+
|
171
|
+
batch_options[:sync] ||= "~/.rbbt/var/jobs"
|
172
|
+
batch_options[:wipe_container] ||= 'post'
|
173
|
+
end
|
174
|
+
|
175
|
+
if batch_options[:contain] && ! batch_options[:singularity]
|
176
|
+
options[:workdir_all] = batch_options[:contain]
|
177
|
+
end
|
178
|
+
|
179
|
+
Misc.add_defaults batch_options,
|
180
|
+
:batch_name => batch_name,
|
181
|
+
:inputs_dir => inputs_dir,
|
182
|
+
:queue => 'debug',
|
183
|
+
:nodes => 1,
|
184
|
+
:step_path => job.path,
|
185
|
+
:task_cpus => 1,
|
186
|
+
:time => '2min',
|
187
|
+
:env_cmd => '_JAVA_OPTIONS="-Xms1g -Xmx${MAX_MEMORY}m"',
|
188
|
+
:singularity_img => ENV["SINGULARITY_IMG"] || "~/rbbt.singularity.img",
|
189
|
+
:singularity_ruby_inline => ENV["SINGULARITY_RUBY_INLINE"] || "~/.singularity_ruby_inline",
|
190
|
+
:singularity_opt_dir => ENV["SINGULARITY_OPT_DIR"] || "~/singularity_opt",
|
191
|
+
:workdir => Dir.pwd
|
192
|
+
|
193
|
+
exec_cmd = exec_cmd(job, batch_options)
|
194
|
+
rbbt_cmd = rbbt_job_exec_cmd(job, options)
|
195
|
+
|
196
|
+
Misc.add_defaults batch_options,
|
197
|
+
:exec_cmd => exec_cmd,
|
198
|
+
:rbbt_cmd => rbbt_cmd
|
199
|
+
|
200
|
+
batch_dir = batch_options[:batch_dir]
|
201
|
+
|
202
|
+
Misc.add_defaults batch_options,
|
203
|
+
:fout => File.join(batch_dir, 'std.out'),
|
204
|
+
:ferr => File.join(batch_dir, 'std.err'),
|
205
|
+
:fjob => File.join(batch_dir, 'job.id'),
|
206
|
+
:fdep => File.join(batch_dir, 'dependencies.list'),
|
207
|
+
:fcfdep => File.join(batch_dir, 'canfail_dependencies.list'),
|
208
|
+
:fexit => File.join(batch_dir, 'exit.status'),
|
209
|
+
:fsync => File.join(batch_dir, 'sync.log'),
|
210
|
+
:fsexit => File.join(batch_dir, 'sync.status'),
|
211
|
+
:fcmd => File.join(batch_dir, 'command.batch')
|
212
|
+
|
213
|
+
batch_options
|
214
|
+
end
|
215
|
+
|
216
|
+
def meta_data(options)
|
217
|
+
meta =<<-EOF
|
218
|
+
#MANIFEST: #{(options[:manifest] || []) * ", "}
|
219
|
+
#DEPENDENCIES: #{(options[:dependencies] || []) * ", "}
|
220
|
+
#EXEC_CMD: #{options[:exec_cmd]}
|
221
|
+
#CMD: #{options[:rbbt_cmd]}
|
222
|
+
#STEP_PATH: #{options[:step_path]}
|
223
|
+
EOF
|
224
|
+
|
225
|
+
meta = meta.split("\n").reject{|line| line =~ /: $/} * "\n"
|
226
|
+
meta
|
227
|
+
end
|
228
|
+
|
229
|
+
def load_modules(modules = [])
|
230
|
+
modules = modules.split(/,\s*/) if String === modules
|
231
|
+
|
232
|
+
str = ""
|
233
|
+
modules.each do |mod|
|
234
|
+
str << "module load #{ mod }" << "\n"
|
235
|
+
end if modules
|
236
|
+
|
237
|
+
str
|
238
|
+
end
|
239
|
+
|
240
|
+
def batch_system_variables
|
241
|
+
<<-EOF
|
242
|
+
let MAX_MEMORY="$(grep MemTotal /proc/meminfo|grep -o "[[:digit:]]*") / 1024"
|
243
|
+
EOF
|
244
|
+
end
|
245
|
+
|
246
|
+
def prepare_environment(options = {})
|
247
|
+
modules = options[:batch_modules]
|
248
|
+
|
249
|
+
prepare_environment = ""
|
250
|
+
|
251
|
+
functions = ""
|
252
|
+
|
253
|
+
if contain = options[:contain]
|
254
|
+
contain = File.expand_path(contain)
|
255
|
+
functions +=<<-EOF
|
256
|
+
function batch_erase_contain_dir(){
|
257
|
+
rm -Rfv '#{contain}' 2>1 >> '#{options[:fsync]}'
|
258
|
+
}
|
259
|
+
EOF
|
260
|
+
|
261
|
+
prepare_environment +=<<-EOF
|
262
|
+
if ls -A '#{contain}' &> /dev/null ; then
|
263
|
+
empty_contain_dir="false"
|
264
|
+
else
|
265
|
+
empty_contain_dir="true"
|
266
|
+
fi
|
267
|
+
EOF
|
268
|
+
|
269
|
+
prepare_environment +=<<-EOF if options[:wipe_container] == 'force'
|
270
|
+
batch_erase_contain_dir()
|
271
|
+
EOF
|
272
|
+
end
|
273
|
+
|
274
|
+
if sync = options[:sync]
|
275
|
+
source = if options[:singularity]
|
276
|
+
File.join(options[:contain], '.rbbt/var/jobs')
|
277
|
+
elsif options[:contain]
|
278
|
+
File.join(options[:contain], 'var/jobs')
|
279
|
+
else
|
280
|
+
'~/.rbbt/var/jobs/'
|
281
|
+
end
|
282
|
+
|
283
|
+
source = File.expand_path(source)
|
284
|
+
sync = File.expand_path(sync)
|
285
|
+
functions +=<<-EOF
|
286
|
+
function batch_sync_contain_dir(){
|
287
|
+
mkdir -p "$(dirname '#{sync}')"
|
288
|
+
rsync -avztAXHP --copy-unsafe-links "#{source}/" "#{sync}/" 2>1 >> '#{options[:fsync]}'
|
289
|
+
sync_es="$?"
|
290
|
+
echo $sync_es > '#{options[:fsexit]}'
|
291
|
+
find '#{sync}' -type l -ls | awk '$13 ~ /^#{sync.gsub('/','\/')}/ { sub("#{source}", "#{sync}", $13); print $11, $13 }' | while read A B; do rm $A; ln -s $B $A; done
|
292
|
+
}
|
293
|
+
EOF
|
294
|
+
end
|
295
|
+
|
296
|
+
if options[:singularity]
|
297
|
+
|
298
|
+
group, user, user_group, scratch_group_dir, projects_group_dir = options.values_at :group, :user, :user_group, :scratch_group_dir, :projects_group_dir
|
299
|
+
|
300
|
+
singularity_img, singularity_opt_dir, singularity_ruby_inline = options.values_at :singularity_img, :singularity_opt_dir, :singularity_ruby_inline
|
301
|
+
|
302
|
+
prepare_environment +=<<-EOF
|
303
|
+
# Load singularity modules
|
304
|
+
module load intel/2018.1
|
305
|
+
module load singularity
|
306
|
+
mkdir -p "#{singularity_opt_dir}"
|
307
|
+
EOF
|
308
|
+
|
309
|
+
if contain
|
310
|
+
|
311
|
+
prepare_environment +=<<-EOF
|
312
|
+
# Prepare container for singularity
|
313
|
+
mkdir -p "#{contain}/.rbbt/etc/"
|
314
|
+
|
315
|
+
for dir in .ruby_inline git home; do
|
316
|
+
mkdir -p "#{contain}/$dir"
|
317
|
+
done
|
318
|
+
|
319
|
+
for tmpd in persist_locks produce_locks R_sockets sensiblewrite sensiblewrite_locks step_info_locks tsv_open_locks; do
|
320
|
+
mkdir -p "#{contain}/.rbbt/tmp/$tmpd"
|
321
|
+
done
|
322
|
+
|
323
|
+
# Copy environment
|
324
|
+
cp ~/.rbbt/etc/environment #{contain}/.rbbt/etc/
|
325
|
+
|
326
|
+
# Set search_paths
|
327
|
+
echo "singularity: /singularity_opt/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" > #{contain}/.rbbt/etc/search_paths
|
328
|
+
echo "rbbt_user: /home/rbbt/.rbbt/{TOPLEVEL}/{SUBPATH}" >> #{contain}/.rbbt/etc/search_paths
|
329
|
+
echo "outside_home: #{contain}/home/{TOPLEVEL}/{SUBPATH}" >> #{contain}/.rbbt/etc/search_paths
|
330
|
+
echo "group_projects: #{projects_group_dir}/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" >> #{contain}/.rbbt/etc/search_paths
|
331
|
+
echo "group_scratch: #{scratch_group_dir}/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" >> #{contain}/.rbbt/etc/search_paths
|
332
|
+
echo "user_projects: #{projects_group_dir}/#{user}/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" >> #{contain}/.rbbt/etc/search_paths
|
333
|
+
echo "user_scratch: #{scratch_group_dir}/#{user}/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" >> #{contain}/.rbbt/etc/search_paths
|
334
|
+
EOF
|
335
|
+
end
|
336
|
+
end
|
337
|
+
|
338
|
+
batch_system_variables + load_modules(modules) + "\n" + functions + "\n" + prepare_environment
|
339
|
+
end
|
340
|
+
|
341
|
+
def execute(options)
|
342
|
+
exec_cmd, job_cmd = options.values_at :exec_cmd, :rbbt_cmd
|
343
|
+
|
344
|
+
<<-EOF
|
345
|
+
step_path=$(
|
346
|
+
#{exec_cmd} #{job_cmd} --printpath
|
347
|
+
)
|
348
|
+
exit_status=$?
|
349
|
+
|
350
|
+
[[ -z $BATCH_JOB_ID ]] || #{exec_cmd} workflow write_info --recursive --force=false --check_pid "$step_path" batch_job $BATCH_JOB_ID
|
351
|
+
[[ -z $BATCH_SYSTEM ]] || #{exec_cmd} workflow write_info --recursive --force=false --check_pid "$step_path" batch_system $BATCH_SYSTEM
|
352
|
+
EOF
|
353
|
+
end
|
354
|
+
|
355
|
+
def sync_environment(options = {})
|
356
|
+
sync_environment = ""
|
357
|
+
|
358
|
+
if options[:sync]
|
359
|
+
sync_environment +=<<-EOF
|
360
|
+
if [ $exit_status == '0' ]; then
|
361
|
+
batch_sync_contain_dir
|
362
|
+
else
|
363
|
+
sync_es=$exit_status
|
364
|
+
fi
|
365
|
+
EOF
|
366
|
+
end
|
367
|
+
|
368
|
+
sync_environment
|
369
|
+
end
|
370
|
+
|
371
|
+
def cleanup_environment(options = {})
|
372
|
+
cleanup_environment = ""
|
373
|
+
if options[:sync]
|
374
|
+
if options[:wipe_container] == 'force'
|
375
|
+
cleanup_environment +=<<-EOF
|
376
|
+
batch_erase_contain_dir
|
377
|
+
EOF
|
378
|
+
elsif options[:wipe_container] == 'post' || options[:wipe_container] == 'both'
|
379
|
+
cleanup_environment +=<<-EOF
|
380
|
+
if [ $sync_es == '0' -a $empty_contain_dir == 'true' ]; then
|
381
|
+
batch_erase_contain_dir
|
382
|
+
fi
|
383
|
+
EOF
|
384
|
+
end
|
385
|
+
end
|
386
|
+
cleanup_environment
|
387
|
+
end
|
388
|
+
|
389
|
+
def coda(options)
|
390
|
+
coda =<<-EOF
|
391
|
+
echo $exit_status > '#{options[:fexit]}'
|
392
|
+
EOF
|
393
|
+
|
394
|
+
if options[:sync]
|
395
|
+
coda +=<<-EOF
|
396
|
+
if [ $sync_es == '0' ]; then
|
397
|
+
exit $exit_status
|
398
|
+
else
|
399
|
+
exit $sync_es
|
400
|
+
fi
|
401
|
+
EOF
|
402
|
+
else
|
403
|
+
coda +=<<-EOF
|
404
|
+
exit $exit_status
|
405
|
+
EOF
|
406
|
+
end
|
407
|
+
|
408
|
+
coda
|
409
|
+
end
|
410
|
+
|
411
|
+
def job_template(job, options = {})
|
412
|
+
batch_options = batch_options job, options
|
413
|
+
|
414
|
+
header = self.header(batch_options)
|
415
|
+
|
416
|
+
meta_data = self.meta_data(batch_options)
|
417
|
+
|
418
|
+
prepare_environment = self.prepare_environment(batch_options)
|
419
|
+
|
420
|
+
execute = self.execute(batch_options)
|
421
|
+
|
422
|
+
sync_environment = self.sync_environment(batch_options)
|
423
|
+
|
424
|
+
cleanup_environment = self.cleanup_environment(batch_options)
|
425
|
+
|
426
|
+
coda = self.coda(batch_options)
|
427
|
+
|
428
|
+
<<-EOF
|
429
|
+
#{header}
|
430
|
+
|
431
|
+
# #{Log.color :green, "0. Meta-data"}
|
432
|
+
#{meta_data}
|
433
|
+
|
434
|
+
# #{Log.color :green, "1. Prepare environment"}
|
435
|
+
#{prepare_environment}
|
436
|
+
|
437
|
+
# #{Log.color :green, "2. Execute"}
|
438
|
+
#{execute}
|
439
|
+
|
440
|
+
# #{Log.color :green, "3. Sync and cleanup environment"}
|
441
|
+
#{sync_environment}
|
442
|
+
#{cleanup_environment}
|
443
|
+
|
444
|
+
# #{Log.color :green, "4. Exit"}
|
445
|
+
#{coda}
|
446
|
+
EOF
|
447
|
+
end
|
448
|
+
|
449
|
+
def prepare_submision(template, batch_dir, clean_batch_job = false, batch_dependencies = [])
|
450
|
+
Open.mkdir batch_dir
|
451
|
+
fcmd = File.join(batch_dir, 'command.batch')
|
452
|
+
fdep = File.join(batch_dir, 'dependencies.list')
|
453
|
+
fcfdep = File.join(batch_dir, 'canfail_dependencies.list')
|
454
|
+
|
455
|
+
Open.write(fcmd, template)
|
456
|
+
|
457
|
+
%w(std.out std.err job.id job.status dependencies.list canfail_dependencies.list exit.status sync.log inputs_dir).each do |filename|
|
458
|
+
path = File.join(batch_dir, filename)
|
459
|
+
Open.rm_rf path if File.exists? path
|
460
|
+
end if clean_batch_job
|
461
|
+
|
462
|
+
batch_dependencies = [] if batch_dependencies.nil?
|
463
|
+
|
464
|
+
canfail_dependencies = batch_dependencies.select{|dep| dep =~ /^canfail:(\d+)/ }.collect{|dep| dep.partition(":").last}
|
465
|
+
dependencies = batch_dependencies.reject{|dep| dep =~ /^canfail:(\d+)/ }
|
466
|
+
|
467
|
+
Open.write(fdep, dependencies * "\n") if dependencies.any?
|
468
|
+
Open.write(fcfdep, canfail_dependencies * "\n") if canfail_dependencies.any?
|
469
|
+
|
470
|
+
fcmd
|
471
|
+
end
|
472
|
+
|
473
|
+
|
474
|
+
def run_job(job, options = {})
|
475
|
+
system = self.to_s.split("::").last
|
476
|
+
|
477
|
+
batch_base_dir, clean_batch_job, remove_batch_dir, procpath, tail, batch_dependencies, dry_run = Misc.process_options options,
|
478
|
+
:batch_base_dir, :clean_batch_job, :remove_batch_dir, :batch_procpath, :tail, :batch_dependencies, :dry_run,
|
479
|
+
:batch_base_dir => File.expand_path(File.join('~/rbbt-batch'))
|
480
|
+
|
481
|
+
workflow = job.workflow
|
482
|
+
task_name = job.task_name
|
483
|
+
|
484
|
+
TmpFile.with_file(nil, remove_batch_dir, :tmpdir => batch_base_dir, :prefix => "#{system}_rbbt_job-#{workflow.to_s}-#{task_name}-") do |batch_dir|
|
485
|
+
Misc.add_defaults options,
|
486
|
+
:batch_dir => batch_dir,
|
487
|
+
:inputs_dir => File.join(batch_dir, "inputs_dir")
|
488
|
+
|
489
|
+
options[:procpath_performance] ||= File.join(batch_dir, "procpath##{procpath.gsub(',', '#')}") if procpath
|
490
|
+
|
491
|
+
template = self.job_template(job, options.dup)
|
492
|
+
|
493
|
+
fcmd = prepare_submision(template, options[:batch_dir], clean_batch_job, batch_dependencies)
|
494
|
+
|
495
|
+
batch_job = run_template(batch_dir, dry_run)
|
496
|
+
|
497
|
+
return batch_job unless tail
|
498
|
+
|
499
|
+
t_monitor = Thread.new do
|
500
|
+
self.follow_job(batch_dir, :STDERR)
|
501
|
+
end
|
502
|
+
self.wait_for_job(batch_dir)
|
503
|
+
t_monitor.raise Aborted
|
504
|
+
return unless Open.read(File.join(batch_dir, 'exit.status')).strip == '0'
|
505
|
+
path = Open.read(File.join(batch_dir, 'std.out')).strip
|
506
|
+
if Open.exists?(path) && job.path != path
|
507
|
+
Log.info "Path of BATCH job #{path} is different from original job #{job.path}. Stablishing link."
|
508
|
+
Open.ln path, job.path
|
509
|
+
Open.ln path + '.info', job.path + '.info' if Open.exists?(path + '.info')
|
510
|
+
Open.ln path + '.files', job.path + '.files' if Open.exists?(path + '.files')
|
511
|
+
end
|
512
|
+
batch_job
|
513
|
+
|
514
|
+
end
|
515
|
+
end
|
516
|
+
|
517
|
+
def follow_job(batch_dir, tail = true)
|
518
|
+
fjob = File.join(batch_dir, 'job.id')
|
519
|
+
fout = File.join(batch_dir, 'std.out')
|
520
|
+
ferr = File.join(batch_dir, 'std.err')
|
521
|
+
fexit = File.join(batch_dir, 'exit.status')
|
522
|
+
fstatus = File.join(batch_dir, 'job.status')
|
523
|
+
|
524
|
+
job = Open.read(fjob).strip if Open.exists?(fjob)
|
525
|
+
|
526
|
+
if job && ! File.exists?(fexit)
|
527
|
+
begin
|
528
|
+
status_txt = job_status(job)
|
529
|
+
STDERR.puts Log.color(:magenta, "Status [#{job.to_i}]:")
|
530
|
+
STDERR.puts status_txt
|
531
|
+
lines = status_txt.split("\n").length
|
532
|
+
rescue
|
533
|
+
if ! File.exists?(fexit)
|
534
|
+
STDERR.puts Log.color(:magenta, "Job #{job.to_i} not done and not running. STDERR:")
|
535
|
+
STDERR.puts Open.read(ferr)
|
536
|
+
end
|
537
|
+
return
|
538
|
+
end
|
539
|
+
end
|
540
|
+
|
541
|
+
if File.exists?(fexit)
|
542
|
+
exit_status = Open.read(fexit)
|
543
|
+
if exit_status.to_i == 0
|
544
|
+
STDERR.puts Log.color(:magenta, "Job #{job} done with exit_status 0. STDOUT:")
|
545
|
+
STDERR.puts Open.read(fout)
|
546
|
+
else
|
547
|
+
STDERR.puts Log.color(:magenta, "Job #{job.to_i} done with exit_status #{exit_status}. STDERR:")
|
548
|
+
STDERR.puts Open.read(ferr)
|
549
|
+
end
|
550
|
+
return
|
551
|
+
end
|
552
|
+
|
553
|
+
if tail
|
554
|
+
Log.severity = 10
|
555
|
+
while ! File.exists? fout
|
556
|
+
if job
|
557
|
+
STDERR.puts
|
558
|
+
Log.clear_line(STDERR)
|
559
|
+
STDERR.write Log.color(:magenta, "Waiting for Output")
|
560
|
+
3.times do
|
561
|
+
STDERR.write Log.color(:magenta, ".")
|
562
|
+
sleep 1
|
563
|
+
end
|
564
|
+
status_txt = job_status(job)
|
565
|
+
lines.times do
|
566
|
+
Log.clear_line(STDERR)
|
567
|
+
end
|
568
|
+
Log.clear_line(STDERR)
|
569
|
+
STDERR.puts Log.color(:magenta, "Status [#{job.to_i}]:")
|
570
|
+
STDERR.puts status_txt
|
571
|
+
lines = status_txt.split("\n").length
|
572
|
+
end
|
573
|
+
end
|
574
|
+
STDERR.puts
|
575
|
+
Log.clear_line(STDERR)
|
576
|
+
STDERR.puts Log.color(:magenta, "Output:")
|
577
|
+
begin
|
578
|
+
status_txt = job_status(job)
|
579
|
+
Open.write(fstatus, status_txt) unless status_txt.nil? || status_txt.empty?
|
580
|
+
out = CMD.cmd("tail -f '#{fout}'", :pipe => true) if File.exists?(fout) and not tail == :STDERR
|
581
|
+
err = CMD.cmd("tail -f '#{ferr}'", :pipe => true) if File.exists?(ferr)
|
582
|
+
|
583
|
+
terr = Misc.consume_stream(err, true, STDERR) if err
|
584
|
+
tout = Misc.consume_stream(out, true, STDOUT) if out
|
585
|
+
|
586
|
+
sleep 3 while job_status(job).include? job.to_s
|
587
|
+
rescue Aborted
|
588
|
+
ensure
|
589
|
+
begin
|
590
|
+
terr.exit if terr
|
591
|
+
tout.exit if tout
|
592
|
+
err.close if err
|
593
|
+
err.join if err
|
594
|
+
rescue Exception
|
595
|
+
end
|
596
|
+
|
597
|
+
begin
|
598
|
+
out.close if out
|
599
|
+
out.join if out
|
600
|
+
rescue Exception
|
601
|
+
end
|
602
|
+
end
|
603
|
+
end
|
604
|
+
end
|
605
|
+
|
606
|
+
def wait_for_job(batch_dir, time = 1)
|
607
|
+
fexit = File.join(batch_dir, 'exit.status')
|
608
|
+
fjob = File.join(batch_dir, 'job.id')
|
609
|
+
job = Open.read(fjob) if Open.exists?(fjob)
|
610
|
+
|
611
|
+
while ! Open.exists?(fexit)
|
612
|
+
sleep time
|
613
|
+
end
|
614
|
+
end
|
615
|
+
|
616
|
+
end
|
617
|
+
|
618
|
+
module BATCH
|
619
|
+
extend HPC::TemplateGeneration
|
620
|
+
end
|
621
|
+
|
622
|
+
end
|
623
|
+
|