rbbt-util 5.26.25 → 5.26.26
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/hpc.rb +98 -53
- data/lib/rbbt/workflow/step.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1e656caf6f822514da2a5b9bf3b951ea4c5f6178cd8fd6da6059b4a940d7835b
|
4
|
+
data.tar.gz: e27dfec559b548efcd10dd9fa676168c330b085b50cdc7628b19c1308ee175fe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e55b8f3c14ac1e581a655deb456a5c373733efe59b9d901134992066f70e4110acd1b9d6752487cf5e8cf83fae0aae7283c7c9a5a6e437f32d97400a498ec7e4
|
7
|
+
data.tar.gz: 6b84830391a26b793a531452b76c96e57fc9b00dc937087eec1269ed6f312dfa7aa0ef07aa48930600cc57c8fe1eab63a48a393ff576abe499ad5563f157a582
|
data/lib/rbbt/hpc.rb
CHANGED
@@ -22,6 +22,12 @@ module Marenostrum
|
|
22
22
|
copy_image = options.delete :copy_image
|
23
23
|
exclusive = options.delete :exclusive
|
24
24
|
highmem = options.delete :highmem
|
25
|
+
|
26
|
+
queue = options.delete(:queue) || 'bsc_ls'
|
27
|
+
task_cpus = options.delete(:task_cpus) || 1
|
28
|
+
nodes = options.delete(:nodes) || 1
|
29
|
+
time = options.delete(:time) || "0:00:10"
|
30
|
+
|
25
31
|
inputs_dir = options.delete :inputs_dir
|
26
32
|
config_keys = options.delete :config_keys
|
27
33
|
|
@@ -36,30 +42,40 @@ module Marenostrum
|
|
36
42
|
|
37
43
|
|
38
44
|
name = options[:name] ||= Misc.obj2digest({:options => options.collect{|k,v| [k,v]}.sort_by{|k,v| k.to_s }, :args => args})
|
39
|
-
|
45
|
+
options.delete(:name)
|
46
|
+
slurm_basedir = options[:slurm_basedir] ||= File.expand_path(File.join('~/rbbt-slurm', name)) if slurm_basedir.nil?
|
47
|
+
options.delete(:slurm_basedir)
|
40
48
|
|
41
49
|
rbbt_cmd = args.reject{|e| e == '--' }.collect{|e| e.include?(" ")? '"' + e + '"' : e } * " "
|
42
50
|
|
51
|
+
rbbt_cmd += " " << options.collect do |o,v|
|
52
|
+
o = o.to_s
|
53
|
+
case v
|
54
|
+
when TrueClass
|
55
|
+
'--' << o
|
56
|
+
when FalseClass
|
57
|
+
'--' << o << "=false"
|
58
|
+
else
|
59
|
+
['--' << o, "'#{v}'"] * " "
|
60
|
+
end
|
61
|
+
end * " "
|
62
|
+
|
43
63
|
rbbt_cmd << " --config_keys='#{config_keys}'" if config_keys and not config_keys.empty?
|
44
64
|
|
45
|
-
queue = options[:queue] || 'bsc_ls'
|
46
|
-
task_cpus = options[:task_cpus] || 1
|
47
|
-
nodes = options[:nodes] || 1
|
48
|
-
time = options[:time] || "0:00:10"
|
49
65
|
|
50
66
|
time = Misc.format_seconds Misc.timespan(time) unless time.include? ":"
|
51
67
|
|
52
68
|
|
53
69
|
#{{{ PREPARE LOCAL LOGFILES
|
54
70
|
|
55
|
-
Open.mkdir
|
71
|
+
Open.mkdir slurm_basedir
|
56
72
|
|
57
|
-
fout = File.join(
|
58
|
-
ferr = File.join(
|
59
|
-
fjob = File.join(
|
60
|
-
fexit = File.join(
|
61
|
-
fsync = File.join(
|
62
|
-
fcmd = File.join(
|
73
|
+
fout = File.join(slurm_basedir, 'std.out')
|
74
|
+
ferr = File.join(slurm_basedir, 'std.err')
|
75
|
+
fjob = File.join(slurm_basedir, 'job.id')
|
76
|
+
fexit = File.join(slurm_basedir, 'exit.status')
|
77
|
+
fsync = File.join(slurm_basedir, 'sync.log')
|
78
|
+
fcmd = File.join(slurm_basedir, 'command.slurm')
|
63
79
|
|
64
80
|
#{{{ GENERATE TEMPLATE
|
65
81
|
|
@@ -115,13 +131,17 @@ mkdir -p "$SINGULARITY_RUBY_INLINE"
|
|
115
131
|
|
116
132
|
if contain
|
117
133
|
user = ENV['USER'] || `whoami`.strip
|
134
|
+
group = File.basename(File.dirname(ENV['HOME']))
|
135
|
+
scratch_group_dir = File.join('/gpfs/scratch/', group)
|
136
|
+
projects_group_dir = File.join('/gpfs/projects/', group)
|
137
|
+
|
118
138
|
env +=<<-EOF
|
119
139
|
|
120
140
|
# Prepare container dir
|
121
141
|
CONTAINER_DIR="#{contain}"
|
122
142
|
mkdir -p $CONTAINER_DIR/.rbbt/etc/
|
123
143
|
|
124
|
-
for dir in .ruby_inline git
|
144
|
+
for dir in .ruby_inline git home; do
|
125
145
|
mkdir -p $CONTAINER_DIR/$dir
|
126
146
|
done
|
127
147
|
|
@@ -140,19 +160,24 @@ echo "group_scratch: $CONTAINER_DIR/scratch/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" >> $C
|
|
140
160
|
echo "user_projects: $CONTAINER_DIR/projects/#{user}/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" >> $CONTAINER_DIR/.rbbt/etc/search_paths
|
141
161
|
echo "user_scratch: $CONTAINER_DIR/scratch/#{user}/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" >> $CONTAINER_DIR/.rbbt/etc/search_paths
|
142
162
|
echo "/scratch/tmp/rbbt/projects/rbbt/workflows/" > $CONTAINER_DIR/.rbbt/etc/workflow_dir
|
163
|
+
|
164
|
+
[[ -f "$CONTAINER_DIR/projects" ]] || ln -s '#{projects_group_dir}' "$CONTAINER_DIR/projects"
|
165
|
+
[[ -f "$CONTAINER_DIR/scratch" ]] || ln -s '#{scratch_group_dir}' "$CONTAINER_DIR/scratch"
|
143
166
|
EOF
|
144
167
|
|
145
168
|
if inputs_dir
|
146
169
|
env +=<<-EOF
|
147
170
|
|
148
171
|
# Copy inputs
|
149
|
-
cp -R '#{inputs_dir}' $CONTAINER_DIR/inputs
|
172
|
+
[[ -d '#{inputs_dir}' ]] && cp -R '#{inputs_dir}' $CONTAINER_DIR/inputs
|
150
173
|
EOF
|
151
174
|
rbbt_cmd = rbbt_cmd.sub(inputs_dir, "#{contain}/inputs")
|
152
175
|
end
|
153
176
|
|
154
177
|
if copy_image
|
155
178
|
env +=<<EOF
|
179
|
+
|
180
|
+
# Copy image
|
156
181
|
rsync -avz "$SINGULARITY_IMG" "$CONTAINER_DIR/rbbt.singularity.img"
|
157
182
|
SINGULARITY_IMG="$CONTAINER_DIR/rbbt.singularity.img"
|
158
183
|
EOF
|
@@ -160,8 +185,11 @@ EOF
|
|
160
185
|
|
161
186
|
if wipe_container == "pre" || wipe_container == "both"
|
162
187
|
env +=<<-EOF
|
188
|
+
|
189
|
+
# Clean container pre
|
163
190
|
singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rm -Rfv .rbbt/var/jobs &>> #{fsync}
|
164
191
|
singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rbbt system clean -f &>> #{fsync}
|
192
|
+
singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rm -Rfv tmp/ &>> #{fsync}
|
165
193
|
EOF
|
166
194
|
end
|
167
195
|
end
|
@@ -171,10 +199,16 @@ EOF
|
|
171
199
|
|
172
200
|
if singularity
|
173
201
|
if contain
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
202
|
+
exec_cmd = %(singularity exec -e -C -H "$CONTAINER_DIR" \
|
203
|
+
-B /apps/ \
|
204
|
+
-B /scratch/tmp \
|
205
|
+
-B "$SINGULARITY_RUBY_INLINE":"$CONTAINER_DIR/.ruby_inline":rw \
|
206
|
+
-B ~/git:"$CONTAINER_DIR/git":ro \
|
207
|
+
-B ~/.rbbt/software/opt/:"/opt/":ro \
|
208
|
+
-B ~/.rbbt:"$CONTAINER_DIR/home/":ro \
|
209
|
+
-B #{scratch_group_dir} \
|
210
|
+
-B #{projects_group_dir} \
|
211
|
+
"$SINGULARITY_IMG" env TMPDIR="$CONTAINER_DIR/.rbbt/tmp" env _JAVA_OPTIONS="-Xms1g -Xmx${MAX_MEMORY}m" rbbt)
|
178
212
|
else
|
179
213
|
exec_cmd = %(singularity exec -e -B /apps/ -B "$SINGULARITY_RUBY_INLINE":"$HOME/.ruby_inline":rw "$SINGULARITY_IMG" env _JAVA_OPTIONS="-Xms1g -Xmx${MAX_MEMORY}m" rbbt)
|
180
214
|
end
|
@@ -213,7 +247,15 @@ singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rbbt system clean
|
|
213
247
|
EOF
|
214
248
|
end
|
215
249
|
|
216
|
-
|
250
|
+
if sync.include?("=>")
|
251
|
+
source, _sep, sync = sync.partition("=>")
|
252
|
+
source = source.strip
|
253
|
+
sync = sync.strip
|
254
|
+
source = File.join(File.expand_path(contain), source)
|
255
|
+
else
|
256
|
+
source = File.join(File.expand_path(contain), '.rbbt/var/jobs')
|
257
|
+
end
|
258
|
+
|
217
259
|
target = File.expand_path(sync)
|
218
260
|
coda +=<<-EOF
|
219
261
|
rsync -avt "#{source}/" "#{target}/" &>> #{fsync}
|
@@ -227,6 +269,7 @@ singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rbbt system clean
|
|
227
269
|
singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rm -v /dev/shm/sem.*.{in,out,process} /dev/shm/sem.Session-PID.*.sem 2> /dev/null >> #{fsync}
|
228
270
|
if [ $sync_es == '0' ]; then
|
229
271
|
singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rm -Rfv .rbbt/var/jobs &>> #{fsync}
|
272
|
+
singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rm -Rfv tmp/ &>> #{fsync}
|
230
273
|
else
|
231
274
|
echo "WARNING: Results could not sync correctly. Job directory not purged"
|
232
275
|
fi
|
@@ -248,17 +291,17 @@ EOF
|
|
248
291
|
|
249
292
|
def self.issue_template(template, options = {})
|
250
293
|
|
251
|
-
|
252
|
-
Open.mkdir
|
294
|
+
slurm_basedir = options[:slurm_basedir]
|
295
|
+
Open.mkdir slurm_basedir
|
253
296
|
|
254
297
|
dry_run = options.delete :dry_run
|
255
298
|
|
256
|
-
fout = File.join(
|
257
|
-
ferr = File.join(
|
258
|
-
fjob = File.join(
|
259
|
-
fexit = File.join(
|
260
|
-
fsync = File.join(
|
261
|
-
fcmd = File.join(
|
299
|
+
fout = File.join(slurm_basedir, 'std.out')
|
300
|
+
ferr = File.join(slurm_basedir, 'std.err')
|
301
|
+
fjob = File.join(slurm_basedir, 'job.id')
|
302
|
+
fexit = File.join(slurm_basedir, 'exit.status')
|
303
|
+
fsync = File.join(slurm_basedir, 'sync.log')
|
304
|
+
fcmd = File.join(slurm_basedir, 'command.slurm')
|
262
305
|
|
263
306
|
job = nil
|
264
307
|
if options[:clean_job]
|
@@ -279,9 +322,9 @@ EOF
|
|
279
322
|
if File.exists?(fout)
|
280
323
|
return
|
281
324
|
elsif dry_run
|
282
|
-
STDERR.puts Log.color(:magenta, "To execute run: ") + Log.color(:blue, "sbatch '#{
|
283
|
-
STDERR.puts Log.color(:magenta, "To monitor progress run (needs local rbbt): ") + Log.color(:blue, "rbbt mn --tail -w '#{
|
284
|
-
raise Marenostrum::SBATCH,
|
325
|
+
STDERR.puts Log.color(:magenta, "To execute run: ") + Log.color(:blue, "sbatch '#{slurm_basedir}/command.slurm'")
|
326
|
+
STDERR.puts Log.color(:magenta, "To monitor progress run (needs local rbbt): ") + Log.color(:blue, "rbbt mn --tail -w '#{slurm_basedir}'")
|
327
|
+
raise Marenostrum::SBATCH, slurm_basedir
|
285
328
|
else
|
286
329
|
Open.rm fsync
|
287
330
|
Open.rm fexit
|
@@ -293,11 +336,11 @@ EOF
|
|
293
336
|
end
|
294
337
|
end
|
295
338
|
|
296
|
-
def self.follow_job(
|
297
|
-
fjob = File.join(
|
298
|
-
fout = File.join(
|
299
|
-
ferr = File.join(
|
300
|
-
fstatus = File.join(
|
339
|
+
def self.follow_job(slurm_basedir, tail = true)
|
340
|
+
fjob = File.join(slurm_basedir, 'job.id')
|
341
|
+
fout = File.join(slurm_basedir, 'std.out')
|
342
|
+
ferr = File.join(slurm_basedir, 'std.err')
|
343
|
+
fstatus = File.join(slurm_basedir, 'job.status')
|
301
344
|
|
302
345
|
job = Open.read(fjob).strip if Open.exists?(fjob)
|
303
346
|
|
@@ -309,6 +352,7 @@ EOF
|
|
309
352
|
end
|
310
353
|
|
311
354
|
if tail
|
355
|
+
Log.severity = 10
|
312
356
|
while ! File.exists? fout
|
313
357
|
if job
|
314
358
|
STDERR.puts
|
@@ -356,9 +400,9 @@ EOF
|
|
356
400
|
end
|
357
401
|
end
|
358
402
|
|
359
|
-
def self.wait_for_job(
|
360
|
-
fexit = File.join(
|
361
|
-
fjob = File.join(
|
403
|
+
def self.wait_for_job(slurm_basedir, time = 1)
|
404
|
+
fexit = File.join(slurm_basedir, 'exit.status')
|
405
|
+
fjob = File.join(slurm_basedir, 'job.id')
|
362
406
|
job = Open.read(fjob) if Open.exists?(fjob)
|
363
407
|
|
364
408
|
|
@@ -368,33 +412,34 @@ EOF
|
|
368
412
|
end
|
369
413
|
|
370
414
|
def self.run_job(job, options = {})
|
415
|
+
options = IndiferentHash.setup(options.dup)
|
416
|
+
|
417
|
+
dry_run = options.delete :dry_run
|
418
|
+
|
371
419
|
workflow = job.workflow
|
372
420
|
task = job.task_name
|
373
|
-
|
374
|
-
|
421
|
+
|
422
|
+
keep_slurm_basedir = options.delete :keep_SLURM_slurm_basedir
|
375
423
|
slurm_basedir = options.delete :SLURM_basedir
|
376
|
-
slurm_basedir = "~/rbbt-
|
377
|
-
TmpFile.with_file(nil, !
|
378
|
-
|
424
|
+
slurm_basedir = "~/rbbt-slurm" if slurm_basedir.nil?
|
425
|
+
TmpFile.with_file(nil, !keep_slurm_basedir, :tmpdir => slurm_basedir, :prefix => "SLURM_rbbt_job-") do |tmp_directory|
|
426
|
+
options[:slurm_basedir] ||= File.join(tmp_directory, 'workdir')
|
427
|
+
slurm_basedir = options[:slurm_basedir]
|
379
428
|
inputs_dir = File.join(tmp_directory, 'inputs_dir')
|
380
429
|
Step.save_job_inputs(job, inputs_dir)
|
381
430
|
options[:inputs_dir] = inputs_dir
|
382
|
-
cmd = ['workflow', 'task', workflow.to_s, task.to_s, '-pf', '
|
431
|
+
cmd = ['workflow', 'task', workflow.to_s, task.to_s, '-pf', '--load_inputs', inputs_dir, '--log', (options[:log] || Log.severity).to_s]
|
383
432
|
|
384
|
-
%w(workflows requires remote_workflow_tasks override_deps).each do |key|
|
385
|
-
next unless options[key]
|
386
|
-
cmd += ["--#{key.to_s}", options[key]]
|
387
|
-
end
|
388
433
|
|
389
434
|
template = self.template(cmd, options)
|
390
|
-
self.issue_template(template, options)
|
435
|
+
self.issue_template(template, options.merge(:slurm_basedir => slurm_basedir, :dry_run => dry_run))
|
391
436
|
t_monitor = Thread.new do
|
392
|
-
self.follow_job(
|
437
|
+
self.follow_job(slurm_basedir, :STDERR)
|
393
438
|
end
|
394
|
-
self.wait_for_job(
|
439
|
+
self.wait_for_job(slurm_basedir)
|
395
440
|
t_monitor.raise Aborted
|
396
|
-
return unless Open.read(File.join(
|
397
|
-
path = Open.read(File.join(
|
441
|
+
return unless Open.read(File.join(slurm_basedir, 'exit.status')).strip == '0'
|
442
|
+
path = Open.read(File.join(slurm_basedir, 'std.out')).strip
|
398
443
|
if Open.exists?(path) && job.path != path
|
399
444
|
Log.info "Path of SLURM job #{path} is different from original job #{job.path}. Stablishing link."
|
400
445
|
Open.ln path, job.path
|
data/lib/rbbt/workflow/step.rb
CHANGED
@@ -139,7 +139,7 @@ class Step
|
|
139
139
|
|
140
140
|
dep.inputs.zip(dep.inputs.fields).each do |v,f|
|
141
141
|
if i.include?(f) && i[f] != v
|
142
|
-
Log.debug "Conflict in #{ f }: #{[i[f]
|
142
|
+
Log.debug "Conflict in #{ f }: #{[Misc.fingerprint(i[f]), Misc.fingerprint(v)] * " <-> "}"
|
143
143
|
i[f] = nil
|
144
144
|
else
|
145
145
|
i[f] = v
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.26.
|
4
|
+
version: 5.26.26
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-07-
|
11
|
+
date: 2019-07-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|