rbbt-util 5.28.9 → 5.29.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rbbt/entity.rb +1 -1
  3. data/lib/rbbt/fix_width_table.rb +5 -4
  4. data/lib/rbbt/hpc.rb +1 -549
  5. data/lib/rbbt/hpc/orchestrate.rb +23 -0
  6. data/lib/rbbt/hpc/slurm.rb +570 -0
  7. data/lib/rbbt/persist.rb +9 -4
  8. data/lib/rbbt/persist/tsv/adapter.rb +0 -1
  9. data/lib/rbbt/persist/tsv/fix_width_table.rb +5 -3
  10. data/lib/rbbt/resource.rb +12 -6
  11. data/lib/rbbt/resource/path.rb +1 -1
  12. data/lib/rbbt/tsv/attach.rb +7 -4
  13. data/lib/rbbt/tsv/dumper.rb +6 -2
  14. data/lib/rbbt/tsv/parallel.rb +0 -3
  15. data/lib/rbbt/util/R.rb +2 -2
  16. data/lib/rbbt/util/cmd.rb +10 -0
  17. data/lib/rbbt/util/misc/bgzf.rb +1 -1
  18. data/lib/rbbt/util/misc/indiferent_hash.rb +8 -0
  19. data/lib/rbbt/util/misc/inspect.rb +11 -7
  20. data/lib/rbbt/util/named_array.rb +1 -1
  21. data/lib/rbbt/util/open.rb +17 -16
  22. data/lib/rbbt/workflow.rb +2 -1
  23. data/lib/rbbt/workflow/accessor.rb +3 -2
  24. data/lib/rbbt/workflow/definition.rb +3 -1
  25. data/lib/rbbt/workflow/examples.rb +2 -2
  26. data/lib/rbbt/workflow/integration/ansible.rb +53 -0
  27. data/lib/rbbt/workflow/integration/ansible/workflow.rb +60 -0
  28. data/lib/rbbt/workflow/step.rb +16 -5
  29. data/lib/rbbt/workflow/step/accessor.rb +36 -24
  30. data/lib/rbbt/workflow/step/dependencies.rb +8 -2
  31. data/lib/rbbt/workflow/step/run.rb +22 -19
  32. data/lib/rbbt/workflow/util/archive.rb +2 -0
  33. data/lib/rbbt/workflow/util/orchestrator.rb +30 -12
  34. data/lib/rbbt/workflow/util/provenance.rb +7 -3
  35. data/share/rbbt_commands/ansible +55 -0
  36. data/share/rbbt_commands/purge_job +0 -1
  37. data/share/rbbt_commands/slurm/list +141 -0
  38. data/share/rbbt_commands/slurm/orchestrate +47 -0
  39. data/share/rbbt_commands/{workflow/slurm → slurm/task} +10 -3
  40. data/share/rbbt_commands/system/status +22 -22
  41. data/share/rbbt_commands/workflow/forget_deps +9 -0
  42. data/share/rbbt_commands/workflow/info +12 -9
  43. data/share/rbbt_commands/workflow/prov +2 -1
  44. data/test/rbbt/association/test_index.rb +6 -6
  45. data/test/rbbt/knowledge_base/test_query.rb +3 -3
  46. data/test/rbbt/knowledge_base/test_registry.rb +1 -1
  47. data/test/rbbt/persist/tsv/test_cdb.rb +0 -7
  48. data/test/rbbt/persist/tsv/test_kyotocabinet.rb +2 -8
  49. data/test/rbbt/persist/tsv/test_leveldb.rb +0 -6
  50. data/test/rbbt/persist/tsv/test_lmdb.rb +0 -6
  51. data/test/rbbt/persist/tsv/test_tokyocabinet.rb +15 -14
  52. data/test/rbbt/test_entity.rb +0 -1
  53. data/test/rbbt/test_knowledge_base.rb +3 -4
  54. data/test/rbbt/test_persist.rb +10 -6
  55. data/test/rbbt/test_workflow.rb +49 -16
  56. data/test/rbbt/tsv/test_accessor.rb +11 -0
  57. data/test/rbbt/tsv/test_attach.rb +86 -8
  58. data/test/rbbt/tsv/test_index.rb +6 -7
  59. data/test/rbbt/tsv/test_manipulate.rb +2 -3
  60. data/test/rbbt/util/R/test_model.rb +2 -1
  61. data/test/rbbt/util/R/test_plot.rb +0 -2
  62. data/test/rbbt/util/concurrency/test_processes.rb +1 -1
  63. data/test/rbbt/util/misc/test_bgzf.rb +11 -7
  64. data/test/rbbt/util/misc/test_lock.rb +0 -1
  65. data/test/rbbt/util/misc/test_multipart_payload.rb +1 -1
  66. data/test/rbbt/util/misc/test_pipes.rb +0 -5
  67. data/test/rbbt/util/test_R.rb +1 -0
  68. data/test/rbbt/util/test_log.rb +4 -6
  69. data/test/rbbt/util/test_misc.rb +0 -2
  70. data/test/rbbt/util/test_open.rb +0 -1
  71. data/test/rbbt/util/test_python.rb +17 -1
  72. data/test/rbbt/workflow/test_remote_workflow.rb +1 -1
  73. data/test/rbbt/workflow/test_step.rb +8 -3
  74. data/test/rbbt/workflow/util/test_orchestrator.rb +50 -0
  75. metadata +10 -5
  76. data/test/rbbt/workflow/remote/test_client.rb +0 -56
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2fa2f71df9aef8810599ab3660c847c084dea75ccb2485d294cfa1962e341162
4
- data.tar.gz: 7eb004ddf8bcf30b00325dc5251d9ed73a307de433c18c0847eb4d70a4bea1e2
3
+ metadata.gz: 169a7a3ce3ebf35313ca6f64d91e98bdb2bb7df79b0367f6888bf0978c67fb01
4
+ data.tar.gz: 654b5db6fea8f9ab2df8e8b00aaf945bdc9904965df0489ed72feba136ac6fd6
5
5
  SHA512:
6
- metadata.gz: 6ba23322f9ac768f1782c4e26605c50171b6d35d1d3c2acc0ef195e26ca39af851a232e1581ca2ec220899b7780f35f8d0d36e1f61f0b0e6500c883999c3760b
7
- data.tar.gz: afa0952f228a951ea78506ad0776fd96dbfbd2b91bac73310ee6807e4a7d0a74c7ca8f582880c5abd28bbf709c8be3d9dff1797056be756962091b2e97031f30
6
+ metadata.gz: dd168909a2df9e5931f74ca9b91ee64e5ec6b003561d75d2843736321deb1e62ac5807133822be461d370b80ecc8b64885c9e20486548d949d20c90fe72cbbab
7
+ data.tar.gz: 377c2c070eb32856de9fb09107cc3ab72de64936590db2bb58ca2c4eaf057e954c258f1345d4b542bb39b2aba756f0c37bfdae999b9aacf01267a1bf327d13fa
@@ -38,7 +38,7 @@ module Entity
38
38
  if value.to_s == k.to_s
39
39
  found = k
40
40
  break
41
- elsif value =~ /\(#{Regexp.quote k}\)/
41
+ elsif value.to_s =~ /\(#{Regexp.quote k}\)/
42
42
  found = k
43
43
  break
44
44
  end
@@ -67,7 +67,7 @@ class FixWidthTable
67
67
 
68
68
  def format(pos, value)
69
69
  padding = value_size - value.length
70
- if range
70
+ if @range
71
71
  (pos + [padding, value + ("\0" * padding)]).pack("llll#{mask}")
72
72
  else
73
73
  [pos, padding, value + ("\0" * padding)].pack("ll#{mask}")
@@ -105,7 +105,7 @@ class FixWidthTable
105
105
 
106
106
  def idx_value(index)
107
107
  return nil if index < 0 or index >= size
108
- @file.seek((range ? 17 : 9 ) + (record_size) * index, IO::SEEK_SET)
108
+ @file.seek((@range ? 17 : 9 ) + (record_size) * index, IO::SEEK_SET)
109
109
  padding = @file.read(4).unpack("l").first+1
110
110
  txt = @file.read(value_size)
111
111
  str = txt.unpack(mask).first
@@ -277,7 +277,8 @@ class FixWidthTable
277
277
 
278
278
  def [](pos)
279
279
  return [] if size == 0
280
- if range
280
+ self.read
281
+ if @range
281
282
  get_range(pos)
282
283
  else
283
284
  get_point(pos)
@@ -286,7 +287,7 @@ class FixWidthTable
286
287
 
287
288
  def overlaps(pos, value = false)
288
289
  return [] if size == 0
289
- idxs = if range
290
+ idxs = if @range
290
291
  get_range(pos, true)
291
292
  else
292
293
  get_point(pos, true)
@@ -1,551 +1,3 @@
1
1
  require 'rbbt-util'
2
2
  require 'rbbt/util/cmd'
3
-
4
- module Marenostrum
5
- SERVER='mn1'
6
- class SBATCH < Exception;
7
- attr_accessor :directory
8
- def initialize(directory)
9
- @directory = directory
10
- end
11
- end
12
-
13
- module SLURM
14
-
15
- def self.template(args, options = {})
16
-
17
- development = options.delete :drbbt
18
- singularity = options.delete :singularity
19
- contain = options.delete :contain
20
- sync = options.delete :sync
21
- user_group = options.delete :user_group
22
- contain_and_sync = options.delete :contain_and_sync
23
- wipe_container = options.delete :wipe_container
24
- copy_image = options.delete :copy_image
25
- exclusive = options.delete :exclusive
26
- highmem = options.delete :highmem
27
-
28
- queue = options.delete(:queue) || 'bsc_ls'
29
- task_cpus = options.delete(:task_cpus) || 1
30
- nodes = options.delete(:nodes) || 1
31
- time = options.delete(:time) || "0:00:10"
32
-
33
- inputs_dir = options.delete :inputs_dir
34
- config_keys = options.delete :config_keys
35
-
36
- user = ENV['USER'] || `whoami`.strip
37
- group = File.basename(File.dirname(ENV['HOME']))
38
-
39
- if contain_and_sync
40
- contain = "/scratch/tmp/rbbt-#{user}" if contain.nil?
41
- sync = "~/.rbbt/var/jobs" if sync.nil?
42
- wipe_container = "post" if wipe_container.nil?
43
- end
44
-
45
- contain = nil if contain == "" || contain == "none"
46
- sync = nil if sync == "" || sync == "none"
47
-
48
- contain = File.expand_path(contain) if contain
49
-
50
- name = options[:name] ||= Misc.obj2digest({:options => options.collect{|k,v| [k,v]}.sort_by{|k,v| k.to_s }, :args => args})
51
- options.delete(:name)
52
- slurm_basedir = options[:slurm_basedir] ||= File.expand_path(File.join('~/rbbt-slurm', name)) if slurm_basedir.nil?
53
- options.delete(:slurm_basedir)
54
-
55
- rbbt_cmd = args.reject{|e| e == '--' }.collect{|e| e.include?(" ")? '"' + e + '"' : e } * " "
56
-
57
- rbbt_cmd += " " << options.collect do |o,v|
58
- o = o.to_s
59
- case v
60
- when TrueClass
61
- '--' << o
62
- when FalseClass
63
- '--' << o << "=false"
64
- else
65
- ['--' << o, "'#{v}'"] * " "
66
- end
67
- end * " "
68
-
69
- rbbt_cmd << " --config_keys='#{config_keys}'" if config_keys and not config_keys.empty?
70
-
71
-
72
- time = Misc.format_seconds Misc.timespan(time) unless time.include? ":"
73
-
74
-
75
- #{{{ PREPARE LOCAL LOGFILES
76
-
77
- Open.mkdir slurm_basedir
78
-
79
- fout = File.join(slurm_basedir, 'std.out')
80
- ferr = File.join(slurm_basedir, 'std.err')
81
- fjob = File.join(slurm_basedir, 'job.id')
82
- fexit = File.join(slurm_basedir, 'exit.status')
83
- fsync = File.join(slurm_basedir, 'sync.log')
84
- fcmd = File.join(slurm_basedir, 'command.slurm')
85
-
86
- #{{{ GENERATE TEMPLATE
87
-
88
- # HEADER
89
- header =<<-EOF
90
- #!/bin/bash
91
- #SBATCH --qos="#{queue}"
92
- #SBATCH --job-name="#{name}"
93
- #SBATCH --workdir="#{Dir.pwd}"
94
- #SBATCH --output="#{fout}"
95
- #SBATCH --error="#{ferr}"
96
- #SBATCH --cpus-per-task="#{task_cpus}"
97
- #SBATCH --time="#{time}"
98
- #SBATCH --nodes="#{nodes}"
99
- EOF
100
-
101
- if highmem
102
- header +=<<-EOF
103
- #SBATCH --constraint=highmem
104
- EOF
105
- end
106
-
107
- if exclusive
108
- header +=<<-EOF
109
- #SBATCH --exclusive
110
- EOF
111
- end
112
-
113
- header +=<<-EOF
114
- #CMD: #{rbbt_cmd}
115
- EOF
116
-
117
- # ENV
118
- env = ""
119
- env +=<<-EOF
120
- # Prepare env
121
- [[ -f ~/config/load.sh ]] && source ~/config/load.sh
122
- module load java
123
-
124
- # Calculate max available memory
125
- let "MAX_MEMORY=$SLURM_MEM_PER_CPU * $SLURM_CPUS_ON_NODE"
126
- EOF
127
-
128
-
129
- # RUN
130
- run = ""
131
- exec_cmd = %(env _JAVA_OPTIONS="-Xms1g -Xmx${MAX_MEMORY}m")
132
-
133
-
134
- if singularity
135
- #{{{ SINGULARITY
136
-
137
- singularity_exec = %(singularity exec -e -B $SINGULARITY_OPT_DIR:/singularity_opt/ -B /apps/)
138
-
139
- env +=<<-EOF
140
- module load intel/2018.1
141
- module load singularity
142
- PROJECTS_ROOT="/gpfs/projects/bsc26/"
143
- SINGULARITY_IMG="$PROJECTS_ROOT/rbbt.singularity.img"
144
- SINGULARITY_OPT_DIR="$PROJECTS_ROOT/singularity_opt/"
145
- SINGULARITY_RUBY_INLINE="$HOME/.singularity_ruby_inline"
146
- mkdir -p "$SINGULARITY_RUBY_INLINE"
147
- EOF
148
-
149
- prep = ""
150
-
151
- if contain
152
- scratch_group_dir = File.join('/gpfs/scratch/', group)
153
- projects_group_dir = File.join('/gpfs/projects/', group)
154
-
155
- prep +=<<-EOF
156
-
157
- # Prepare container dir
158
- CONTAINER_DIR="#{contain}"
159
- mkdir -p $CONTAINER_DIR/.rbbt/etc/
160
-
161
- for dir in .ruby_inline git home; do
162
- mkdir -p $CONTAINER_DIR/$dir
163
- done
164
-
165
- for tmpd in persist_locks produce_locks R_sockets sensiblewrite sensiblewrite_locks step_info_locks tsv_open_locks; do
166
- mkdir -p $CONTAINER_DIR/.rbbt/tmp/$tmpd
167
- done
168
-
169
- # Copy environment
170
- cp ~/.rbbt/etc/environment $CONTAINER_DIR/.rbbt/etc/
171
-
172
- # Set search_paths
173
- echo "singularity: /singularity_opt/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" > $CONTAINER_DIR/.rbbt/etc/search_paths
174
- echo "rbbt_user: /home/rbbt/.rbbt/{TOPLEVEL}/{SUBPATH}" >> $CONTAINER_DIR/.rbbt/etc/search_paths
175
- echo "home: $CONTAINER_DIR/home/{TOPLEVEL}/{SUBPATH}" >> $CONTAINER_DIR/.rbbt/etc/search_paths
176
- echo "group_projects: #{projects_group_dir}/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" >> $CONTAINER_DIR/.rbbt/etc/search_paths
177
- echo "group_scratch: #{scratch_group_dir}/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" >> $CONTAINER_DIR/.rbbt/etc/search_paths
178
- echo "user_projects: #{projects_group_dir}/#{user}/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" >> $CONTAINER_DIR/.rbbt/etc/search_paths
179
- echo "user_scratch: #{scratch_group_dir}/#{user}/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" >> $CONTAINER_DIR/.rbbt/etc/search_paths
180
- EOF
181
-
182
- if user_group && group != user_group
183
- prep +=<<-EOF
184
-
185
- # Add user_group search_path
186
- echo "#{user_group}: /gpfs/projects/#{user_group}/{PKGDIR}/{TOPLEVEL}/{SUBPATH}" >> $CONTAINER_DIR/.rbbt/etc/search_paths
187
- EOF
188
- end
189
-
190
- if inputs_dir
191
- prep +=<<-EOF
192
-
193
- # Copy inputs
194
- [[ -d '#{inputs_dir}' ]] && cp -R '#{inputs_dir}' $CONTAINER_DIR/inputs
195
- EOF
196
- rbbt_cmd = rbbt_cmd.sub(inputs_dir, "#{contain}/inputs")
197
- end
198
-
199
- if copy_image
200
- prep +=<<EOF
201
-
202
- # Copy image
203
- rsync -avz "$SINGULARITY_IMG" "$CONTAINER_DIR/rbbt.singularity.img" 1>&2
204
- SINGULARITY_IMG="$CONTAINER_DIR/rbbt.singularity.img"
205
- EOF
206
- end
207
-
208
- if wipe_container == "pre" || wipe_container == "both"
209
- if singularity
210
- prep +=<<-EOF
211
-
212
- # Clean container pre
213
- singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rm -Rfv .rbbt/var/jobs &>> #{fsync}
214
- singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rbbt system clean -f &>> #{fsync}
215
- singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rm -Rfv tmp/ &>> #{fsync}
216
- EOF
217
- end
218
- end
219
- end
220
-
221
- if contain
222
- singularity_exec << %( -C -H "$CONTAINER_DIR" \
223
- -B /scratch/tmp \
224
- #{ group != user_group ? "-B /gpfs/projects/#{user_group}" : "" } \
225
- -B #{scratch_group_dir} \
226
- -B #{projects_group_dir} \
227
- -B "$SINGULARITY_RUBY_INLINE":"$CONTAINER_DIR/.ruby_inline":rw \
228
- -B ~/git:"$CONTAINER_DIR/git":ro \
229
- #{Open.exists?('~/.rbbt/software/opt/')? '-B ~/.rbbt/software/opt/:"/opt/":ro' : '' } \
230
- -B ~/.rbbt:"$CONTAINER_DIR/home/":ro \
231
- "$SINGULARITY_IMG")
232
- exec_cmd << ' TMPDIR="$CONTAINER_DIR/.rbbt/tmp" '
233
- else
234
- singularity_exec += %( -B "$SINGULARITY_RUBY_INLINE":"$HOME/.ruby_inline":rw "$SINGULARITY_IMG" )
235
- end
236
-
237
- if development
238
- exec_cmd += " rbbt --dev='#{development}'"
239
- else
240
- exec_cmd += ' rbbt'
241
- end
242
-
243
- exec_cmd = singularity_exec + " " + exec_cmd
244
- else
245
- if development
246
- exec_cmd << " " << %(~/git/rbbt-util/bin/rbbt --dev=#{development})
247
- else
248
- exec_cmd << " " << 'rbbt'
249
- end
250
-
251
- if contain
252
- rbbt_cmd << " " << %(--workdir_all='#{contain}')
253
- end
254
- end
255
-
256
-
257
- cmd =<<-EOF
258
- #{exec_cmd} \\
259
- #{rbbt_cmd}
260
- EOF
261
-
262
- run +=<<-EOF
263
-
264
- # Run command
265
- #{cmd}
266
-
267
- # Save exit status
268
- exit_status=$?
269
-
270
- EOF
271
-
272
- # CODA
273
- coda = ""
274
- if sync
275
- if singularity
276
- coda +=<<-EOF
277
- singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rbbt system clean all -q &>> #{fsync}
278
- EOF
279
- else
280
- coda +=<<-EOF
281
- rbbt system clean all -q &>> #{fsync}
282
- EOF
283
- end
284
-
285
- if sync.include?("=>")
286
- source, _sep, sync = sync.partition("=>")
287
- source = source.strip
288
- sync = sync.strip
289
- source = File.join(File.expand_path(contain), source)
290
- else
291
- source = File.join(File.expand_path(contain), '.rbbt/var/jobs')
292
- end
293
-
294
- target = File.expand_path(sync)
295
- coda +=<<-EOF
296
-
297
- # Sync data to target location
298
- mkdir -p "$(dirname '#{target}')"
299
- rsync -avztAXHP --copy-unsafe-links "#{source}/" "#{target}/" &>> #{fsync}
300
- sync_es="$?"
301
- find '#{target}' -type l -ls | awk '$13 ~ /^#{target.gsub('/','\/')}/ { sub("#{source}", "#{target}", $13); print $11, $13 }' | while read A B; do rm $A; ln -s $B $A; done
302
- EOF
303
-
304
- if contain && (wipe_container == "post" || wipe_container == "both")
305
- prep =<<-EOF + prep
306
- if ls -A '#{contain}' &> /dev/null ; then
307
- echo "ERROR: Container directory not empty, refusing to wipe. #{contain}" &>> #{fsync}
308
- fi
309
- EOF
310
- if singularity
311
- coda +=<<-EOF
312
- singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rm -v /dev/shm/sem.*.{in,out,process} /dev/shm/sem.Session-PID.*.sem 2> /dev/null >> #{fsync}
313
-
314
-
315
- # Clean container directory
316
- #if [ $exit_status == '0' -a $sync_es == '0' ]; then
317
- singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rbbt system clean -f &>> #{fsync}
318
- singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rm -Rfv .rbbt/var/jobs &>> #{fsync}
319
- singularity exec -e -C -H "$CONTAINER_DIR" "$SINGULARITY_IMG" rm -Rfv tmp/ &>> #{fsync}
320
- #else
321
- # echo "ERROR: Process failed or results could not sync correctly. Contain directory not purged" &>> #{fsync}
322
- #fi
323
- EOF
324
- else
325
- coda +=<<-EOF
326
- #{exec_cmd} system clean
327
- if [ $exit_status == '0' -a $sync_es == '0' ]; then
328
- rm -Rfv #{contain} &>> #{fsync}
329
- else
330
- echo "ERROR: Process failed or results could not sync correctly. Contain directory not purged" &>> #{fsync}
331
- fi
332
- unset sync_es
333
- EOF
334
-
335
- end
336
- end
337
- end
338
- coda +=<<-EOF
339
-
340
- # Write exit status to file
341
- echo $exit_status > #{fexit}
342
- EOF
343
- if sync
344
- coda +=<<-EOF
345
- if [ "$sync_es" == '0' ]; then
346
- unset sync_es
347
- exit $exit_status
348
- else
349
- exit $sync_es
350
- fi
351
- EOF
352
- else
353
- coda +=<<-EOF
354
- exit $exit_status
355
- EOF
356
- end
357
-
358
- template = [header, env, prep, run, coda] * "\n"
359
-
360
- template
361
- end
362
-
363
- def self.issue_template(template, options = {})
364
-
365
- slurm_basedir = options[:slurm_basedir]
366
- Open.mkdir slurm_basedir
367
-
368
- dry_run = options.delete :dry_run
369
-
370
- fout = File.join(slurm_basedir, 'std.out')
371
- ferr = File.join(slurm_basedir, 'std.err')
372
- fjob = File.join(slurm_basedir, 'job.id')
373
- fexit = File.join(slurm_basedir, 'exit.status')
374
- fsync = File.join(slurm_basedir, 'sync.log')
375
- fcmd = File.join(slurm_basedir, 'command.slurm')
376
-
377
- job = nil
378
- if options[:clean_job]
379
- [fcmd, fjob, fout, ferr, fsync, fexit].each do |file|
380
- Open.rm file if Open.exists? file
381
- end
382
- end
383
-
384
- return if Open.exists?(fexit)
385
-
386
- STDERR.puts Log.color(:magenta, "Issuing SLURM file: #{fcmd}")
387
- STDERR.puts template
388
-
389
- Open.write(fcmd, template) unless File.exists? fcmd
390
- if File.exists?(fjob)
391
- job = Open.read(fjob).to_i
392
- else
393
- if File.exists?(fout)
394
- return
395
- elsif dry_run
396
- STDERR.puts Log.color(:magenta, "To execute run: ") + Log.color(:blue, "sbatch '#{slurm_basedir}/command.slurm'")
397
- STDERR.puts Log.color(:magenta, "To monitor progress run (needs local rbbt): ") + Log.color(:blue, "rbbt mn --tail -w '#{slurm_basedir}'")
398
- raise Marenostrum::SBATCH, slurm_basedir
399
- else
400
- Open.rm fsync
401
- Open.rm fexit
402
- Open.rm fout
403
- Open.rm ferr
404
- job = CMD.cmd("sbatch '#{fcmd}'").read.scan(/\d+/).first.to_i
405
- Open.write(fjob, job.to_s)
406
- end
407
- end
408
- end
409
-
410
- def self.follow_job(slurm_basedir, tail = true)
411
- fjob = File.join(slurm_basedir, 'job.id')
412
- fout = File.join(slurm_basedir, 'std.out')
413
- ferr = File.join(slurm_basedir, 'std.err')
414
- fstatus = File.join(slurm_basedir, 'job.status')
415
-
416
- job = Open.read(fjob).strip if Open.exists?(fjob)
417
-
418
- if job
419
- status_txt = CMD.cmd("squeue --job #{job}").read
420
- STDERR.puts Log.color(:magenta, "Status [#{job.to_i}]:")
421
- STDERR.puts status_txt
422
- lines = status_txt.split("\n").length
423
- end
424
-
425
- if tail
426
- Log.severity = 10
427
- while ! File.exists? fout
428
- if job
429
- STDERR.puts
430
- Log.clear_line(STDERR)
431
- STDERR.write Log.color(:magenta, "Waiting for Output")
432
- 3.times do
433
- STDERR.write Log.color(:magenta, ".")
434
- sleep 1
435
- end
436
- status_txt = CMD.cmd("squeue --job #{job}").read
437
- lines.times do
438
- Log.clear_line(STDERR)
439
- end
440
- Log.clear_line(STDERR)
441
- STDERR.puts Log.color(:magenta, "Status [#{job.to_i}]:")
442
- STDERR.puts status_txt
443
- lines = status_txt.split("\n").length
444
- end
445
- end
446
- STDERR.puts
447
- Log.clear_line(STDERR)
448
- STDERR.puts Log.color(:magenta, "Output:")
449
- begin
450
- CMD.cmd("squeue --job #{job} > #{fstatus}")
451
- out = CMD.cmd("tail -f '#{fout}'", :pipe => true) if File.exists?(fout) and not tail == :STDERR
452
- err = CMD.cmd("tail -f '#{ferr}'", :pipe => true) if File.exists?(ferr)
453
-
454
- terr = Misc.consume_stream(err, true, STDERR) if err
455
- tout = Misc.consume_stream(out, true, STDOUT) if out
456
-
457
- sleep 3 while CMD.cmd("squeue --job #{job}").read.include? job.to_s
458
- rescue Aborted
459
- ensure
460
- begin
461
- terr.exit if terr
462
- tout.exit if tout
463
- err.close if err
464
- err.join if err
465
- rescue Exception
466
- end
467
-
468
- begin
469
- out.close if out
470
- out.join if out
471
- rescue Exception
472
- end
473
- end
474
- end
475
- end
476
-
477
- def self.wait_for_job(slurm_basedir, time = 1)
478
- fexit = File.join(slurm_basedir, 'exit.status')
479
- fjob = File.join(slurm_basedir, 'job.id')
480
- job = Open.read(fjob) if Open.exists?(fjob)
481
-
482
-
483
- while ! Open.exists?(fexit)
484
- sleep time
485
- end
486
- end
487
-
488
- def self.run_job(job, options = {})
489
- options = IndiferentHash.setup(options.dup)
490
-
491
- dry_run = options.delete :dry_run
492
- tail = options.delete :tail
493
-
494
- workflow = job.workflow
495
- task = job.task_name
496
-
497
- keep_slurm_basedir = options.delete :keep_SLURM_slurm_basedir
498
- slurm_basedir = options.delete :SLURM_basedir
499
- slurm_basedir = "~/rbbt-slurm" if slurm_basedir.nil?
500
- TmpFile.with_file(nil, !keep_slurm_basedir, :tmpdir => slurm_basedir, :prefix => "SLURM_rbbt_job-") do |tmp_directory|
501
- options[:slurm_basedir] ||= tmp_directory
502
- slurm_basedir = options[:slurm_basedir]
503
- inputs_dir = File.join(tmp_directory, 'inputs_dir')
504
- saved = Step.save_job_inputs(job, inputs_dir, options)
505
- if saved
506
- options[:inputs_dir] = inputs_dir
507
- cmd = ['workflow', 'task', workflow.to_s, task.to_s, '-pf', '--load_inputs', inputs_dir, '--log', (options[:log] || Log.severity).to_s]
508
- else
509
- cmd = ['workflow', 'task', workflow.to_s, task.to_s, '-pf', '--log', (options[:log] || Log.severity).to_s]
510
- end
511
-
512
-
513
- template = self.template(cmd, options)
514
- self.issue_template(template, options.merge(:slurm_basedir => slurm_basedir, :dry_run => dry_run))
515
-
516
- return unless tail
517
-
518
- t_monitor = Thread.new do
519
- self.follow_job(slurm_basedir, :STDERR)
520
- end
521
- self.wait_for_job(slurm_basedir)
522
- t_monitor.raise Aborted
523
- return unless Open.read(File.join(slurm_basedir, 'exit.status')).strip == '0'
524
- path = Open.read(File.join(slurm_basedir, 'std.out')).strip
525
- if Open.exists?(path) && job.path != path
526
- Log.info "Path of SLURM job #{path} is different from original job #{job.path}. Stablishing link."
527
- Open.ln path, job.path
528
- Open.ln path + '.info', job.path + '.info' if Open.exists?(path + '.info')
529
- Open.ln path + '.files', job.path + '.files' if Open.exists?(path + '.files')
530
- end
531
- end
532
- end
533
- end
534
-
535
- def self.relay(job, options={})
536
- options = Misc.add_defaults options, :target => 'mn1', :search_path => 'user'
537
- done_deps = job.dependencies.select do |dep|
538
- dep.done?
539
- end
540
-
541
- error_deps = job.dependencies.select do |dep|
542
- dep.error? && ! dep.recoverable_error?
543
- end
544
-
545
- (done_deps + error_deps).each do |dep|
546
- Step.migrate(dep.path, options[:search_path], options)
547
- end
548
-
549
- end
550
- end
551
-
3
+ require 'rbbt/hpc/slurm'