workflow_manager 0.5.2 → 0.5.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/bin/wfm_hello +2 -1
- data/bin/wfm_monitoring +2 -1
- data/lib/workflow_manager/cluster.rb +340 -0
- data/lib/workflow_manager/server.rb +3 -0
- data/lib/workflow_manager/version.rb +1 -1
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: ede215b5a867ebbafc9af7ead5a9afa04ed1530eac7d0d1bffb2ee42b09cbded
|
4
|
+
data.tar.gz: 7f68d53462f3c22ec889edaec171a6d5685063090b28264dfd93a224ca2d6f95
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dcc2bac302faf22e7170143d7377bb87144b3e6ccf049ba8d768318fd2f3a7ccdb213643c55fadbddfcdedd92b103e572dd75310d94c27fa917f55fd1c9d2c87
|
7
|
+
data.tar.gz: 18789821274de2c8961f15e6a49194c1d4e2c2f09eab095e9f7e14f575388507ff83a0dff5da480bcb79ec276956eea85739ee4d3f8cfd12c6c44139cdc3b465
|
data/bin/wfm_hello
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# encoding: utf-8
|
3
3
|
# 20121112 masa workflow manager client
|
4
|
-
Version = '
|
4
|
+
Version = '20200522-134606'
|
5
5
|
|
6
6
|
require 'drb/drb'
|
7
7
|
|
@@ -28,3 +28,4 @@ if wfmrc
|
|
28
28
|
end
|
29
29
|
workflow_manager = DRbObject.new_with_uri(uri)
|
30
30
|
puts workflow_manager.hello
|
31
|
+
puts workflow_manager.cluster_node_list
|
data/bin/wfm_monitoring
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# encoding: utf-8
|
3
3
|
# 20121112 masa workflow manager client
|
4
|
-
Version = '
|
4
|
+
Version = '20200722-161135'
|
5
5
|
|
6
6
|
require 'drb/drb'
|
7
7
|
require 'workflow_manager/optparse_ex'
|
@@ -16,6 +16,7 @@ opt = OptionParser.new do |o|
|
|
16
16
|
o.on(:nodes, '-n nodes', '--nodes', 'Comma separated list of nodes to submit to for g-sub')
|
17
17
|
o.on(:ram, '-r RAM', '--RAM', 'Amount of RAM to request in Gigs for g-sub')
|
18
18
|
o.on(:scratch, '-s scratch', '--scratch', 'Amount of scratch space to request in Gigs for g-sub')
|
19
|
+
o.on(:queue, '-q queue', '--queue', 'Queue name')
|
19
20
|
o.parse!(ARGV)
|
20
21
|
end
|
21
22
|
unless script_file = ARGV[0] and script_file =~ /\.sh/
|
@@ -38,6 +38,8 @@ module WorkflowManager
|
|
38
38
|
end
|
39
39
|
def default_node
|
40
40
|
end
|
41
|
+
def node_list
|
42
|
+
end
|
41
43
|
end
|
42
44
|
|
43
45
|
class LocalComputer < Cluster
|
@@ -238,6 +240,63 @@ module WorkflowManager
|
|
238
240
|
'fgcz-h-010: cpu 8,mem 30 GB,scr 400G' => 'fgcz-h-010',
|
239
241
|
}
|
240
242
|
end
|
243
|
+
def node_list
|
244
|
+
node2scr = {}
|
245
|
+
command = "qhost -F scratch"
|
246
|
+
keep = nil
|
247
|
+
IO.popen(command) do |out|
|
248
|
+
while line=out.gets
|
249
|
+
hostname, arch, ncpu, loading, memtot, memuse, *others = line.split
|
250
|
+
if hostname =~ /fgcz/
|
251
|
+
keep = hostname
|
252
|
+
elsif scratch_ = line.chomp.split.last and
|
253
|
+
scratch = scratch_.split('=').last
|
254
|
+
node2scr[keep] = scratch.to_i
|
255
|
+
keep = nil
|
256
|
+
end
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
list = {}
|
261
|
+
keep = nil
|
262
|
+
command = 'qhost -q'
|
263
|
+
IO.popen(command) do |out|
|
264
|
+
while line=out.gets
|
265
|
+
# HOSTNAME ARCH NCPU LOAD MEMTOT MEMUSE SWAPTO SWAPUS
|
266
|
+
hostname, arch, ncpu, loading, memtot, memuse, *others = line.split
|
267
|
+
if hostname =~ /fgcz/
|
268
|
+
#puts [hostname, ncpu, loading, memtot, memuse].join("\t")
|
269
|
+
mem = memtot.gsub(/G/, '').to_i
|
270
|
+
keep = [hostname, ncpu, "#{mem}G"]
|
271
|
+
elsif hostname == "GT" and keep and cores = line.chomp.split.last and cores !~ /[du]/
|
272
|
+
hostname = keep.shift
|
273
|
+
keep[0] = cores
|
274
|
+
if scr = node2scr[hostname] and scr >= 1000
|
275
|
+
scr = "%.1f" % (scr.to_f / 1000)
|
276
|
+
scr << "T"
|
277
|
+
else
|
278
|
+
scr = scr.to_s + "G"
|
279
|
+
end
|
280
|
+
keep << scr
|
281
|
+
list[hostname] = keep
|
282
|
+
keep = nil
|
283
|
+
end
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
# reformat
|
288
|
+
nodes = {}
|
289
|
+
list.each do |hostname, specs|
|
290
|
+
# 20190823 masa tentatively off use f47
|
291
|
+
unless hostname =~ /fgcz-c-047/
|
292
|
+
cores, ram, scr = specs
|
293
|
+
key = "#{hostname}: cores #{cores}, ram #{ram}, scr #{scr}"
|
294
|
+
value = hostname
|
295
|
+
nodes[key] = value
|
296
|
+
end
|
297
|
+
end
|
298
|
+
nodes
|
299
|
+
end
|
241
300
|
end
|
242
301
|
|
243
302
|
class FGCZCourseCluster < FGCZCluster
|
@@ -290,4 +349,285 @@ module WorkflowManager
|
|
290
349
|
}
|
291
350
|
end
|
292
351
|
end
|
352
|
+
|
353
|
+
class FGCZDevian10Cluster < Cluster
|
354
|
+
def submit_job(script_file, script_content, option='')
|
355
|
+
if script_name = File.basename(script_file) and script_name =~ /\.sh/
|
356
|
+
script_name = script_name.split(/\.sh/).first + ".sh"
|
357
|
+
new_job_script = generate_new_job_script(script_name, script_content)
|
358
|
+
new_job_script_base = File.basename(new_job_script)
|
359
|
+
log_file = File.join(@log_dir, new_job_script_base + "_o.log")
|
360
|
+
err_file = File.join(@log_dir, new_job_script_base + "_e.log")
|
361
|
+
command = "g-sub -o #{log_file} -e #{err_file} -q course #{option} #{new_job_script}"
|
362
|
+
#command = "sbatch -o #{log_file} -e #{err_file} #{new_job_script}"
|
363
|
+
job_id = `#{command}`
|
364
|
+
#job_id = job_id.match(/Your job (\d+) \(/)[1]
|
365
|
+
job_id = job_id.chomp.split.last
|
366
|
+
[job_id, log_file, command]
|
367
|
+
else
|
368
|
+
err_msg = "FGCZDevian10Cluster#submit_job, ERROR: script_name is not *.sh: #{File.basename(script_file)}"
|
369
|
+
warn err_msg
|
370
|
+
raise err_msg
|
371
|
+
end
|
372
|
+
end
|
373
|
+
def job_running?(job_id)
|
374
|
+
qstat_flag = false
|
375
|
+
IO.popen('squeue') do |io|
|
376
|
+
while line=io.gets
|
377
|
+
# ["JOBID", "PARTITION", "NAME", "USER", "ST", "TIME", "NODES", "NODELIST(REASON)"]
|
378
|
+
# ["206", "employee", "test.sh", "masaomi", "R", "0:03", "1", "fgcz-h-030"]
|
379
|
+
jobid, partition, name, user, state, *others = line.chomp.split
|
380
|
+
if jobid.strip == job_id and state == 'R'
|
381
|
+
qstat_flag = true
|
382
|
+
break
|
383
|
+
end
|
384
|
+
end
|
385
|
+
end
|
386
|
+
qstat_flag
|
387
|
+
end
|
388
|
+
def job_ends?(log_file)
|
389
|
+
log_flag = false
|
390
|
+
IO.popen("tail -n 10 #{log_file} 2> /dev/null") do |io|
|
391
|
+
while line=io.gets
|
392
|
+
if line =~ /__SCRIPT END__/
|
393
|
+
log_flag = true
|
394
|
+
break
|
395
|
+
end
|
396
|
+
end
|
397
|
+
end
|
398
|
+
log_flag
|
399
|
+
end
|
400
|
+
def job_pending?(job_id)
|
401
|
+
qstat_flag = false
|
402
|
+
IO.popen('squeue') do |io|
|
403
|
+
while line=io.gets
|
404
|
+
jobid, partition, name, user, state, *others = line.chomp.split
|
405
|
+
if jobid.strip == job_id and state =~ /PD/
|
406
|
+
qstat_flag = true
|
407
|
+
break
|
408
|
+
end
|
409
|
+
end
|
410
|
+
end
|
411
|
+
qstat_flag
|
412
|
+
end
|
413
|
+
def copy_commands(org_dir, dest_parent_dir, now=nil)
|
414
|
+
commands = if now == "force"
|
415
|
+
target_file = File.join(dest_parent_dir, File.basename(org_dir))
|
416
|
+
["g-req copynow -f #{org_dir} #{dest_parent_dir}"]
|
417
|
+
elsif now
|
418
|
+
["g-req copynow #{org_dir} #{dest_parent_dir}"]
|
419
|
+
else
|
420
|
+
["g-req -w copy #{org_dir} #{dest_parent_dir}"]
|
421
|
+
end
|
422
|
+
end
|
423
|
+
def kill_command(job_id)
|
424
|
+
command = "scancel #{job_id}"
|
425
|
+
end
|
426
|
+
def delete_command(target)
|
427
|
+
command = "g-req remove #{target}"
|
428
|
+
end
|
429
|
+
def cluster_nodes
|
430
|
+
nodes = {
|
431
|
+
'fgcz-h-900: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-900',
|
432
|
+
'fgcz-h-901: cpu 8,mem 30 GB,scr 400G' => 'fgcz-h-901',
|
433
|
+
}
|
434
|
+
end
|
435
|
+
end
|
436
|
+
|
437
|
+
class FGCZDebian10CourseCluster < Cluster
|
438
|
+
def submit_job(script_file, script_content, option='')
|
439
|
+
if script_name = File.basename(script_file) and script_name =~ /\.sh/
|
440
|
+
script_name = script_name.split(/\.sh/).first + ".sh"
|
441
|
+
new_job_script = generate_new_job_script(script_name, script_content)
|
442
|
+
new_job_script_base = File.basename(new_job_script)
|
443
|
+
log_file = File.join(@log_dir, new_job_script_base + "_o.log")
|
444
|
+
err_file = File.join(@log_dir, new_job_script_base + "_e.log")
|
445
|
+
command = "g-sub -o #{log_file} -e #{err_file} -q course #{option} #{new_job_script}"
|
446
|
+
job_id = `#{command}`
|
447
|
+
job_id = job_id.chomp.split.last
|
448
|
+
[job_id, log_file, command]
|
449
|
+
else
|
450
|
+
err_msg = "FGCZDebian10CourseCluster#submit_job, ERROR: script_name is not *.sh: #{File.basename(script_file)}"
|
451
|
+
warn err_msg
|
452
|
+
raise err_msg
|
453
|
+
end
|
454
|
+
end
|
455
|
+
def job_running?(job_id)
|
456
|
+
qstat_flag = false
|
457
|
+
IO.popen('squeue') do |io|
|
458
|
+
while line=io.gets
|
459
|
+
# ["JOBID", "PARTITION", "NAME", "USER", "ST", "TIME", "NODES", "NODELIST(REASON)"]
|
460
|
+
# ["206", "employee", "test.sh", "masaomi", "R", "0:03", "1", "fgcz-h-030"]
|
461
|
+
jobid, partition, name, user, state, *others = line.chomp.split
|
462
|
+
if jobid.strip == job_id and state == 'R'
|
463
|
+
qstat_flag = true
|
464
|
+
break
|
465
|
+
end
|
466
|
+
end
|
467
|
+
end
|
468
|
+
qstat_flag
|
469
|
+
end
|
470
|
+
def job_ends?(log_file)
|
471
|
+
log_flag = false
|
472
|
+
IO.popen("tail -n 10 #{log_file} 2> /dev/null") do |io|
|
473
|
+
while line=io.gets
|
474
|
+
if line =~ /__SCRIPT END__/
|
475
|
+
log_flag = true
|
476
|
+
break
|
477
|
+
end
|
478
|
+
end
|
479
|
+
end
|
480
|
+
log_flag
|
481
|
+
end
|
482
|
+
def job_pending?(job_id)
|
483
|
+
qstat_flag = false
|
484
|
+
IO.popen('squeue') do |io|
|
485
|
+
while line=io.gets
|
486
|
+
jobid, partition, name, user, state, *others = line.chomp.split
|
487
|
+
if jobid.strip == job_id and state =~ /PD/
|
488
|
+
qstat_flag = true
|
489
|
+
break
|
490
|
+
end
|
491
|
+
end
|
492
|
+
end
|
493
|
+
qstat_flag
|
494
|
+
end
|
495
|
+
def copy_commands(org_dir, dest_parent_dir, now=nil)
|
496
|
+
commands = ["cp -r #{org_dir} #{dest_parent_dir}"]
|
497
|
+
end
|
498
|
+
def kill_command(job_id)
|
499
|
+
command = "scancel #{job_id}"
|
500
|
+
end
|
501
|
+
def delete_command(target)
|
502
|
+
command = "rm -rf #{target}"
|
503
|
+
end
|
504
|
+
def cluster_nodes
|
505
|
+
nodes = {
|
506
|
+
'fgcz-h-900: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-900',
|
507
|
+
'fgcz-h-901: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-901',
|
508
|
+
'fgcz-h-902: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-902',
|
509
|
+
'fgcz-h-903: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-903',
|
510
|
+
'fgcz-h-904: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-904',
|
511
|
+
'fgcz-h-905: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-905',
|
512
|
+
'fgcz-h-906: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-906',
|
513
|
+
'fgcz-h-907: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-907',
|
514
|
+
'fgcz-h-908: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-908',
|
515
|
+
'fgcz-h-909: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-909',
|
516
|
+
'fgcz-h-910: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-910',
|
517
|
+
'fgcz-h-911: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-911',
|
518
|
+
'fgcz-h-912: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-912',
|
519
|
+
'fgcz-h-913: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-913',
|
520
|
+
'fgcz-h-914: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-914',
|
521
|
+
'fgcz-h-915: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-915',
|
522
|
+
'fgcz-h-916: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-916',
|
523
|
+
'fgcz-h-917: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-917',
|
524
|
+
}
|
525
|
+
end
|
526
|
+
end
|
527
|
+
|
528
|
+
class FGCZDebian10Cluster < Cluster
|
529
|
+
def parse(options)
|
530
|
+
options = options.split
|
531
|
+
ram = if i = options.index("-r")
|
532
|
+
options[i+1]
|
533
|
+
end
|
534
|
+
cores = if i = options.index("-c")
|
535
|
+
options[i+1]
|
536
|
+
end
|
537
|
+
scratch = if i = options.index("-s")
|
538
|
+
options[i+1]
|
539
|
+
end
|
540
|
+
queue = if i = options.index("-q")
|
541
|
+
options[i+1]
|
542
|
+
end
|
543
|
+
new_options = []
|
544
|
+
new_options << "--mem=#{ram}G" if ram
|
545
|
+
new_options << "-n #{cores}" if cores
|
546
|
+
new_options << "--tmp=#{scratch}G" if scratch
|
547
|
+
new_options << "-p #{queue}" if queue
|
548
|
+
new_options.join(" ")
|
549
|
+
end
|
550
|
+
def submit_job(script_file, script_content, option='')
|
551
|
+
if script_name = File.basename(script_file) and script_name =~ /\.sh/
|
552
|
+
script_name = script_name.split(/\.sh/).first + ".sh"
|
553
|
+
new_job_script = generate_new_job_script(script_name, script_content)
|
554
|
+
new_job_script_base = File.basename(new_job_script)
|
555
|
+
log_file = File.join(@log_dir, new_job_script_base + "_o.log")
|
556
|
+
err_file = File.join(@log_dir, new_job_script_base + "_e.log")
|
557
|
+
#command = "g-sub -o #{log_file} -e #{err_file} -q user #{option} #{new_job_script}"
|
558
|
+
sbatch_options = parse(option)
|
559
|
+
command = "sbatch -o #{log_file} -e #{err_file} #{sbatch_options} #{new_job_script}"
|
560
|
+
puts command
|
561
|
+
job_id = `#{command}`
|
562
|
+
job_id = job_id.chomp.split.last
|
563
|
+
[job_id, log_file, command]
|
564
|
+
else
|
565
|
+
err_msg = "FGCZDebian10Cluster#submit_job, ERROR: script_name is not *.sh: #{File.basename(script_file)}"
|
566
|
+
warn err_msg
|
567
|
+
raise err_msg
|
568
|
+
end
|
569
|
+
end
|
570
|
+
def job_running?(job_id)
|
571
|
+
qstat_flag = false
|
572
|
+
IO.popen('squeue') do |io|
|
573
|
+
while line=io.gets
|
574
|
+
# ["JOBID", "PARTITION", "NAME", "USER", "ST", "TIME", "NODES", "NODELIST(REASON)"]
|
575
|
+
# ["206", "employee", "test.sh", "masaomi", "R", "0:03", "1", "fgcz-h-030"]
|
576
|
+
jobid, partition, name, user, state, *others = line.chomp.split
|
577
|
+
if jobid.strip == job_id and state == 'R'
|
578
|
+
qstat_flag = true
|
579
|
+
break
|
580
|
+
end
|
581
|
+
end
|
582
|
+
end
|
583
|
+
qstat_flag
|
584
|
+
end
|
585
|
+
def job_ends?(log_file)
|
586
|
+
log_flag = false
|
587
|
+
IO.popen("tail -n 10 #{log_file} 2> /dev/null") do |io|
|
588
|
+
while line=io.gets
|
589
|
+
if line =~ /__SCRIPT END__/
|
590
|
+
log_flag = true
|
591
|
+
break
|
592
|
+
end
|
593
|
+
end
|
594
|
+
end
|
595
|
+
log_flag
|
596
|
+
end
|
597
|
+
def job_pending?(job_id)
|
598
|
+
qstat_flag = false
|
599
|
+
IO.popen('squeue') do |io|
|
600
|
+
while line=io.gets
|
601
|
+
jobid, partition, name, user, state, *others = line.chomp.split
|
602
|
+
if jobid.strip == job_id and state =~ /PD/
|
603
|
+
qstat_flag = true
|
604
|
+
break
|
605
|
+
end
|
606
|
+
end
|
607
|
+
end
|
608
|
+
qstat_flag
|
609
|
+
end
|
610
|
+
def copy_commands(org_dir, dest_parent_dir, now=nil)
|
611
|
+
commands = if now == "force"
|
612
|
+
target_file = File.join(dest_parent_dir, File.basename(org_dir))
|
613
|
+
["g-req copynow -f #{org_dir} #{dest_parent_dir}"]
|
614
|
+
elsif now
|
615
|
+
["g-req copynow #{org_dir} #{dest_parent_dir}"]
|
616
|
+
else
|
617
|
+
["g-req -w copy #{org_dir} #{dest_parent_dir}"]
|
618
|
+
end
|
619
|
+
end
|
620
|
+
def kill_command(job_id)
|
621
|
+
command = "scancel #{job_id}"
|
622
|
+
end
|
623
|
+
def delete_command(target)
|
624
|
+
command = "g-req remove #{target}"
|
625
|
+
end
|
626
|
+
def cluster_nodes
|
627
|
+
nodes = {
|
628
|
+
'fgcz-h-110: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-110',
|
629
|
+
'fgcz-h-111: cpu 8,mem 30 GB,scr 400G' => 'fgcz-h-111',
|
630
|
+
}
|
631
|
+
end
|
632
|
+
end
|
293
633
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: workflow_manager
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Functional Genomics Center Zurich
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-07-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -104,8 +104,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
104
104
|
- !ruby/object:Gem::Version
|
105
105
|
version: '0'
|
106
106
|
requirements: []
|
107
|
-
|
108
|
-
rubygems_version: 2.6.14
|
107
|
+
rubygems_version: 3.0.3
|
109
108
|
signing_key:
|
110
109
|
specification_version: 4
|
111
110
|
summary: Workflow Manager manages job submissions using dRuby.
|