workflow_manager 0.5.2 → 0.5.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: '08c87b93bcb1fc9aadd48bea307f1543787acf79'
4
- data.tar.gz: 89fd672a4e1ec7929c6e7dbf19597bed82afc110
2
+ SHA256:
3
+ metadata.gz: ede215b5a867ebbafc9af7ead5a9afa04ed1530eac7d0d1bffb2ee42b09cbded
4
+ data.tar.gz: 7f68d53462f3c22ec889edaec171a6d5685063090b28264dfd93a224ca2d6f95
5
5
  SHA512:
6
- metadata.gz: 87f5d37dd1394ef6bbb894e163a629c12eb590a270edab39c16b892b1437cbace16245132713057dcc45a3879bfa3cc6512615e6177b9ebb55facc75d893d769
7
- data.tar.gz: '095e8de179697c6c07278e99ace552ab8f21ee025408a0617fbe4148d71fcaf12cf09b884ec648d9054c30eef8e191c7a2b6395a73a0c7c965d8603ac20eb458'
6
+ metadata.gz: dcc2bac302faf22e7170143d7377bb87144b3e6ccf049ba8d768318fd2f3a7ccdb213643c55fadbddfcdedd92b103e572dd75310d94c27fa917f55fd1c9d2c87
7
+ data.tar.gz: 18789821274de2c8961f15e6a49194c1d4e2c2f09eab095e9f7e14f575388507ff83a0dff5da480bcb79ec276956eea85739ee4d3f8cfd12c6c44139cdc3b465
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  # encoding: utf-8
3
3
  # 20121112 masa workflow manager client
4
- Version = '20130517-111334'
4
+ Version = '20200522-134606'
5
5
 
6
6
  require 'drb/drb'
7
7
 
@@ -28,3 +28,4 @@ if wfmrc
28
28
  end
29
29
  workflow_manager = DRbObject.new_with_uri(uri)
30
30
  puts workflow_manager.hello
31
+ puts workflow_manager.cluster_node_list
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  # encoding: utf-8
3
3
  # 20121112 masa workflow manager client
4
- Version = '20160317-153614'
4
+ Version = '20200722-161135'
5
5
 
6
6
  require 'drb/drb'
7
7
  require 'workflow_manager/optparse_ex'
@@ -16,6 +16,7 @@ opt = OptionParser.new do |o|
16
16
  o.on(:nodes, '-n nodes', '--nodes', 'Comma separated list of nodes to submit to for g-sub')
17
17
  o.on(:ram, '-r RAM', '--RAM', 'Amount of RAM to request in Gigs for g-sub')
18
18
  o.on(:scratch, '-s scratch', '--scratch', 'Amount of scratch space to request in Gigs for g-sub')
19
+ o.on(:queue, '-q queue', '--queue', 'Queue name')
19
20
  o.parse!(ARGV)
20
21
  end
21
22
  unless script_file = ARGV[0] and script_file =~ /\.sh/
@@ -38,6 +38,8 @@ module WorkflowManager
38
38
  end
39
39
  def default_node
40
40
  end
41
+ def node_list
42
+ end
41
43
  end
42
44
 
43
45
  class LocalComputer < Cluster
@@ -238,6 +240,63 @@ module WorkflowManager
238
240
  'fgcz-h-010: cpu 8,mem 30 GB,scr 400G' => 'fgcz-h-010',
239
241
  }
240
242
  end
243
+ def node_list
244
+ node2scr = {}
245
+ command = "qhost -F scratch"
246
+ keep = nil
247
+ IO.popen(command) do |out|
248
+ while line=out.gets
249
+ hostname, arch, ncpu, loading, memtot, memuse, *others = line.split
250
+ if hostname =~ /fgcz/
251
+ keep = hostname
252
+ elsif scratch_ = line.chomp.split.last and
253
+ scratch = scratch_.split('=').last
254
+ node2scr[keep] = scratch.to_i
255
+ keep = nil
256
+ end
257
+ end
258
+ end
259
+
260
+ list = {}
261
+ keep = nil
262
+ command = 'qhost -q'
263
+ IO.popen(command) do |out|
264
+ while line=out.gets
265
+ # HOSTNAME ARCH NCPU LOAD MEMTOT MEMUSE SWAPTO SWAPUS
266
+ hostname, arch, ncpu, loading, memtot, memuse, *others = line.split
267
+ if hostname =~ /fgcz/
268
+ #puts [hostname, ncpu, loading, memtot, memuse].join("\t")
269
+ mem = memtot.gsub(/G/, '').to_i
270
+ keep = [hostname, ncpu, "#{mem}G"]
271
+ elsif hostname == "GT" and keep and cores = line.chomp.split.last and cores !~ /[du]/
272
+ hostname = keep.shift
273
+ keep[0] = cores
274
+ if scr = node2scr[hostname] and scr >= 1000
275
+ scr = "%.1f" % (scr.to_f / 1000)
276
+ scr << "T"
277
+ else
278
+ scr = scr.to_s + "G"
279
+ end
280
+ keep << scr
281
+ list[hostname] = keep
282
+ keep = nil
283
+ end
284
+ end
285
+ end
286
+
287
+ # reformat
288
+ nodes = {}
289
+ list.each do |hostname, specs|
290
+ # 20190823 masa tentatively off use f47
291
+ unless hostname =~ /fgcz-c-047/
292
+ cores, ram, scr = specs
293
+ key = "#{hostname}: cores #{cores}, ram #{ram}, scr #{scr}"
294
+ value = hostname
295
+ nodes[key] = value
296
+ end
297
+ end
298
+ nodes
299
+ end
241
300
  end
242
301
 
243
302
  class FGCZCourseCluster < FGCZCluster
@@ -290,4 +349,285 @@ module WorkflowManager
290
349
  }
291
350
  end
292
351
  end
352
+
353
+ class FGCZDevian10Cluster < Cluster
354
+ def submit_job(script_file, script_content, option='')
355
+ if script_name = File.basename(script_file) and script_name =~ /\.sh/
356
+ script_name = script_name.split(/\.sh/).first + ".sh"
357
+ new_job_script = generate_new_job_script(script_name, script_content)
358
+ new_job_script_base = File.basename(new_job_script)
359
+ log_file = File.join(@log_dir, new_job_script_base + "_o.log")
360
+ err_file = File.join(@log_dir, new_job_script_base + "_e.log")
361
+ command = "g-sub -o #{log_file} -e #{err_file} -q course #{option} #{new_job_script}"
362
+ #command = "sbatch -o #{log_file} -e #{err_file} #{new_job_script}"
363
+ job_id = `#{command}`
364
+ #job_id = job_id.match(/Your job (\d+) \(/)[1]
365
+ job_id = job_id.chomp.split.last
366
+ [job_id, log_file, command]
367
+ else
368
+ err_msg = "FGCZDevian10Cluster#submit_job, ERROR: script_name is not *.sh: #{File.basename(script_file)}"
369
+ warn err_msg
370
+ raise err_msg
371
+ end
372
+ end
373
+ def job_running?(job_id)
374
+ qstat_flag = false
375
+ IO.popen('squeue') do |io|
376
+ while line=io.gets
377
+ # ["JOBID", "PARTITION", "NAME", "USER", "ST", "TIME", "NODES", "NODELIST(REASON)"]
378
+ # ["206", "employee", "test.sh", "masaomi", "R", "0:03", "1", "fgcz-h-030"]
379
+ jobid, partition, name, user, state, *others = line.chomp.split
380
+ if jobid.strip == job_id and state == 'R'
381
+ qstat_flag = true
382
+ break
383
+ end
384
+ end
385
+ end
386
+ qstat_flag
387
+ end
388
+ def job_ends?(log_file)
389
+ log_flag = false
390
+ IO.popen("tail -n 10 #{log_file} 2> /dev/null") do |io|
391
+ while line=io.gets
392
+ if line =~ /__SCRIPT END__/
393
+ log_flag = true
394
+ break
395
+ end
396
+ end
397
+ end
398
+ log_flag
399
+ end
400
+ def job_pending?(job_id)
401
+ qstat_flag = false
402
+ IO.popen('squeue') do |io|
403
+ while line=io.gets
404
+ jobid, partition, name, user, state, *others = line.chomp.split
405
+ if jobid.strip == job_id and state =~ /PD/
406
+ qstat_flag = true
407
+ break
408
+ end
409
+ end
410
+ end
411
+ qstat_flag
412
+ end
413
+ def copy_commands(org_dir, dest_parent_dir, now=nil)
414
+ commands = if now == "force"
415
+ target_file = File.join(dest_parent_dir, File.basename(org_dir))
416
+ ["g-req copynow -f #{org_dir} #{dest_parent_dir}"]
417
+ elsif now
418
+ ["g-req copynow #{org_dir} #{dest_parent_dir}"]
419
+ else
420
+ ["g-req -w copy #{org_dir} #{dest_parent_dir}"]
421
+ end
422
+ end
423
+ def kill_command(job_id)
424
+ command = "scancel #{job_id}"
425
+ end
426
+ def delete_command(target)
427
+ command = "g-req remove #{target}"
428
+ end
429
+ def cluster_nodes
430
+ nodes = {
431
+ 'fgcz-h-900: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-900',
432
+ 'fgcz-h-901: cpu 8,mem 30 GB,scr 400G' => 'fgcz-h-901',
433
+ }
434
+ end
435
+ end
436
+
437
+ class FGCZDebian10CourseCluster < Cluster
438
+ def submit_job(script_file, script_content, option='')
439
+ if script_name = File.basename(script_file) and script_name =~ /\.sh/
440
+ script_name = script_name.split(/\.sh/).first + ".sh"
441
+ new_job_script = generate_new_job_script(script_name, script_content)
442
+ new_job_script_base = File.basename(new_job_script)
443
+ log_file = File.join(@log_dir, new_job_script_base + "_o.log")
444
+ err_file = File.join(@log_dir, new_job_script_base + "_e.log")
445
+ command = "g-sub -o #{log_file} -e #{err_file} -q course #{option} #{new_job_script}"
446
+ job_id = `#{command}`
447
+ job_id = job_id.chomp.split.last
448
+ [job_id, log_file, command]
449
+ else
450
+ err_msg = "FGCZDebian10CourseCluster#submit_job, ERROR: script_name is not *.sh: #{File.basename(script_file)}"
451
+ warn err_msg
452
+ raise err_msg
453
+ end
454
+ end
455
+ def job_running?(job_id)
456
+ qstat_flag = false
457
+ IO.popen('squeue') do |io|
458
+ while line=io.gets
459
+ # ["JOBID", "PARTITION", "NAME", "USER", "ST", "TIME", "NODES", "NODELIST(REASON)"]
460
+ # ["206", "employee", "test.sh", "masaomi", "R", "0:03", "1", "fgcz-h-030"]
461
+ jobid, partition, name, user, state, *others = line.chomp.split
462
+ if jobid.strip == job_id and state == 'R'
463
+ qstat_flag = true
464
+ break
465
+ end
466
+ end
467
+ end
468
+ qstat_flag
469
+ end
470
+ def job_ends?(log_file)
471
+ log_flag = false
472
+ IO.popen("tail -n 10 #{log_file} 2> /dev/null") do |io|
473
+ while line=io.gets
474
+ if line =~ /__SCRIPT END__/
475
+ log_flag = true
476
+ break
477
+ end
478
+ end
479
+ end
480
+ log_flag
481
+ end
482
+ def job_pending?(job_id)
483
+ qstat_flag = false
484
+ IO.popen('squeue') do |io|
485
+ while line=io.gets
486
+ jobid, partition, name, user, state, *others = line.chomp.split
487
+ if jobid.strip == job_id and state =~ /PD/
488
+ qstat_flag = true
489
+ break
490
+ end
491
+ end
492
+ end
493
+ qstat_flag
494
+ end
495
+ def copy_commands(org_dir, dest_parent_dir, now=nil)
496
+ commands = ["cp -r #{org_dir} #{dest_parent_dir}"]
497
+ end
498
+ def kill_command(job_id)
499
+ command = "scancel #{job_id}"
500
+ end
501
+ def delete_command(target)
502
+ command = "rm -rf #{target}"
503
+ end
504
+ def cluster_nodes
505
+ nodes = {
506
+ 'fgcz-h-900: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-900',
507
+ 'fgcz-h-901: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-901',
508
+ 'fgcz-h-902: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-902',
509
+ 'fgcz-h-903: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-903',
510
+ 'fgcz-h-904: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-904',
511
+ 'fgcz-h-905: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-905',
512
+ 'fgcz-h-906: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-906',
513
+ 'fgcz-h-907: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-907',
514
+ 'fgcz-h-908: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-908',
515
+ 'fgcz-h-909: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-909',
516
+ 'fgcz-h-910: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-910',
517
+ 'fgcz-h-911: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-911',
518
+ 'fgcz-h-912: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-912',
519
+ 'fgcz-h-913: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-913',
520
+ 'fgcz-h-914: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-914',
521
+ 'fgcz-h-915: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-915',
522
+ 'fgcz-h-916: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-916',
523
+ 'fgcz-h-917: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-917',
524
+ }
525
+ end
526
+ end
527
+
528
+ class FGCZDebian10Cluster < Cluster
529
+ def parse(options)
530
+ options = options.split
531
+ ram = if i = options.index("-r")
532
+ options[i+1]
533
+ end
534
+ cores = if i = options.index("-c")
535
+ options[i+1]
536
+ end
537
+ scratch = if i = options.index("-s")
538
+ options[i+1]
539
+ end
540
+ queue = if i = options.index("-q")
541
+ options[i+1]
542
+ end
543
+ new_options = []
544
+ new_options << "--mem=#{ram}G" if ram
545
+ new_options << "-n #{cores}" if cores
546
+ new_options << "--tmp=#{scratch}G" if scratch
547
+ new_options << "-p #{queue}" if queue
548
+ new_options.join(" ")
549
+ end
550
+ def submit_job(script_file, script_content, option='')
551
+ if script_name = File.basename(script_file) and script_name =~ /\.sh/
552
+ script_name = script_name.split(/\.sh/).first + ".sh"
553
+ new_job_script = generate_new_job_script(script_name, script_content)
554
+ new_job_script_base = File.basename(new_job_script)
555
+ log_file = File.join(@log_dir, new_job_script_base + "_o.log")
556
+ err_file = File.join(@log_dir, new_job_script_base + "_e.log")
557
+ #command = "g-sub -o #{log_file} -e #{err_file} -q user #{option} #{new_job_script}"
558
+ sbatch_options = parse(option)
559
+ command = "sbatch -o #{log_file} -e #{err_file} #{sbatch_options} #{new_job_script}"
560
+ puts command
561
+ job_id = `#{command}`
562
+ job_id = job_id.chomp.split.last
563
+ [job_id, log_file, command]
564
+ else
565
+ err_msg = "FGCZDebian10Cluster#submit_job, ERROR: script_name is not *.sh: #{File.basename(script_file)}"
566
+ warn err_msg
567
+ raise err_msg
568
+ end
569
+ end
570
+ def job_running?(job_id)
571
+ qstat_flag = false
572
+ IO.popen('squeue') do |io|
573
+ while line=io.gets
574
+ # ["JOBID", "PARTITION", "NAME", "USER", "ST", "TIME", "NODES", "NODELIST(REASON)"]
575
+ # ["206", "employee", "test.sh", "masaomi", "R", "0:03", "1", "fgcz-h-030"]
576
+ jobid, partition, name, user, state, *others = line.chomp.split
577
+ if jobid.strip == job_id and state == 'R'
578
+ qstat_flag = true
579
+ break
580
+ end
581
+ end
582
+ end
583
+ qstat_flag
584
+ end
585
+ def job_ends?(log_file)
586
+ log_flag = false
587
+ IO.popen("tail -n 10 #{log_file} 2> /dev/null") do |io|
588
+ while line=io.gets
589
+ if line =~ /__SCRIPT END__/
590
+ log_flag = true
591
+ break
592
+ end
593
+ end
594
+ end
595
+ log_flag
596
+ end
597
+ def job_pending?(job_id)
598
+ qstat_flag = false
599
+ IO.popen('squeue') do |io|
600
+ while line=io.gets
601
+ jobid, partition, name, user, state, *others = line.chomp.split
602
+ if jobid.strip == job_id and state =~ /PD/
603
+ qstat_flag = true
604
+ break
605
+ end
606
+ end
607
+ end
608
+ qstat_flag
609
+ end
610
+ def copy_commands(org_dir, dest_parent_dir, now=nil)
611
+ commands = if now == "force"
612
+ target_file = File.join(dest_parent_dir, File.basename(org_dir))
613
+ ["g-req copynow -f #{org_dir} #{dest_parent_dir}"]
614
+ elsif now
615
+ ["g-req copynow #{org_dir} #{dest_parent_dir}"]
616
+ else
617
+ ["g-req -w copy #{org_dir} #{dest_parent_dir}"]
618
+ end
619
+ end
620
+ def kill_command(job_id)
621
+ command = "scancel #{job_id}"
622
+ end
623
+ def delete_command(target)
624
+ command = "g-req remove #{target}"
625
+ end
626
+ def cluster_nodes
627
+ nodes = {
628
+ 'fgcz-h-110: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-110',
629
+ 'fgcz-h-111: cpu 8,mem 30 GB,scr 400G' => 'fgcz-h-111',
630
+ }
631
+ end
632
+ end
293
633
  end
@@ -509,6 +509,9 @@ module WorkflowManager
509
509
  end
510
510
  end
511
511
  alias_method :check_status, :success_or_fail
512
+ def cluster_node_list
513
+ @cluster.node_list
514
+ end
512
515
  end
513
516
  end
514
517
 
@@ -1,3 +1,3 @@
1
1
  module WorkflowManager
2
- VERSION = "0.5.2"
2
+ VERSION = "0.5.7"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: workflow_manager
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.5.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Functional Genomics Center Zurich
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-03-26 00:00:00.000000000 Z
11
+ date: 2020-07-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -104,8 +104,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
104
104
  - !ruby/object:Gem::Version
105
105
  version: '0'
106
106
  requirements: []
107
- rubyforge_project:
108
- rubygems_version: 2.6.14
107
+ rubygems_version: 3.0.3
109
108
  signing_key:
110
109
  specification_version: 4
111
110
  summary: Workflow Manager manages job submissions using dRuby.