workflow_manager 0.5.1 → 0.5.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 11c00265553f6ee24f60116455ad308ce4f880ca
4
- data.tar.gz: 7a76ec915498d09ec76e8504ffd9c53bf2b68954
2
+ SHA256:
3
+ metadata.gz: 624dc0f37286ee8b893ceb0d73cce4ce83ab6078be6b1f06a96c316e145311f4
4
+ data.tar.gz: 805777d8e45242281c3e929b03a92d3fd9205c6c091f716ebb16b93bf15904e9
5
5
  SHA512:
6
- metadata.gz: fd80735df1170477400d0501bd4c5407598f48560c6af06d3acadc82f93c95f64371b0b0829681710712476b3465063eda608bf89122f83457664676de819f9c
7
- data.tar.gz: f4809de592cddf922cb6c7d3b0d85e3936503d1bf97c82e96107f1a7dcf5984f09b7abd72abe50f56c1a1db9234bcf7e454f44ec78875c2ad95d2ea5b3bbe7ed
6
+ metadata.gz: 8c89da95c15863910c397bc149b799b094ee631feb092a7e0ffea7648c2148e09677b1288bf41daa5aa798b8a315ad3e7d2a4686d2b7bb5ced4f57fe8eff2161
7
+ data.tar.gz: bea9060c0b8322b3b096ffdae4ac82d9f523bc9322b8bfa412f986aab5c56881523985698c974eb581560929eea60d34189724efae29c28e31127512f0b56478
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  # encoding: utf-8
3
3
  # 20121112 masa workflow manager client
4
- Version = '20130517-111334'
4
+ Version = '20200522-134606'
5
5
 
6
6
  require 'drb/drb'
7
7
 
@@ -28,3 +28,4 @@ if wfmrc
28
28
  end
29
29
  workflow_manager = DRbObject.new_with_uri(uri)
30
30
  puts workflow_manager.hello
31
+ puts workflow_manager.cluster_node_list
@@ -38,6 +38,8 @@ module WorkflowManager
38
38
  end
39
39
  def default_node
40
40
  end
41
+ def node_list
42
+ end
41
43
  end
42
44
 
43
45
  class LocalComputer < Cluster
@@ -238,6 +240,63 @@ module WorkflowManager
238
240
  'fgcz-h-010: cpu 8,mem 30 GB,scr 400G' => 'fgcz-h-010',
239
241
  }
240
242
  end
243
+ def node_list
244
+ node2scr = {}
245
+ command = "qhost -F scratch"
246
+ keep = nil
247
+ IO.popen(command) do |out|
248
+ while line=out.gets
249
+ hostname, arch, ncpu, loading, memtot, memuse, *others = line.split
250
+ if hostname =~ /fgcz/
251
+ keep = hostname
252
+ elsif scratch_ = line.chomp.split.last and
253
+ scratch = scratch_.split('=').last
254
+ node2scr[keep] = scratch.to_i
255
+ keep = nil
256
+ end
257
+ end
258
+ end
259
+
260
+ list = {}
261
+ keep = nil
262
+ command = 'qhost -q'
263
+ IO.popen(command) do |out|
264
+ while line=out.gets
265
+ # HOSTNAME ARCH NCPU LOAD MEMTOT MEMUSE SWAPTO SWAPUS
266
+ hostname, arch, ncpu, loading, memtot, memuse, *others = line.split
267
+ if hostname =~ /fgcz/
268
+ #puts [hostname, ncpu, loading, memtot, memuse].join("\t")
269
+ mem = memtot.gsub(/G/, '').to_i
270
+ keep = [hostname, ncpu, "#{mem}G"]
271
+ elsif hostname == "GT" and keep and cores = line.chomp.split.last and cores !~ /[du]/
272
+ hostname = keep.shift
273
+ keep[0] = cores
274
+ if scr = node2scr[hostname] and scr >= 1000
275
+ scr = "%.1f" % (scr.to_f / 1000)
276
+ scr << "T"
277
+ else
278
+ scr = scr.to_s + "G"
279
+ end
280
+ keep << scr
281
+ list[hostname] = keep
282
+ keep = nil
283
+ end
284
+ end
285
+ end
286
+
287
+ # reformat
288
+ nodes = {}
289
+ list.each do |hostname, specs|
290
+ # 20190823 masa tentatively off use f47
291
+ unless hostname =~ /fgcz-c-047/
292
+ cores, ram, scr = specs
293
+ key = "#{hostname}: cores #{cores}, ram #{ram}, scr #{scr}"
294
+ value = hostname
295
+ nodes[key] = value
296
+ end
297
+ end
298
+ nodes
299
+ end
241
300
  end
242
301
 
243
302
  class FGCZCourseCluster < FGCZCluster
@@ -290,4 +349,180 @@ module WorkflowManager
290
349
  }
291
350
  end
292
351
  end
352
+
353
+ class FGCZDevian10Cluster < Cluster
354
+ def submit_job(script_file, script_content, option='')
355
+ if script_name = File.basename(script_file) and script_name =~ /\.sh/
356
+ script_name = script_name.split(/\.sh/).first + ".sh"
357
+ new_job_script = generate_new_job_script(script_name, script_content)
358
+ new_job_script_base = File.basename(new_job_script)
359
+ log_file = File.join(@log_dir, new_job_script_base + "_o.log")
360
+ err_file = File.join(@log_dir, new_job_script_base + "_e.log")
361
+ command = "g-sub -o #{log_file} -e #{err_file} -q course #{option} #{new_job_script}"
362
+ #command = "sbatch -o #{log_file} -e #{err_file} #{new_job_script}"
363
+ job_id = `#{command}`
364
+ #job_id = job_id.match(/Your job (\d+) \(/)[1]
365
+ job_id = job_id.chomp.split.last
366
+ [job_id, log_file, command]
367
+ else
368
+ err_msg = "FGCZDevian10Cluster#submit_job, ERROR: script_name is not *.sh: #{File.basename(script_file)}"
369
+ warn err_msg
370
+ raise err_msg
371
+ end
372
+ end
373
+ def job_running?(job_id)
374
+ qstat_flag = false
375
+ IO.popen('squeue') do |io|
376
+ while line=io.gets
377
+ # ["JOBID", "PARTITION", "NAME", "USER", "ST", "TIME", "NODES", "NODELIST(REASON)"]
378
+ # ["206", "employee", "test.sh", "masaomi", "R", "0:03", "1", "fgcz-h-030"]
379
+ jobid, partition, name, user, state, *others = line.chomp.split
380
+ if jobid.strip == job_id and state == 'R'
381
+ qstat_flag = true
382
+ break
383
+ end
384
+ end
385
+ end
386
+ qstat_flag
387
+ end
388
+ def job_ends?(log_file)
389
+ log_flag = false
390
+ IO.popen("tail -n 10 #{log_file} 2> /dev/null") do |io|
391
+ while line=io.gets
392
+ if line =~ /__SCRIPT END__/
393
+ log_flag = true
394
+ break
395
+ end
396
+ end
397
+ end
398
+ log_flag
399
+ end
400
+ def job_pending?(job_id)
401
+ qstat_flag = false
402
+ IO.popen('squeue') do |io|
403
+ while line=io.gets
404
+ jobid, partition, name, user, state, *others = line.chomp.split
405
+ if jobid.strip == job_id and state =~ /PD/
406
+ qstat_flag = true
407
+ break
408
+ end
409
+ end
410
+ end
411
+ qstat_flag
412
+ end
413
+ def copy_commands(org_dir, dest_parent_dir, now=nil)
414
+ commands = if now == "force"
415
+ target_file = File.join(dest_parent_dir, File.basename(org_dir))
416
+ ["g-req copynow -f #{org_dir} #{dest_parent_dir}"]
417
+ elsif now
418
+ ["g-req copynow #{org_dir} #{dest_parent_dir}"]
419
+ else
420
+ ["g-req -w copy #{org_dir} #{dest_parent_dir}"]
421
+ end
422
+ end
423
+ def kill_command(job_id)
424
+ command = "scancel #{job_id}"
425
+ end
426
+ def delete_command(target)
427
+ command = "g-req remove #{target}"
428
+ end
429
+ def cluster_nodes
430
+ nodes = {
431
+ 'fgcz-h-900: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-900',
432
+ 'fgcz-h-901: cpu 8,mem 30 GB,scr 400G' => 'fgcz-h-901',
433
+ }
434
+ end
435
+ end
436
+
437
+ class FGCZDebian10CourseCluster < Cluster
438
+ def submit_job(script_file, script_content, option='')
439
+ if script_name = File.basename(script_file) and script_name =~ /\.sh/
440
+ script_name = script_name.split(/\.sh/).first + ".sh"
441
+ new_job_script = generate_new_job_script(script_name, script_content)
442
+ new_job_script_base = File.basename(new_job_script)
443
+ log_file = File.join(@log_dir, new_job_script_base + "_o.log")
444
+ err_file = File.join(@log_dir, new_job_script_base + "_e.log")
445
+ command = "g-sub -o #{log_file} -e #{err_file} -q course #{option} #{new_job_script}"
446
+ job_id = `#{command}`
447
+ job_id = job_id.chomp.split.last
448
+ [job_id, log_file, command]
449
+ else
450
+ err_msg = "FGCZDebian10CourseCluster#submit_job, ERROR: script_name is not *.sh: #{File.basename(script_file)}"
451
+ warn err_msg
452
+ raise err_msg
453
+ end
454
+ end
455
+ def job_running?(job_id)
456
+ qstat_flag = false
457
+ IO.popen('squeue') do |io|
458
+ while line=io.gets
459
+ # ["JOBID", "PARTITION", "NAME", "USER", "ST", "TIME", "NODES", "NODELIST(REASON)"]
460
+ # ["206", "employee", "test.sh", "masaomi", "R", "0:03", "1", "fgcz-h-030"]
461
+ jobid, partition, name, user, state, *others = line.chomp.split
462
+ if jobid.strip == job_id and state == 'R'
463
+ qstat_flag = true
464
+ break
465
+ end
466
+ end
467
+ end
468
+ qstat_flag
469
+ end
470
+ def job_ends?(log_file)
471
+ log_flag = false
472
+ IO.popen("tail -n 10 #{log_file} 2> /dev/null") do |io|
473
+ while line=io.gets
474
+ if line =~ /__SCRIPT END__/
475
+ log_flag = true
476
+ break
477
+ end
478
+ end
479
+ end
480
+ log_flag
481
+ end
482
+ def job_pending?(job_id)
483
+ qstat_flag = false
484
+ IO.popen('squeue') do |io|
485
+ while line=io.gets
486
+ jobid, partition, name, user, state, *others = line.chomp.split
487
+ if jobid.strip == job_id and state =~ /PD/
488
+ qstat_flag = true
489
+ break
490
+ end
491
+ end
492
+ end
493
+ qstat_flag
494
+ end
495
+ def copy_commands(org_dir, dest_parent_dir, now=nil)
496
+ commands = ["cp -r #{org_dir} #{dest_parent_dir}"]
497
+ end
498
+ def kill_command(job_id)
499
+ command = "scancel #{job_id}"
500
+ end
501
+ def delete_command(target)
502
+ command = "rm -rf #{target}"
503
+ end
504
+ def cluster_nodes
505
+ nodes = {
506
+ 'fgcz-h-900: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-900',
507
+ 'fgcz-h-901: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-901',
508
+ 'fgcz-h-902: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-902',
509
+ 'fgcz-h-903: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-903',
510
+ 'fgcz-h-904: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-904',
511
+ 'fgcz-h-905: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-905',
512
+ 'fgcz-h-906: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-906',
513
+ 'fgcz-h-907: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-907',
514
+ 'fgcz-h-908: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-908',
515
+ 'fgcz-h-909: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-909',
516
+ 'fgcz-h-910: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-910',
517
+ 'fgcz-h-911: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-911',
518
+ 'fgcz-h-912: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-912',
519
+ 'fgcz-h-913: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-913',
520
+ 'fgcz-h-914: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-914',
521
+ 'fgcz-h-915: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-915',
522
+ 'fgcz-h-916: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-916',
523
+ 'fgcz-h-917: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-917',
524
+ }
525
+ end
526
+ end
527
+
293
528
  end
@@ -100,6 +100,7 @@ module WorkflowManager
100
100
  end
101
101
  end
102
102
  class RedisDB
103
+ attr_accessor :port
103
104
  def run_redis_server(redis_conf)
104
105
  @pid = fork do
105
106
  exec("redis-server #{redis_conf}")
@@ -108,11 +109,13 @@ module WorkflowManager
108
109
  Process.waitpid @pid
109
110
  end
110
111
  end
111
- def initialize(db_no=0, redis_conf=nil)
112
+ def initialize(db_no=0, redis_conf)
112
113
  if db_no==0
113
114
  run_redis_server(redis_conf)
114
115
  end
115
- @db = Redis.new(db: db_no)
116
+ conf = Hash[*CSV.readlines(redis_conf, col_sep: " ").map{|a| [a.first, a[1,100].join(",")]}.flatten]
117
+ @port = (conf["port"]||6379).to_i
118
+ @db = Redis.new(port: @port, db: db_no)
116
119
  end
117
120
  def transaction
118
121
  #@db.multi do
@@ -154,7 +157,7 @@ module WorkflowManager
154
157
  when "KyotoCabinet"
155
158
  KyotoDB.new(@db_logs)
156
159
  when "Redis"
157
- RedisDB.new(1)
160
+ RedisDB.new(1, @redis_conf)
158
161
  end
159
162
 
160
163
  @system_log = File.join(@log_dir, "system.log")
@@ -163,6 +166,7 @@ module WorkflowManager
163
166
  puts("DB = #{DB_MODE}")
164
167
  if DB_MODE == "Redis"
165
168
  puts("Redis conf = #{config.redis_conf}")
169
+ puts("Redis port = #{@logs.port}")
166
170
  end
167
171
  puts("Cluster = #{@cluster.name}")
168
172
  log_puts("DB = #{DB_MODE}")
@@ -505,6 +509,9 @@ module WorkflowManager
505
509
  end
506
510
  end
507
511
  alias_method :check_status, :success_or_fail
512
+ def cluster_node_list
513
+ @cluster.node_list
514
+ end
508
515
  end
509
516
  end
510
517
 
@@ -1,3 +1,3 @@
1
1
  module WorkflowManager
2
- VERSION = "0.5.1"
2
+ VERSION = "0.5.6"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: workflow_manager
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Functional Genomics Center Zurich
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-03-26 00:00:00.000000000 Z
11
+ date: 2020-06-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -104,8 +104,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
104
104
  - !ruby/object:Gem::Version
105
105
  version: '0'
106
106
  requirements: []
107
- rubyforge_project:
108
- rubygems_version: 2.6.14
107
+ rubygems_version: 3.0.3
109
108
  signing_key:
110
109
  specification_version: 4
111
110
  summary: Workflow Manager manages job submissions using dRuby.