workflow_manager 0.5.1 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 11c00265553f6ee24f60116455ad308ce4f880ca
4
- data.tar.gz: 7a76ec915498d09ec76e8504ffd9c53bf2b68954
2
+ SHA256:
3
+ metadata.gz: 624dc0f37286ee8b893ceb0d73cce4ce83ab6078be6b1f06a96c316e145311f4
4
+ data.tar.gz: 805777d8e45242281c3e929b03a92d3fd9205c6c091f716ebb16b93bf15904e9
5
5
  SHA512:
6
- metadata.gz: fd80735df1170477400d0501bd4c5407598f48560c6af06d3acadc82f93c95f64371b0b0829681710712476b3465063eda608bf89122f83457664676de819f9c
7
- data.tar.gz: f4809de592cddf922cb6c7d3b0d85e3936503d1bf97c82e96107f1a7dcf5984f09b7abd72abe50f56c1a1db9234bcf7e454f44ec78875c2ad95d2ea5b3bbe7ed
6
+ metadata.gz: 8c89da95c15863910c397bc149b799b094ee631feb092a7e0ffea7648c2148e09677b1288bf41daa5aa798b8a315ad3e7d2a4686d2b7bb5ced4f57fe8eff2161
7
+ data.tar.gz: bea9060c0b8322b3b096ffdae4ac82d9f523bc9322b8bfa412f986aab5c56881523985698c974eb581560929eea60d34189724efae29c28e31127512f0b56478
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  # encoding: utf-8
3
3
  # 20121112 masa workflow manager client
4
- Version = '20130517-111334'
4
+ Version = '20200522-134606'
5
5
 
6
6
  require 'drb/drb'
7
7
 
@@ -28,3 +28,4 @@ if wfmrc
28
28
  end
29
29
  workflow_manager = DRbObject.new_with_uri(uri)
30
30
  puts workflow_manager.hello
31
+ puts workflow_manager.cluster_node_list
@@ -38,6 +38,8 @@ module WorkflowManager
38
38
  end
39
39
  def default_node
40
40
  end
41
+ def node_list
42
+ end
41
43
  end
42
44
 
43
45
  class LocalComputer < Cluster
@@ -238,6 +240,63 @@ module WorkflowManager
238
240
  'fgcz-h-010: cpu 8,mem 30 GB,scr 400G' => 'fgcz-h-010',
239
241
  }
240
242
  end
243
+ def node_list
244
+ node2scr = {}
245
+ command = "qhost -F scratch"
246
+ keep = nil
247
+ IO.popen(command) do |out|
248
+ while line=out.gets
249
+ hostname, arch, ncpu, loading, memtot, memuse, *others = line.split
250
+ if hostname =~ /fgcz/
251
+ keep = hostname
252
+ elsif scratch_ = line.chomp.split.last and
253
+ scratch = scratch_.split('=').last
254
+ node2scr[keep] = scratch.to_i
255
+ keep = nil
256
+ end
257
+ end
258
+ end
259
+
260
+ list = {}
261
+ keep = nil
262
+ command = 'qhost -q'
263
+ IO.popen(command) do |out|
264
+ while line=out.gets
265
+ # HOSTNAME ARCH NCPU LOAD MEMTOT MEMUSE SWAPTO SWAPUS
266
+ hostname, arch, ncpu, loading, memtot, memuse, *others = line.split
267
+ if hostname =~ /fgcz/
268
+ #puts [hostname, ncpu, loading, memtot, memuse].join("\t")
269
+ mem = memtot.gsub(/G/, '').to_i
270
+ keep = [hostname, ncpu, "#{mem}G"]
271
+ elsif hostname == "GT" and keep and cores = line.chomp.split.last and cores !~ /[du]/
272
+ hostname = keep.shift
273
+ keep[0] = cores
274
+ if scr = node2scr[hostname] and scr >= 1000
275
+ scr = "%.1f" % (scr.to_f / 1000)
276
+ scr << "T"
277
+ else
278
+ scr = scr.to_s + "G"
279
+ end
280
+ keep << scr
281
+ list[hostname] = keep
282
+ keep = nil
283
+ end
284
+ end
285
+ end
286
+
287
+ # reformat
288
+ nodes = {}
289
+ list.each do |hostname, specs|
290
+ # 20190823 masa tentatively off use f47
291
+ unless hostname =~ /fgcz-c-047/
292
+ cores, ram, scr = specs
293
+ key = "#{hostname}: cores #{cores}, ram #{ram}, scr #{scr}"
294
+ value = hostname
295
+ nodes[key] = value
296
+ end
297
+ end
298
+ nodes
299
+ end
241
300
  end
242
301
 
243
302
  class FGCZCourseCluster < FGCZCluster
@@ -290,4 +349,180 @@ module WorkflowManager
290
349
  }
291
350
  end
292
351
  end
352
+
353
+ class FGCZDevian10Cluster < Cluster
354
+ def submit_job(script_file, script_content, option='')
355
+ if script_name = File.basename(script_file) and script_name =~ /\.sh/
356
+ script_name = script_name.split(/\.sh/).first + ".sh"
357
+ new_job_script = generate_new_job_script(script_name, script_content)
358
+ new_job_script_base = File.basename(new_job_script)
359
+ log_file = File.join(@log_dir, new_job_script_base + "_o.log")
360
+ err_file = File.join(@log_dir, new_job_script_base + "_e.log")
361
+ command = "g-sub -o #{log_file} -e #{err_file} -q course #{option} #{new_job_script}"
362
+ #command = "sbatch -o #{log_file} -e #{err_file} #{new_job_script}"
363
+ job_id = `#{command}`
364
+ #job_id = job_id.match(/Your job (\d+) \(/)[1]
365
+ job_id = job_id.chomp.split.last
366
+ [job_id, log_file, command]
367
+ else
368
+ err_msg = "FGCZDevian10Cluster#submit_job, ERROR: script_name is not *.sh: #{File.basename(script_file)}"
369
+ warn err_msg
370
+ raise err_msg
371
+ end
372
+ end
373
+ def job_running?(job_id)
374
+ qstat_flag = false
375
+ IO.popen('squeue') do |io|
376
+ while line=io.gets
377
+ # ["JOBID", "PARTITION", "NAME", "USER", "ST", "TIME", "NODES", "NODELIST(REASON)"]
378
+ # ["206", "employee", "test.sh", "masaomi", "R", "0:03", "1", "fgcz-h-030"]
379
+ jobid, partition, name, user, state, *others = line.chomp.split
380
+ if jobid.strip == job_id and state == 'R'
381
+ qstat_flag = true
382
+ break
383
+ end
384
+ end
385
+ end
386
+ qstat_flag
387
+ end
388
+ def job_ends?(log_file)
389
+ log_flag = false
390
+ IO.popen("tail -n 10 #{log_file} 2> /dev/null") do |io|
391
+ while line=io.gets
392
+ if line =~ /__SCRIPT END__/
393
+ log_flag = true
394
+ break
395
+ end
396
+ end
397
+ end
398
+ log_flag
399
+ end
400
+ def job_pending?(job_id)
401
+ qstat_flag = false
402
+ IO.popen('squeue') do |io|
403
+ while line=io.gets
404
+ jobid, partition, name, user, state, *others = line.chomp.split
405
+ if jobid.strip == job_id and state =~ /PD/
406
+ qstat_flag = true
407
+ break
408
+ end
409
+ end
410
+ end
411
+ qstat_flag
412
+ end
413
+ def copy_commands(org_dir, dest_parent_dir, now=nil)
414
+ commands = if now == "force"
415
+ target_file = File.join(dest_parent_dir, File.basename(org_dir))
416
+ ["g-req copynow -f #{org_dir} #{dest_parent_dir}"]
417
+ elsif now
418
+ ["g-req copynow #{org_dir} #{dest_parent_dir}"]
419
+ else
420
+ ["g-req -w copy #{org_dir} #{dest_parent_dir}"]
421
+ end
422
+ end
423
+ def kill_command(job_id)
424
+ command = "scancel #{job_id}"
425
+ end
426
+ def delete_command(target)
427
+ command = "g-req remove #{target}"
428
+ end
429
+ def cluster_nodes
430
+ nodes = {
431
+ 'fgcz-h-900: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-900',
432
+ 'fgcz-h-901: cpu 8,mem 30 GB,scr 400G' => 'fgcz-h-901',
433
+ }
434
+ end
435
+ end
436
+
437
+ class FGCZDebian10CourseCluster < Cluster
438
+ def submit_job(script_file, script_content, option='')
439
+ if script_name = File.basename(script_file) and script_name =~ /\.sh/
440
+ script_name = script_name.split(/\.sh/).first + ".sh"
441
+ new_job_script = generate_new_job_script(script_name, script_content)
442
+ new_job_script_base = File.basename(new_job_script)
443
+ log_file = File.join(@log_dir, new_job_script_base + "_o.log")
444
+ err_file = File.join(@log_dir, new_job_script_base + "_e.log")
445
+ command = "g-sub -o #{log_file} -e #{err_file} -q course #{option} #{new_job_script}"
446
+ job_id = `#{command}`
447
+ job_id = job_id.chomp.split.last
448
+ [job_id, log_file, command]
449
+ else
450
+ err_msg = "FGCZDebian10CourseCluster#submit_job, ERROR: script_name is not *.sh: #{File.basename(script_file)}"
451
+ warn err_msg
452
+ raise err_msg
453
+ end
454
+ end
455
+ def job_running?(job_id)
456
+ qstat_flag = false
457
+ IO.popen('squeue') do |io|
458
+ while line=io.gets
459
+ # ["JOBID", "PARTITION", "NAME", "USER", "ST", "TIME", "NODES", "NODELIST(REASON)"]
460
+ # ["206", "employee", "test.sh", "masaomi", "R", "0:03", "1", "fgcz-h-030"]
461
+ jobid, partition, name, user, state, *others = line.chomp.split
462
+ if jobid.strip == job_id and state == 'R'
463
+ qstat_flag = true
464
+ break
465
+ end
466
+ end
467
+ end
468
+ qstat_flag
469
+ end
470
+ def job_ends?(log_file)
471
+ log_flag = false
472
+ IO.popen("tail -n 10 #{log_file} 2> /dev/null") do |io|
473
+ while line=io.gets
474
+ if line =~ /__SCRIPT END__/
475
+ log_flag = true
476
+ break
477
+ end
478
+ end
479
+ end
480
+ log_flag
481
+ end
482
+ def job_pending?(job_id)
483
+ qstat_flag = false
484
+ IO.popen('squeue') do |io|
485
+ while line=io.gets
486
+ jobid, partition, name, user, state, *others = line.chomp.split
487
+ if jobid.strip == job_id and state =~ /PD/
488
+ qstat_flag = true
489
+ break
490
+ end
491
+ end
492
+ end
493
+ qstat_flag
494
+ end
495
+ def copy_commands(org_dir, dest_parent_dir, now=nil)
496
+ commands = ["cp -r #{org_dir} #{dest_parent_dir}"]
497
+ end
498
+ def kill_command(job_id)
499
+ command = "scancel #{job_id}"
500
+ end
501
+ def delete_command(target)
502
+ command = "rm -rf #{target}"
503
+ end
504
+ def cluster_nodes
505
+ nodes = {
506
+ 'fgcz-h-900: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-900',
507
+ 'fgcz-h-901: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-901',
508
+ 'fgcz-h-902: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-902',
509
+ 'fgcz-h-903: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-903',
510
+ 'fgcz-h-904: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-904',
511
+ 'fgcz-h-905: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-905',
512
+ 'fgcz-h-906: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-906',
513
+ 'fgcz-h-907: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-907',
514
+ 'fgcz-h-908: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-908',
515
+ 'fgcz-h-909: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-909',
516
+ 'fgcz-h-910: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-910',
517
+ 'fgcz-h-911: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-911',
518
+ 'fgcz-h-912: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-912',
519
+ 'fgcz-h-913: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-913',
520
+ 'fgcz-h-914: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-914',
521
+ 'fgcz-h-915: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-915',
522
+ 'fgcz-h-916: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-916',
523
+ 'fgcz-h-917: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-917',
524
+ }
525
+ end
526
+ end
527
+
293
528
  end
@@ -100,6 +100,7 @@ module WorkflowManager
100
100
  end
101
101
  end
102
102
  class RedisDB
103
+ attr_accessor :port
103
104
  def run_redis_server(redis_conf)
104
105
  @pid = fork do
105
106
  exec("redis-server #{redis_conf}")
@@ -108,11 +109,13 @@ module WorkflowManager
108
109
  Process.waitpid @pid
109
110
  end
110
111
  end
111
- def initialize(db_no=0, redis_conf=nil)
112
+ def initialize(db_no=0, redis_conf)
112
113
  if db_no==0
113
114
  run_redis_server(redis_conf)
114
115
  end
115
- @db = Redis.new(db: db_no)
116
+ conf = Hash[*CSV.readlines(redis_conf, col_sep: " ").map{|a| [a.first, a[1,100].join(",")]}.flatten]
117
+ @port = (conf["port"]||6379).to_i
118
+ @db = Redis.new(port: @port, db: db_no)
116
119
  end
117
120
  def transaction
118
121
  #@db.multi do
@@ -154,7 +157,7 @@ module WorkflowManager
154
157
  when "KyotoCabinet"
155
158
  KyotoDB.new(@db_logs)
156
159
  when "Redis"
157
- RedisDB.new(1)
160
+ RedisDB.new(1, @redis_conf)
158
161
  end
159
162
 
160
163
  @system_log = File.join(@log_dir, "system.log")
@@ -163,6 +166,7 @@ module WorkflowManager
163
166
  puts("DB = #{DB_MODE}")
164
167
  if DB_MODE == "Redis"
165
168
  puts("Redis conf = #{config.redis_conf}")
169
+ puts("Redis port = #{@logs.port}")
166
170
  end
167
171
  puts("Cluster = #{@cluster.name}")
168
172
  log_puts("DB = #{DB_MODE}")
@@ -505,6 +509,9 @@ module WorkflowManager
505
509
  end
506
510
  end
507
511
  alias_method :check_status, :success_or_fail
512
+ def cluster_node_list
513
+ @cluster.node_list
514
+ end
508
515
  end
509
516
  end
510
517
 
@@ -1,3 +1,3 @@
1
1
  module WorkflowManager
2
- VERSION = "0.5.1"
2
+ VERSION = "0.5.6"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: workflow_manager
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Functional Genomics Center Zurich
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-03-26 00:00:00.000000000 Z
11
+ date: 2020-06-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -104,8 +104,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
104
104
  - !ruby/object:Gem::Version
105
105
  version: '0'
106
106
  requirements: []
107
- rubyforge_project:
108
- rubygems_version: 2.6.14
107
+ rubygems_version: 3.0.3
109
108
  signing_key:
110
109
  specification_version: 4
111
110
  summary: Workflow Manager manages job submissions using dRuby.