workflow_manager 0.5.1 → 0.5.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/bin/wfm_hello +2 -1
- data/lib/workflow_manager/cluster.rb +235 -0
- data/lib/workflow_manager/server.rb +10 -3
- data/lib/workflow_manager/version.rb +1 -1
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 624dc0f37286ee8b893ceb0d73cce4ce83ab6078be6b1f06a96c316e145311f4
|
4
|
+
data.tar.gz: 805777d8e45242281c3e929b03a92d3fd9205c6c091f716ebb16b93bf15904e9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8c89da95c15863910c397bc149b799b094ee631feb092a7e0ffea7648c2148e09677b1288bf41daa5aa798b8a315ad3e7d2a4686d2b7bb5ced4f57fe8eff2161
|
7
|
+
data.tar.gz: bea9060c0b8322b3b096ffdae4ac82d9f523bc9322b8bfa412f986aab5c56881523985698c974eb581560929eea60d34189724efae29c28e31127512f0b56478
|
data/bin/wfm_hello
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# encoding: utf-8
|
3
3
|
# 20121112 masa workflow manager client
|
4
|
-
Version = '
|
4
|
+
Version = '20200522-134606'
|
5
5
|
|
6
6
|
require 'drb/drb'
|
7
7
|
|
@@ -28,3 +28,4 @@ if wfmrc
|
|
28
28
|
end
|
29
29
|
workflow_manager = DRbObject.new_with_uri(uri)
|
30
30
|
puts workflow_manager.hello
|
31
|
+
puts workflow_manager.cluster_node_list
|
@@ -38,6 +38,8 @@ module WorkflowManager
|
|
38
38
|
end
|
39
39
|
def default_node
|
40
40
|
end
|
41
|
+
def node_list
|
42
|
+
end
|
41
43
|
end
|
42
44
|
|
43
45
|
class LocalComputer < Cluster
|
@@ -238,6 +240,63 @@ module WorkflowManager
|
|
238
240
|
'fgcz-h-010: cpu 8,mem 30 GB,scr 400G' => 'fgcz-h-010',
|
239
241
|
}
|
240
242
|
end
|
243
|
+
def node_list
|
244
|
+
node2scr = {}
|
245
|
+
command = "qhost -F scratch"
|
246
|
+
keep = nil
|
247
|
+
IO.popen(command) do |out|
|
248
|
+
while line=out.gets
|
249
|
+
hostname, arch, ncpu, loading, memtot, memuse, *others = line.split
|
250
|
+
if hostname =~ /fgcz/
|
251
|
+
keep = hostname
|
252
|
+
elsif scratch_ = line.chomp.split.last and
|
253
|
+
scratch = scratch_.split('=').last
|
254
|
+
node2scr[keep] = scratch.to_i
|
255
|
+
keep = nil
|
256
|
+
end
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
list = {}
|
261
|
+
keep = nil
|
262
|
+
command = 'qhost -q'
|
263
|
+
IO.popen(command) do |out|
|
264
|
+
while line=out.gets
|
265
|
+
# HOSTNAME ARCH NCPU LOAD MEMTOT MEMUSE SWAPTO SWAPUS
|
266
|
+
hostname, arch, ncpu, loading, memtot, memuse, *others = line.split
|
267
|
+
if hostname =~ /fgcz/
|
268
|
+
#puts [hostname, ncpu, loading, memtot, memuse].join("\t")
|
269
|
+
mem = memtot.gsub(/G/, '').to_i
|
270
|
+
keep = [hostname, ncpu, "#{mem}G"]
|
271
|
+
elsif hostname == "GT" and keep and cores = line.chomp.split.last and cores !~ /[du]/
|
272
|
+
hostname = keep.shift
|
273
|
+
keep[0] = cores
|
274
|
+
if scr = node2scr[hostname] and scr >= 1000
|
275
|
+
scr = "%.1f" % (scr.to_f / 1000)
|
276
|
+
scr << "T"
|
277
|
+
else
|
278
|
+
scr = scr.to_s + "G"
|
279
|
+
end
|
280
|
+
keep << scr
|
281
|
+
list[hostname] = keep
|
282
|
+
keep = nil
|
283
|
+
end
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
# reformat
|
288
|
+
nodes = {}
|
289
|
+
list.each do |hostname, specs|
|
290
|
+
# 20190823 masa tentatively off use f47
|
291
|
+
unless hostname =~ /fgcz-c-047/
|
292
|
+
cores, ram, scr = specs
|
293
|
+
key = "#{hostname}: cores #{cores}, ram #{ram}, scr #{scr}"
|
294
|
+
value = hostname
|
295
|
+
nodes[key] = value
|
296
|
+
end
|
297
|
+
end
|
298
|
+
nodes
|
299
|
+
end
|
241
300
|
end
|
242
301
|
|
243
302
|
class FGCZCourseCluster < FGCZCluster
|
@@ -290,4 +349,180 @@ module WorkflowManager
|
|
290
349
|
}
|
291
350
|
end
|
292
351
|
end
|
352
|
+
|
353
|
+
class FGCZDevian10Cluster < Cluster
|
354
|
+
def submit_job(script_file, script_content, option='')
|
355
|
+
if script_name = File.basename(script_file) and script_name =~ /\.sh/
|
356
|
+
script_name = script_name.split(/\.sh/).first + ".sh"
|
357
|
+
new_job_script = generate_new_job_script(script_name, script_content)
|
358
|
+
new_job_script_base = File.basename(new_job_script)
|
359
|
+
log_file = File.join(@log_dir, new_job_script_base + "_o.log")
|
360
|
+
err_file = File.join(@log_dir, new_job_script_base + "_e.log")
|
361
|
+
command = "g-sub -o #{log_file} -e #{err_file} -q course #{option} #{new_job_script}"
|
362
|
+
#command = "sbatch -o #{log_file} -e #{err_file} #{new_job_script}"
|
363
|
+
job_id = `#{command}`
|
364
|
+
#job_id = job_id.match(/Your job (\d+) \(/)[1]
|
365
|
+
job_id = job_id.chomp.split.last
|
366
|
+
[job_id, log_file, command]
|
367
|
+
else
|
368
|
+
err_msg = "FGCZDevian10Cluster#submit_job, ERROR: script_name is not *.sh: #{File.basename(script_file)}"
|
369
|
+
warn err_msg
|
370
|
+
raise err_msg
|
371
|
+
end
|
372
|
+
end
|
373
|
+
def job_running?(job_id)
|
374
|
+
qstat_flag = false
|
375
|
+
IO.popen('squeue') do |io|
|
376
|
+
while line=io.gets
|
377
|
+
# ["JOBID", "PARTITION", "NAME", "USER", "ST", "TIME", "NODES", "NODELIST(REASON)"]
|
378
|
+
# ["206", "employee", "test.sh", "masaomi", "R", "0:03", "1", "fgcz-h-030"]
|
379
|
+
jobid, partition, name, user, state, *others = line.chomp.split
|
380
|
+
if jobid.strip == job_id and state == 'R'
|
381
|
+
qstat_flag = true
|
382
|
+
break
|
383
|
+
end
|
384
|
+
end
|
385
|
+
end
|
386
|
+
qstat_flag
|
387
|
+
end
|
388
|
+
def job_ends?(log_file)
|
389
|
+
log_flag = false
|
390
|
+
IO.popen("tail -n 10 #{log_file} 2> /dev/null") do |io|
|
391
|
+
while line=io.gets
|
392
|
+
if line =~ /__SCRIPT END__/
|
393
|
+
log_flag = true
|
394
|
+
break
|
395
|
+
end
|
396
|
+
end
|
397
|
+
end
|
398
|
+
log_flag
|
399
|
+
end
|
400
|
+
def job_pending?(job_id)
|
401
|
+
qstat_flag = false
|
402
|
+
IO.popen('squeue') do |io|
|
403
|
+
while line=io.gets
|
404
|
+
jobid, partition, name, user, state, *others = line.chomp.split
|
405
|
+
if jobid.strip == job_id and state =~ /PD/
|
406
|
+
qstat_flag = true
|
407
|
+
break
|
408
|
+
end
|
409
|
+
end
|
410
|
+
end
|
411
|
+
qstat_flag
|
412
|
+
end
|
413
|
+
def copy_commands(org_dir, dest_parent_dir, now=nil)
|
414
|
+
commands = if now == "force"
|
415
|
+
target_file = File.join(dest_parent_dir, File.basename(org_dir))
|
416
|
+
["g-req copynow -f #{org_dir} #{dest_parent_dir}"]
|
417
|
+
elsif now
|
418
|
+
["g-req copynow #{org_dir} #{dest_parent_dir}"]
|
419
|
+
else
|
420
|
+
["g-req -w copy #{org_dir} #{dest_parent_dir}"]
|
421
|
+
end
|
422
|
+
end
|
423
|
+
def kill_command(job_id)
|
424
|
+
command = "scancel #{job_id}"
|
425
|
+
end
|
426
|
+
def delete_command(target)
|
427
|
+
command = "g-req remove #{target}"
|
428
|
+
end
|
429
|
+
def cluster_nodes
|
430
|
+
nodes = {
|
431
|
+
'fgcz-h-900: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-900',
|
432
|
+
'fgcz-h-901: cpu 8,mem 30 GB,scr 400G' => 'fgcz-h-901',
|
433
|
+
}
|
434
|
+
end
|
435
|
+
end
|
436
|
+
|
437
|
+
class FGCZDebian10CourseCluster < Cluster
|
438
|
+
def submit_job(script_file, script_content, option='')
|
439
|
+
if script_name = File.basename(script_file) and script_name =~ /\.sh/
|
440
|
+
script_name = script_name.split(/\.sh/).first + ".sh"
|
441
|
+
new_job_script = generate_new_job_script(script_name, script_content)
|
442
|
+
new_job_script_base = File.basename(new_job_script)
|
443
|
+
log_file = File.join(@log_dir, new_job_script_base + "_o.log")
|
444
|
+
err_file = File.join(@log_dir, new_job_script_base + "_e.log")
|
445
|
+
command = "g-sub -o #{log_file} -e #{err_file} -q course #{option} #{new_job_script}"
|
446
|
+
job_id = `#{command}`
|
447
|
+
job_id = job_id.chomp.split.last
|
448
|
+
[job_id, log_file, command]
|
449
|
+
else
|
450
|
+
err_msg = "FGCZDebian10CourseCluster#submit_job, ERROR: script_name is not *.sh: #{File.basename(script_file)}"
|
451
|
+
warn err_msg
|
452
|
+
raise err_msg
|
453
|
+
end
|
454
|
+
end
|
455
|
+
def job_running?(job_id)
|
456
|
+
qstat_flag = false
|
457
|
+
IO.popen('squeue') do |io|
|
458
|
+
while line=io.gets
|
459
|
+
# ["JOBID", "PARTITION", "NAME", "USER", "ST", "TIME", "NODES", "NODELIST(REASON)"]
|
460
|
+
# ["206", "employee", "test.sh", "masaomi", "R", "0:03", "1", "fgcz-h-030"]
|
461
|
+
jobid, partition, name, user, state, *others = line.chomp.split
|
462
|
+
if jobid.strip == job_id and state == 'R'
|
463
|
+
qstat_flag = true
|
464
|
+
break
|
465
|
+
end
|
466
|
+
end
|
467
|
+
end
|
468
|
+
qstat_flag
|
469
|
+
end
|
470
|
+
def job_ends?(log_file)
|
471
|
+
log_flag = false
|
472
|
+
IO.popen("tail -n 10 #{log_file} 2> /dev/null") do |io|
|
473
|
+
while line=io.gets
|
474
|
+
if line =~ /__SCRIPT END__/
|
475
|
+
log_flag = true
|
476
|
+
break
|
477
|
+
end
|
478
|
+
end
|
479
|
+
end
|
480
|
+
log_flag
|
481
|
+
end
|
482
|
+
def job_pending?(job_id)
|
483
|
+
qstat_flag = false
|
484
|
+
IO.popen('squeue') do |io|
|
485
|
+
while line=io.gets
|
486
|
+
jobid, partition, name, user, state, *others = line.chomp.split
|
487
|
+
if jobid.strip == job_id and state =~ /PD/
|
488
|
+
qstat_flag = true
|
489
|
+
break
|
490
|
+
end
|
491
|
+
end
|
492
|
+
end
|
493
|
+
qstat_flag
|
494
|
+
end
|
495
|
+
def copy_commands(org_dir, dest_parent_dir, now=nil)
|
496
|
+
commands = ["cp -r #{org_dir} #{dest_parent_dir}"]
|
497
|
+
end
|
498
|
+
def kill_command(job_id)
|
499
|
+
command = "scancel #{job_id}"
|
500
|
+
end
|
501
|
+
def delete_command(target)
|
502
|
+
command = "rm -rf #{target}"
|
503
|
+
end
|
504
|
+
def cluster_nodes
|
505
|
+
nodes = {
|
506
|
+
'fgcz-h-900: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-900',
|
507
|
+
'fgcz-h-901: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-901',
|
508
|
+
'fgcz-h-902: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-902',
|
509
|
+
'fgcz-h-903: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-903',
|
510
|
+
'fgcz-h-904: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-904',
|
511
|
+
'fgcz-h-905: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-905',
|
512
|
+
'fgcz-h-906: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-906',
|
513
|
+
'fgcz-h-907: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-907',
|
514
|
+
'fgcz-h-908: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-908',
|
515
|
+
'fgcz-h-909: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-909',
|
516
|
+
'fgcz-h-910: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-910',
|
517
|
+
'fgcz-h-911: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-911',
|
518
|
+
'fgcz-h-912: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-912',
|
519
|
+
'fgcz-h-913: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-913',
|
520
|
+
'fgcz-h-914: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-914',
|
521
|
+
'fgcz-h-915: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-915',
|
522
|
+
'fgcz-h-916: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-916',
|
523
|
+
'fgcz-h-917: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-917',
|
524
|
+
}
|
525
|
+
end
|
526
|
+
end
|
527
|
+
|
293
528
|
end
|
@@ -100,6 +100,7 @@ module WorkflowManager
|
|
100
100
|
end
|
101
101
|
end
|
102
102
|
class RedisDB
|
103
|
+
attr_accessor :port
|
103
104
|
def run_redis_server(redis_conf)
|
104
105
|
@pid = fork do
|
105
106
|
exec("redis-server #{redis_conf}")
|
@@ -108,11 +109,13 @@ module WorkflowManager
|
|
108
109
|
Process.waitpid @pid
|
109
110
|
end
|
110
111
|
end
|
111
|
-
def initialize(db_no=0, redis_conf
|
112
|
+
def initialize(db_no=0, redis_conf)
|
112
113
|
if db_no==0
|
113
114
|
run_redis_server(redis_conf)
|
114
115
|
end
|
115
|
-
|
116
|
+
conf = Hash[*CSV.readlines(redis_conf, col_sep: " ").map{|a| [a.first, a[1,100].join(",")]}.flatten]
|
117
|
+
@port = (conf["port"]||6379).to_i
|
118
|
+
@db = Redis.new(port: @port, db: db_no)
|
116
119
|
end
|
117
120
|
def transaction
|
118
121
|
#@db.multi do
|
@@ -154,7 +157,7 @@ module WorkflowManager
|
|
154
157
|
when "KyotoCabinet"
|
155
158
|
KyotoDB.new(@db_logs)
|
156
159
|
when "Redis"
|
157
|
-
RedisDB.new(1)
|
160
|
+
RedisDB.new(1, @redis_conf)
|
158
161
|
end
|
159
162
|
|
160
163
|
@system_log = File.join(@log_dir, "system.log")
|
@@ -163,6 +166,7 @@ module WorkflowManager
|
|
163
166
|
puts("DB = #{DB_MODE}")
|
164
167
|
if DB_MODE == "Redis"
|
165
168
|
puts("Redis conf = #{config.redis_conf}")
|
169
|
+
puts("Redis port = #{@logs.port}")
|
166
170
|
end
|
167
171
|
puts("Cluster = #{@cluster.name}")
|
168
172
|
log_puts("DB = #{DB_MODE}")
|
@@ -505,6 +509,9 @@ module WorkflowManager
|
|
505
509
|
end
|
506
510
|
end
|
507
511
|
alias_method :check_status, :success_or_fail
|
512
|
+
def cluster_node_list
|
513
|
+
@cluster.node_list
|
514
|
+
end
|
508
515
|
end
|
509
516
|
end
|
510
517
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: workflow_manager
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Functional Genomics Center Zurich
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-06-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -104,8 +104,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
104
104
|
- !ruby/object:Gem::Version
|
105
105
|
version: '0'
|
106
106
|
requirements: []
|
107
|
-
|
108
|
-
rubygems_version: 2.6.14
|
107
|
+
rubygems_version: 3.0.3
|
109
108
|
signing_key:
|
110
109
|
specification_version: 4
|
111
110
|
summary: Workflow Manager manages job submissions using dRuby.
|