workflow_manager 0.5.1 → 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/bin/wfm_hello +2 -1
- data/lib/workflow_manager/cluster.rb +235 -0
- data/lib/workflow_manager/server.rb +10 -3
- data/lib/workflow_manager/version.rb +1 -1
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 624dc0f37286ee8b893ceb0d73cce4ce83ab6078be6b1f06a96c316e145311f4
|
4
|
+
data.tar.gz: 805777d8e45242281c3e929b03a92d3fd9205c6c091f716ebb16b93bf15904e9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8c89da95c15863910c397bc149b799b094ee631feb092a7e0ffea7648c2148e09677b1288bf41daa5aa798b8a315ad3e7d2a4686d2b7bb5ced4f57fe8eff2161
|
7
|
+
data.tar.gz: bea9060c0b8322b3b096ffdae4ac82d9f523bc9322b8bfa412f986aab5c56881523985698c974eb581560929eea60d34189724efae29c28e31127512f0b56478
|
data/bin/wfm_hello
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# encoding: utf-8
|
3
3
|
# 20121112 masa workflow manager client
|
4
|
-
Version = '
|
4
|
+
Version = '20200522-134606'
|
5
5
|
|
6
6
|
require 'drb/drb'
|
7
7
|
|
@@ -28,3 +28,4 @@ if wfmrc
|
|
28
28
|
end
|
29
29
|
workflow_manager = DRbObject.new_with_uri(uri)
|
30
30
|
puts workflow_manager.hello
|
31
|
+
puts workflow_manager.cluster_node_list
|
@@ -38,6 +38,8 @@ module WorkflowManager
|
|
38
38
|
end
|
39
39
|
def default_node
|
40
40
|
end
|
41
|
+
def node_list
|
42
|
+
end
|
41
43
|
end
|
42
44
|
|
43
45
|
class LocalComputer < Cluster
|
@@ -238,6 +240,63 @@ module WorkflowManager
|
|
238
240
|
'fgcz-h-010: cpu 8,mem 30 GB,scr 400G' => 'fgcz-h-010',
|
239
241
|
}
|
240
242
|
end
|
243
|
+
def node_list
|
244
|
+
node2scr = {}
|
245
|
+
command = "qhost -F scratch"
|
246
|
+
keep = nil
|
247
|
+
IO.popen(command) do |out|
|
248
|
+
while line=out.gets
|
249
|
+
hostname, arch, ncpu, loading, memtot, memuse, *others = line.split
|
250
|
+
if hostname =~ /fgcz/
|
251
|
+
keep = hostname
|
252
|
+
elsif scratch_ = line.chomp.split.last and
|
253
|
+
scratch = scratch_.split('=').last
|
254
|
+
node2scr[keep] = scratch.to_i
|
255
|
+
keep = nil
|
256
|
+
end
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
list = {}
|
261
|
+
keep = nil
|
262
|
+
command = 'qhost -q'
|
263
|
+
IO.popen(command) do |out|
|
264
|
+
while line=out.gets
|
265
|
+
# HOSTNAME ARCH NCPU LOAD MEMTOT MEMUSE SWAPTO SWAPUS
|
266
|
+
hostname, arch, ncpu, loading, memtot, memuse, *others = line.split
|
267
|
+
if hostname =~ /fgcz/
|
268
|
+
#puts [hostname, ncpu, loading, memtot, memuse].join("\t")
|
269
|
+
mem = memtot.gsub(/G/, '').to_i
|
270
|
+
keep = [hostname, ncpu, "#{mem}G"]
|
271
|
+
elsif hostname == "GT" and keep and cores = line.chomp.split.last and cores !~ /[du]/
|
272
|
+
hostname = keep.shift
|
273
|
+
keep[0] = cores
|
274
|
+
if scr = node2scr[hostname] and scr >= 1000
|
275
|
+
scr = "%.1f" % (scr.to_f / 1000)
|
276
|
+
scr << "T"
|
277
|
+
else
|
278
|
+
scr = scr.to_s + "G"
|
279
|
+
end
|
280
|
+
keep << scr
|
281
|
+
list[hostname] = keep
|
282
|
+
keep = nil
|
283
|
+
end
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
# reformat
|
288
|
+
nodes = {}
|
289
|
+
list.each do |hostname, specs|
|
290
|
+
# 20190823 masa tentatively off use f47
|
291
|
+
unless hostname =~ /fgcz-c-047/
|
292
|
+
cores, ram, scr = specs
|
293
|
+
key = "#{hostname}: cores #{cores}, ram #{ram}, scr #{scr}"
|
294
|
+
value = hostname
|
295
|
+
nodes[key] = value
|
296
|
+
end
|
297
|
+
end
|
298
|
+
nodes
|
299
|
+
end
|
241
300
|
end
|
242
301
|
|
243
302
|
class FGCZCourseCluster < FGCZCluster
|
@@ -290,4 +349,180 @@ module WorkflowManager
|
|
290
349
|
}
|
291
350
|
end
|
292
351
|
end
|
352
|
+
|
353
|
+
class FGCZDevian10Cluster < Cluster
|
354
|
+
def submit_job(script_file, script_content, option='')
|
355
|
+
if script_name = File.basename(script_file) and script_name =~ /\.sh/
|
356
|
+
script_name = script_name.split(/\.sh/).first + ".sh"
|
357
|
+
new_job_script = generate_new_job_script(script_name, script_content)
|
358
|
+
new_job_script_base = File.basename(new_job_script)
|
359
|
+
log_file = File.join(@log_dir, new_job_script_base + "_o.log")
|
360
|
+
err_file = File.join(@log_dir, new_job_script_base + "_e.log")
|
361
|
+
command = "g-sub -o #{log_file} -e #{err_file} -q course #{option} #{new_job_script}"
|
362
|
+
#command = "sbatch -o #{log_file} -e #{err_file} #{new_job_script}"
|
363
|
+
job_id = `#{command}`
|
364
|
+
#job_id = job_id.match(/Your job (\d+) \(/)[1]
|
365
|
+
job_id = job_id.chomp.split.last
|
366
|
+
[job_id, log_file, command]
|
367
|
+
else
|
368
|
+
err_msg = "FGCZDevian10Cluster#submit_job, ERROR: script_name is not *.sh: #{File.basename(script_file)}"
|
369
|
+
warn err_msg
|
370
|
+
raise err_msg
|
371
|
+
end
|
372
|
+
end
|
373
|
+
def job_running?(job_id)
|
374
|
+
qstat_flag = false
|
375
|
+
IO.popen('squeue') do |io|
|
376
|
+
while line=io.gets
|
377
|
+
# ["JOBID", "PARTITION", "NAME", "USER", "ST", "TIME", "NODES", "NODELIST(REASON)"]
|
378
|
+
# ["206", "employee", "test.sh", "masaomi", "R", "0:03", "1", "fgcz-h-030"]
|
379
|
+
jobid, partition, name, user, state, *others = line.chomp.split
|
380
|
+
if jobid.strip == job_id and state == 'R'
|
381
|
+
qstat_flag = true
|
382
|
+
break
|
383
|
+
end
|
384
|
+
end
|
385
|
+
end
|
386
|
+
qstat_flag
|
387
|
+
end
|
388
|
+
def job_ends?(log_file)
|
389
|
+
log_flag = false
|
390
|
+
IO.popen("tail -n 10 #{log_file} 2> /dev/null") do |io|
|
391
|
+
while line=io.gets
|
392
|
+
if line =~ /__SCRIPT END__/
|
393
|
+
log_flag = true
|
394
|
+
break
|
395
|
+
end
|
396
|
+
end
|
397
|
+
end
|
398
|
+
log_flag
|
399
|
+
end
|
400
|
+
def job_pending?(job_id)
|
401
|
+
qstat_flag = false
|
402
|
+
IO.popen('squeue') do |io|
|
403
|
+
while line=io.gets
|
404
|
+
jobid, partition, name, user, state, *others = line.chomp.split
|
405
|
+
if jobid.strip == job_id and state =~ /PD/
|
406
|
+
qstat_flag = true
|
407
|
+
break
|
408
|
+
end
|
409
|
+
end
|
410
|
+
end
|
411
|
+
qstat_flag
|
412
|
+
end
|
413
|
+
def copy_commands(org_dir, dest_parent_dir, now=nil)
|
414
|
+
commands = if now == "force"
|
415
|
+
target_file = File.join(dest_parent_dir, File.basename(org_dir))
|
416
|
+
["g-req copynow -f #{org_dir} #{dest_parent_dir}"]
|
417
|
+
elsif now
|
418
|
+
["g-req copynow #{org_dir} #{dest_parent_dir}"]
|
419
|
+
else
|
420
|
+
["g-req -w copy #{org_dir} #{dest_parent_dir}"]
|
421
|
+
end
|
422
|
+
end
|
423
|
+
def kill_command(job_id)
|
424
|
+
command = "scancel #{job_id}"
|
425
|
+
end
|
426
|
+
def delete_command(target)
|
427
|
+
command = "g-req remove #{target}"
|
428
|
+
end
|
429
|
+
def cluster_nodes
|
430
|
+
nodes = {
|
431
|
+
'fgcz-h-900: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-900',
|
432
|
+
'fgcz-h-901: cpu 8,mem 30 GB,scr 400G' => 'fgcz-h-901',
|
433
|
+
}
|
434
|
+
end
|
435
|
+
end
|
436
|
+
|
437
|
+
class FGCZDebian10CourseCluster < Cluster
|
438
|
+
def submit_job(script_file, script_content, option='')
|
439
|
+
if script_name = File.basename(script_file) and script_name =~ /\.sh/
|
440
|
+
script_name = script_name.split(/\.sh/).first + ".sh"
|
441
|
+
new_job_script = generate_new_job_script(script_name, script_content)
|
442
|
+
new_job_script_base = File.basename(new_job_script)
|
443
|
+
log_file = File.join(@log_dir, new_job_script_base + "_o.log")
|
444
|
+
err_file = File.join(@log_dir, new_job_script_base + "_e.log")
|
445
|
+
command = "g-sub -o #{log_file} -e #{err_file} -q course #{option} #{new_job_script}"
|
446
|
+
job_id = `#{command}`
|
447
|
+
job_id = job_id.chomp.split.last
|
448
|
+
[job_id, log_file, command]
|
449
|
+
else
|
450
|
+
err_msg = "FGCZDebian10CourseCluster#submit_job, ERROR: script_name is not *.sh: #{File.basename(script_file)}"
|
451
|
+
warn err_msg
|
452
|
+
raise err_msg
|
453
|
+
end
|
454
|
+
end
|
455
|
+
def job_running?(job_id)
|
456
|
+
qstat_flag = false
|
457
|
+
IO.popen('squeue') do |io|
|
458
|
+
while line=io.gets
|
459
|
+
# ["JOBID", "PARTITION", "NAME", "USER", "ST", "TIME", "NODES", "NODELIST(REASON)"]
|
460
|
+
# ["206", "employee", "test.sh", "masaomi", "R", "0:03", "1", "fgcz-h-030"]
|
461
|
+
jobid, partition, name, user, state, *others = line.chomp.split
|
462
|
+
if jobid.strip == job_id and state == 'R'
|
463
|
+
qstat_flag = true
|
464
|
+
break
|
465
|
+
end
|
466
|
+
end
|
467
|
+
end
|
468
|
+
qstat_flag
|
469
|
+
end
|
470
|
+
def job_ends?(log_file)
|
471
|
+
log_flag = false
|
472
|
+
IO.popen("tail -n 10 #{log_file} 2> /dev/null") do |io|
|
473
|
+
while line=io.gets
|
474
|
+
if line =~ /__SCRIPT END__/
|
475
|
+
log_flag = true
|
476
|
+
break
|
477
|
+
end
|
478
|
+
end
|
479
|
+
end
|
480
|
+
log_flag
|
481
|
+
end
|
482
|
+
def job_pending?(job_id)
|
483
|
+
qstat_flag = false
|
484
|
+
IO.popen('squeue') do |io|
|
485
|
+
while line=io.gets
|
486
|
+
jobid, partition, name, user, state, *others = line.chomp.split
|
487
|
+
if jobid.strip == job_id and state =~ /PD/
|
488
|
+
qstat_flag = true
|
489
|
+
break
|
490
|
+
end
|
491
|
+
end
|
492
|
+
end
|
493
|
+
qstat_flag
|
494
|
+
end
|
495
|
+
def copy_commands(org_dir, dest_parent_dir, now=nil)
|
496
|
+
commands = ["cp -r #{org_dir} #{dest_parent_dir}"]
|
497
|
+
end
|
498
|
+
def kill_command(job_id)
|
499
|
+
command = "scancel #{job_id}"
|
500
|
+
end
|
501
|
+
def delete_command(target)
|
502
|
+
command = "rm -rf #{target}"
|
503
|
+
end
|
504
|
+
def cluster_nodes
|
505
|
+
nodes = {
|
506
|
+
'fgcz-h-900: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-900',
|
507
|
+
'fgcz-h-901: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-901',
|
508
|
+
'fgcz-h-902: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-902',
|
509
|
+
'fgcz-h-903: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-903',
|
510
|
+
'fgcz-h-904: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-904',
|
511
|
+
'fgcz-h-905: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-905',
|
512
|
+
'fgcz-h-906: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-906',
|
513
|
+
'fgcz-h-907: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-907',
|
514
|
+
'fgcz-h-908: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-908',
|
515
|
+
'fgcz-h-909: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-909',
|
516
|
+
'fgcz-h-910: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-910',
|
517
|
+
'fgcz-h-911: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-911',
|
518
|
+
'fgcz-h-912: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-912',
|
519
|
+
'fgcz-h-913: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-913',
|
520
|
+
'fgcz-h-914: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-914',
|
521
|
+
'fgcz-h-915: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-915',
|
522
|
+
'fgcz-h-916: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-916',
|
523
|
+
'fgcz-h-917: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-917',
|
524
|
+
}
|
525
|
+
end
|
526
|
+
end
|
527
|
+
|
293
528
|
end
|
@@ -100,6 +100,7 @@ module WorkflowManager
|
|
100
100
|
end
|
101
101
|
end
|
102
102
|
class RedisDB
|
103
|
+
attr_accessor :port
|
103
104
|
def run_redis_server(redis_conf)
|
104
105
|
@pid = fork do
|
105
106
|
exec("redis-server #{redis_conf}")
|
@@ -108,11 +109,13 @@ module WorkflowManager
|
|
108
109
|
Process.waitpid @pid
|
109
110
|
end
|
110
111
|
end
|
111
|
-
def initialize(db_no=0, redis_conf
|
112
|
+
def initialize(db_no=0, redis_conf)
|
112
113
|
if db_no==0
|
113
114
|
run_redis_server(redis_conf)
|
114
115
|
end
|
115
|
-
|
116
|
+
conf = Hash[*CSV.readlines(redis_conf, col_sep: " ").map{|a| [a.first, a[1,100].join(",")]}.flatten]
|
117
|
+
@port = (conf["port"]||6379).to_i
|
118
|
+
@db = Redis.new(port: @port, db: db_no)
|
116
119
|
end
|
117
120
|
def transaction
|
118
121
|
#@db.multi do
|
@@ -154,7 +157,7 @@ module WorkflowManager
|
|
154
157
|
when "KyotoCabinet"
|
155
158
|
KyotoDB.new(@db_logs)
|
156
159
|
when "Redis"
|
157
|
-
RedisDB.new(1)
|
160
|
+
RedisDB.new(1, @redis_conf)
|
158
161
|
end
|
159
162
|
|
160
163
|
@system_log = File.join(@log_dir, "system.log")
|
@@ -163,6 +166,7 @@ module WorkflowManager
|
|
163
166
|
puts("DB = #{DB_MODE}")
|
164
167
|
if DB_MODE == "Redis"
|
165
168
|
puts("Redis conf = #{config.redis_conf}")
|
169
|
+
puts("Redis port = #{@logs.port}")
|
166
170
|
end
|
167
171
|
puts("Cluster = #{@cluster.name}")
|
168
172
|
log_puts("DB = #{DB_MODE}")
|
@@ -505,6 +509,9 @@ module WorkflowManager
|
|
505
509
|
end
|
506
510
|
end
|
507
511
|
alias_method :check_status, :success_or_fail
|
512
|
+
def cluster_node_list
|
513
|
+
@cluster.node_list
|
514
|
+
end
|
508
515
|
end
|
509
516
|
end
|
510
517
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: workflow_manager
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Functional Genomics Center Zurich
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-06-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -104,8 +104,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
104
104
|
- !ruby/object:Gem::Version
|
105
105
|
version: '0'
|
106
106
|
requirements: []
|
107
|
-
|
108
|
-
rubygems_version: 2.6.14
|
107
|
+
rubygems_version: 3.0.3
|
109
108
|
signing_key:
|
110
109
|
specification_version: 4
|
111
110
|
summary: Workflow Manager manages job submissions using dRuby.
|