workflow_manager 0.5.8 → 0.7.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/Gemfile +3 -2
- data/bin/wfm_monitoring +3 -2
- data/bin/workflow_manager +22 -2
- data/config/environments/production.rb +1 -1
- data/config/environments/redis.conf +2 -2
- data/config/environments/sidekiq.yml +8 -0
- data/lib/job_checker.rb +81 -0
- data/lib/worker4.rb +80 -0
- data/lib/workflow_manager/cluster.rb +14 -95
- data/lib/workflow_manager/server.rb +47 -11
- data/lib/workflow_manager/version.rb +1 -1
- data/test/call_worker4.rb +11 -0
- data/test/call_worker_method.rb +11 -0
- data/test/job_list.rb +50 -0
- data/test/test_job1.sh +5 -0
- data/workflow_manager.gemspec +1 -1
- metadata +16 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6ea3816ec54cac7bf1097d94f410aeb9d17641755494de742814c4151a20d4de
|
4
|
+
data.tar.gz: 8adcaf1765299151ccddc6f68740989e265b68b7829c07a4e49ee751bb0ff608
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fb11c23c1346ca6fff218b2283b6254cf18ef3872725c1f7a068c8261d1af2217d7d13686c61ea28f2d4c01b7c1f1b242862f9ae60e223ffb64f70d0646aae2b
|
7
|
+
data.tar.gz: 764e474e5fbbf9fffb411f5484c774bff60e517893ef983e7307ca923d8c12f982ac4c4f20468015d7b78a2b0ff4c9adab8cbf465c0a9fb09bf46170bd9eea69
|
data/.gitignore
CHANGED
data/Gemfile
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
source 'https://rubygems.org'
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
gem 'redis'
|
4
|
+
gem 'sidekiq'
|
5
|
+
gem 'workflow_manager', :path => '/srv/GT/analysis/masaomi/FGCZ/prototype_workflow_manager_with_sidekiq_20210122/workflow_manager/'
|
data/bin/wfm_monitoring
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# encoding: utf-8
|
3
3
|
# 20121112 masa workflow manager client
|
4
|
-
Version = '
|
4
|
+
Version = '20210625-165025'
|
5
5
|
|
6
6
|
require 'drb/drb'
|
7
7
|
require 'workflow_manager/optparse_ex'
|
@@ -54,4 +54,5 @@ sge_options << "-n #{opt.nodes}" if opt.nodes
|
|
54
54
|
script_content = File.read(script_file)
|
55
55
|
workflow_manager = DRbObject.new_with_uri(uri)
|
56
56
|
#puts workflow_manager.start_monitoring(script_file, user, 0, script_content, project_number, sge_options.join(' '), opt.log)
|
57
|
-
puts workflow_manager.start_monitoring2(script_file, script_content, user, project_number, sge_options.join(' '), opt.log)
|
57
|
+
#puts workflow_manager.start_monitoring2(script_file, script_content, user, project_number, sge_options.join(' '), opt.log)
|
58
|
+
puts workflow_manager.start_monitoring3(script_file, script_content, user, project_number, sge_options.join(' '), opt.log)
|
data/bin/workflow_manager
CHANGED
@@ -8,7 +8,7 @@ Version = WorkflowManager::VERSION
|
|
8
8
|
opt = OptionParser.new do |o|
|
9
9
|
o.banner = "Version: #{Version}\nUsage:\n #{File.basename(__FILE__)} -d [druby://host:port] -m [development|production]"
|
10
10
|
o.on(:server, 'druby://localhost:12345', '-d server', '--server', 'workflow manager URI (default: druby://localhost:12345)')
|
11
|
-
o.on(:mode, '
|
11
|
+
o.on(:mode, 'production', '-m mode', '--mode', 'development|production (default: production)')
|
12
12
|
o.parse!(ARGV)
|
13
13
|
end
|
14
14
|
|
@@ -33,7 +33,9 @@ if opt.mode
|
|
33
33
|
default_config_file = File.join(default_config_dir, opt.mode+".rb")
|
34
34
|
if File.exist?(default_config_file)
|
35
35
|
default_redis_config_file = File.join(default_config_dir, "redis.conf")
|
36
|
+
default_sidekiq_config_file = File.join(default_config_dir, "sidekiq.conf")
|
36
37
|
FileUtils.cp(default_redis_config_file, config_dir)
|
38
|
+
FileUtils.cp(default_sidekiq_config_file, config_dir)
|
37
39
|
FileUtils.cp(default_config_file, config_file)
|
38
40
|
else
|
39
41
|
raise "Configure file does not exist: #{config_file}"
|
@@ -43,4 +45,22 @@ if opt.mode
|
|
43
45
|
end
|
44
46
|
DRb.start_service(uri, WorkflowManager::Server.new)
|
45
47
|
puts DRb.uri
|
46
|
-
DRb.thread.join
|
48
|
+
#DRb.thread.join
|
49
|
+
|
50
|
+
sleep 1
|
51
|
+
|
52
|
+
sidekiq_pid = fork do
|
53
|
+
app_dir = File.expand_path('..', __FILE__)
|
54
|
+
job_checker = File.join(app_dir, "../lib/job_checker.rb")
|
55
|
+
exec("sidekiq -C config/environments/sidekiq.yml -r #{job_checker}")
|
56
|
+
end
|
57
|
+
|
58
|
+
begin
|
59
|
+
DRb.thread.join
|
60
|
+
puts "__END__"
|
61
|
+
rescue SignalException
|
62
|
+
Process.kill("HUP", sidekiq_pid)
|
63
|
+
sleep 1
|
64
|
+
puts "__CORRECTLY_END__"
|
65
|
+
end
|
66
|
+
|
@@ -7,6 +7,6 @@ WorkflowManager::Server.configure do |config|
|
|
7
7
|
config.interval = 30
|
8
8
|
config.resubmit = 0
|
9
9
|
config.redis_conf = "config/environments/redis.conf"
|
10
|
-
config.cluster = WorkflowManager::
|
10
|
+
config.cluster = WorkflowManager::FGCZDebian10Cluster.new('FGCZDebian10Cluster')
|
11
11
|
end
|
12
12
|
|
data/lib/job_checker.rb
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'sidekiq'
|
2
|
+
require 'redis'
|
3
|
+
|
4
|
+
WORKER_INTERVAL = 10 # [s]
|
5
|
+
REDIS_CONF = File.expand_path("../../config/environments/redis.conf", __FILE__)
|
6
|
+
PORT = if File.exist?(REDIS_CONF)
|
7
|
+
redis_conf = Hash[*File.readlines(REDIS_CONF).map{|line| line.chomp.split}.map{|e| [e[0], e[1,100].join(",")]}.flatten]
|
8
|
+
redis_conf["port"].to_i
|
9
|
+
else
|
10
|
+
6379
|
11
|
+
end
|
12
|
+
SIDEKIQ_URL = "redis://localhost:#{PORT}/3"
|
13
|
+
warn "redis.conf: #{REDIS_CONF}"
|
14
|
+
warn "Redis port: #{PORT}"
|
15
|
+
warn "Sidekiq URL: #{SIDEKIQ_URL}"
|
16
|
+
|
17
|
+
Sidekiq.configure_server do |config|
|
18
|
+
config.redis = { url: SIDEKIQ_URL }
|
19
|
+
end
|
20
|
+
|
21
|
+
Sidekiq.configure_client do |config|
|
22
|
+
config.redis = { url: SIDEKIQ_URL }
|
23
|
+
end
|
24
|
+
|
25
|
+
class Redis
|
26
|
+
alias_method :[], :get
|
27
|
+
alias_method :[]=, :set
|
28
|
+
end
|
29
|
+
|
30
|
+
class JobChecker
|
31
|
+
include Sidekiq::Worker
|
32
|
+
sidekiq_options queue: :default, retry: 5
|
33
|
+
|
34
|
+
def generate_new_job_script(log_dir, script_basename, script_content)
|
35
|
+
new_job_script = File.basename(script_basename) + "_" + Time.now.strftime("%Y%m%d%H%M%S%L")
|
36
|
+
new_job_script = File.join(log_dir, new_job_script)
|
37
|
+
open(new_job_script, 'w') do |out|
|
38
|
+
out.print script_content
|
39
|
+
out.print "\necho __SCRIPT END__\n"
|
40
|
+
end
|
41
|
+
new_job_script
|
42
|
+
end
|
43
|
+
def update_time_status(status, script_basename, user, project_number)
|
44
|
+
unless @start_time
|
45
|
+
@start_time = Time.now.strftime("%Y-%m-%d %H:%M:%S")
|
46
|
+
end
|
47
|
+
time = Time.now.strftime("%Y-%m-%d %H:%M:%S")
|
48
|
+
[status, script_basename, [@start_time, time].join("/"), user, project_number].join(',')
|
49
|
+
end
|
50
|
+
|
51
|
+
def perform(job_id, script_basename, log_file, user, project_id)
|
52
|
+
puts "JobID (in JobChecker): #{job_id}"
|
53
|
+
db0 = Redis.new(port: PORT, db: 0) # state + alpha DB
|
54
|
+
db1 = Redis.new(port: PORT, db: 1) # log DB
|
55
|
+
db2 = Redis.new(port: PORT, db: 2) # project jobs DB
|
56
|
+
db1[job_id] = log_file
|
57
|
+
pre_state = nil
|
58
|
+
@start_time = nil
|
59
|
+
begin
|
60
|
+
command = "sacct --jobs=#{job_id} --format=state"
|
61
|
+
#puts command
|
62
|
+
ret = `#{command}`
|
63
|
+
#print ret
|
64
|
+
state = ret.split(/\n/).last.strip
|
65
|
+
#puts "state: #{state}"
|
66
|
+
db0[job_id] = update_time_status(state, script_basename, user, project_id)
|
67
|
+
|
68
|
+
unless state == pre_state
|
69
|
+
db0[job_id] = update_time_status(state, script_basename, user, project_id)
|
70
|
+
project_jobs = eval((db2[project_id]||[]).to_s)
|
71
|
+
project_jobs = Hash[*project_jobs]
|
72
|
+
project_jobs[job_id] = state
|
73
|
+
#p project_jobs
|
74
|
+
db2[project_id] = project_jobs.to_a.flatten.last(200).to_s
|
75
|
+
end
|
76
|
+
pre_state = state
|
77
|
+
sleep WORKER_INTERVAL
|
78
|
+
end while state =~ /RUNNING/ or state =~ /PENDING/ or state =~ /---/
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
data/lib/worker4.rb
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
require 'sidekiq'
|
2
|
+
require 'redis'
|
3
|
+
|
4
|
+
WORKER_INTERVAL = 10 # [s]
|
5
|
+
|
6
|
+
Sidekiq.configure_server do |config|
|
7
|
+
config.redis = { url: 'redis://localhost:6380/3' }
|
8
|
+
end
|
9
|
+
|
10
|
+
Sidekiq.configure_client do |config|
|
11
|
+
config.redis = { url: 'redis://localhost:6380/3' }
|
12
|
+
end
|
13
|
+
|
14
|
+
class Redis
|
15
|
+
alias_method :[], :get
|
16
|
+
alias_method :[]=, :set
|
17
|
+
end
|
18
|
+
|
19
|
+
class JobWorker
|
20
|
+
include Sidekiq::Worker
|
21
|
+
sidekiq_options queue: :default, retry: 5
|
22
|
+
|
23
|
+
def generate_new_job_script(log_dir, script_basename, script_content)
|
24
|
+
new_job_script = File.basename(script_basename) + "_" + Time.now.strftime("%Y%m%d%H%M%S%L")
|
25
|
+
new_job_script = File.join(log_dir, new_job_script)
|
26
|
+
open(new_job_script, 'w') do |out|
|
27
|
+
out.print script_content
|
28
|
+
out.print "\necho __SCRIPT END__\n"
|
29
|
+
end
|
30
|
+
new_job_script
|
31
|
+
end
|
32
|
+
def update_time_status(status, script_basename, user, project_number)
|
33
|
+
unless @start_time
|
34
|
+
@start_time = Time.now.strftime("%Y-%m-%d %H:%M:%S")
|
35
|
+
end
|
36
|
+
time = Time.now.strftime("%Y-%m-%d %H:%M:%S")
|
37
|
+
[status, script_basename, [@start_time, time].join("/"), user, project_number].join(',')
|
38
|
+
end
|
39
|
+
|
40
|
+
def perform(project_id, log_dir, script_basename, script_content)
|
41
|
+
#script_base_name = "test_job.sh"
|
42
|
+
job_script = generate_new_job_script(log_dir, script_basename, script_content)
|
43
|
+
log_file = job_script + "_o.log"
|
44
|
+
err_file = job_script + "_e.log"
|
45
|
+
command = "sbatch -o #{log_file} -e #{err_file} -N 1 #{job_script}"
|
46
|
+
puts command
|
47
|
+
ret = `#{command}`
|
48
|
+
job_id = ret.chomp.split.last
|
49
|
+
puts "JobID: #{job_id}"
|
50
|
+
db0 = Redis.new(port: 6380, db: 0) # state + alpha DB
|
51
|
+
db1 = Redis.new(port: 6380, db: 1) # log DB
|
52
|
+
db2 = Redis.new(port: 6380, db: 2) # project jobs DB
|
53
|
+
db1[job_id] = log_file
|
54
|
+
pre_state = nil
|
55
|
+
@start_time = nil
|
56
|
+
begin
|
57
|
+
command = "sacct --jobs=#{job_id} --format=state"
|
58
|
+
puts command
|
59
|
+
ret = `#{command}`
|
60
|
+
#print ret
|
61
|
+
state = ret.split(/\n/).last.strip
|
62
|
+
puts "state: #{state}"
|
63
|
+
#db.set(job_id, state)
|
64
|
+
db0[job_id] = update_time_status(state, script_basename, "sushi_lover", project_id)
|
65
|
+
|
66
|
+
unless state == pre_state
|
67
|
+
db0[job_id] = update_time_status(state, script_basename, "sushi_lover", project_id)
|
68
|
+
project_jobs = eval((db2[project_id]||[]).to_s)
|
69
|
+
project_jobs = Hash[*project_jobs]
|
70
|
+
project_jobs[job_id] = state
|
71
|
+
p project_jobs
|
72
|
+
db2[project_id] = project_jobs.to_a.flatten.last(200).to_s
|
73
|
+
#db2[project_id] = project_jobs.to_s
|
74
|
+
end
|
75
|
+
pre_state = state
|
76
|
+
sleep WORKER_INTERVAL
|
77
|
+
end while state =~ /RUNNING/ or state =~ /PENDING/ or state =~ /---/
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
@@ -308,6 +308,7 @@ module WorkflowManager
|
|
308
308
|
end
|
309
309
|
end
|
310
310
|
|
311
|
+
|
311
312
|
class HydraCluster < Cluster
|
312
313
|
def submit_job(script_file, script_content, option='')
|
313
314
|
# TODO
|
@@ -434,97 +435,6 @@ module WorkflowManager
|
|
434
435
|
end
|
435
436
|
end
|
436
437
|
|
437
|
-
class FGCZDebian10CourseCluster < Cluster
|
438
|
-
def submit_job(script_file, script_content, option='')
|
439
|
-
if script_name = File.basename(script_file) and script_name =~ /\.sh/
|
440
|
-
script_name = script_name.split(/\.sh/).first + ".sh"
|
441
|
-
new_job_script = generate_new_job_script(script_name, script_content)
|
442
|
-
new_job_script_base = File.basename(new_job_script)
|
443
|
-
log_file = File.join(@log_dir, new_job_script_base + "_o.log")
|
444
|
-
err_file = File.join(@log_dir, new_job_script_base + "_e.log")
|
445
|
-
command = "g-sub -o #{log_file} -e #{err_file} -q course #{option} #{new_job_script}"
|
446
|
-
job_id = `#{command}`
|
447
|
-
job_id = job_id.chomp.split.last
|
448
|
-
[job_id, log_file, command]
|
449
|
-
else
|
450
|
-
err_msg = "FGCZDebian10CourseCluster#submit_job, ERROR: script_name is not *.sh: #{File.basename(script_file)}"
|
451
|
-
warn err_msg
|
452
|
-
raise err_msg
|
453
|
-
end
|
454
|
-
end
|
455
|
-
def job_running?(job_id)
|
456
|
-
qstat_flag = false
|
457
|
-
IO.popen('squeue') do |io|
|
458
|
-
while line=io.gets
|
459
|
-
# ["JOBID", "PARTITION", "NAME", "USER", "ST", "TIME", "NODES", "NODELIST(REASON)"]
|
460
|
-
# ["206", "employee", "test.sh", "masaomi", "R", "0:03", "1", "fgcz-h-030"]
|
461
|
-
jobid, partition, name, user, state, *others = line.chomp.split
|
462
|
-
if jobid.strip == job_id and state == 'R'
|
463
|
-
qstat_flag = true
|
464
|
-
break
|
465
|
-
end
|
466
|
-
end
|
467
|
-
end
|
468
|
-
qstat_flag
|
469
|
-
end
|
470
|
-
def job_ends?(log_file)
|
471
|
-
log_flag = false
|
472
|
-
IO.popen("tail -n 10 #{log_file} 2> /dev/null") do |io|
|
473
|
-
while line=io.gets
|
474
|
-
if line =~ /__SCRIPT END__/
|
475
|
-
log_flag = true
|
476
|
-
break
|
477
|
-
end
|
478
|
-
end
|
479
|
-
end
|
480
|
-
log_flag
|
481
|
-
end
|
482
|
-
def job_pending?(job_id)
|
483
|
-
qstat_flag = false
|
484
|
-
IO.popen('squeue') do |io|
|
485
|
-
while line=io.gets
|
486
|
-
jobid, partition, name, user, state, *others = line.chomp.split
|
487
|
-
if jobid.strip == job_id and state =~ /PD/
|
488
|
-
qstat_flag = true
|
489
|
-
break
|
490
|
-
end
|
491
|
-
end
|
492
|
-
end
|
493
|
-
qstat_flag
|
494
|
-
end
|
495
|
-
def copy_commands(org_dir, dest_parent_dir, now=nil)
|
496
|
-
commands = ["cp -r #{org_dir} #{dest_parent_dir}"]
|
497
|
-
end
|
498
|
-
def kill_command(job_id)
|
499
|
-
command = "scancel #{job_id}"
|
500
|
-
end
|
501
|
-
def delete_command(target)
|
502
|
-
command = "rm -rf #{target}"
|
503
|
-
end
|
504
|
-
def cluster_nodes
|
505
|
-
nodes = {
|
506
|
-
'fgcz-h-900: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-900',
|
507
|
-
'fgcz-h-901: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-901',
|
508
|
-
'fgcz-h-902: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-902',
|
509
|
-
'fgcz-h-903: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-903',
|
510
|
-
'fgcz-h-904: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-904',
|
511
|
-
'fgcz-h-905: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-905',
|
512
|
-
'fgcz-h-906: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-906',
|
513
|
-
'fgcz-h-907: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-907',
|
514
|
-
'fgcz-h-908: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-908',
|
515
|
-
'fgcz-h-909: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-909',
|
516
|
-
'fgcz-h-910: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-910',
|
517
|
-
'fgcz-h-911: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-911',
|
518
|
-
'fgcz-h-912: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-912',
|
519
|
-
'fgcz-h-913: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-913',
|
520
|
-
'fgcz-h-914: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-914',
|
521
|
-
'fgcz-h-915: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-915',
|
522
|
-
'fgcz-h-916: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-916',
|
523
|
-
'fgcz-h-917: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-917',
|
524
|
-
}
|
525
|
-
end
|
526
|
-
end
|
527
|
-
|
528
438
|
class FGCZDebian10Cluster < Cluster
|
529
439
|
def parse(options)
|
530
440
|
options = options.split
|
@@ -537,14 +447,14 @@ module WorkflowManager
|
|
537
447
|
scratch = if i = options.index("-s")
|
538
448
|
options[i+1]
|
539
449
|
end
|
540
|
-
|
541
|
-
|
542
|
-
|
450
|
+
partition = if i = options.index("-p")
|
451
|
+
options[i+1]
|
452
|
+
end
|
543
453
|
new_options = []
|
544
454
|
new_options << "--mem=#{ram}G" if ram
|
545
455
|
new_options << "-n #{cores}" if cores
|
546
456
|
new_options << "--tmp=#{scratch}G" if scratch
|
547
|
-
new_options << "-p #{
|
457
|
+
new_options << "-p #{partition}" if partition
|
548
458
|
new_options.join(" ")
|
549
459
|
end
|
550
460
|
def submit_job(script_file, script_content, option='')
|
@@ -630,4 +540,13 @@ module WorkflowManager
|
|
630
540
|
}
|
631
541
|
end
|
632
542
|
end
|
543
|
+
|
544
|
+
class FGCZDebian10DemoCluster < FGCZDebian10Cluster
|
545
|
+
def copy_commands(org_dir, dest_parent_dir, now=nil)
|
546
|
+
commands = ["cp -r #{org_dir} #{dest_parent_dir}"]
|
547
|
+
end
|
548
|
+
def delete_command(target)
|
549
|
+
command = "rm -rf #{target}"
|
550
|
+
end
|
551
|
+
end
|
633
552
|
end
|
@@ -4,6 +4,9 @@
|
|
4
4
|
require 'drb/drb'
|
5
5
|
require 'fileutils'
|
6
6
|
require 'csv'
|
7
|
+
|
8
|
+
require 'job_checker'
|
9
|
+
|
7
10
|
begin
|
8
11
|
require 'redis'
|
9
12
|
DB_MODE = "Redis"
|
@@ -159,6 +162,7 @@ module WorkflowManager
|
|
159
162
|
when "Redis"
|
160
163
|
RedisDB.new(1, @redis_conf)
|
161
164
|
end
|
165
|
+
@jobs = RedisDB.new(2, @redis_conf)
|
162
166
|
|
163
167
|
@system_log = File.join(@log_dir, "system.log")
|
164
168
|
@mutex = Mutex.new
|
@@ -172,15 +176,34 @@ module WorkflowManager
|
|
172
176
|
log_puts("DB = #{DB_MODE}")
|
173
177
|
log_puts("Cluster = #{@cluster.name}")
|
174
178
|
log_puts("Server starts")
|
179
|
+
log_puts("Recovery check")
|
180
|
+
recovery_job_checker
|
181
|
+
end
|
182
|
+
def recovery_job_checker
|
183
|
+
@logs.transaction do |logs|
|
184
|
+
@statuses.transaction do |statuses|
|
185
|
+
statuses.each do |job_id, status|
|
186
|
+
# puts [job_id, status].join(",")
|
187
|
+
# 120249,RUNNING,QC_ventricles_100k.sh,2021-07-30 09:47:04/2021-07-30 09:47:04,masaomi,1535
|
188
|
+
stat, script_basename, time, user, project_number = status.split(",")
|
189
|
+
if stat == "RUNNING" or stat == "PENDING"
|
190
|
+
log_file = logs[job_id]
|
191
|
+
log_puts("JobID (in recovery check): #{job_id}")
|
192
|
+
puts "JobID (in recovery check): #{job_id}"
|
193
|
+
JobChecker.perform_async(job_id, script_basename, log_file, user, project_number)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
175
198
|
end
|
176
199
|
def hello
|
177
|
-
'hello
|
200
|
+
'hello hoge hoge bar boo bundle, '+ @cluster.name
|
178
201
|
end
|
179
202
|
def copy_commands(org_dir, dest_parent_dir, now=nil)
|
180
203
|
@cluster.copy_commands(org_dir, dest_parent_dir, now)
|
181
204
|
end
|
182
205
|
def kill_job(job_id)
|
183
|
-
status(job_id, '
|
206
|
+
status(job_id, 'FAIL')
|
184
207
|
status = `#{@cluster.kill_command(job_id)}`
|
185
208
|
end
|
186
209
|
def delete_command(target)
|
@@ -272,6 +295,17 @@ module WorkflowManager
|
|
272
295
|
Thread.current.kill
|
273
296
|
end
|
274
297
|
end
|
298
|
+
def start_monitoring3(script_path, script_content, user='sushi_lover', project_number=0, sge_options='', log_dir='')
|
299
|
+
script_basename = File.basename(script_path)
|
300
|
+
job_id, log_file, command = @cluster.submit_job(script_path, script_content, sge_options)
|
301
|
+
#p command
|
302
|
+
#p log_file
|
303
|
+
#p job_id
|
304
|
+
puts "JobID (in WorkflowManager): #{job_id}"
|
305
|
+
sleep 1
|
306
|
+
JobChecker.perform_async(job_id, script_basename, log_file, user, project_number)
|
307
|
+
job_id
|
308
|
+
end
|
275
309
|
def start_monitoring2(script_path, script_content, user='sushi_lover', project_number=0, sge_options='', log_dir='')
|
276
310
|
# script_path is only used to generate a log file name
|
277
311
|
# It is not used to read the script contents
|
@@ -418,7 +452,7 @@ module WorkflowManager
|
|
418
452
|
#@statuses.open(@db_stat)
|
419
453
|
@statuses.transaction do |statuses|
|
420
454
|
if new_status and stat = statuses[job_id.to_s]
|
421
|
-
status_list = ['
|
455
|
+
status_list = ['CONPLETED', 'RUNNING', 'PENDING', 'FAIL']
|
422
456
|
if status_list.include?(new_status)
|
423
457
|
items = stat.split(/,/)
|
424
458
|
items.shift
|
@@ -438,17 +472,19 @@ module WorkflowManager
|
|
438
472
|
job_idsh = if job_ids
|
439
473
|
Hash[*(job_ids.split(',')).map{|job_id| [job_id, true]}.flatten]
|
440
474
|
end
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
end
|
447
|
-
else
|
448
|
-
s << [key, value]
|
475
|
+
s_ = {}
|
476
|
+
unless job_ids
|
477
|
+
@jobs.transaction do |jobs|
|
478
|
+
if project_jobs = jobs[project_number]
|
479
|
+
s_ = Hash[*eval(project_jobs)]
|
449
480
|
end
|
450
481
|
end
|
451
482
|
end
|
483
|
+
@statuses.transaction do |statuses|
|
484
|
+
s_.each do |job_id, stat|
|
485
|
+
s << [job_id, statuses[job_id]]
|
486
|
+
end
|
487
|
+
end
|
452
488
|
if job_ids
|
453
489
|
s = s.select{|job_id, stat| job_idsh[job_id]}
|
454
490
|
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
# Version = '20210625-104318'
|
4
|
+
|
5
|
+
require './lib/worker4'
|
6
|
+
script_file = "./test/test_job1.sh"
|
7
|
+
script_content = File.read(script_file)
|
8
|
+
log_dir = "./logs"
|
9
|
+
script_basename = File.basename(script_file)
|
10
|
+
JobWorker.perform_async(1001, log_dir, script_basename, script_content)
|
11
|
+
p "submitted test_job1.sh"
|
@@ -0,0 +1,11 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
# Version = '20210625-162836'
|
4
|
+
|
5
|
+
require 'workflow_manager'
|
6
|
+
script_file = "./test/test_job1.sh"
|
7
|
+
script_content = File.read(script_file)
|
8
|
+
log_dir = "./logs"
|
9
|
+
script_basename = File.basename(script_file)
|
10
|
+
JobWorker.perform_async(1001, log_dir, script_basename, script_content)
|
11
|
+
p "submitted test_job1.sh"
|
data/test/job_list.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
# Version = '20210723-134812'
|
4
|
+
|
5
|
+
PORT = (ARGV[0]||6380).to_i
|
6
|
+
require 'redis'
|
7
|
+
db0 = Redis.new(port: PORT, db: 0)
|
8
|
+
db1 = Redis.new(port: PORT, db: 1)
|
9
|
+
db2 = Redis.new(port: PORT, db: 2)
|
10
|
+
#db3 = Redis.new(port: 6380, db: 3)
|
11
|
+
|
12
|
+
class Redis
|
13
|
+
def show_all
|
14
|
+
self.keys.sort.each do |key|
|
15
|
+
value = self.get(key)
|
16
|
+
puts [key, value].join("\t")
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
dbs = [db0, db1, db2]
|
22
|
+
db_notes = ["state DB", "log DB", "project job DB"]
|
23
|
+
|
24
|
+
dbs.each.with_index do |db, i|
|
25
|
+
note = db_notes[i]
|
26
|
+
puts ["db#{i}", note].join("\t")
|
27
|
+
db.show_all
|
28
|
+
puts
|
29
|
+
end
|
30
|
+
exit
|
31
|
+
puts "db0, status DB"
|
32
|
+
puts ["JobID", "Status"].join("\t")
|
33
|
+
db0.keys.sort.each do |key|
|
34
|
+
value = db0.get(key)
|
35
|
+
puts [key, value].join("\t")
|
36
|
+
end
|
37
|
+
|
38
|
+
puts
|
39
|
+
puts "db1, log DB"
|
40
|
+
db1.keys.sort.each do |key|
|
41
|
+
value = db1.get(key)
|
42
|
+
puts [key, value].join("\t")
|
43
|
+
end
|
44
|
+
|
45
|
+
puts
|
46
|
+
puts "db2, status DB2, project specific"
|
47
|
+
db2.keys.sort.each do |key|
|
48
|
+
value = db2.get(key)
|
49
|
+
puts [key, value].join("\t")
|
50
|
+
end
|
data/test/test_job1.sh
ADDED
data/workflow_manager.gemspec
CHANGED
@@ -19,6 +19,6 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
20
20
|
spec.require_paths = ["lib"]
|
21
21
|
|
22
|
-
spec.add_development_dependency "bundler", "~>
|
22
|
+
spec.add_development_dependency "bundler", "~> 2.2.10"
|
23
23
|
spec.add_development_dependency "rake"
|
24
24
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: workflow_manager
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Functional Genomics Center Zurich
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-07-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 2.2.10
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 2.2.10
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -76,6 +76,9 @@ files:
|
|
76
76
|
- config/environments/development.rb
|
77
77
|
- config/environments/production.rb
|
78
78
|
- config/environments/redis.conf
|
79
|
+
- config/environments/sidekiq.yml
|
80
|
+
- lib/job_checker.rb
|
81
|
+
- lib/worker4.rb
|
79
82
|
- lib/workflow_manager.rb
|
80
83
|
- lib/workflow_manager/cluster.rb
|
81
84
|
- lib/workflow_manager/optparse_ex.rb
|
@@ -84,6 +87,10 @@ files:
|
|
84
87
|
- spec/cluster_spec.rb
|
85
88
|
- spec/server_spec.rb
|
86
89
|
- spec/spec_helper.rb
|
90
|
+
- test/call_worker4.rb
|
91
|
+
- test/call_worker_method.rb
|
92
|
+
- test/job_list.rb
|
93
|
+
- test/test_job1.sh
|
87
94
|
- workflow_manager.gemspec
|
88
95
|
homepage: ''
|
89
96
|
licenses:
|
@@ -104,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
104
111
|
- !ruby/object:Gem::Version
|
105
112
|
version: '0'
|
106
113
|
requirements: []
|
107
|
-
rubygems_version: 3.0.3
|
114
|
+
rubygems_version: 3.0.3.1
|
108
115
|
signing_key:
|
109
116
|
specification_version: 4
|
110
117
|
summary: Workflow Manager manages job submissions using dRuby.
|
@@ -112,3 +119,7 @@ test_files:
|
|
112
119
|
- spec/cluster_spec.rb
|
113
120
|
- spec/server_spec.rb
|
114
121
|
- spec/spec_helper.rb
|
122
|
+
- test/call_worker4.rb
|
123
|
+
- test/call_worker_method.rb
|
124
|
+
- test/job_list.rb
|
125
|
+
- test/test_job1.sh
|