workflow_manager 0.5.8 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/Gemfile +3 -2
- data/bin/wfm_monitoring +3 -2
- data/bin/workflow_manager +22 -2
- data/config/environments/production.rb +1 -1
- data/config/environments/redis.conf +2 -2
- data/config/environments/sidekiq.yml +8 -0
- data/lib/job_checker.rb +81 -0
- data/lib/worker4.rb +80 -0
- data/lib/workflow_manager/cluster.rb +14 -95
- data/lib/workflow_manager/server.rb +47 -11
- data/lib/workflow_manager/version.rb +1 -1
- data/test/call_worker4.rb +11 -0
- data/test/call_worker_method.rb +11 -0
- data/test/job_list.rb +50 -0
- data/test/test_job1.sh +5 -0
- data/workflow_manager.gemspec +1 -1
- metadata +16 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6ea3816ec54cac7bf1097d94f410aeb9d17641755494de742814c4151a20d4de
|
4
|
+
data.tar.gz: 8adcaf1765299151ccddc6f68740989e265b68b7829c07a4e49ee751bb0ff608
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fb11c23c1346ca6fff218b2283b6254cf18ef3872725c1f7a068c8261d1af2217d7d13686c61ea28f2d4c01b7c1f1b242862f9ae60e223ffb64f70d0646aae2b
|
7
|
+
data.tar.gz: 764e474e5fbbf9fffb411f5484c774bff60e517893ef983e7307ca923d8c12f982ac4c4f20468015d7b78a2b0ff4c9adab8cbf465c0a9fb09bf46170bd9eea69
|
data/.gitignore
CHANGED
data/Gemfile
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
source 'https://rubygems.org'
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
gem 'redis'
|
4
|
+
gem 'sidekiq'
|
5
|
+
gem 'workflow_manager', :path => '/srv/GT/analysis/masaomi/FGCZ/prototype_workflow_manager_with_sidekiq_20210122/workflow_manager/'
|
data/bin/wfm_monitoring
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# encoding: utf-8
|
3
3
|
# 20121112 masa workflow manager client
|
4
|
-
Version = '
|
4
|
+
Version = '20210625-165025'
|
5
5
|
|
6
6
|
require 'drb/drb'
|
7
7
|
require 'workflow_manager/optparse_ex'
|
@@ -54,4 +54,5 @@ sge_options << "-n #{opt.nodes}" if opt.nodes
|
|
54
54
|
script_content = File.read(script_file)
|
55
55
|
workflow_manager = DRbObject.new_with_uri(uri)
|
56
56
|
#puts workflow_manager.start_monitoring(script_file, user, 0, script_content, project_number, sge_options.join(' '), opt.log)
|
57
|
-
puts workflow_manager.start_monitoring2(script_file, script_content, user, project_number, sge_options.join(' '), opt.log)
|
57
|
+
#puts workflow_manager.start_monitoring2(script_file, script_content, user, project_number, sge_options.join(' '), opt.log)
|
58
|
+
puts workflow_manager.start_monitoring3(script_file, script_content, user, project_number, sge_options.join(' '), opt.log)
|
data/bin/workflow_manager
CHANGED
@@ -8,7 +8,7 @@ Version = WorkflowManager::VERSION
|
|
8
8
|
opt = OptionParser.new do |o|
|
9
9
|
o.banner = "Version: #{Version}\nUsage:\n #{File.basename(__FILE__)} -d [druby://host:port] -m [development|production]"
|
10
10
|
o.on(:server, 'druby://localhost:12345', '-d server', '--server', 'workflow manager URI (default: druby://localhost:12345)')
|
11
|
-
o.on(:mode, '
|
11
|
+
o.on(:mode, 'production', '-m mode', '--mode', 'development|production (default: production)')
|
12
12
|
o.parse!(ARGV)
|
13
13
|
end
|
14
14
|
|
@@ -33,7 +33,9 @@ if opt.mode
|
|
33
33
|
default_config_file = File.join(default_config_dir, opt.mode+".rb")
|
34
34
|
if File.exist?(default_config_file)
|
35
35
|
default_redis_config_file = File.join(default_config_dir, "redis.conf")
|
36
|
+
default_sidekiq_config_file = File.join(default_config_dir, "sidekiq.conf")
|
36
37
|
FileUtils.cp(default_redis_config_file, config_dir)
|
38
|
+
FileUtils.cp(default_sidekiq_config_file, config_dir)
|
37
39
|
FileUtils.cp(default_config_file, config_file)
|
38
40
|
else
|
39
41
|
raise "Configure file does not exist: #{config_file}"
|
@@ -43,4 +45,22 @@ if opt.mode
|
|
43
45
|
end
|
44
46
|
DRb.start_service(uri, WorkflowManager::Server.new)
|
45
47
|
puts DRb.uri
|
46
|
-
DRb.thread.join
|
48
|
+
#DRb.thread.join
|
49
|
+
|
50
|
+
sleep 1
|
51
|
+
|
52
|
+
sidekiq_pid = fork do
|
53
|
+
app_dir = File.expand_path('..', __FILE__)
|
54
|
+
job_checker = File.join(app_dir, "../lib/job_checker.rb")
|
55
|
+
exec("sidekiq -C config/environments/sidekiq.yml -r #{job_checker}")
|
56
|
+
end
|
57
|
+
|
58
|
+
begin
|
59
|
+
DRb.thread.join
|
60
|
+
puts "__END__"
|
61
|
+
rescue SignalException
|
62
|
+
Process.kill("HUP", sidekiq_pid)
|
63
|
+
sleep 1
|
64
|
+
puts "__CORRECTLY_END__"
|
65
|
+
end
|
66
|
+
|
@@ -7,6 +7,6 @@ WorkflowManager::Server.configure do |config|
|
|
7
7
|
config.interval = 30
|
8
8
|
config.resubmit = 0
|
9
9
|
config.redis_conf = "config/environments/redis.conf"
|
10
|
-
config.cluster = WorkflowManager::
|
10
|
+
config.cluster = WorkflowManager::FGCZDebian10Cluster.new('FGCZDebian10Cluster')
|
11
11
|
end
|
12
12
|
|
data/lib/job_checker.rb
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'sidekiq'
|
2
|
+
require 'redis'
|
3
|
+
|
4
|
+
WORKER_INTERVAL = 10 # [s]
|
5
|
+
REDIS_CONF = File.expand_path("../../config/environments/redis.conf", __FILE__)
|
6
|
+
PORT = if File.exist?(REDIS_CONF)
|
7
|
+
redis_conf = Hash[*File.readlines(REDIS_CONF).map{|line| line.chomp.split}.map{|e| [e[0], e[1,100].join(",")]}.flatten]
|
8
|
+
redis_conf["port"].to_i
|
9
|
+
else
|
10
|
+
6379
|
11
|
+
end
|
12
|
+
SIDEKIQ_URL = "redis://localhost:#{PORT}/3"
|
13
|
+
warn "redis.conf: #{REDIS_CONF}"
|
14
|
+
warn "Redis port: #{PORT}"
|
15
|
+
warn "Sidekiq URL: #{SIDEKIQ_URL}"
|
16
|
+
|
17
|
+
Sidekiq.configure_server do |config|
|
18
|
+
config.redis = { url: SIDEKIQ_URL }
|
19
|
+
end
|
20
|
+
|
21
|
+
Sidekiq.configure_client do |config|
|
22
|
+
config.redis = { url: SIDEKIQ_URL }
|
23
|
+
end
|
24
|
+
|
25
|
+
class Redis
|
26
|
+
alias_method :[], :get
|
27
|
+
alias_method :[]=, :set
|
28
|
+
end
|
29
|
+
|
30
|
+
class JobChecker
|
31
|
+
include Sidekiq::Worker
|
32
|
+
sidekiq_options queue: :default, retry: 5
|
33
|
+
|
34
|
+
def generate_new_job_script(log_dir, script_basename, script_content)
|
35
|
+
new_job_script = File.basename(script_basename) + "_" + Time.now.strftime("%Y%m%d%H%M%S%L")
|
36
|
+
new_job_script = File.join(log_dir, new_job_script)
|
37
|
+
open(new_job_script, 'w') do |out|
|
38
|
+
out.print script_content
|
39
|
+
out.print "\necho __SCRIPT END__\n"
|
40
|
+
end
|
41
|
+
new_job_script
|
42
|
+
end
|
43
|
+
def update_time_status(status, script_basename, user, project_number)
|
44
|
+
unless @start_time
|
45
|
+
@start_time = Time.now.strftime("%Y-%m-%d %H:%M:%S")
|
46
|
+
end
|
47
|
+
time = Time.now.strftime("%Y-%m-%d %H:%M:%S")
|
48
|
+
[status, script_basename, [@start_time, time].join("/"), user, project_number].join(',')
|
49
|
+
end
|
50
|
+
|
51
|
+
def perform(job_id, script_basename, log_file, user, project_id)
|
52
|
+
puts "JobID (in JobChecker): #{job_id}"
|
53
|
+
db0 = Redis.new(port: PORT, db: 0) # state + alpha DB
|
54
|
+
db1 = Redis.new(port: PORT, db: 1) # log DB
|
55
|
+
db2 = Redis.new(port: PORT, db: 2) # project jobs DB
|
56
|
+
db1[job_id] = log_file
|
57
|
+
pre_state = nil
|
58
|
+
@start_time = nil
|
59
|
+
begin
|
60
|
+
command = "sacct --jobs=#{job_id} --format=state"
|
61
|
+
#puts command
|
62
|
+
ret = `#{command}`
|
63
|
+
#print ret
|
64
|
+
state = ret.split(/\n/).last.strip
|
65
|
+
#puts "state: #{state}"
|
66
|
+
db0[job_id] = update_time_status(state, script_basename, user, project_id)
|
67
|
+
|
68
|
+
unless state == pre_state
|
69
|
+
db0[job_id] = update_time_status(state, script_basename, user, project_id)
|
70
|
+
project_jobs = eval((db2[project_id]||[]).to_s)
|
71
|
+
project_jobs = Hash[*project_jobs]
|
72
|
+
project_jobs[job_id] = state
|
73
|
+
#p project_jobs
|
74
|
+
db2[project_id] = project_jobs.to_a.flatten.last(200).to_s
|
75
|
+
end
|
76
|
+
pre_state = state
|
77
|
+
sleep WORKER_INTERVAL
|
78
|
+
end while state =~ /RUNNING/ or state =~ /PENDING/ or state =~ /---/
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
data/lib/worker4.rb
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
require 'sidekiq'
|
2
|
+
require 'redis'
|
3
|
+
|
4
|
+
WORKER_INTERVAL = 10 # [s]
|
5
|
+
|
6
|
+
Sidekiq.configure_server do |config|
|
7
|
+
config.redis = { url: 'redis://localhost:6380/3' }
|
8
|
+
end
|
9
|
+
|
10
|
+
Sidekiq.configure_client do |config|
|
11
|
+
config.redis = { url: 'redis://localhost:6380/3' }
|
12
|
+
end
|
13
|
+
|
14
|
+
class Redis
|
15
|
+
alias_method :[], :get
|
16
|
+
alias_method :[]=, :set
|
17
|
+
end
|
18
|
+
|
19
|
+
class JobWorker
|
20
|
+
include Sidekiq::Worker
|
21
|
+
sidekiq_options queue: :default, retry: 5
|
22
|
+
|
23
|
+
def generate_new_job_script(log_dir, script_basename, script_content)
|
24
|
+
new_job_script = File.basename(script_basename) + "_" + Time.now.strftime("%Y%m%d%H%M%S%L")
|
25
|
+
new_job_script = File.join(log_dir, new_job_script)
|
26
|
+
open(new_job_script, 'w') do |out|
|
27
|
+
out.print script_content
|
28
|
+
out.print "\necho __SCRIPT END__\n"
|
29
|
+
end
|
30
|
+
new_job_script
|
31
|
+
end
|
32
|
+
def update_time_status(status, script_basename, user, project_number)
|
33
|
+
unless @start_time
|
34
|
+
@start_time = Time.now.strftime("%Y-%m-%d %H:%M:%S")
|
35
|
+
end
|
36
|
+
time = Time.now.strftime("%Y-%m-%d %H:%M:%S")
|
37
|
+
[status, script_basename, [@start_time, time].join("/"), user, project_number].join(',')
|
38
|
+
end
|
39
|
+
|
40
|
+
def perform(project_id, log_dir, script_basename, script_content)
|
41
|
+
#script_base_name = "test_job.sh"
|
42
|
+
job_script = generate_new_job_script(log_dir, script_basename, script_content)
|
43
|
+
log_file = job_script + "_o.log"
|
44
|
+
err_file = job_script + "_e.log"
|
45
|
+
command = "sbatch -o #{log_file} -e #{err_file} -N 1 #{job_script}"
|
46
|
+
puts command
|
47
|
+
ret = `#{command}`
|
48
|
+
job_id = ret.chomp.split.last
|
49
|
+
puts "JobID: #{job_id}"
|
50
|
+
db0 = Redis.new(port: 6380, db: 0) # state + alpha DB
|
51
|
+
db1 = Redis.new(port: 6380, db: 1) # log DB
|
52
|
+
db2 = Redis.new(port: 6380, db: 2) # project jobs DB
|
53
|
+
db1[job_id] = log_file
|
54
|
+
pre_state = nil
|
55
|
+
@start_time = nil
|
56
|
+
begin
|
57
|
+
command = "sacct --jobs=#{job_id} --format=state"
|
58
|
+
puts command
|
59
|
+
ret = `#{command}`
|
60
|
+
#print ret
|
61
|
+
state = ret.split(/\n/).last.strip
|
62
|
+
puts "state: #{state}"
|
63
|
+
#db.set(job_id, state)
|
64
|
+
db0[job_id] = update_time_status(state, script_basename, "sushi_lover", project_id)
|
65
|
+
|
66
|
+
unless state == pre_state
|
67
|
+
db0[job_id] = update_time_status(state, script_basename, "sushi_lover", project_id)
|
68
|
+
project_jobs = eval((db2[project_id]||[]).to_s)
|
69
|
+
project_jobs = Hash[*project_jobs]
|
70
|
+
project_jobs[job_id] = state
|
71
|
+
p project_jobs
|
72
|
+
db2[project_id] = project_jobs.to_a.flatten.last(200).to_s
|
73
|
+
#db2[project_id] = project_jobs.to_s
|
74
|
+
end
|
75
|
+
pre_state = state
|
76
|
+
sleep WORKER_INTERVAL
|
77
|
+
end while state =~ /RUNNING/ or state =~ /PENDING/ or state =~ /---/
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
@@ -308,6 +308,7 @@ module WorkflowManager
|
|
308
308
|
end
|
309
309
|
end
|
310
310
|
|
311
|
+
|
311
312
|
class HydraCluster < Cluster
|
312
313
|
def submit_job(script_file, script_content, option='')
|
313
314
|
# TODO
|
@@ -434,97 +435,6 @@ module WorkflowManager
|
|
434
435
|
end
|
435
436
|
end
|
436
437
|
|
437
|
-
class FGCZDebian10CourseCluster < Cluster
|
438
|
-
def submit_job(script_file, script_content, option='')
|
439
|
-
if script_name = File.basename(script_file) and script_name =~ /\.sh/
|
440
|
-
script_name = script_name.split(/\.sh/).first + ".sh"
|
441
|
-
new_job_script = generate_new_job_script(script_name, script_content)
|
442
|
-
new_job_script_base = File.basename(new_job_script)
|
443
|
-
log_file = File.join(@log_dir, new_job_script_base + "_o.log")
|
444
|
-
err_file = File.join(@log_dir, new_job_script_base + "_e.log")
|
445
|
-
command = "g-sub -o #{log_file} -e #{err_file} -q course #{option} #{new_job_script}"
|
446
|
-
job_id = `#{command}`
|
447
|
-
job_id = job_id.chomp.split.last
|
448
|
-
[job_id, log_file, command]
|
449
|
-
else
|
450
|
-
err_msg = "FGCZDebian10CourseCluster#submit_job, ERROR: script_name is not *.sh: #{File.basename(script_file)}"
|
451
|
-
warn err_msg
|
452
|
-
raise err_msg
|
453
|
-
end
|
454
|
-
end
|
455
|
-
def job_running?(job_id)
|
456
|
-
qstat_flag = false
|
457
|
-
IO.popen('squeue') do |io|
|
458
|
-
while line=io.gets
|
459
|
-
# ["JOBID", "PARTITION", "NAME", "USER", "ST", "TIME", "NODES", "NODELIST(REASON)"]
|
460
|
-
# ["206", "employee", "test.sh", "masaomi", "R", "0:03", "1", "fgcz-h-030"]
|
461
|
-
jobid, partition, name, user, state, *others = line.chomp.split
|
462
|
-
if jobid.strip == job_id and state == 'R'
|
463
|
-
qstat_flag = true
|
464
|
-
break
|
465
|
-
end
|
466
|
-
end
|
467
|
-
end
|
468
|
-
qstat_flag
|
469
|
-
end
|
470
|
-
def job_ends?(log_file)
|
471
|
-
log_flag = false
|
472
|
-
IO.popen("tail -n 10 #{log_file} 2> /dev/null") do |io|
|
473
|
-
while line=io.gets
|
474
|
-
if line =~ /__SCRIPT END__/
|
475
|
-
log_flag = true
|
476
|
-
break
|
477
|
-
end
|
478
|
-
end
|
479
|
-
end
|
480
|
-
log_flag
|
481
|
-
end
|
482
|
-
def job_pending?(job_id)
|
483
|
-
qstat_flag = false
|
484
|
-
IO.popen('squeue') do |io|
|
485
|
-
while line=io.gets
|
486
|
-
jobid, partition, name, user, state, *others = line.chomp.split
|
487
|
-
if jobid.strip == job_id and state =~ /PD/
|
488
|
-
qstat_flag = true
|
489
|
-
break
|
490
|
-
end
|
491
|
-
end
|
492
|
-
end
|
493
|
-
qstat_flag
|
494
|
-
end
|
495
|
-
def copy_commands(org_dir, dest_parent_dir, now=nil)
|
496
|
-
commands = ["cp -r #{org_dir} #{dest_parent_dir}"]
|
497
|
-
end
|
498
|
-
def kill_command(job_id)
|
499
|
-
command = "scancel #{job_id}"
|
500
|
-
end
|
501
|
-
def delete_command(target)
|
502
|
-
command = "rm -rf #{target}"
|
503
|
-
end
|
504
|
-
def cluster_nodes
|
505
|
-
nodes = {
|
506
|
-
'fgcz-h-900: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-900',
|
507
|
-
'fgcz-h-901: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-901',
|
508
|
-
'fgcz-h-902: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-902',
|
509
|
-
'fgcz-h-903: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-903',
|
510
|
-
'fgcz-h-904: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-904',
|
511
|
-
'fgcz-h-905: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-905',
|
512
|
-
'fgcz-h-906: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-906',
|
513
|
-
'fgcz-h-907: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-907',
|
514
|
-
'fgcz-h-908: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-908',
|
515
|
-
'fgcz-h-909: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-909',
|
516
|
-
'fgcz-h-910: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-910',
|
517
|
-
'fgcz-h-911: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-911',
|
518
|
-
'fgcz-h-912: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-912',
|
519
|
-
'fgcz-h-913: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-913',
|
520
|
-
'fgcz-h-914: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-914',
|
521
|
-
'fgcz-h-915: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-915',
|
522
|
-
'fgcz-h-916: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-916',
|
523
|
-
'fgcz-h-917: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-917',
|
524
|
-
}
|
525
|
-
end
|
526
|
-
end
|
527
|
-
|
528
438
|
class FGCZDebian10Cluster < Cluster
|
529
439
|
def parse(options)
|
530
440
|
options = options.split
|
@@ -537,14 +447,14 @@ module WorkflowManager
|
|
537
447
|
scratch = if i = options.index("-s")
|
538
448
|
options[i+1]
|
539
449
|
end
|
540
|
-
|
541
|
-
|
542
|
-
|
450
|
+
partition = if i = options.index("-p")
|
451
|
+
options[i+1]
|
452
|
+
end
|
543
453
|
new_options = []
|
544
454
|
new_options << "--mem=#{ram}G" if ram
|
545
455
|
new_options << "-n #{cores}" if cores
|
546
456
|
new_options << "--tmp=#{scratch}G" if scratch
|
547
|
-
new_options << "-p #{
|
457
|
+
new_options << "-p #{partition}" if partition
|
548
458
|
new_options.join(" ")
|
549
459
|
end
|
550
460
|
def submit_job(script_file, script_content, option='')
|
@@ -630,4 +540,13 @@ module WorkflowManager
|
|
630
540
|
}
|
631
541
|
end
|
632
542
|
end
|
543
|
+
|
544
|
+
class FGCZDebian10DemoCluster < FGCZDebian10Cluster
|
545
|
+
def copy_commands(org_dir, dest_parent_dir, now=nil)
|
546
|
+
commands = ["cp -r #{org_dir} #{dest_parent_dir}"]
|
547
|
+
end
|
548
|
+
def delete_command(target)
|
549
|
+
command = "rm -rf #{target}"
|
550
|
+
end
|
551
|
+
end
|
633
552
|
end
|
@@ -4,6 +4,9 @@
|
|
4
4
|
require 'drb/drb'
|
5
5
|
require 'fileutils'
|
6
6
|
require 'csv'
|
7
|
+
|
8
|
+
require 'job_checker'
|
9
|
+
|
7
10
|
begin
|
8
11
|
require 'redis'
|
9
12
|
DB_MODE = "Redis"
|
@@ -159,6 +162,7 @@ module WorkflowManager
|
|
159
162
|
when "Redis"
|
160
163
|
RedisDB.new(1, @redis_conf)
|
161
164
|
end
|
165
|
+
@jobs = RedisDB.new(2, @redis_conf)
|
162
166
|
|
163
167
|
@system_log = File.join(@log_dir, "system.log")
|
164
168
|
@mutex = Mutex.new
|
@@ -172,15 +176,34 @@ module WorkflowManager
|
|
172
176
|
log_puts("DB = #{DB_MODE}")
|
173
177
|
log_puts("Cluster = #{@cluster.name}")
|
174
178
|
log_puts("Server starts")
|
179
|
+
log_puts("Recovery check")
|
180
|
+
recovery_job_checker
|
181
|
+
end
|
182
|
+
def recovery_job_checker
|
183
|
+
@logs.transaction do |logs|
|
184
|
+
@statuses.transaction do |statuses|
|
185
|
+
statuses.each do |job_id, status|
|
186
|
+
# puts [job_id, status].join(",")
|
187
|
+
# 120249,RUNNING,QC_ventricles_100k.sh,2021-07-30 09:47:04/2021-07-30 09:47:04,masaomi,1535
|
188
|
+
stat, script_basename, time, user, project_number = status.split(",")
|
189
|
+
if stat == "RUNNING" or stat == "PENDING"
|
190
|
+
log_file = logs[job_id]
|
191
|
+
log_puts("JobID (in recovery check): #{job_id}")
|
192
|
+
puts "JobID (in recovery check): #{job_id}"
|
193
|
+
JobChecker.perform_async(job_id, script_basename, log_file, user, project_number)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
175
198
|
end
|
176
199
|
def hello
|
177
|
-
'hello
|
200
|
+
'hello hoge hoge bar boo bundle, '+ @cluster.name
|
178
201
|
end
|
179
202
|
def copy_commands(org_dir, dest_parent_dir, now=nil)
|
180
203
|
@cluster.copy_commands(org_dir, dest_parent_dir, now)
|
181
204
|
end
|
182
205
|
def kill_job(job_id)
|
183
|
-
status(job_id, '
|
206
|
+
status(job_id, 'FAIL')
|
184
207
|
status = `#{@cluster.kill_command(job_id)}`
|
185
208
|
end
|
186
209
|
def delete_command(target)
|
@@ -272,6 +295,17 @@ module WorkflowManager
|
|
272
295
|
Thread.current.kill
|
273
296
|
end
|
274
297
|
end
|
298
|
+
def start_monitoring3(script_path, script_content, user='sushi_lover', project_number=0, sge_options='', log_dir='')
|
299
|
+
script_basename = File.basename(script_path)
|
300
|
+
job_id, log_file, command = @cluster.submit_job(script_path, script_content, sge_options)
|
301
|
+
#p command
|
302
|
+
#p log_file
|
303
|
+
#p job_id
|
304
|
+
puts "JobID (in WorkflowManager): #{job_id}"
|
305
|
+
sleep 1
|
306
|
+
JobChecker.perform_async(job_id, script_basename, log_file, user, project_number)
|
307
|
+
job_id
|
308
|
+
end
|
275
309
|
def start_monitoring2(script_path, script_content, user='sushi_lover', project_number=0, sge_options='', log_dir='')
|
276
310
|
# script_path is only used to generate a log file name
|
277
311
|
# It is not used to read the script contents
|
@@ -418,7 +452,7 @@ module WorkflowManager
|
|
418
452
|
#@statuses.open(@db_stat)
|
419
453
|
@statuses.transaction do |statuses|
|
420
454
|
if new_status and stat = statuses[job_id.to_s]
|
421
|
-
status_list = ['
|
455
|
+
status_list = ['CONPLETED', 'RUNNING', 'PENDING', 'FAIL']
|
422
456
|
if status_list.include?(new_status)
|
423
457
|
items = stat.split(/,/)
|
424
458
|
items.shift
|
@@ -438,17 +472,19 @@ module WorkflowManager
|
|
438
472
|
job_idsh = if job_ids
|
439
473
|
Hash[*(job_ids.split(',')).map{|job_id| [job_id, true]}.flatten]
|
440
474
|
end
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
end
|
447
|
-
else
|
448
|
-
s << [key, value]
|
475
|
+
s_ = {}
|
476
|
+
unless job_ids
|
477
|
+
@jobs.transaction do |jobs|
|
478
|
+
if project_jobs = jobs[project_number]
|
479
|
+
s_ = Hash[*eval(project_jobs)]
|
449
480
|
end
|
450
481
|
end
|
451
482
|
end
|
483
|
+
@statuses.transaction do |statuses|
|
484
|
+
s_.each do |job_id, stat|
|
485
|
+
s << [job_id, statuses[job_id]]
|
486
|
+
end
|
487
|
+
end
|
452
488
|
if job_ids
|
453
489
|
s = s.select{|job_id, stat| job_idsh[job_id]}
|
454
490
|
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
# Version = '20210625-104318'
|
4
|
+
|
5
|
+
require './lib/worker4'
|
6
|
+
script_file = "./test/test_job1.sh"
|
7
|
+
script_content = File.read(script_file)
|
8
|
+
log_dir = "./logs"
|
9
|
+
script_basename = File.basename(script_file)
|
10
|
+
JobWorker.perform_async(1001, log_dir, script_basename, script_content)
|
11
|
+
p "submitted test_job1.sh"
|
@@ -0,0 +1,11 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
# Version = '20210625-162836'
|
4
|
+
|
5
|
+
require 'workflow_manager'
|
6
|
+
script_file = "./test/test_job1.sh"
|
7
|
+
script_content = File.read(script_file)
|
8
|
+
log_dir = "./logs"
|
9
|
+
script_basename = File.basename(script_file)
|
10
|
+
JobWorker.perform_async(1001, log_dir, script_basename, script_content)
|
11
|
+
p "submitted test_job1.sh"
|
data/test/job_list.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
# Version = '20210723-134812'
|
4
|
+
|
5
|
+
PORT = (ARGV[0]||6380).to_i
|
6
|
+
require 'redis'
|
7
|
+
db0 = Redis.new(port: PORT, db: 0)
|
8
|
+
db1 = Redis.new(port: PORT, db: 1)
|
9
|
+
db2 = Redis.new(port: PORT, db: 2)
|
10
|
+
#db3 = Redis.new(port: 6380, db: 3)
|
11
|
+
|
12
|
+
class Redis
|
13
|
+
def show_all
|
14
|
+
self.keys.sort.each do |key|
|
15
|
+
value = self.get(key)
|
16
|
+
puts [key, value].join("\t")
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
dbs = [db0, db1, db2]
|
22
|
+
db_notes = ["state DB", "log DB", "project job DB"]
|
23
|
+
|
24
|
+
dbs.each.with_index do |db, i|
|
25
|
+
note = db_notes[i]
|
26
|
+
puts ["db#{i}", note].join("\t")
|
27
|
+
db.show_all
|
28
|
+
puts
|
29
|
+
end
|
30
|
+
exit
|
31
|
+
puts "db0, status DB"
|
32
|
+
puts ["JobID", "Status"].join("\t")
|
33
|
+
db0.keys.sort.each do |key|
|
34
|
+
value = db0.get(key)
|
35
|
+
puts [key, value].join("\t")
|
36
|
+
end
|
37
|
+
|
38
|
+
puts
|
39
|
+
puts "db1, log DB"
|
40
|
+
db1.keys.sort.each do |key|
|
41
|
+
value = db1.get(key)
|
42
|
+
puts [key, value].join("\t")
|
43
|
+
end
|
44
|
+
|
45
|
+
puts
|
46
|
+
puts "db2, status DB2, project specific"
|
47
|
+
db2.keys.sort.each do |key|
|
48
|
+
value = db2.get(key)
|
49
|
+
puts [key, value].join("\t")
|
50
|
+
end
|
data/test/test_job1.sh
ADDED
data/workflow_manager.gemspec
CHANGED
@@ -19,6 +19,6 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
20
20
|
spec.require_paths = ["lib"]
|
21
21
|
|
22
|
-
spec.add_development_dependency "bundler", "~>
|
22
|
+
spec.add_development_dependency "bundler", "~> 2.2.10"
|
23
23
|
spec.add_development_dependency "rake"
|
24
24
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: workflow_manager
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Functional Genomics Center Zurich
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-07-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 2.2.10
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 2.2.10
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -76,6 +76,9 @@ files:
|
|
76
76
|
- config/environments/development.rb
|
77
77
|
- config/environments/production.rb
|
78
78
|
- config/environments/redis.conf
|
79
|
+
- config/environments/sidekiq.yml
|
80
|
+
- lib/job_checker.rb
|
81
|
+
- lib/worker4.rb
|
79
82
|
- lib/workflow_manager.rb
|
80
83
|
- lib/workflow_manager/cluster.rb
|
81
84
|
- lib/workflow_manager/optparse_ex.rb
|
@@ -84,6 +87,10 @@ files:
|
|
84
87
|
- spec/cluster_spec.rb
|
85
88
|
- spec/server_spec.rb
|
86
89
|
- spec/spec_helper.rb
|
90
|
+
- test/call_worker4.rb
|
91
|
+
- test/call_worker_method.rb
|
92
|
+
- test/job_list.rb
|
93
|
+
- test/test_job1.sh
|
87
94
|
- workflow_manager.gemspec
|
88
95
|
homepage: ''
|
89
96
|
licenses:
|
@@ -104,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
104
111
|
- !ruby/object:Gem::Version
|
105
112
|
version: '0'
|
106
113
|
requirements: []
|
107
|
-
rubygems_version: 3.0.3
|
114
|
+
rubygems_version: 3.0.3.1
|
108
115
|
signing_key:
|
109
116
|
specification_version: 4
|
110
117
|
summary: Workflow Manager manages job submissions using dRuby.
|
@@ -112,3 +119,7 @@ test_files:
|
|
112
119
|
- spec/cluster_spec.rb
|
113
120
|
- spec/server_spec.rb
|
114
121
|
- spec/spec_helper.rb
|
122
|
+
- test/call_worker4.rb
|
123
|
+
- test/call_worker_method.rb
|
124
|
+
- test/job_list.rb
|
125
|
+
- test/test_job1.sh
|