workflow_manager 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/.bzrignore +17 -0
- data/bin/workflow_manager +29 -252
- data/config/environments/development.rb +2 -2
- data/lib/workflow_manager.rb +4 -3
- data/lib/workflow_manager/cluster.rb +94 -92
- data/lib/workflow_manager/server.rb +236 -0
- data/lib/workflow_manager/version.rb +2 -2
- metadata +4 -2
data/.bzrignore
ADDED
data/bin/workflow_manager
CHANGED
@@ -1,262 +1,39 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# encoding: utf-8
|
3
|
-
# 20121109 masa workflow manager druby server
|
4
|
-
Version = '20131104-192005'
|
5
3
|
|
6
|
-
require '
|
4
|
+
require 'workflow_manager'
|
7
5
|
require 'fileutils'
|
8
|
-
require 'kyotocabinet'
|
9
|
-
require_relative '../lib/workflow_manager'
|
10
6
|
|
11
|
-
|
12
|
-
#
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
INTERVAL = 30
|
17
|
-
RESUBMIT = 0
|
18
|
-
|
19
|
-
class WorkflowManager
|
20
|
-
@@config = nil
|
21
|
-
class Config
|
22
|
-
attr_accessor :log_dir
|
23
|
-
attr_accessor :db_dir
|
24
|
-
attr_accessor :interval
|
25
|
-
attr_accessor :resubmit
|
26
|
-
attr_accessor :cluster
|
27
|
-
end
|
28
|
-
def self.config=(config)
|
29
|
-
@@config = config
|
30
|
-
end
|
31
|
-
def self.config
|
32
|
-
@@config
|
33
|
-
end
|
34
|
-
def config
|
35
|
-
@@config ||= WorkflowManager.configure{}
|
36
|
-
end
|
37
|
-
def self.configure
|
38
|
-
@@config = Config.new
|
39
|
-
# default values
|
40
|
-
@@config.log_dir = LOG_DIR
|
41
|
-
@@config.db_dir = DB_DIR
|
42
|
-
@@config.interval = INTERVAL # interval to check jobs, [s]
|
43
|
-
@@config.resubmit = RESUBMIT # how many times at maximum to resubmit when job fails
|
44
|
-
yield(@@config)
|
45
|
-
if @@config.cluster
|
46
|
-
@@config.cluster.log_dir = File.expand_path(@@config.log_dir)
|
47
|
-
end
|
48
|
-
@@config
|
49
|
-
end
|
7
|
+
opt = OptionParser.new do |o|
|
8
|
+
o.banner = "Usage:\n #{File.basename(__FILE__)} -d [druby://host:port] -m [development|production]"
|
9
|
+
o.on(:server, 'druby://localhost:12345', '-d server', '--server', 'workflow manager URI (default: druby://localhost:12345)')
|
10
|
+
o.on(:mode, 'development', '-m mode', '--mode', 'development|production (default: development)')
|
11
|
+
o.parse!(ARGV)
|
50
12
|
end
|
51
13
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
'hello, '+ @cluster.name
|
72
|
-
end
|
73
|
-
def copy_commands(org_dir, dest_parent_dir)
|
74
|
-
@cluster.copy_commands(org_dir, dest_parent_dir)
|
75
|
-
end
|
76
|
-
def log_puts(str)
|
77
|
-
time = Time.now.strftime("[%Y.%m.%d %H:%M:%S]")
|
78
|
-
@mutex.synchronize do
|
79
|
-
open(@system_log, "a") do |out|
|
80
|
-
out.print time + " " + str + "\n"
|
81
|
-
end
|
82
|
-
end
|
83
|
-
end
|
84
|
-
def start_monitoring(submit_command, user = 'sushi lover', resubmit = 0, script = '', project_number = 0, sge_options='', log_dir = '')
|
85
|
-
log_puts("monitoring: script=" + submit_command + " user=" + user + " resubmit=" + resubmit.to_s + " project=" + project_number.to_s + " sge option=" + sge_options + " log dir=" + log_dir.to_s)
|
86
|
-
|
87
|
-
#warn submit_command
|
88
|
-
#
|
89
|
-
# TODO: analyze arguments
|
90
|
-
#
|
91
|
-
job_id, log_file, command = @cluster.submit_job(submit_command, script, sge_options)
|
92
|
-
log_puts("submit: " + job_id + " " + command)
|
93
|
-
|
94
|
-
#
|
95
|
-
# monitor worker
|
96
|
-
#
|
97
|
-
if job_id and log_file
|
98
|
-
monitor_worker = Thread.new(job_id, log_file, submit_command, user, resubmit, script, project_number, sge_options, log_dir) do |t_job_id, t_log_file, t_submit_command, t_user, t_resubmit, t_script, t_project_number, t_sge_options, t_log_dir|
|
99
|
-
loop do
|
100
|
-
status = success_or_fail(t_job_id, t_log_file)
|
101
|
-
script_name = File.basename(submit_command).split(/-/).first
|
102
|
-
@statuses.open(@db_stat)
|
103
|
-
start_time = if stat = @statuses[t_job_id] and stat = stat.split(/,/) and time = stat[2]
|
104
|
-
time
|
105
|
-
end
|
106
|
-
time = if start_time
|
107
|
-
if status == 'success' or status == 'fail'
|
108
|
-
start_time + '/' + Time.now.strftime("%Y-%m-%d %H:%M:%S")
|
109
|
-
else
|
110
|
-
start_time
|
111
|
-
end
|
112
|
-
else
|
113
|
-
Time.now.strftime("%Y-%m-%d %H:%M:%S")
|
114
|
-
end
|
115
|
-
@statuses[t_job_id] = [status, script_name, time, user, project_number].join(',')
|
116
|
-
@statuses.close
|
117
|
-
@logs.open(@db_logs)
|
118
|
-
@logs[t_job_id] = t_log_file
|
119
|
-
@logs.close
|
120
|
-
#warn t_job_id + " " + status
|
121
|
-
if status == 'success'
|
122
|
-
log_puts(status + ": " + t_job_id)
|
123
|
-
unless t_log_dir.empty?
|
124
|
-
copy_commands(t_log_file, t_log_dir).each do |command|
|
125
|
-
log_puts(command)
|
126
|
-
system command
|
127
|
-
end
|
128
|
-
err_file = t_log_file.gsub('_o.log','_e.log')
|
129
|
-
copy_commands(err_file, t_log_dir).each do |command|
|
130
|
-
log_puts(command)
|
131
|
-
system command
|
132
|
-
end
|
133
|
-
end
|
134
|
-
Thread.current.kill
|
135
|
-
elsif status == 'fail'
|
136
|
-
log_puts(status + ": " + t_job_id)
|
137
|
-
#
|
138
|
-
# TODO: re-submit
|
139
|
-
#
|
140
|
-
if t_resubmit < RESUBMIT
|
141
|
-
log_puts("resubmit: " + t_job_id)
|
142
|
-
resubmit_job_id = start_monitoring(t_submit_command, t_user, t_resubmit + 1, t_script, t_project_number, t_sge_options)
|
143
|
-
script_name = File.basename(submit_command).split(/-/).first
|
144
|
-
@statuses.open(@db_stat)
|
145
|
-
@statuses[t_job_id] = ["resubmit: " + resubmit_job_id.to_s, script_name, Time.now.strftime("%Y-%m-%d %H:%M:%S"), t_user, t_project_number].join(',')
|
146
|
-
@statuses.close
|
147
|
-
else
|
148
|
-
log_puts("fail: " + t_job_id)
|
149
|
-
end
|
150
|
-
unless t_log_dir.empty?
|
151
|
-
copy_commands(t_log_file, t_log_dir).each do |command|
|
152
|
-
log_puts(command)
|
153
|
-
system command
|
154
|
-
end
|
155
|
-
err_file = t_log_file.gsub('_o.log','_e.log')
|
156
|
-
copy_commands(err_file, t_log_dir).each do |command|
|
157
|
-
log_puts(command)
|
158
|
-
system command
|
159
|
-
end
|
160
|
-
end
|
161
|
-
Thread.current.kill
|
162
|
-
end
|
163
|
-
sleep @interval
|
164
|
-
end
|
165
|
-
end
|
166
|
-
job_id.to_i
|
167
|
-
end
|
168
|
-
end
|
169
|
-
def status(job_id)
|
170
|
-
stat = nil
|
171
|
-
@statuses.open(@db_stat)
|
172
|
-
stat = @statuses[job_id.to_s]
|
173
|
-
@statuses.close
|
174
|
-
stat
|
175
|
-
end
|
176
|
-
def job_list(with_results=false, project_number=nil)
|
177
|
-
s = []
|
178
|
-
@statuses.open(@db_stat)
|
179
|
-
@statuses.each do |key, value|
|
180
|
-
if project_number
|
181
|
-
if x = value.split(/,/)[4].to_i==project_number.to_i
|
182
|
-
s << [key, value]
|
183
|
-
end
|
184
|
-
else
|
185
|
-
s << [key, value]
|
186
|
-
end
|
14
|
+
uri = opt.server
|
15
|
+
if opt.mode =~ /[development|production]/
|
16
|
+
config = File.join(File.dirname(File.expand_path(__FILE__)), "../config/environments/#{opt.mode}.rb")
|
17
|
+
opt.mode = nil unless File.exist?(config)
|
18
|
+
end
|
19
|
+
print "mode = #{opt.mode}\n"
|
20
|
+
if opt.mode
|
21
|
+
config_dir = "./config/environments/"
|
22
|
+
FileUtils.mkdir_p config_dir
|
23
|
+
config_file = File.join(config_dir, opt.mode+".rb")
|
24
|
+
unless File.exist?(config_file)
|
25
|
+
app_dir = File.expand_path('..', __FILE__)
|
26
|
+
default_config_dir = File.join(app_dir, "../config/environments")
|
27
|
+
p default_config_dir
|
28
|
+
default_config_file = File.join(default_config_dir, opt.mode+".rb")
|
29
|
+
if File.exist?(default_config_file)
|
30
|
+
FileUtils.cp(default_config_file, config_file)
|
31
|
+
else
|
32
|
+
raise "Configure file does not exist: #{config_file}"
|
187
33
|
end
|
188
|
-
@statuses.close
|
189
|
-
s.sort.reverse.map{|v| v.join(',')}.join("\n")
|
190
34
|
end
|
191
|
-
|
192
|
-
@logs.open(@db_logs)
|
193
|
-
log_file = @logs[job_id.to_s]
|
194
|
-
@logs.close
|
195
|
-
log_data = if log_file and File.exist?(log_file)
|
196
|
-
"__STDOUT LOG__\n\n" + File.read(log_file)
|
197
|
-
else
|
198
|
-
'no log file'
|
199
|
-
end
|
200
|
-
if with_err
|
201
|
-
err_file = log_file.gsub(/_o\.log/,'_e.log')
|
202
|
-
if err_file and File.exist?(err_file)
|
203
|
-
log_data << "\n\n__STDERR LOG__\n\n"
|
204
|
-
log_data << File.read(err_file)
|
205
|
-
end
|
206
|
-
end
|
207
|
-
log_data
|
208
|
-
end
|
209
|
-
def get_script(job_id)
|
210
|
-
@logs.open(@db_logs)
|
211
|
-
script_file = @logs[job_id.to_s]
|
212
|
-
@logs.close
|
213
|
-
if script_file
|
214
|
-
script_file = script_file.gsub(/_o\.log/,'')
|
215
|
-
end
|
216
|
-
script = if script_file and File.exist?(script_file)
|
217
|
-
File.read(script_file)
|
218
|
-
else
|
219
|
-
'no script file'
|
220
|
-
end
|
221
|
-
script
|
222
|
-
end
|
223
|
-
def success_or_fail(job_id, log_file)
|
224
|
-
job_running = @cluster.job_running?(job_id)
|
225
|
-
job_ends = @cluster.job_ends?(log_file)
|
226
|
-
msg = if job_running
|
227
|
-
'running'
|
228
|
-
elsif job_ends
|
229
|
-
'success'
|
230
|
-
else
|
231
|
-
'fail'
|
232
|
-
end
|
233
|
-
msg
|
234
|
-
end
|
35
|
+
require config_file
|
235
36
|
end
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
#
|
240
|
-
#if __FILE__ == $0
|
241
|
-
|
242
|
-
opt = OptionParser.new do |o|
|
243
|
-
o.banner = "Usage:\n #{File.basename(__FILE__)} -d [druby://host:port] -m [development|production]"
|
244
|
-
o.on(:server, 'druby://localhost:12345', '-d server', '--server', 'workflow manager URI (default: druby://localhost:12345)')
|
245
|
-
o.on(:mode, 'development', '-m mode', '--mode', 'development|production (default: development)')
|
246
|
-
o.parse!(ARGV)
|
247
|
-
end
|
248
|
-
|
249
|
-
uri = opt.server
|
250
|
-
if opt.mode =~ /[development|production]/
|
251
|
-
config = File.join(File.dirname(File.expand_path(__FILE__)), "../config/environments/#{opt.mode}.rb")
|
252
|
-
opt.mode = nil unless File.exist?(config)
|
253
|
-
end
|
254
|
-
print "mode = #{opt.mode}\n"
|
255
|
-
if opt.mode
|
256
|
-
require_relative "../config/environments/#{opt.mode}"
|
257
|
-
end
|
258
|
-
DRb.start_service(uri, WorkflowManager.new)
|
259
|
-
puts DRb.uri
|
260
|
-
DRb.thread.join
|
261
|
-
#sleep
|
262
|
-
#end
|
37
|
+
DRb.start_service(uri, WorkflowManager::Server.new)
|
38
|
+
puts DRb.uri
|
39
|
+
DRb.thread.join
|
@@ -1,11 +1,11 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# encoding: utf-8
|
3
3
|
|
4
|
-
WorkflowManager.configure do |config|
|
4
|
+
WorkflowManager::Server.configure do |config|
|
5
5
|
config.log_dir = 'logs'
|
6
6
|
config.db_dir = 'dbs'
|
7
7
|
config.interval = 30
|
8
8
|
config.resubmit = 0
|
9
|
-
config.cluster = LocalComputer.new('local_computer')
|
9
|
+
config.cluster = WorkflowManager::LocalComputer.new('local_computer')
|
10
10
|
end
|
11
11
|
|
data/lib/workflow_manager.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
|
2
|
-
|
2
|
+
require 'workflow_manager/version'
|
3
3
|
|
4
|
-
|
5
|
-
|
4
|
+
require 'workflow_manager/optparse_ex'
|
5
|
+
require 'workflow_manager/cluster'
|
6
|
+
require 'workflow_manager/server'
|
@@ -1,113 +1,115 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# encoding: utf-8
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
out
|
18
|
-
|
4
|
+
module WorkflowManager
|
5
|
+
class Cluster
|
6
|
+
attr_accessor :name
|
7
|
+
attr_reader :options
|
8
|
+
attr_accessor :log_dir
|
9
|
+
def initialize(name='', log_dir='')
|
10
|
+
@name = name
|
11
|
+
@options = {}
|
12
|
+
@log_dir = log_dir
|
13
|
+
end
|
14
|
+
def generate_new_job_script(script_name, script_content)
|
15
|
+
new_job_script = File.basename(script_name) + "_" + Time.now.strftime("%Y%m%d%H%M%S")
|
16
|
+
new_job_script = File.join(@log_dir, new_job_script)
|
17
|
+
open(new_job_script, 'w') do |out|
|
18
|
+
out.print script_content
|
19
|
+
out.print "\necho __SCRIPT END__\n"
|
20
|
+
end
|
21
|
+
new_job_script
|
22
|
+
end
|
23
|
+
def submit_job(script_file, script_content, option='')
|
24
|
+
end
|
25
|
+
def job_running?(job_id)
|
26
|
+
end
|
27
|
+
def job_ends?(log_file)
|
28
|
+
end
|
29
|
+
def copy_commands(org_dir, dest_parent_dir)
|
19
30
|
end
|
20
|
-
new_job_script
|
21
|
-
end
|
22
|
-
def submit_job(script_file, script_content, option='')
|
23
|
-
end
|
24
|
-
def job_running?(job_id)
|
25
|
-
end
|
26
|
-
def job_ends?(log_file)
|
27
|
-
end
|
28
|
-
def copy_commands(org_dir, dest_parent_dir)
|
29
31
|
end
|
30
|
-
end
|
31
32
|
|
32
|
-
class LocalComputer < Cluster
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
33
|
+
class LocalComputer < Cluster
|
34
|
+
def submit_job(script_file, script_content, option='')
|
35
|
+
if script_name = File.basename(script_file) and script_name =~ /\.sh$/
|
36
|
+
new_job_script = generate_new_job_script(script_name, script_content)
|
37
|
+
new_job_script_base = File.basename(new_job_script)
|
38
|
+
log_file = File.join(@log_dir, new_job_script_base + "_o.log")
|
39
|
+
err_file = File.join(@log_dir, new_job_script_base + "_e.log")
|
40
|
+
command = "bash #{new_job_script} 1> #{log_file} 2> #{err_file}"
|
41
|
+
pid = spawn(command)
|
42
|
+
Process.detach(pid)
|
43
|
+
[pid.to_s, log_file, command]
|
44
|
+
end
|
43
45
|
end
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
46
|
+
def job_running?(pid)
|
47
|
+
command = "ps aux"
|
48
|
+
result = IO.popen(command) do |io|
|
49
|
+
flag = false
|
50
|
+
while line=io.gets
|
51
|
+
x = line.split
|
52
|
+
if x[1].to_i == pid.to_i
|
53
|
+
flag = true
|
54
|
+
break
|
55
|
+
end
|
54
56
|
end
|
57
|
+
flag
|
55
58
|
end
|
56
|
-
|
59
|
+
result
|
60
|
+
end
|
61
|
+
def job_ends?(log_file)
|
62
|
+
command = "tail -n 20 #{log_file}|grep '__SCRIPT END__'"
|
63
|
+
result = `#{command}`
|
64
|
+
result.to_s.empty? ? false : true
|
65
|
+
end
|
66
|
+
def copy_commands(org_dir, dest_parent_dir)
|
67
|
+
commands = []
|
68
|
+
commands << "mkdir -p #{dest_parent_dir}"
|
69
|
+
commands << "cp -r #{org_dir} #{dest_parent_dir}"
|
70
|
+
commands
|
57
71
|
end
|
58
|
-
result
|
59
|
-
end
|
60
|
-
def job_ends?(log_file)
|
61
|
-
command = "tail -n 20 #{log_file}|grep '__SCRIPT END__'"
|
62
|
-
result = `#{command}`
|
63
|
-
result.to_s.empty? ? false : true
|
64
|
-
end
|
65
|
-
def copy_commands(org_dir, dest_parent_dir)
|
66
|
-
commands = []
|
67
|
-
commands << "mkdir -p #{dest_parent_dir}"
|
68
|
-
commands << "cp -r #{org_dir} #{dest_parent_dir}"
|
69
|
-
commands
|
70
72
|
end
|
71
|
-
end
|
72
73
|
|
73
|
-
class FGCZCluster < Cluster
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
74
|
+
class FGCZCluster < Cluster
|
75
|
+
def submit_job(script_file, script_content, option='')
|
76
|
+
if script_name = File.basename(script_file) and script_name =~ /\.sh$/
|
77
|
+
new_job_script = generate_new_job_script(script_name, script_content)
|
78
|
+
new_job_script_base = File.basename(new_job_script)
|
79
|
+
log_file = File.join(@log_dir, new_job_script_base + "_o.log")
|
80
|
+
err_file = File.join(@log_dir, new_job_script_base + "_e.log")
|
81
|
+
command = "g-sub -o #{log_file} -e #{err_file} #{option} #{new_job_script}"
|
82
|
+
job_id = `#{command}`
|
83
|
+
job_id = job_id.match(/Your job (\d+) \(/)[1]
|
84
|
+
[job_id, log_file, command]
|
85
|
+
end
|
84
86
|
end
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
87
|
+
def job_running?(job_id)
|
88
|
+
qstat_flag = false
|
89
|
+
IO.popen('qstat -u "*"') do |io|
|
90
|
+
while line=io.gets
|
91
|
+
if line =~ /#{job_id}/
|
92
|
+
qstat_flag = true
|
93
|
+
break
|
94
|
+
end
|
93
95
|
end
|
94
96
|
end
|
97
|
+
qstat_flag
|
95
98
|
end
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
break
|
99
|
+
def job_ends?(log_file)
|
100
|
+
log_flag = false
|
101
|
+
IO.popen("tail -n 10 #{log_file}") do |io|
|
102
|
+
while line=io.gets
|
103
|
+
if line =~ /__SCRIPT END__/
|
104
|
+
log_flag = true
|
105
|
+
break
|
106
|
+
end
|
105
107
|
end
|
106
108
|
end
|
109
|
+
log_flag
|
110
|
+
end
|
111
|
+
def copy_commands(org_dir, dest_parent_dir)
|
112
|
+
commands = ["g-req -w copy #{org_dir} #{dest_parent_dir}"]
|
107
113
|
end
|
108
|
-
log_flag
|
109
|
-
end
|
110
|
-
def copy_commands(org_dir, dest_parent_dir)
|
111
|
-
commands = ["g-req -w copy #{org_dir} #{dest_parent_dir}"]
|
112
114
|
end
|
113
115
|
end
|
@@ -0,0 +1,236 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'drb/drb'
|
5
|
+
require 'fileutils'
|
6
|
+
require 'kyotocabinet'
|
7
|
+
|
8
|
+
module WorkflowManager
|
9
|
+
# default parameters
|
10
|
+
#LOG_DIR = '/srv/GT/analysis/workflow_manager_logs'
|
11
|
+
LOG_DIR = 'logs'
|
12
|
+
DB_DIR = 'dbs'
|
13
|
+
INTERVAL = 30
|
14
|
+
RESUBMIT = 0
|
15
|
+
|
16
|
+
#class WorkflowManager
|
17
|
+
class Server
|
18
|
+
@@config = nil
|
19
|
+
class Config
|
20
|
+
attr_accessor :log_dir
|
21
|
+
attr_accessor :db_dir
|
22
|
+
attr_accessor :interval
|
23
|
+
attr_accessor :resubmit
|
24
|
+
attr_accessor :cluster
|
25
|
+
end
|
26
|
+
def self.config=(config)
|
27
|
+
@@config = config
|
28
|
+
end
|
29
|
+
def self.config
|
30
|
+
@@config
|
31
|
+
end
|
32
|
+
def config
|
33
|
+
@@config ||= WorkflowManager.configure{}
|
34
|
+
end
|
35
|
+
def self.configure
|
36
|
+
@@config = Config.new
|
37
|
+
# default values
|
38
|
+
@@config.log_dir = LOG_DIR
|
39
|
+
@@config.db_dir = DB_DIR
|
40
|
+
@@config.interval = INTERVAL # interval to check jobs, [s]
|
41
|
+
@@config.resubmit = RESUBMIT # how many times at maximum to resubmit when job fails
|
42
|
+
yield(@@config)
|
43
|
+
if @@config.cluster
|
44
|
+
@@config.cluster.log_dir = File.expand_path(@@config.log_dir)
|
45
|
+
end
|
46
|
+
@@config
|
47
|
+
end
|
48
|
+
# end
|
49
|
+
|
50
|
+
#class WorkflowManager
|
51
|
+
# class Server
|
52
|
+
def initialize
|
53
|
+
@interval = config.interval
|
54
|
+
@resubmit = config.resubmit
|
55
|
+
@db_stat = File.join(config.db_dir, 'statuses.kch')
|
56
|
+
@db_logs = File.join(config.db_dir, 'logs.kch')
|
57
|
+
|
58
|
+
@log_dir = File.expand_path(config.log_dir)
|
59
|
+
@db_dir = File.expand_path(config.db_dir)
|
60
|
+
FileUtils.mkdir_p @log_dir unless File.exist?(@log_dir)
|
61
|
+
FileUtils.mkdir_p @db_dir unless File.exist?(@db_dir)
|
62
|
+
@statuses = KyotoCabinet::DB.new
|
63
|
+
@logs = KyotoCabinet::DB.new
|
64
|
+
@system_log = File.join(@log_dir, "system.log")
|
65
|
+
@mutex = Mutex.new
|
66
|
+
@cluster = config.cluster
|
67
|
+
log_puts("Server starts")
|
68
|
+
end
|
69
|
+
def hello
|
70
|
+
'hello, '+ @cluster.name
|
71
|
+
end
|
72
|
+
def copy_commands(org_dir, dest_parent_dir)
|
73
|
+
@cluster.copy_commands(org_dir, dest_parent_dir)
|
74
|
+
end
|
75
|
+
def log_puts(str)
|
76
|
+
time = Time.now.strftime("[%Y.%m.%d %H:%M:%S]")
|
77
|
+
@mutex.synchronize do
|
78
|
+
open(@system_log, "a") do |out|
|
79
|
+
out.print time + " " + str + "\n"
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
def start_monitoring(submit_command, user = 'sushi lover', resubmit = 0, script = '', project_number = 0, sge_options='', log_dir = '')
|
84
|
+
log_puts("monitoring: script=" + submit_command + " user=" + user + " resubmit=" + resubmit.to_s + " project=" + project_number.to_s + " sge option=" + sge_options + " log dir=" + log_dir.to_s)
|
85
|
+
|
86
|
+
#warn submit_command
|
87
|
+
#
|
88
|
+
# TODO: analyze arguments
|
89
|
+
#
|
90
|
+
job_id, log_file, command = @cluster.submit_job(submit_command, script, sge_options)
|
91
|
+
log_puts("submit: " + job_id + " " + command)
|
92
|
+
|
93
|
+
#
|
94
|
+
# monitor worker
|
95
|
+
#
|
96
|
+
if job_id and log_file
|
97
|
+
monitor_worker = Thread.new(job_id, log_file, submit_command, user, resubmit, script, project_number, sge_options, log_dir) do |t_job_id, t_log_file, t_submit_command, t_user, t_resubmit, t_script, t_project_number, t_sge_options, t_log_dir|
|
98
|
+
loop do
|
99
|
+
status = success_or_fail(t_job_id, t_log_file)
|
100
|
+
script_name = File.basename(submit_command).split(/-/).first
|
101
|
+
@statuses.open(@db_stat)
|
102
|
+
start_time = if stat = @statuses[t_job_id] and stat = stat.split(/,/) and time = stat[2]
|
103
|
+
time
|
104
|
+
end
|
105
|
+
time = if start_time
|
106
|
+
if status == 'success' or status == 'fail'
|
107
|
+
start_time + '/' + Time.now.strftime("%Y-%m-%d %H:%M:%S")
|
108
|
+
else
|
109
|
+
start_time
|
110
|
+
end
|
111
|
+
else
|
112
|
+
Time.now.strftime("%Y-%m-%d %H:%M:%S")
|
113
|
+
end
|
114
|
+
@statuses[t_job_id] = [status, script_name, time, user, project_number].join(',')
|
115
|
+
@statuses.close
|
116
|
+
@logs.open(@db_logs)
|
117
|
+
@logs[t_job_id] = t_log_file
|
118
|
+
@logs.close
|
119
|
+
#warn t_job_id + " " + status
|
120
|
+
if status == 'success'
|
121
|
+
log_puts(status + ": " + t_job_id)
|
122
|
+
unless t_log_dir.empty?
|
123
|
+
copy_commands(t_log_file, t_log_dir).each do |command|
|
124
|
+
log_puts(command)
|
125
|
+
system command
|
126
|
+
end
|
127
|
+
err_file = t_log_file.gsub('_o.log','_e.log')
|
128
|
+
copy_commands(err_file, t_log_dir).each do |command|
|
129
|
+
log_puts(command)
|
130
|
+
system command
|
131
|
+
end
|
132
|
+
end
|
133
|
+
Thread.current.kill
|
134
|
+
elsif status == 'fail'
|
135
|
+
log_puts(status + ": " + t_job_id)
|
136
|
+
#
|
137
|
+
# TODO: re-submit
|
138
|
+
#
|
139
|
+
if t_resubmit < RESUBMIT
|
140
|
+
log_puts("resubmit: " + t_job_id)
|
141
|
+
resubmit_job_id = start_monitoring(t_submit_command, t_user, t_resubmit + 1, t_script, t_project_number, t_sge_options)
|
142
|
+
script_name = File.basename(submit_command).split(/-/).first
|
143
|
+
@statuses.open(@db_stat)
|
144
|
+
@statuses[t_job_id] = ["resubmit: " + resubmit_job_id.to_s, script_name, Time.now.strftime("%Y-%m-%d %H:%M:%S"), t_user, t_project_number].join(',')
|
145
|
+
@statuses.close
|
146
|
+
else
|
147
|
+
log_puts("fail: " + t_job_id)
|
148
|
+
end
|
149
|
+
unless t_log_dir.empty?
|
150
|
+
copy_commands(t_log_file, t_log_dir).each do |command|
|
151
|
+
log_puts(command)
|
152
|
+
system command
|
153
|
+
end
|
154
|
+
err_file = t_log_file.gsub('_o.log','_e.log')
|
155
|
+
copy_commands(err_file, t_log_dir).each do |command|
|
156
|
+
log_puts(command)
|
157
|
+
system command
|
158
|
+
end
|
159
|
+
end
|
160
|
+
Thread.current.kill
|
161
|
+
end
|
162
|
+
sleep @interval
|
163
|
+
end
|
164
|
+
end
|
165
|
+
job_id.to_i
|
166
|
+
end
|
167
|
+
end
|
168
|
+
def status(job_id)
|
169
|
+
stat = nil
|
170
|
+
@statuses.open(@db_stat)
|
171
|
+
stat = @statuses[job_id.to_s]
|
172
|
+
@statuses.close
|
173
|
+
stat
|
174
|
+
end
|
175
|
+
def job_list(with_results=false, project_number=nil)
|
176
|
+
s = []
|
177
|
+
@statuses.open(@db_stat)
|
178
|
+
@statuses.each do |key, value|
|
179
|
+
if project_number
|
180
|
+
if x = value.split(/,/)[4].to_i==project_number.to_i
|
181
|
+
s << [key, value]
|
182
|
+
end
|
183
|
+
else
|
184
|
+
s << [key, value]
|
185
|
+
end
|
186
|
+
end
|
187
|
+
@statuses.close
|
188
|
+
s.sort.reverse.map{|v| v.join(',')}.join("\n")
|
189
|
+
end
|
190
|
+
def get_log(job_id, with_err=false)
|
191
|
+
@logs.open(@db_logs)
|
192
|
+
log_file = @logs[job_id.to_s]
|
193
|
+
@logs.close
|
194
|
+
log_data = if log_file and File.exist?(log_file)
|
195
|
+
"__STDOUT LOG__\n\n" + File.read(log_file)
|
196
|
+
else
|
197
|
+
'no log file'
|
198
|
+
end
|
199
|
+
if with_err
|
200
|
+
err_file = log_file.gsub(/_o\.log/,'_e.log')
|
201
|
+
if err_file and File.exist?(err_file)
|
202
|
+
log_data << "\n\n__STDERR LOG__\n\n"
|
203
|
+
log_data << File.read(err_file)
|
204
|
+
end
|
205
|
+
end
|
206
|
+
log_data
|
207
|
+
end
|
208
|
+
def get_script(job_id)
|
209
|
+
@logs.open(@db_logs)
|
210
|
+
script_file = @logs[job_id.to_s]
|
211
|
+
@logs.close
|
212
|
+
if script_file
|
213
|
+
script_file = script_file.gsub(/_o\.log/,'')
|
214
|
+
end
|
215
|
+
script = if script_file and File.exist?(script_file)
|
216
|
+
File.read(script_file)
|
217
|
+
else
|
218
|
+
'no script file'
|
219
|
+
end
|
220
|
+
script
|
221
|
+
end
|
222
|
+
def success_or_fail(job_id, log_file)
|
223
|
+
job_running = @cluster.job_running?(job_id)
|
224
|
+
job_ends = @cluster.job_ends?(log_file)
|
225
|
+
msg = if job_running
|
226
|
+
'running'
|
227
|
+
elsif job_ends
|
228
|
+
'success'
|
229
|
+
else
|
230
|
+
'fail'
|
231
|
+
end
|
232
|
+
msg
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
@@ -1,3 +1,3 @@
|
|
1
|
-
|
2
|
-
VERSION = "0.0.
|
1
|
+
module WorkflowManager
|
2
|
+
VERSION = "0.0.6"
|
3
3
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: workflow_manager
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-11-
|
12
|
+
date: 2013-11-08 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -57,6 +57,7 @@ executables:
|
|
57
57
|
extensions: []
|
58
58
|
extra_rdoc_files: []
|
59
59
|
files:
|
60
|
+
- .bzrignore
|
60
61
|
- Gemfile
|
61
62
|
- LICENSE.txt
|
62
63
|
- README.md
|
@@ -72,6 +73,7 @@ files:
|
|
72
73
|
- config/environments/production.rb
|
73
74
|
- lib/workflow_manager/cluster.rb
|
74
75
|
- lib/workflow_manager/optparse_ex.rb
|
76
|
+
- lib/workflow_manager/server.rb
|
75
77
|
- lib/workflow_manager/version.rb
|
76
78
|
- lib/workflow_manager.rb
|
77
79
|
- workflow_manager.gemspec
|