workflow_manager 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.bzrignore +17 -0
- data/bin/workflow_manager +29 -252
- data/config/environments/development.rb +2 -2
- data/lib/workflow_manager.rb +4 -3
- data/lib/workflow_manager/cluster.rb +94 -92
- data/lib/workflow_manager/server.rb +236 -0
- data/lib/workflow_manager/version.rb +2 -2
- metadata +4 -2
data/.bzrignore
ADDED
data/bin/workflow_manager
CHANGED
@@ -1,262 +1,39 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# encoding: utf-8
|
3
|
-
# 20121109 masa workflow manager druby server
|
4
|
-
Version = '20131104-192005'
|
5
3
|
|
6
|
-
require '
|
4
|
+
require 'workflow_manager'
|
7
5
|
require 'fileutils'
|
8
|
-
require 'kyotocabinet'
|
9
|
-
require_relative '../lib/workflow_manager'
|
10
6
|
|
11
|
-
|
12
|
-
#
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
INTERVAL = 30
|
17
|
-
RESUBMIT = 0
|
18
|
-
|
19
|
-
class WorkflowManager
|
20
|
-
@@config = nil
|
21
|
-
class Config
|
22
|
-
attr_accessor :log_dir
|
23
|
-
attr_accessor :db_dir
|
24
|
-
attr_accessor :interval
|
25
|
-
attr_accessor :resubmit
|
26
|
-
attr_accessor :cluster
|
27
|
-
end
|
28
|
-
def self.config=(config)
|
29
|
-
@@config = config
|
30
|
-
end
|
31
|
-
def self.config
|
32
|
-
@@config
|
33
|
-
end
|
34
|
-
def config
|
35
|
-
@@config ||= WorkflowManager.configure{}
|
36
|
-
end
|
37
|
-
def self.configure
|
38
|
-
@@config = Config.new
|
39
|
-
# default values
|
40
|
-
@@config.log_dir = LOG_DIR
|
41
|
-
@@config.db_dir = DB_DIR
|
42
|
-
@@config.interval = INTERVAL # interval to check jobs, [s]
|
43
|
-
@@config.resubmit = RESUBMIT # how many times at maximum to resubmit when job fails
|
44
|
-
yield(@@config)
|
45
|
-
if @@config.cluster
|
46
|
-
@@config.cluster.log_dir = File.expand_path(@@config.log_dir)
|
47
|
-
end
|
48
|
-
@@config
|
49
|
-
end
|
7
|
+
opt = OptionParser.new do |o|
|
8
|
+
o.banner = "Usage:\n #{File.basename(__FILE__)} -d [druby://host:port] -m [development|production]"
|
9
|
+
o.on(:server, 'druby://localhost:12345', '-d server', '--server', 'workflow manager URI (default: druby://localhost:12345)')
|
10
|
+
o.on(:mode, 'development', '-m mode', '--mode', 'development|production (default: development)')
|
11
|
+
o.parse!(ARGV)
|
50
12
|
end
|
51
13
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
'hello, '+ @cluster.name
|
72
|
-
end
|
73
|
-
def copy_commands(org_dir, dest_parent_dir)
|
74
|
-
@cluster.copy_commands(org_dir, dest_parent_dir)
|
75
|
-
end
|
76
|
-
def log_puts(str)
|
77
|
-
time = Time.now.strftime("[%Y.%m.%d %H:%M:%S]")
|
78
|
-
@mutex.synchronize do
|
79
|
-
open(@system_log, "a") do |out|
|
80
|
-
out.print time + " " + str + "\n"
|
81
|
-
end
|
82
|
-
end
|
83
|
-
end
|
84
|
-
def start_monitoring(submit_command, user = 'sushi lover', resubmit = 0, script = '', project_number = 0, sge_options='', log_dir = '')
|
85
|
-
log_puts("monitoring: script=" + submit_command + " user=" + user + " resubmit=" + resubmit.to_s + " project=" + project_number.to_s + " sge option=" + sge_options + " log dir=" + log_dir.to_s)
|
86
|
-
|
87
|
-
#warn submit_command
|
88
|
-
#
|
89
|
-
# TODO: analyze arguments
|
90
|
-
#
|
91
|
-
job_id, log_file, command = @cluster.submit_job(submit_command, script, sge_options)
|
92
|
-
log_puts("submit: " + job_id + " " + command)
|
93
|
-
|
94
|
-
#
|
95
|
-
# monitor worker
|
96
|
-
#
|
97
|
-
if job_id and log_file
|
98
|
-
monitor_worker = Thread.new(job_id, log_file, submit_command, user, resubmit, script, project_number, sge_options, log_dir) do |t_job_id, t_log_file, t_submit_command, t_user, t_resubmit, t_script, t_project_number, t_sge_options, t_log_dir|
|
99
|
-
loop do
|
100
|
-
status = success_or_fail(t_job_id, t_log_file)
|
101
|
-
script_name = File.basename(submit_command).split(/-/).first
|
102
|
-
@statuses.open(@db_stat)
|
103
|
-
start_time = if stat = @statuses[t_job_id] and stat = stat.split(/,/) and time = stat[2]
|
104
|
-
time
|
105
|
-
end
|
106
|
-
time = if start_time
|
107
|
-
if status == 'success' or status == 'fail'
|
108
|
-
start_time + '/' + Time.now.strftime("%Y-%m-%d %H:%M:%S")
|
109
|
-
else
|
110
|
-
start_time
|
111
|
-
end
|
112
|
-
else
|
113
|
-
Time.now.strftime("%Y-%m-%d %H:%M:%S")
|
114
|
-
end
|
115
|
-
@statuses[t_job_id] = [status, script_name, time, user, project_number].join(',')
|
116
|
-
@statuses.close
|
117
|
-
@logs.open(@db_logs)
|
118
|
-
@logs[t_job_id] = t_log_file
|
119
|
-
@logs.close
|
120
|
-
#warn t_job_id + " " + status
|
121
|
-
if status == 'success'
|
122
|
-
log_puts(status + ": " + t_job_id)
|
123
|
-
unless t_log_dir.empty?
|
124
|
-
copy_commands(t_log_file, t_log_dir).each do |command|
|
125
|
-
log_puts(command)
|
126
|
-
system command
|
127
|
-
end
|
128
|
-
err_file = t_log_file.gsub('_o.log','_e.log')
|
129
|
-
copy_commands(err_file, t_log_dir).each do |command|
|
130
|
-
log_puts(command)
|
131
|
-
system command
|
132
|
-
end
|
133
|
-
end
|
134
|
-
Thread.current.kill
|
135
|
-
elsif status == 'fail'
|
136
|
-
log_puts(status + ": " + t_job_id)
|
137
|
-
#
|
138
|
-
# TODO: re-submit
|
139
|
-
#
|
140
|
-
if t_resubmit < RESUBMIT
|
141
|
-
log_puts("resubmit: " + t_job_id)
|
142
|
-
resubmit_job_id = start_monitoring(t_submit_command, t_user, t_resubmit + 1, t_script, t_project_number, t_sge_options)
|
143
|
-
script_name = File.basename(submit_command).split(/-/).first
|
144
|
-
@statuses.open(@db_stat)
|
145
|
-
@statuses[t_job_id] = ["resubmit: " + resubmit_job_id.to_s, script_name, Time.now.strftime("%Y-%m-%d %H:%M:%S"), t_user, t_project_number].join(',')
|
146
|
-
@statuses.close
|
147
|
-
else
|
148
|
-
log_puts("fail: " + t_job_id)
|
149
|
-
end
|
150
|
-
unless t_log_dir.empty?
|
151
|
-
copy_commands(t_log_file, t_log_dir).each do |command|
|
152
|
-
log_puts(command)
|
153
|
-
system command
|
154
|
-
end
|
155
|
-
err_file = t_log_file.gsub('_o.log','_e.log')
|
156
|
-
copy_commands(err_file, t_log_dir).each do |command|
|
157
|
-
log_puts(command)
|
158
|
-
system command
|
159
|
-
end
|
160
|
-
end
|
161
|
-
Thread.current.kill
|
162
|
-
end
|
163
|
-
sleep @interval
|
164
|
-
end
|
165
|
-
end
|
166
|
-
job_id.to_i
|
167
|
-
end
|
168
|
-
end
|
169
|
-
def status(job_id)
|
170
|
-
stat = nil
|
171
|
-
@statuses.open(@db_stat)
|
172
|
-
stat = @statuses[job_id.to_s]
|
173
|
-
@statuses.close
|
174
|
-
stat
|
175
|
-
end
|
176
|
-
def job_list(with_results=false, project_number=nil)
|
177
|
-
s = []
|
178
|
-
@statuses.open(@db_stat)
|
179
|
-
@statuses.each do |key, value|
|
180
|
-
if project_number
|
181
|
-
if x = value.split(/,/)[4].to_i==project_number.to_i
|
182
|
-
s << [key, value]
|
183
|
-
end
|
184
|
-
else
|
185
|
-
s << [key, value]
|
186
|
-
end
|
14
|
+
uri = opt.server
|
15
|
+
if opt.mode =~ /[development|production]/
|
16
|
+
config = File.join(File.dirname(File.expand_path(__FILE__)), "../config/environments/#{opt.mode}.rb")
|
17
|
+
opt.mode = nil unless File.exist?(config)
|
18
|
+
end
|
19
|
+
print "mode = #{opt.mode}\n"
|
20
|
+
if opt.mode
|
21
|
+
config_dir = "./config/environments/"
|
22
|
+
FileUtils.mkdir_p config_dir
|
23
|
+
config_file = File.join(config_dir, opt.mode+".rb")
|
24
|
+
unless File.exist?(config_file)
|
25
|
+
app_dir = File.expand_path('..', __FILE__)
|
26
|
+
default_config_dir = File.join(app_dir, "../config/environments")
|
27
|
+
p default_config_dir
|
28
|
+
default_config_file = File.join(default_config_dir, opt.mode+".rb")
|
29
|
+
if File.exist?(default_config_file)
|
30
|
+
FileUtils.cp(default_config_file, config_file)
|
31
|
+
else
|
32
|
+
raise "Configure file does not exist: #{config_file}"
|
187
33
|
end
|
188
|
-
@statuses.close
|
189
|
-
s.sort.reverse.map{|v| v.join(',')}.join("\n")
|
190
34
|
end
|
191
|
-
|
192
|
-
@logs.open(@db_logs)
|
193
|
-
log_file = @logs[job_id.to_s]
|
194
|
-
@logs.close
|
195
|
-
log_data = if log_file and File.exist?(log_file)
|
196
|
-
"__STDOUT LOG__\n\n" + File.read(log_file)
|
197
|
-
else
|
198
|
-
'no log file'
|
199
|
-
end
|
200
|
-
if with_err
|
201
|
-
err_file = log_file.gsub(/_o\.log/,'_e.log')
|
202
|
-
if err_file and File.exist?(err_file)
|
203
|
-
log_data << "\n\n__STDERR LOG__\n\n"
|
204
|
-
log_data << File.read(err_file)
|
205
|
-
end
|
206
|
-
end
|
207
|
-
log_data
|
208
|
-
end
|
209
|
-
def get_script(job_id)
|
210
|
-
@logs.open(@db_logs)
|
211
|
-
script_file = @logs[job_id.to_s]
|
212
|
-
@logs.close
|
213
|
-
if script_file
|
214
|
-
script_file = script_file.gsub(/_o\.log/,'')
|
215
|
-
end
|
216
|
-
script = if script_file and File.exist?(script_file)
|
217
|
-
File.read(script_file)
|
218
|
-
else
|
219
|
-
'no script file'
|
220
|
-
end
|
221
|
-
script
|
222
|
-
end
|
223
|
-
def success_or_fail(job_id, log_file)
|
224
|
-
job_running = @cluster.job_running?(job_id)
|
225
|
-
job_ends = @cluster.job_ends?(log_file)
|
226
|
-
msg = if job_running
|
227
|
-
'running'
|
228
|
-
elsif job_ends
|
229
|
-
'success'
|
230
|
-
else
|
231
|
-
'fail'
|
232
|
-
end
|
233
|
-
msg
|
234
|
-
end
|
35
|
+
require config_file
|
235
36
|
end
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
#
|
240
|
-
#if __FILE__ == $0
|
241
|
-
|
242
|
-
opt = OptionParser.new do |o|
|
243
|
-
o.banner = "Usage:\n #{File.basename(__FILE__)} -d [druby://host:port] -m [development|production]"
|
244
|
-
o.on(:server, 'druby://localhost:12345', '-d server', '--server', 'workflow manager URI (default: druby://localhost:12345)')
|
245
|
-
o.on(:mode, 'development', '-m mode', '--mode', 'development|production (default: development)')
|
246
|
-
o.parse!(ARGV)
|
247
|
-
end
|
248
|
-
|
249
|
-
uri = opt.server
|
250
|
-
if opt.mode =~ /[development|production]/
|
251
|
-
config = File.join(File.dirname(File.expand_path(__FILE__)), "../config/environments/#{opt.mode}.rb")
|
252
|
-
opt.mode = nil unless File.exist?(config)
|
253
|
-
end
|
254
|
-
print "mode = #{opt.mode}\n"
|
255
|
-
if opt.mode
|
256
|
-
require_relative "../config/environments/#{opt.mode}"
|
257
|
-
end
|
258
|
-
DRb.start_service(uri, WorkflowManager.new)
|
259
|
-
puts DRb.uri
|
260
|
-
DRb.thread.join
|
261
|
-
#sleep
|
262
|
-
#end
|
37
|
+
DRb.start_service(uri, WorkflowManager::Server.new)
|
38
|
+
puts DRb.uri
|
39
|
+
DRb.thread.join
|
@@ -1,11 +1,11 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# encoding: utf-8
|
3
3
|
|
4
|
-
WorkflowManager.configure do |config|
|
4
|
+
WorkflowManager::Server.configure do |config|
|
5
5
|
config.log_dir = 'logs'
|
6
6
|
config.db_dir = 'dbs'
|
7
7
|
config.interval = 30
|
8
8
|
config.resubmit = 0
|
9
|
-
config.cluster = LocalComputer.new('local_computer')
|
9
|
+
config.cluster = WorkflowManager::LocalComputer.new('local_computer')
|
10
10
|
end
|
11
11
|
|
data/lib/workflow_manager.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
|
2
|
-
|
2
|
+
require 'workflow_manager/version'
|
3
3
|
|
4
|
-
|
5
|
-
|
4
|
+
require 'workflow_manager/optparse_ex'
|
5
|
+
require 'workflow_manager/cluster'
|
6
|
+
require 'workflow_manager/server'
|
@@ -1,113 +1,115 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# encoding: utf-8
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
out
|
18
|
-
|
4
|
+
module WorkflowManager
|
5
|
+
class Cluster
|
6
|
+
attr_accessor :name
|
7
|
+
attr_reader :options
|
8
|
+
attr_accessor :log_dir
|
9
|
+
def initialize(name='', log_dir='')
|
10
|
+
@name = name
|
11
|
+
@options = {}
|
12
|
+
@log_dir = log_dir
|
13
|
+
end
|
14
|
+
def generate_new_job_script(script_name, script_content)
|
15
|
+
new_job_script = File.basename(script_name) + "_" + Time.now.strftime("%Y%m%d%H%M%S")
|
16
|
+
new_job_script = File.join(@log_dir, new_job_script)
|
17
|
+
open(new_job_script, 'w') do |out|
|
18
|
+
out.print script_content
|
19
|
+
out.print "\necho __SCRIPT END__\n"
|
20
|
+
end
|
21
|
+
new_job_script
|
22
|
+
end
|
23
|
+
def submit_job(script_file, script_content, option='')
|
24
|
+
end
|
25
|
+
def job_running?(job_id)
|
26
|
+
end
|
27
|
+
def job_ends?(log_file)
|
28
|
+
end
|
29
|
+
def copy_commands(org_dir, dest_parent_dir)
|
19
30
|
end
|
20
|
-
new_job_script
|
21
|
-
end
|
22
|
-
def submit_job(script_file, script_content, option='')
|
23
|
-
end
|
24
|
-
def job_running?(job_id)
|
25
|
-
end
|
26
|
-
def job_ends?(log_file)
|
27
|
-
end
|
28
|
-
def copy_commands(org_dir, dest_parent_dir)
|
29
31
|
end
|
30
|
-
end
|
31
32
|
|
32
|
-
class LocalComputer < Cluster
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
33
|
+
class LocalComputer < Cluster
|
34
|
+
def submit_job(script_file, script_content, option='')
|
35
|
+
if script_name = File.basename(script_file) and script_name =~ /\.sh$/
|
36
|
+
new_job_script = generate_new_job_script(script_name, script_content)
|
37
|
+
new_job_script_base = File.basename(new_job_script)
|
38
|
+
log_file = File.join(@log_dir, new_job_script_base + "_o.log")
|
39
|
+
err_file = File.join(@log_dir, new_job_script_base + "_e.log")
|
40
|
+
command = "bash #{new_job_script} 1> #{log_file} 2> #{err_file}"
|
41
|
+
pid = spawn(command)
|
42
|
+
Process.detach(pid)
|
43
|
+
[pid.to_s, log_file, command]
|
44
|
+
end
|
43
45
|
end
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
46
|
+
def job_running?(pid)
|
47
|
+
command = "ps aux"
|
48
|
+
result = IO.popen(command) do |io|
|
49
|
+
flag = false
|
50
|
+
while line=io.gets
|
51
|
+
x = line.split
|
52
|
+
if x[1].to_i == pid.to_i
|
53
|
+
flag = true
|
54
|
+
break
|
55
|
+
end
|
54
56
|
end
|
57
|
+
flag
|
55
58
|
end
|
56
|
-
|
59
|
+
result
|
60
|
+
end
|
61
|
+
def job_ends?(log_file)
|
62
|
+
command = "tail -n 20 #{log_file}|grep '__SCRIPT END__'"
|
63
|
+
result = `#{command}`
|
64
|
+
result.to_s.empty? ? false : true
|
65
|
+
end
|
66
|
+
def copy_commands(org_dir, dest_parent_dir)
|
67
|
+
commands = []
|
68
|
+
commands << "mkdir -p #{dest_parent_dir}"
|
69
|
+
commands << "cp -r #{org_dir} #{dest_parent_dir}"
|
70
|
+
commands
|
57
71
|
end
|
58
|
-
result
|
59
|
-
end
|
60
|
-
def job_ends?(log_file)
|
61
|
-
command = "tail -n 20 #{log_file}|grep '__SCRIPT END__'"
|
62
|
-
result = `#{command}`
|
63
|
-
result.to_s.empty? ? false : true
|
64
|
-
end
|
65
|
-
def copy_commands(org_dir, dest_parent_dir)
|
66
|
-
commands = []
|
67
|
-
commands << "mkdir -p #{dest_parent_dir}"
|
68
|
-
commands << "cp -r #{org_dir} #{dest_parent_dir}"
|
69
|
-
commands
|
70
72
|
end
|
71
|
-
end
|
72
73
|
|
73
|
-
class FGCZCluster < Cluster
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
74
|
+
class FGCZCluster < Cluster
|
75
|
+
def submit_job(script_file, script_content, option='')
|
76
|
+
if script_name = File.basename(script_file) and script_name =~ /\.sh$/
|
77
|
+
new_job_script = generate_new_job_script(script_name, script_content)
|
78
|
+
new_job_script_base = File.basename(new_job_script)
|
79
|
+
log_file = File.join(@log_dir, new_job_script_base + "_o.log")
|
80
|
+
err_file = File.join(@log_dir, new_job_script_base + "_e.log")
|
81
|
+
command = "g-sub -o #{log_file} -e #{err_file} #{option} #{new_job_script}"
|
82
|
+
job_id = `#{command}`
|
83
|
+
job_id = job_id.match(/Your job (\d+) \(/)[1]
|
84
|
+
[job_id, log_file, command]
|
85
|
+
end
|
84
86
|
end
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
87
|
+
def job_running?(job_id)
|
88
|
+
qstat_flag = false
|
89
|
+
IO.popen('qstat -u "*"') do |io|
|
90
|
+
while line=io.gets
|
91
|
+
if line =~ /#{job_id}/
|
92
|
+
qstat_flag = true
|
93
|
+
break
|
94
|
+
end
|
93
95
|
end
|
94
96
|
end
|
97
|
+
qstat_flag
|
95
98
|
end
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
break
|
99
|
+
def job_ends?(log_file)
|
100
|
+
log_flag = false
|
101
|
+
IO.popen("tail -n 10 #{log_file}") do |io|
|
102
|
+
while line=io.gets
|
103
|
+
if line =~ /__SCRIPT END__/
|
104
|
+
log_flag = true
|
105
|
+
break
|
106
|
+
end
|
105
107
|
end
|
106
108
|
end
|
109
|
+
log_flag
|
110
|
+
end
|
111
|
+
def copy_commands(org_dir, dest_parent_dir)
|
112
|
+
commands = ["g-req -w copy #{org_dir} #{dest_parent_dir}"]
|
107
113
|
end
|
108
|
-
log_flag
|
109
|
-
end
|
110
|
-
def copy_commands(org_dir, dest_parent_dir)
|
111
|
-
commands = ["g-req -w copy #{org_dir} #{dest_parent_dir}"]
|
112
114
|
end
|
113
115
|
end
|
@@ -0,0 +1,236 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'drb/drb'
|
5
|
+
require 'fileutils'
|
6
|
+
require 'kyotocabinet'
|
7
|
+
|
8
|
+
module WorkflowManager
|
9
|
+
# default parameters
|
10
|
+
#LOG_DIR = '/srv/GT/analysis/workflow_manager_logs'
|
11
|
+
LOG_DIR = 'logs'
|
12
|
+
DB_DIR = 'dbs'
|
13
|
+
INTERVAL = 30
|
14
|
+
RESUBMIT = 0
|
15
|
+
|
16
|
+
#class WorkflowManager
|
17
|
+
class Server
|
18
|
+
@@config = nil
|
19
|
+
class Config
|
20
|
+
attr_accessor :log_dir
|
21
|
+
attr_accessor :db_dir
|
22
|
+
attr_accessor :interval
|
23
|
+
attr_accessor :resubmit
|
24
|
+
attr_accessor :cluster
|
25
|
+
end
|
26
|
+
def self.config=(config)
|
27
|
+
@@config = config
|
28
|
+
end
|
29
|
+
def self.config
|
30
|
+
@@config
|
31
|
+
end
|
32
|
+
def config
|
33
|
+
@@config ||= WorkflowManager.configure{}
|
34
|
+
end
|
35
|
+
def self.configure
|
36
|
+
@@config = Config.new
|
37
|
+
# default values
|
38
|
+
@@config.log_dir = LOG_DIR
|
39
|
+
@@config.db_dir = DB_DIR
|
40
|
+
@@config.interval = INTERVAL # interval to check jobs, [s]
|
41
|
+
@@config.resubmit = RESUBMIT # how many times at maximum to resubmit when job fails
|
42
|
+
yield(@@config)
|
43
|
+
if @@config.cluster
|
44
|
+
@@config.cluster.log_dir = File.expand_path(@@config.log_dir)
|
45
|
+
end
|
46
|
+
@@config
|
47
|
+
end
|
48
|
+
# end
|
49
|
+
|
50
|
+
#class WorkflowManager
|
51
|
+
# class Server
|
52
|
+
def initialize
|
53
|
+
@interval = config.interval
|
54
|
+
@resubmit = config.resubmit
|
55
|
+
@db_stat = File.join(config.db_dir, 'statuses.kch')
|
56
|
+
@db_logs = File.join(config.db_dir, 'logs.kch')
|
57
|
+
|
58
|
+
@log_dir = File.expand_path(config.log_dir)
|
59
|
+
@db_dir = File.expand_path(config.db_dir)
|
60
|
+
FileUtils.mkdir_p @log_dir unless File.exist?(@log_dir)
|
61
|
+
FileUtils.mkdir_p @db_dir unless File.exist?(@db_dir)
|
62
|
+
@statuses = KyotoCabinet::DB.new
|
63
|
+
@logs = KyotoCabinet::DB.new
|
64
|
+
@system_log = File.join(@log_dir, "system.log")
|
65
|
+
@mutex = Mutex.new
|
66
|
+
@cluster = config.cluster
|
67
|
+
log_puts("Server starts")
|
68
|
+
end
|
69
|
+
def hello
|
70
|
+
'hello, '+ @cluster.name
|
71
|
+
end
|
72
|
+
def copy_commands(org_dir, dest_parent_dir)
|
73
|
+
@cluster.copy_commands(org_dir, dest_parent_dir)
|
74
|
+
end
|
75
|
+
def log_puts(str)
|
76
|
+
time = Time.now.strftime("[%Y.%m.%d %H:%M:%S]")
|
77
|
+
@mutex.synchronize do
|
78
|
+
open(@system_log, "a") do |out|
|
79
|
+
out.print time + " " + str + "\n"
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
def start_monitoring(submit_command, user = 'sushi lover', resubmit = 0, script = '', project_number = 0, sge_options='', log_dir = '')
|
84
|
+
log_puts("monitoring: script=" + submit_command + " user=" + user + " resubmit=" + resubmit.to_s + " project=" + project_number.to_s + " sge option=" + sge_options + " log dir=" + log_dir.to_s)
|
85
|
+
|
86
|
+
#warn submit_command
|
87
|
+
#
|
88
|
+
# TODO: analyze arguments
|
89
|
+
#
|
90
|
+
job_id, log_file, command = @cluster.submit_job(submit_command, script, sge_options)
|
91
|
+
log_puts("submit: " + job_id + " " + command)
|
92
|
+
|
93
|
+
#
|
94
|
+
# monitor worker
|
95
|
+
#
|
96
|
+
if job_id and log_file
|
97
|
+
monitor_worker = Thread.new(job_id, log_file, submit_command, user, resubmit, script, project_number, sge_options, log_dir) do |t_job_id, t_log_file, t_submit_command, t_user, t_resubmit, t_script, t_project_number, t_sge_options, t_log_dir|
|
98
|
+
loop do
|
99
|
+
status = success_or_fail(t_job_id, t_log_file)
|
100
|
+
script_name = File.basename(submit_command).split(/-/).first
|
101
|
+
@statuses.open(@db_stat)
|
102
|
+
start_time = if stat = @statuses[t_job_id] and stat = stat.split(/,/) and time = stat[2]
|
103
|
+
time
|
104
|
+
end
|
105
|
+
time = if start_time
|
106
|
+
if status == 'success' or status == 'fail'
|
107
|
+
start_time + '/' + Time.now.strftime("%Y-%m-%d %H:%M:%S")
|
108
|
+
else
|
109
|
+
start_time
|
110
|
+
end
|
111
|
+
else
|
112
|
+
Time.now.strftime("%Y-%m-%d %H:%M:%S")
|
113
|
+
end
|
114
|
+
@statuses[t_job_id] = [status, script_name, time, user, project_number].join(',')
|
115
|
+
@statuses.close
|
116
|
+
@logs.open(@db_logs)
|
117
|
+
@logs[t_job_id] = t_log_file
|
118
|
+
@logs.close
|
119
|
+
#warn t_job_id + " " + status
|
120
|
+
if status == 'success'
|
121
|
+
log_puts(status + ": " + t_job_id)
|
122
|
+
unless t_log_dir.empty?
|
123
|
+
copy_commands(t_log_file, t_log_dir).each do |command|
|
124
|
+
log_puts(command)
|
125
|
+
system command
|
126
|
+
end
|
127
|
+
err_file = t_log_file.gsub('_o.log','_e.log')
|
128
|
+
copy_commands(err_file, t_log_dir).each do |command|
|
129
|
+
log_puts(command)
|
130
|
+
system command
|
131
|
+
end
|
132
|
+
end
|
133
|
+
Thread.current.kill
|
134
|
+
elsif status == 'fail'
|
135
|
+
log_puts(status + ": " + t_job_id)
|
136
|
+
#
|
137
|
+
# TODO: re-submit
|
138
|
+
#
|
139
|
+
if t_resubmit < RESUBMIT
|
140
|
+
log_puts("resubmit: " + t_job_id)
|
141
|
+
resubmit_job_id = start_monitoring(t_submit_command, t_user, t_resubmit + 1, t_script, t_project_number, t_sge_options)
|
142
|
+
script_name = File.basename(submit_command).split(/-/).first
|
143
|
+
@statuses.open(@db_stat)
|
144
|
+
@statuses[t_job_id] = ["resubmit: " + resubmit_job_id.to_s, script_name, Time.now.strftime("%Y-%m-%d %H:%M:%S"), t_user, t_project_number].join(',')
|
145
|
+
@statuses.close
|
146
|
+
else
|
147
|
+
log_puts("fail: " + t_job_id)
|
148
|
+
end
|
149
|
+
unless t_log_dir.empty?
|
150
|
+
copy_commands(t_log_file, t_log_dir).each do |command|
|
151
|
+
log_puts(command)
|
152
|
+
system command
|
153
|
+
end
|
154
|
+
err_file = t_log_file.gsub('_o.log','_e.log')
|
155
|
+
copy_commands(err_file, t_log_dir).each do |command|
|
156
|
+
log_puts(command)
|
157
|
+
system command
|
158
|
+
end
|
159
|
+
end
|
160
|
+
Thread.current.kill
|
161
|
+
end
|
162
|
+
sleep @interval
|
163
|
+
end
|
164
|
+
end
|
165
|
+
job_id.to_i
|
166
|
+
end
|
167
|
+
end
|
168
|
+
def status(job_id)
|
169
|
+
stat = nil
|
170
|
+
@statuses.open(@db_stat)
|
171
|
+
stat = @statuses[job_id.to_s]
|
172
|
+
@statuses.close
|
173
|
+
stat
|
174
|
+
end
|
175
|
+
def job_list(with_results=false, project_number=nil)
|
176
|
+
s = []
|
177
|
+
@statuses.open(@db_stat)
|
178
|
+
@statuses.each do |key, value|
|
179
|
+
if project_number
|
180
|
+
if x = value.split(/,/)[4].to_i==project_number.to_i
|
181
|
+
s << [key, value]
|
182
|
+
end
|
183
|
+
else
|
184
|
+
s << [key, value]
|
185
|
+
end
|
186
|
+
end
|
187
|
+
@statuses.close
|
188
|
+
s.sort.reverse.map{|v| v.join(',')}.join("\n")
|
189
|
+
end
|
190
|
+
def get_log(job_id, with_err=false)
|
191
|
+
@logs.open(@db_logs)
|
192
|
+
log_file = @logs[job_id.to_s]
|
193
|
+
@logs.close
|
194
|
+
log_data = if log_file and File.exist?(log_file)
|
195
|
+
"__STDOUT LOG__\n\n" + File.read(log_file)
|
196
|
+
else
|
197
|
+
'no log file'
|
198
|
+
end
|
199
|
+
if with_err
|
200
|
+
err_file = log_file.gsub(/_o\.log/,'_e.log')
|
201
|
+
if err_file and File.exist?(err_file)
|
202
|
+
log_data << "\n\n__STDERR LOG__\n\n"
|
203
|
+
log_data << File.read(err_file)
|
204
|
+
end
|
205
|
+
end
|
206
|
+
log_data
|
207
|
+
end
|
208
|
+
def get_script(job_id)
|
209
|
+
@logs.open(@db_logs)
|
210
|
+
script_file = @logs[job_id.to_s]
|
211
|
+
@logs.close
|
212
|
+
if script_file
|
213
|
+
script_file = script_file.gsub(/_o\.log/,'')
|
214
|
+
end
|
215
|
+
script = if script_file and File.exist?(script_file)
|
216
|
+
File.read(script_file)
|
217
|
+
else
|
218
|
+
'no script file'
|
219
|
+
end
|
220
|
+
script
|
221
|
+
end
|
222
|
+
def success_or_fail(job_id, log_file)
|
223
|
+
job_running = @cluster.job_running?(job_id)
|
224
|
+
job_ends = @cluster.job_ends?(log_file)
|
225
|
+
msg = if job_running
|
226
|
+
'running'
|
227
|
+
elsif job_ends
|
228
|
+
'success'
|
229
|
+
else
|
230
|
+
'fail'
|
231
|
+
end
|
232
|
+
msg
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
@@ -1,3 +1,3 @@
|
|
1
|
-
|
2
|
-
VERSION = "0.0.
|
1
|
+
module WorkflowManager
|
2
|
+
VERSION = "0.0.6"
|
3
3
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: workflow_manager
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-11-
|
12
|
+
date: 2013-11-08 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -57,6 +57,7 @@ executables:
|
|
57
57
|
extensions: []
|
58
58
|
extra_rdoc_files: []
|
59
59
|
files:
|
60
|
+
- .bzrignore
|
60
61
|
- Gemfile
|
61
62
|
- LICENSE.txt
|
62
63
|
- README.md
|
@@ -72,6 +73,7 @@ files:
|
|
72
73
|
- config/environments/production.rb
|
73
74
|
- lib/workflow_manager/cluster.rb
|
74
75
|
- lib/workflow_manager/optparse_ex.rb
|
76
|
+
- lib/workflow_manager/server.rb
|
75
77
|
- lib/workflow_manager/version.rb
|
76
78
|
- lib/workflow_manager.rb
|
77
79
|
- workflow_manager.gemspec
|