workflow_manager 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/wfm_job_list +10 -1
- data/bin/workflow_manager +0 -1
- data/lib/workflow_manager/server.rb +189 -145
- data/lib/workflow_manager/version.rb +1 -1
- metadata +1 -1
data/bin/wfm_job_list
CHANGED
@@ -1,10 +1,19 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# encoding: utf-8
|
3
3
|
# 20121112 masa workflow manager client
|
4
|
-
Version = '
|
4
|
+
Version = '20131108-112017'
|
5
5
|
|
6
6
|
require 'drb/drb'
|
7
7
|
require 'workflow_manager/optparse_ex'
|
8
|
+
require 'pstore'
|
9
|
+
class PStore
|
10
|
+
def each
|
11
|
+
self.roots.each do |key|
|
12
|
+
yield(key, self[key])
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
8
17
|
#require File.join((File.expand_path('../../lib',__FILE__)), 'optparse_ex.rb')
|
9
18
|
|
10
19
|
opt = OptionParser.new do |o|
|
data/bin/workflow_manager
CHANGED
@@ -24,7 +24,6 @@ if opt.mode
|
|
24
24
|
unless File.exist?(config_file)
|
25
25
|
app_dir = File.expand_path('..', __FILE__)
|
26
26
|
default_config_dir = File.join(app_dir, "../config/environments")
|
27
|
-
p default_config_dir
|
28
27
|
default_config_file = File.join(default_config_dir, opt.mode+".rb")
|
29
28
|
if File.exist?(default_config_file)
|
30
29
|
FileUtils.cp(default_config_file, config_file)
|
@@ -3,17 +3,28 @@
|
|
3
3
|
|
4
4
|
require 'drb/drb'
|
5
5
|
require 'fileutils'
|
6
|
-
|
6
|
+
begin
|
7
|
+
require 'kyotocabinet'
|
8
|
+
NO_KYOTO = false
|
9
|
+
rescue LoadError
|
10
|
+
require 'pstore'
|
11
|
+
class PStore
|
12
|
+
def each
|
13
|
+
self.roots.each do |key|
|
14
|
+
yield(key, self[key])
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
NO_KYOTO = true
|
19
|
+
end
|
7
20
|
|
8
21
|
module WorkflowManager
|
9
22
|
# default parameters
|
10
|
-
#LOG_DIR = '/srv/GT/analysis/workflow_manager_logs'
|
11
23
|
LOG_DIR = 'logs'
|
12
24
|
DB_DIR = 'dbs'
|
13
25
|
INTERVAL = 30
|
14
26
|
RESUBMIT = 0
|
15
27
|
|
16
|
-
#class WorkflowManager
|
17
28
|
class Server
|
18
29
|
@@config = nil
|
19
30
|
class Config
|
@@ -45,26 +56,47 @@ module WorkflowManager
|
|
45
56
|
end
|
46
57
|
@@config
|
47
58
|
end
|
48
|
-
# end
|
49
59
|
|
50
|
-
|
51
|
-
|
60
|
+
class KyotoDB
|
61
|
+
def initialize(db_file)
|
62
|
+
@file = db_file
|
63
|
+
@db = KyotoCabinet::DB.new
|
64
|
+
end
|
65
|
+
def transaction
|
66
|
+
@db.open(@file)
|
67
|
+
yield(@db)
|
68
|
+
@db.close
|
69
|
+
end
|
70
|
+
end
|
71
|
+
class PStoreDB
|
72
|
+
def initialize(db_file)
|
73
|
+
@db = PStore.new(db_file)
|
74
|
+
end
|
75
|
+
def transaction
|
76
|
+
@db.transaction do
|
77
|
+
yield(@db)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
52
82
|
def initialize
|
53
|
-
|
54
|
-
|
83
|
+
@interval = config.interval
|
84
|
+
@resubmit = config.resubmit
|
55
85
|
@db_stat = File.join(config.db_dir, 'statuses.kch')
|
56
86
|
@db_logs = File.join(config.db_dir, 'logs.kch')
|
57
87
|
|
58
|
-
|
88
|
+
@log_dir = File.expand_path(config.log_dir)
|
59
89
|
@db_dir = File.expand_path(config.db_dir)
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
90
|
+
FileUtils.mkdir_p @log_dir unless File.exist?(@log_dir)
|
91
|
+
FileUtils.mkdir_p @db_dir unless File.exist?(@db_dir)
|
92
|
+
#@statuses = KyotoCabinet::DB.new
|
93
|
+
@statuses = NO_KYOTO ? PStoreDB.new(@db_stat) : KyotoDB.new(@db_stat)
|
94
|
+
#@logs = KyotoCabinet::DB.new
|
95
|
+
@logs = NO_KYOTO ? PStoreDB.new(@db_logs) : KyotoDB.new(@db_logs)
|
96
|
+
@system_log = File.join(@log_dir, "system.log")
|
97
|
+
@mutex = Mutex.new
|
66
98
|
@cluster = config.cluster
|
67
|
-
|
99
|
+
log_puts("Server starts")
|
68
100
|
end
|
69
101
|
def hello
|
70
102
|
'hello, '+ @cluster.name
|
@@ -73,163 +105,175 @@ module WorkflowManager
|
|
73
105
|
@cluster.copy_commands(org_dir, dest_parent_dir)
|
74
106
|
end
|
75
107
|
def log_puts(str)
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
108
|
+
time = Time.now.strftime("[%Y.%m.%d %H:%M:%S]")
|
109
|
+
@mutex.synchronize do
|
110
|
+
open(@system_log, "a") do |out|
|
111
|
+
out.print time + " " + str + "\n"
|
112
|
+
end
|
113
|
+
end
|
82
114
|
end
|
83
115
|
def start_monitoring(submit_command, user = 'sushi lover', resubmit = 0, script = '', project_number = 0, sge_options='', log_dir = '')
|
84
|
-
|
116
|
+
log_puts("monitoring: script=" + submit_command + " user=" + user + " resubmit=" + resubmit.to_s + " project=" + project_number.to_s + " sge option=" + sge_options + " log dir=" + log_dir.to_s)
|
85
117
|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
118
|
+
#warn submit_command
|
119
|
+
#
|
120
|
+
# TODO: analyze arguments
|
121
|
+
#
|
122
|
+
job_id, log_file, command = @cluster.submit_job(submit_command, script, sge_options)
|
123
|
+
log_puts("submit: " + job_id + " " + command)
|
92
124
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
125
|
+
#
|
126
|
+
# monitor worker
|
127
|
+
#
|
128
|
+
if job_id and log_file
|
129
|
+
monitor_worker = Thread.new(job_id, log_file, submit_command, user, resubmit, script, project_number, sge_options, log_dir) do |t_job_id, t_log_file, t_submit_command, t_user, t_resubmit, t_script, t_project_number, t_sge_options, t_log_dir|
|
130
|
+
loop do
|
131
|
+
status = success_or_fail(t_job_id, t_log_file)
|
132
|
+
script_name = File.basename(submit_command).split(/-/).first
|
133
|
+
#@statuses.open(@db_stat)
|
134
|
+
@statuses.transaction do |statuses|
|
135
|
+
#start_time = if stat = @statuses[t_job_id] and stat = stat.split(/,/) and time = stat[2]
|
136
|
+
start_time = if stat = statuses[t_job_id] and stat = stat.split(/,/) and time = stat[2]
|
137
|
+
time
|
138
|
+
end
|
139
|
+
time = if start_time
|
140
|
+
if status == 'success' or status == 'fail'
|
141
|
+
start_time + '/' + Time.now.strftime("%Y-%m-%d %H:%M:%S")
|
142
|
+
else
|
143
|
+
start_time
|
104
144
|
end
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
@statuses[t_job_id] = [status, script_name, time, user, project_number].join(',')
|
115
|
-
@statuses.close
|
116
|
-
@logs.open(@db_logs)
|
117
|
-
@logs[t_job_id] = t_log_file
|
118
|
-
@logs.close
|
119
|
-
#warn t_job_id + " " + status
|
120
|
-
if status == 'success'
|
121
|
-
log_puts(status + ": " + t_job_id)
|
122
|
-
unless t_log_dir.empty?
|
123
|
-
copy_commands(t_log_file, t_log_dir).each do |command|
|
124
|
-
log_puts(command)
|
125
|
-
system command
|
126
|
-
end
|
127
|
-
err_file = t_log_file.gsub('_o.log','_e.log')
|
128
|
-
copy_commands(err_file, t_log_dir).each do |command|
|
129
|
-
log_puts(command)
|
130
|
-
system command
|
131
|
-
end
|
145
|
+
else
|
146
|
+
Time.now.strftime("%Y-%m-%d %H:%M:%S")
|
147
|
+
end
|
148
|
+
#@statuses[t_job_id] = [status, script_name, time, user, project_number].join(',')
|
149
|
+
statuses[t_job_id] = [status, script_name, time, user, project_number].join(',')
|
150
|
+
#@statuses.close
|
151
|
+
end
|
152
|
+
@logs.transaction do |logs|
|
153
|
+
logs[t_job_id] = t_log_file
|
132
154
|
end
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
else
|
147
|
-
log_puts("fail: " + t_job_id)
|
148
|
-
end
|
149
|
-
unless t_log_dir.empty?
|
150
|
-
copy_commands(t_log_file, t_log_dir).each do |command|
|
151
|
-
log_puts(command)
|
152
|
-
system command
|
155
|
+
#warn t_job_id + " " + status
|
156
|
+
if status == 'success'
|
157
|
+
log_puts(status + ": " + t_job_id)
|
158
|
+
unless t_log_dir.empty?
|
159
|
+
copy_commands(t_log_file, t_log_dir).each do |command|
|
160
|
+
log_puts(command)
|
161
|
+
system command
|
162
|
+
end
|
163
|
+
err_file = t_log_file.gsub('_o.log','_e.log')
|
164
|
+
copy_commands(err_file, t_log_dir).each do |command|
|
165
|
+
log_puts(command)
|
166
|
+
system command
|
167
|
+
end
|
153
168
|
end
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
169
|
+
Thread.current.kill
|
170
|
+
elsif status == 'fail'
|
171
|
+
log_puts(status + ": " + t_job_id)
|
172
|
+
#
|
173
|
+
# TODO: re-submit
|
174
|
+
#
|
175
|
+
if t_resubmit < RESUBMIT
|
176
|
+
log_puts("resubmit: " + t_job_id)
|
177
|
+
resubmit_job_id = start_monitoring(t_submit_command, t_user, t_resubmit + 1, t_script, t_project_number, t_sge_options)
|
178
|
+
script_name = File.basename(submit_command).split(/-/).first
|
179
|
+
#@statuses.open(@db_stat)
|
180
|
+
@statuses.transaction do |statuses|
|
181
|
+
statuses[t_job_id] = ["resubmit: " + resubmit_job_id.to_s, script_name, Time.now.strftime("%Y-%m-%d %H:%M:%S"), t_user, t_project_number].join(',')
|
182
|
+
#@statuses.close
|
183
|
+
end
|
184
|
+
else
|
185
|
+
log_puts("fail: " + t_job_id)
|
158
186
|
end
|
187
|
+
unless t_log_dir.empty?
|
188
|
+
copy_commands(t_log_file, t_log_dir).each do |command|
|
189
|
+
log_puts(command)
|
190
|
+
system command
|
191
|
+
end
|
192
|
+
err_file = t_log_file.gsub('_o.log','_e.log')
|
193
|
+
copy_commands(err_file, t_log_dir).each do |command|
|
194
|
+
log_puts(command)
|
195
|
+
system command
|
196
|
+
end
|
197
|
+
end
|
198
|
+
Thread.current.kill
|
159
199
|
end
|
160
|
-
|
161
|
-
|
162
|
-
sleep @interval
|
163
|
-
end
|
164
|
-
end
|
165
|
-
job_id.to_i
|
200
|
+
sleep @interval
|
201
|
+
end
|
166
202
|
end
|
203
|
+
job_id.to_i
|
204
|
+
end
|
167
205
|
end
|
168
206
|
def status(job_id)
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
207
|
+
stat = nil
|
208
|
+
#@statuses.open(@db_stat)
|
209
|
+
@statuses.transaction do |statuses|
|
210
|
+
stat = statuses[job_id.to_s]
|
211
|
+
#@statuses.close
|
212
|
+
end
|
213
|
+
stat
|
174
214
|
end
|
175
215
|
def job_list(with_results=false, project_number=nil)
|
176
216
|
s = []
|
177
|
-
|
178
|
-
@statuses.
|
179
|
-
|
180
|
-
if
|
217
|
+
#@statuses.open(@db_stat)
|
218
|
+
@statuses.transaction do |statuses|
|
219
|
+
statuses.each do |key, value|
|
220
|
+
if project_number
|
221
|
+
if x = value.split(/,/)[4].to_i==project_number.to_i
|
222
|
+
s << [key, value]
|
223
|
+
end
|
224
|
+
else
|
181
225
|
s << [key, value]
|
182
226
|
end
|
183
|
-
else
|
184
|
-
s << [key, value]
|
185
227
|
end
|
228
|
+
#@statuses.close
|
186
229
|
end
|
187
|
-
@statuses.close
|
188
230
|
s.sort.reverse.map{|v| v.join(',')}.join("\n")
|
189
231
|
end
|
190
232
|
def get_log(job_id, with_err=false)
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
233
|
+
log_file = nil
|
234
|
+
@logs.transaction do |logs|
|
235
|
+
log_file = logs[job_id.to_s]
|
236
|
+
end
|
237
|
+
log_data = if log_file and File.exist?(log_file)
|
238
|
+
"__STDOUT LOG__\n\n" + File.read(log_file)
|
239
|
+
else
|
240
|
+
'no log file'
|
241
|
+
end
|
242
|
+
if with_err
|
243
|
+
err_file = log_file.gsub(/_o\.log/,'_e.log')
|
244
|
+
if err_file and File.exist?(err_file)
|
245
|
+
log_data << "\n\n__STDERR LOG__\n\n"
|
246
|
+
log_data << File.read(err_file)
|
198
247
|
end
|
199
|
-
if with_err
|
200
|
-
err_file = log_file.gsub(/_o\.log/,'_e.log')
|
201
|
-
if err_file and File.exist?(err_file)
|
202
|
-
log_data << "\n\n__STDERR LOG__\n\n"
|
203
|
-
log_data << File.read(err_file)
|
204
248
|
end
|
205
|
-
|
206
|
-
log_data
|
249
|
+
log_data
|
207
250
|
end
|
208
251
|
def get_script(job_id)
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
252
|
+
script_file = nil
|
253
|
+
@logs.transaction do |logs|
|
254
|
+
script_file = logs[job_id.to_s]
|
255
|
+
end
|
256
|
+
if script_file
|
257
|
+
script_file = script_file.gsub(/_o\.log/,'')
|
258
|
+
end
|
259
|
+
script = if script_file and File.exist?(script_file)
|
260
|
+
File.read(script_file)
|
261
|
+
else
|
262
|
+
'no script file'
|
263
|
+
end
|
264
|
+
script
|
221
265
|
end
|
222
266
|
def success_or_fail(job_id, log_file)
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
267
|
+
job_running = @cluster.job_running?(job_id)
|
268
|
+
job_ends = @cluster.job_ends?(log_file)
|
269
|
+
msg = if job_running
|
270
|
+
'running'
|
271
|
+
elsif job_ends
|
272
|
+
'success'
|
273
|
+
else
|
274
|
+
'fail'
|
275
|
+
end
|
276
|
+
msg
|
233
277
|
end
|
234
278
|
end
|
235
279
|
end
|