rub2 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +21 -0
- data/README.txt +84 -0
- data/Rakefile +2 -0
- data/lib/rub2.rb +477 -0
- data/lib/rub2/version.rb +3 -0
- data/rub2.gemspec +26 -0
- data/sample.rb +38 -0
- metadata +81 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 3076d25cda3711a25aa2affcb5d6e6910116d22c
|
4
|
+
data.tar.gz: 1dae4fdf67c4c12d4fe6ffd5497b1cd4003d88d1
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4fd9836ab51f32dfa84dfdf4a045bda69fd0886b9fc3e30a7ca34ae51145e27bc35970593584d5959d93266f952c3dd2cf9ebc7b4b523a888854f551046c22ed
|
7
|
+
data.tar.gz: 14c99adaec23c336e8c8ed7f0e806d75bb459dbfde0c3b354b65b635d3f61d21128d821d251ad882ac27d6218f05d384e4d4508c6261ae0f4077ce7c2a3d176d
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2017 holrock
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.txt
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
* There's the rub
|
2
|
+
|
3
|
+
wrapper for qsub
|
4
|
+
|
5
|
+
* サンプル
|
6
|
+
|
7
|
+
require 'rub2'
|
8
|
+
|
9
|
+
submit "SimpleJob" do
|
10
|
+
execute_with Dir.glob("/etc/*.conf") do |dotfile|
|
11
|
+
"wc -l #{dotfile}"
|
12
|
+
end
|
13
|
+
end
|
14
|
+
# exit if job failed
|
15
|
+
|
16
|
+
|
17
|
+
submit "WithOptions" do
|
18
|
+
log 'log/test.log' # log file path
|
19
|
+
resource 'nodes' => '1:ppn=4', 'mem' => '15mb' # qsub -l option
|
20
|
+
array_request 2..4 # or array_request [1, 3] # qsub -t option
|
21
|
+
inherit_environment # qsub -V option
|
22
|
+
continue_on_error # don't exit on job failed
|
23
|
+
dry_run # output script and exit. no execute
|
24
|
+
|
25
|
+
# multiple arguments
|
26
|
+
execute_with [1, 2, 3, 4], [4, 5, 6, 7] do |arg1, arg2|
|
27
|
+
"echo '#{arg1} + #{arg2}' | bc"
|
28
|
+
end
|
29
|
+
|
30
|
+
# succeeded handler
|
31
|
+
on_done do
|
32
|
+
puts 'done'
|
33
|
+
end
|
34
|
+
|
35
|
+
# faild handler
|
36
|
+
# results: {job_id => ret_code}
|
37
|
+
on_fail do |results|
|
38
|
+
results.each do |job_id, ret_code|
|
39
|
+
puts get_executed_command(job_id) unless ret_code == 0
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
* 使い方
|
45
|
+
|
46
|
+
submit "JobName"で指定された名前でジョブを作成します。
|
47
|
+
submitブロック内でexecute_withの引数に配列を渡すと、配列の要素数分JobArrayを作成して、execute_withから返された文字列をbashの引数として実行します。
|
48
|
+
jobが全て成功するとsubmitブロックはtrueを返します。
|
49
|
+
|
50
|
+
* オプション
|
51
|
+
|
52
|
+
** log
|
53
|
+
|
54
|
+
ログファイル出力先。未指定の場合はカレントフォルダにログディレクトリを作成します。
|
55
|
+
|
56
|
+
** resource
|
57
|
+
|
58
|
+
qsub -lオプション。リソース名 => 値のハッシュを渡してください。
|
59
|
+
|
60
|
+
** array_request
|
61
|
+
|
62
|
+
実行するarray index。1-10の時は1..10のようにRangeを、1,2,4の時は[1,2,4]と配列を渡してください。
|
63
|
+
|
64
|
+
** inherit_environment
|
65
|
+
|
66
|
+
qsub -Vオプション。環境変数を引き継ぎます。
|
67
|
+
|
68
|
+
** dry_run
|
69
|
+
|
70
|
+
実行せずにsubmitするスクリプトを表示します。デバッグ向け。
|
71
|
+
|
72
|
+
** max_retry
|
73
|
+
|
74
|
+
max retry count
|
75
|
+
|
76
|
+
* ハンドラ
|
77
|
+
|
78
|
+
ジョブが成功した場合にはon_doneハンドラ、一つでも失敗した場合はon_failハンドラで指定されたブロックを実行します。
|
79
|
+
未指定の場合はデフォルトハンドラを実行し、失敗したジョブがあれば表示します。
|
80
|
+
|
81
|
+
|
82
|
+
その他
|
83
|
+
|
84
|
+
各jobの結果はRinda(dRuby)サーバーで受け取ります。 ネットワーク不調などにより結果が受け取れず終了しないときは適当にctrl-cで終らせてくさい。
|
data/Rakefile
ADDED
data/lib/rub2.rb
ADDED
@@ -0,0 +1,477 @@
|
|
1
|
+
require "rub2/version"
|
2
|
+
|
3
|
+
require 'erb'
|
4
|
+
require 'pathname'
|
5
|
+
require 'open3'
|
6
|
+
require 'rinda/tuplespace'
|
7
|
+
|
8
|
+
module Rub2
|
9
|
+
|
10
|
+
def putlog(str)
|
11
|
+
s = Time.now.strftime("%FT%T")
|
12
|
+
print "#{s}\t#{str}\n"
|
13
|
+
end
|
14
|
+
module_function :putlog
|
15
|
+
|
16
|
+
class JobScript
|
17
|
+
attr_accessor :log, :shell, :resource, :array_request,
|
18
|
+
:inherit_environment, :commands, :log_path, :uri
|
19
|
+
attr_reader :source
|
20
|
+
|
21
|
+
def initialize(name)
|
22
|
+
@name = name
|
23
|
+
@shell = '/bin/bash'
|
24
|
+
@log_path = make_default_log_path(name)
|
25
|
+
@uri = nil
|
26
|
+
end
|
27
|
+
|
28
|
+
def build
|
29
|
+
@source = ERB.new(ScriptTemplate, nil, '-').result(binding)
|
30
|
+
end
|
31
|
+
|
32
|
+
# Range (1..100) or Array [1,10,50] -> '-t 1-100' or -t '1,10,50'
|
33
|
+
def make_array_request_string
|
34
|
+
@array_request = (1..@commands.size) if @array_request.nil?
|
35
|
+
limit = ''
|
36
|
+
unless @slot_limit.nil?
|
37
|
+
limit = "%#{@slot_limit}"
|
38
|
+
end
|
39
|
+
|
40
|
+
if @array_request.kind_of?(Range)
|
41
|
+
last = nil
|
42
|
+
if @array_request.exclude_end?
|
43
|
+
last = @array_request.last - 1
|
44
|
+
else
|
45
|
+
last = @array_request.last
|
46
|
+
end
|
47
|
+
return sprintf("%d-%d%s", @array_request.first, last, limit)
|
48
|
+
end
|
49
|
+
return @array_request.sort.join(',') + limit
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
|
54
|
+
# return path to "pwd/log_ymd_hms/jobname_log"
|
55
|
+
def make_default_log_path(name)
|
56
|
+
t = Time.now.strftime('%Y%m%d_%H%M%S')
|
57
|
+
Pathname.new(Dir.pwd).join("log_#{t}", "#{name}.log")
|
58
|
+
end
|
59
|
+
# return key=value[,key=value]
|
60
|
+
def make_pbs_resources_string
|
61
|
+
return '' if @resource.empty?
|
62
|
+
|
63
|
+
s = []
|
64
|
+
@resource.each do |k, v|
|
65
|
+
s.push("#{k}=#{v}")
|
66
|
+
end
|
67
|
+
return s.join(',')
|
68
|
+
end
|
69
|
+
|
70
|
+
ScriptTemplate =<<'EOS'
|
71
|
+
#PBS -S <%= @shell %>
|
72
|
+
#PBS -N <%= @name %>
|
73
|
+
#PBS -j oe
|
74
|
+
#PBS -o <%= @log_path %>
|
75
|
+
#PBS -t <%= make_array_request_string %>
|
76
|
+
<%- unless @resource.nil? -%>
|
77
|
+
#PBS -l <%= make_pbs_resources_string %>
|
78
|
+
<%- end -%>
|
79
|
+
<%- if @inherit_environment -%>
|
80
|
+
#PBS -V
|
81
|
+
<%- end -%>
|
82
|
+
|
83
|
+
CMD=(
|
84
|
+
<% @commands.each do |i| -%>
|
85
|
+
"<%= i.gsub(/"/){ '\\"' } %>"
|
86
|
+
<% end -%>
|
87
|
+
)
|
88
|
+
ID=$(($PBS_ARRAYID - 1))
|
89
|
+
cd $PBS_O_WORKDIR
|
90
|
+
echo "job start: $(date -Iminute)"
|
91
|
+
echo "$PBS_O_HOST -> $(hostname): $PBS_JOBNAME $PBS_JOBID (cwd: $PWD)"
|
92
|
+
echo "execute: ${CMD[$ID]}"
|
93
|
+
bash -c "set -e; set -o pipefail; ${CMD[$ID]}"
|
94
|
+
RET=$?
|
95
|
+
echo "job exit: $RET at: $(date -Iminute)"
|
96
|
+
for i in {0..10}; do
|
97
|
+
ruby -r drb -e "DRbObject.new_with_uri('<%= @uri %>').write([<%= Process.pid %>, '$PBS_JOBID', $PBS_ARRAYID, '$HOSTNAME', $RET])"
|
98
|
+
if [ $? -eq 0 ]; then
|
99
|
+
exit $RET
|
100
|
+
fi
|
101
|
+
sleep 5
|
102
|
+
done
|
103
|
+
echo GIVEUP
|
104
|
+
exit $RET
|
105
|
+
EOS
|
106
|
+
|
107
|
+
end
|
108
|
+
|
109
|
+
|
110
|
+
class Job
|
111
|
+
attr_reader :parent_id, :array_id, :exit_code, :died_at
|
112
|
+
|
113
|
+
def initialize(parent_id, array_id, max_retry)
|
114
|
+
@parent_id = parent_id
|
115
|
+
@array_id = array_id
|
116
|
+
@max_retry = max_retry
|
117
|
+
raise "negative max_retry" if max_retry < 0
|
118
|
+
@exit_code = nil
|
119
|
+
@retry_count = 0
|
120
|
+
@died_at = nil
|
121
|
+
end
|
122
|
+
|
123
|
+
def finished?
|
124
|
+
return false if @exit_code.nil?
|
125
|
+
return (not need_retry?)
|
126
|
+
end
|
127
|
+
|
128
|
+
def succeeded?
|
129
|
+
return @exit_code == 0
|
130
|
+
end
|
131
|
+
|
132
|
+
def need_retry?
|
133
|
+
return false if @exit_code.nil?
|
134
|
+
return false if succeeded?
|
135
|
+
return @retry_count < @max_retry
|
136
|
+
end
|
137
|
+
|
138
|
+
def job_id
|
139
|
+
return "#{@parent_id}[#{@array_id}]"
|
140
|
+
end
|
141
|
+
|
142
|
+
def set_exit_code(exit_code)
|
143
|
+
unless @exit_code.nil?
|
144
|
+
Rub2.putlog("warn: already assigned exit_code #{self} #{@exit_code} -> #{exit_code}")
|
145
|
+
end
|
146
|
+
@exit_code = exit_code
|
147
|
+
@died_at = nil
|
148
|
+
end
|
149
|
+
|
150
|
+
def dead_end?(t)
|
151
|
+
return false if @died_at.nil?
|
152
|
+
return (t - @died_at) > 60.0 # daed job, maybe
|
153
|
+
end
|
154
|
+
|
155
|
+
def set_dead_time(t)
|
156
|
+
@died_at = t
|
157
|
+
end
|
158
|
+
|
159
|
+
def set_resubmit_id(new_parent_id)
|
160
|
+
@parent_id = new_parent_id
|
161
|
+
@exit_code = nil
|
162
|
+
@died_at = nil
|
163
|
+
@retry_count += 1
|
164
|
+
end
|
165
|
+
|
166
|
+
def inspect
|
167
|
+
"#<Job: @parent_id=#{@parent_id}, @array_id=#{@array_id}, @exit_code=#{@exit_code}, @retry_count=#{@retry_count}, @max_retry=#{@max_retry}, @died_at=#{@died_at}>"
|
168
|
+
end
|
169
|
+
|
170
|
+
def to_s
|
171
|
+
return job_id
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
|
176
|
+
class JobStore
|
177
|
+
|
178
|
+
def initialize
|
179
|
+
@jobs = nil
|
180
|
+
end
|
181
|
+
|
182
|
+
def init_job(job_id, job_index, max_retry)
|
183
|
+
raise "already initialized" unless @jobs.nil?
|
184
|
+
jobs = []
|
185
|
+
job_index.each do |index|
|
186
|
+
jobs.push(Job.new(job_id, index, max_retry))
|
187
|
+
end
|
188
|
+
@jobs = jobs
|
189
|
+
end
|
190
|
+
|
191
|
+
def get_job_from_index(index)
|
192
|
+
return @jobs.find {|j| j.array_id == index}
|
193
|
+
end
|
194
|
+
|
195
|
+
# exit_status: [ {:array_id => 1, :exit_code => 1}, ...]
|
196
|
+
def update_exit_code(exit_status)
|
197
|
+
exit_status.each do |es|
|
198
|
+
job = get_job_from_index(es[:array_id])
|
199
|
+
job.set_exit_code(es[:exit_code])
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
def mark_dead_job(jobs)
|
204
|
+
now = Time.now
|
205
|
+
jobs.each do |job|
|
206
|
+
if job.dead_end?(now)
|
207
|
+
job.set_exit_code(-1)
|
208
|
+
Rub2.putlog("#{job} no response 1min after finished.")
|
209
|
+
else
|
210
|
+
job.set_dead_time(now)
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
def each_job(&block)
|
216
|
+
@jobs.each do |job|
|
217
|
+
yield job
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
def all_finish?
|
222
|
+
return @jobs.all? {|j| j.finished?}
|
223
|
+
end
|
224
|
+
|
225
|
+
def select_retry_jobs
|
226
|
+
return @jobs.select {|job| job.need_retry?}
|
227
|
+
end
|
228
|
+
|
229
|
+
def job_count
|
230
|
+
return @jobs.size
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
class DeadJobCollector
|
235
|
+
|
236
|
+
def collect(job_store)
|
237
|
+
runnning_job_ids = parse_qstat(`qstat -t 2>/dev/null`)
|
238
|
+
failed = []
|
239
|
+
job_store.each_job do |job|
|
240
|
+
failed.push(job) if dead_job?(job, runnning_job_ids)
|
241
|
+
end
|
242
|
+
job_store.mark_dead_job(failed)
|
243
|
+
end
|
244
|
+
|
245
|
+
private
|
246
|
+
|
247
|
+
def parse_qstat(str)
|
248
|
+
id_hash = {}
|
249
|
+
str.each_line do |line|
|
250
|
+
if line =~ /\A(\d+(:?\[\d+\])?)/
|
251
|
+
id_hash[$1] = 1
|
252
|
+
end
|
253
|
+
end
|
254
|
+
return id_hash
|
255
|
+
end
|
256
|
+
|
257
|
+
def dead_job?(job, runnning_job_ids)
|
258
|
+
return false if runnning_job_ids.has_key?(job.job_id)
|
259
|
+
return (not job.finished?)
|
260
|
+
end
|
261
|
+
|
262
|
+
end
|
263
|
+
|
264
|
+
class JobResultCollector
|
265
|
+
def initialize(uri, timeout, job_count)
|
266
|
+
@drb = DRbObject.new_with_uri(uri)
|
267
|
+
@pid = Process.pid
|
268
|
+
@timeout = timeout
|
269
|
+
@job_count = job_count
|
270
|
+
@success_count = 0
|
271
|
+
end
|
272
|
+
|
273
|
+
# block thread
|
274
|
+
def collect_job_result(job_store)
|
275
|
+
_pid, job_id, array_id, host, exit_code = @drb.take([@pid, nil, nil, nil, nil], @timeout)
|
276
|
+
job_store.update_exit_code([{:array_id => array_id, :exit_code => exit_code}])
|
277
|
+
@success_count += 1 if exit_code == 0
|
278
|
+
Rub2.putlog "#{job_id}(#{host}) => #{exit_code}\t(#{@success_count}/#{@job_count})"
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
class Manager
|
283
|
+
def initialize(name)
|
284
|
+
@script = JobScript.new(name)
|
285
|
+
@job_store = JobStore.new
|
286
|
+
@timeout = 30
|
287
|
+
@max_retry_count = 0
|
288
|
+
@jobid = []
|
289
|
+
end
|
290
|
+
|
291
|
+
# example:
|
292
|
+
# execute_with array (, arrays) do |arg1 (, args...)|
|
293
|
+
# return command_string
|
294
|
+
# end
|
295
|
+
|
296
|
+
def execute_with(first, *rest, &block)
|
297
|
+
commands = []
|
298
|
+
first.zip(*rest) do |i|
|
299
|
+
cmd = block.call(*i)
|
300
|
+
commands.push cmd if cmd
|
301
|
+
end
|
302
|
+
@script.commands = commands
|
303
|
+
end
|
304
|
+
|
305
|
+
### handler
|
306
|
+
|
307
|
+
# example: on_fail {|results| p results}
|
308
|
+
def on_fail(&block)
|
309
|
+
@fail_proc = block
|
310
|
+
end
|
311
|
+
|
312
|
+
# example: on_done {puts 'done'}
|
313
|
+
def on_done(&block)
|
314
|
+
@done_proc = block
|
315
|
+
end
|
316
|
+
|
317
|
+
### job options
|
318
|
+
|
319
|
+
def log(log_path)
|
320
|
+
@script.log_path = Pathname.new(log_path)
|
321
|
+
end
|
322
|
+
|
323
|
+
def shell(intep)
|
324
|
+
@script.shell = intep
|
325
|
+
end
|
326
|
+
|
327
|
+
def resource(res = {})
|
328
|
+
@script.resource = res
|
329
|
+
end
|
330
|
+
|
331
|
+
def array_request(req)
|
332
|
+
@script.array_request = req
|
333
|
+
end
|
334
|
+
|
335
|
+
# slot limit doent't work on torque
|
336
|
+
def slot_limit(limit)
|
337
|
+
@script.slot_limit = limit
|
338
|
+
end
|
339
|
+
|
340
|
+
def dry_run
|
341
|
+
@dry_run = true
|
342
|
+
end
|
343
|
+
|
344
|
+
def inherit_environment
|
345
|
+
@script.inherit_environment = true
|
346
|
+
end
|
347
|
+
|
348
|
+
def continue_on_error
|
349
|
+
@continue_on_error = true
|
350
|
+
end
|
351
|
+
|
352
|
+
def max_retry(count)
|
353
|
+
@max_retry_count = count
|
354
|
+
end
|
355
|
+
|
356
|
+
### accessor
|
357
|
+
|
358
|
+
def get_executed_command(job_id)
|
359
|
+
return @script.commands[job_id - 1]
|
360
|
+
end
|
361
|
+
|
362
|
+
### job control
|
363
|
+
|
364
|
+
# exec qsub
|
365
|
+
def submit
|
366
|
+
unless @dry_run
|
367
|
+
@script.log_path.dirname.mkpath unless @script.log_path.dirname.exist?
|
368
|
+
@script.uri = start_tuplespace
|
369
|
+
end
|
370
|
+
|
371
|
+
@script.build
|
372
|
+
|
373
|
+
|
374
|
+
if @dry_run
|
375
|
+
puts @script.source
|
376
|
+
return
|
377
|
+
end
|
378
|
+
@jobid << submit_qsub(@script.source)
|
379
|
+
@job_store.init_job(@jobid.first, @script.array_request, @max_retry_count)
|
380
|
+
end
|
381
|
+
|
382
|
+
def wait_finish
|
383
|
+
return true if @dry_run
|
384
|
+
|
385
|
+
results = polling_loop
|
386
|
+
|
387
|
+
if results.all? {|aid, ret| ret == 0}
|
388
|
+
if @done_proc
|
389
|
+
@done_proc.call
|
390
|
+
else
|
391
|
+
Rub2.putlog "job succeeded"
|
392
|
+
end
|
393
|
+
return true
|
394
|
+
end
|
395
|
+
|
396
|
+
if @fail_proc
|
397
|
+
@fail_proc.call(results)
|
398
|
+
else
|
399
|
+
results.each do |aid, ret|
|
400
|
+
unless ret == 0
|
401
|
+
Rub2.putlog "array job failed: #{ret}"
|
402
|
+
end
|
403
|
+
end
|
404
|
+
end
|
405
|
+
|
406
|
+
return false if @continue_on_error
|
407
|
+
Rub2.putlog "job failed: #{@jobid.join(',')}"
|
408
|
+
exit false
|
409
|
+
end
|
410
|
+
|
411
|
+
private
|
412
|
+
|
413
|
+
def polling_loop
|
414
|
+
job_result = JobResultCollector.new(@uri, @timeout, @job_store.job_count)
|
415
|
+
dead_job = DeadJobCollector.new()
|
416
|
+
|
417
|
+
until @job_store.all_finish?
|
418
|
+
begin
|
419
|
+
job_result.collect_job_result(@job_store) until @job_store.all_finish?
|
420
|
+
rescue Rinda::RequestExpiredError
|
421
|
+
# ignore timeout
|
422
|
+
end
|
423
|
+
dead_job.collect(@job_store)
|
424
|
+
retry_job = @job_store.select_retry_jobs
|
425
|
+
resubmit_faild_job(retry_job) unless retry_job.empty?
|
426
|
+
end
|
427
|
+
results = []
|
428
|
+
@job_store.each_job do |job|
|
429
|
+
results.push([job.array_id, job.exit_code])
|
430
|
+
end
|
431
|
+
return results
|
432
|
+
end
|
433
|
+
|
434
|
+
def resubmit_faild_job(failed_jobs)
|
435
|
+
return if failed_jobs.empty?
|
436
|
+
ids = failed_jobs.map {|job| job.array_id}.join(',')
|
437
|
+
newjobid = submit_qsub(@script.source, ids)
|
438
|
+
@jobid << newjobid
|
439
|
+
failed_jobs.each {|job| job.set_resubmit_id(newjobid)}
|
440
|
+
end
|
441
|
+
|
442
|
+
def submit_qsub(script, array_option = nil)
|
443
|
+
jobid = nil
|
444
|
+
cmd = "qsub"
|
445
|
+
cmd += " -t #{array_option}" if array_option
|
446
|
+
Open3.popen3(cmd) do |stdin, stdout, stderr|
|
447
|
+
stdin.puts(script)
|
448
|
+
stdin.close
|
449
|
+
jobid = stdout.read.chomp
|
450
|
+
raise "qsub error: " + stderr.read.chomp if jobid.empty?
|
451
|
+
end
|
452
|
+
jobid =~ /\A(\d+)/
|
453
|
+
jobid = $1
|
454
|
+
Rub2.putlog "job submited: #{jobid}[#{array_option || @script.make_array_request_string}]"
|
455
|
+
return jobid
|
456
|
+
end
|
457
|
+
|
458
|
+
# start server for job results
|
459
|
+
def start_tuplespace
|
460
|
+
@ts = Rinda::TupleSpace.new
|
461
|
+
@drb = DRb.start_service("druby://:0", @ts)
|
462
|
+
@uri = @drb.uri
|
463
|
+
Rub2.putlog "start Rinda Server: #{@uri}"
|
464
|
+
return @uri
|
465
|
+
end
|
466
|
+
|
467
|
+
end
|
468
|
+
end
|
469
|
+
|
470
|
+
# define global DSL function
|
471
|
+
def submit(name, &block)
|
472
|
+
raise "Empty name" if name.empty?
|
473
|
+
job = Rub2::Manager.new(name)
|
474
|
+
job.instance_eval(&block)
|
475
|
+
job.submit
|
476
|
+
return job.wait_finish
|
477
|
+
end
|
data/lib/rub2/version.rb
ADDED
data/rub2.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
|
2
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require "rub2/version"
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "rub2"
|
8
|
+
spec.version = Rub2::VERSION
|
9
|
+
spec.authors = ["holrock"]
|
10
|
+
spec.email = ["ispeporez@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = "simple DSL for submitting jobs via qsub"
|
13
|
+
spec.description = ""
|
14
|
+
spec.homepage = "https://github.com/holrock/rub2"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
18
|
+
f.match(%r{^(test|spec|features)/})
|
19
|
+
end
|
20
|
+
spec.bindir = "exe"
|
21
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
22
|
+
spec.require_paths = ["lib"]
|
23
|
+
|
24
|
+
spec.add_development_dependency "bundler", "~> 1.16"
|
25
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
26
|
+
end
|
data/sample.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rub2'
|
4
|
+
|
5
|
+
submit "SimpleJob" do
|
6
|
+
execute_with Dir.glob("/etc/*.conf") do |dotfile|
|
7
|
+
"wc -l #{dotfile}"
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
submit "WithOptions" do
|
13
|
+
log 'log/test.log' # log file path
|
14
|
+
resource 'nodes' => '1:ppn=4', 'mem' => '15mb' # qsub -l option
|
15
|
+
array_request 2..4 # or array_request [1, 3] # qsub -t option
|
16
|
+
inherit_environment # qsub -V option
|
17
|
+
continue_on_error # don't exit on job error
|
18
|
+
dry_run # output script and exit. no execute
|
19
|
+
max_retry 5
|
20
|
+
|
21
|
+
# multiple arguments
|
22
|
+
execute_with [1, 2, 3, 4], [4, 5, 6, 7] do |arg1, arg2|
|
23
|
+
"echo '#{arg1} + #{arg2}' | bc"
|
24
|
+
end
|
25
|
+
|
26
|
+
# succeeded handler
|
27
|
+
on_done do
|
28
|
+
puts 'done'
|
29
|
+
end
|
30
|
+
|
31
|
+
# faild handler
|
32
|
+
# results: {job_id => ret_code}
|
33
|
+
on_fail do |results|
|
34
|
+
results.each do |job_id, ret_code|
|
35
|
+
puts get_executed_command(job_id) unless ret_code == 0
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
metadata
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rub2
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- holrock
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-12-22 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.16'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.16'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
description: ''
|
42
|
+
email:
|
43
|
+
- ispeporez@gmail.com
|
44
|
+
executables: []
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- ".gitignore"
|
49
|
+
- Gemfile
|
50
|
+
- LICENSE.txt
|
51
|
+
- README.txt
|
52
|
+
- Rakefile
|
53
|
+
- lib/rub2.rb
|
54
|
+
- lib/rub2/version.rb
|
55
|
+
- rub2.gemspec
|
56
|
+
- sample.rb
|
57
|
+
homepage: https://github.com/holrock/rub2
|
58
|
+
licenses:
|
59
|
+
- MIT
|
60
|
+
metadata: {}
|
61
|
+
post_install_message:
|
62
|
+
rdoc_options: []
|
63
|
+
require_paths:
|
64
|
+
- lib
|
65
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
75
|
+
requirements: []
|
76
|
+
rubyforge_project:
|
77
|
+
rubygems_version: 2.6.14
|
78
|
+
signing_key:
|
79
|
+
specification_version: 4
|
80
|
+
summary: simple DSL for submitting jobs via qsub
|
81
|
+
test_files: []
|