rq 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- data/DEPENDS +5 -0
- data/HISTORY +26 -0
- data/README +552 -0
- data/TODO +13 -0
- data/VERSION +1 -0
- data/bin/rq +391 -0
- data/bin/rq-0.1.7 +410 -0
- data/install.rb +143 -0
- data/lib/rq-0.1.7.rb +82 -0
- data/lib/rq-0.1.7/backer.rb +27 -0
- data/lib/rq-0.1.7/configfile.rb +78 -0
- data/lib/rq-0.1.7/configurator.rb +36 -0
- data/lib/rq-0.1.7/creator.rb +23 -0
- data/lib/rq-0.1.7/defaultconfig.txt +5 -0
- data/lib/rq-0.1.7/deleter.rb +39 -0
- data/lib/rq-0.1.7/executor.rb +41 -0
- data/lib/rq-0.1.7/feeder.rb +367 -0
- data/lib/rq-0.1.7/job.rb +51 -0
- data/lib/rq-0.1.7/jobqueue.rb +432 -0
- data/lib/rq-0.1.7/jobrunner.rb +63 -0
- data/lib/rq-0.1.7/jobrunnerdaemon.rb +179 -0
- data/lib/rq-0.1.7/lister.rb +22 -0
- data/lib/rq-0.1.7/locker.rb +37 -0
- data/lib/rq-0.1.7/logging.rb +117 -0
- data/lib/rq-0.1.7/mainhelper.rb +53 -0
- data/lib/rq-0.1.7/qdb.rb +634 -0
- data/lib/rq-0.1.7/querier.rb +33 -0
- data/lib/rq-0.1.7/refresher.rb +72 -0
- data/lib/rq-0.1.7/sleepcycle.rb +46 -0
- data/lib/rq-0.1.7/snapshotter.rb +25 -0
- data/lib/rq-0.1.7/statuslister.rb +22 -0
- data/lib/rq-0.1.7/submitter.rb +90 -0
- data/lib/rq-0.1.7/updater.rb +95 -0
- data/lib/rq-0.1.7/usage.rb +609 -0
- data/lib/rq-0.1.7/util.rb +286 -0
- data/lib/rq.rb +84 -0
- data/rdoc.cmd +2 -0
- data/rq +2 -0
- data/rq.gemspec +36 -0
- data/rq.help +552 -0
- data/white_box/crontab +2 -0
- data/white_box/killrq +18 -0
- data/white_box/rq_killer +27 -0
- metadata +126 -0
@@ -0,0 +1,286 @@
|
|
1
|
+
unless defined? $__rq_util__
|
2
|
+
module RQ
|
3
|
+
#{{{
|
4
|
+
LIBDIR = File::dirname(File::expand_path(__FILE__)) + File::SEPARATOR unless
|
5
|
+
defined? LIBDIR
|
6
|
+
|
7
|
+
require 'pathname'
|
8
|
+
require 'socket'
|
9
|
+
require 'tmpdir'
|
10
|
+
|
11
|
+
module Util
|
12
|
+
#{{{
|
13
|
+
class << self
|
14
|
+
def export sym
|
15
|
+
#{{{
|
16
|
+
sym = "#{ sym }".intern
|
17
|
+
module_function sym
|
18
|
+
public sym
|
19
|
+
#}}}
|
20
|
+
end
|
21
|
+
def append_features c
|
22
|
+
#{{{
|
23
|
+
super
|
24
|
+
c.extend Util
|
25
|
+
#}}}
|
26
|
+
end
|
27
|
+
end
|
28
|
+
def mcp obj
|
29
|
+
#{{{
|
30
|
+
Marshal.load(Marshal.dump(obj))
|
31
|
+
#}}}
|
32
|
+
end
|
33
|
+
export 'mcp'
|
34
|
+
def klass
|
35
|
+
#{{{
|
36
|
+
self.class
|
37
|
+
#}}}
|
38
|
+
end
|
39
|
+
export 'klass'
|
40
|
+
def realpath path
|
41
|
+
#{{{
|
42
|
+
path = File::expand_path "#{ path }"
|
43
|
+
begin
|
44
|
+
Pathname::new(path).realpath.to_s
|
45
|
+
rescue Errno::ENOENT, Errno::ENOTDIR
|
46
|
+
path
|
47
|
+
end
|
48
|
+
#}}}
|
49
|
+
end
|
50
|
+
export 'realpath'
|
51
|
+
def hashify(*hashes)
|
52
|
+
#{{{
|
53
|
+
hashes.inject(accum={}){|accum,hash| accum.update hash}
|
54
|
+
#}}}
|
55
|
+
end
|
56
|
+
export 'hashify'
|
57
|
+
def getopt opt, hash, default = nil
|
58
|
+
#{{{
|
59
|
+
key = opt
|
60
|
+
return hash[key] if hash.has_key? key
|
61
|
+
|
62
|
+
key = "#{ key }"
|
63
|
+
return hash[key] if hash.has_key? key
|
64
|
+
|
65
|
+
key = key.intern
|
66
|
+
return hash[key] if hash.has_key? key
|
67
|
+
|
68
|
+
return default
|
69
|
+
#}}}
|
70
|
+
end
|
71
|
+
export 'getopt'
|
72
|
+
def alive? pid
|
73
|
+
#{{{
|
74
|
+
pid = Integer("#{ pid }")
|
75
|
+
begin
|
76
|
+
Process.kill 0, pid
|
77
|
+
true
|
78
|
+
rescue Errno::ESRCH
|
79
|
+
false
|
80
|
+
end
|
81
|
+
#}}}
|
82
|
+
end
|
83
|
+
export 'alive?'
|
84
|
+
def maim(pid, opts = {})
|
85
|
+
#{{{
|
86
|
+
sigs = getopt('signals', opts) || %w(SIGTERM SIGQUIT SIGKILL)
|
87
|
+
suspend = getopt('suspend', opts) || 4
|
88
|
+
pid = Integer("#{ pid }")
|
89
|
+
sigs.each do |sig|
|
90
|
+
begin
|
91
|
+
Process.kill(sig, pid)
|
92
|
+
rescue Errno::ESRCH
|
93
|
+
return nil
|
94
|
+
end
|
95
|
+
sleep 0.2
|
96
|
+
unless alive?(pid)
|
97
|
+
break
|
98
|
+
else
|
99
|
+
sleep suspend
|
100
|
+
end
|
101
|
+
end
|
102
|
+
not alive?(pid)
|
103
|
+
#}}}
|
104
|
+
end
|
105
|
+
export 'maim'
|
106
|
+
def timestamp time = Time.now
|
107
|
+
#{{{
|
108
|
+
usec = "#{ time.usec }"
|
109
|
+
usec << ('0' * (6 - usec.size)) if usec.size < 6
|
110
|
+
time.strftime('%Y-%m-%d %H:%M:%S.') << usec
|
111
|
+
#}}}
|
112
|
+
end
|
113
|
+
export 'timestamp'
|
114
|
+
def stamptime string, local = true
|
115
|
+
#{{{
|
116
|
+
string = "#{ string }"
|
117
|
+
pat = %r/^\s*(\d\d\d\d)-(\d\d)-(\d\d) (\d\d):(\d\d):(\d\d).(\d\d\d\d\d\d)\s*$/o
|
118
|
+
match = pat.match string
|
119
|
+
raise ArgumentError, "<#{ string.inspect }>" unless match
|
120
|
+
yyyy,mm,dd,h,m,s,u = match.to_a[1..-1].map{|m| m.to_i}
|
121
|
+
if local
|
122
|
+
Time.local yyyy,mm,dd,h,m,s,u
|
123
|
+
else
|
124
|
+
Time.gm yyyy,mm,dd,h,m,s,u
|
125
|
+
end
|
126
|
+
#}}}
|
127
|
+
end
|
128
|
+
export 'stamptime'
|
129
|
+
def escape! s, char, esc
|
130
|
+
#{{{
|
131
|
+
re = %r/([#{0x5c.chr << esc}]*)#{char}/
|
132
|
+
s.gsub!(re) do
|
133
|
+
(($1.size % 2 == 0) ? ($1 << esc) : $1) + char
|
134
|
+
end
|
135
|
+
#}}}
|
136
|
+
end
|
137
|
+
export 'escape!'
|
138
|
+
def escape s, char, esc
|
139
|
+
#{{{
|
140
|
+
ss = "#{ s }"
|
141
|
+
escape! ss, char, esc
|
142
|
+
ss
|
143
|
+
#}}}
|
144
|
+
end
|
145
|
+
export 'escape'
|
146
|
+
def fork(*args, &block)
|
147
|
+
#{{{
|
148
|
+
begin
|
149
|
+
verbose = $VERBOSE
|
150
|
+
$VERBOSE = nil
|
151
|
+
Process::fork(*args, &block)
|
152
|
+
ensure
|
153
|
+
$VERBOSE = verbose
|
154
|
+
end
|
155
|
+
#}}}
|
156
|
+
end
|
157
|
+
export 'fork'
|
158
|
+
def exec(*args, &block)
|
159
|
+
#{{{
|
160
|
+
begin
|
161
|
+
verbose = $VERBOSE
|
162
|
+
$VERBOSE = nil
|
163
|
+
Kernel::exec(*args, &block)
|
164
|
+
ensure
|
165
|
+
$VERBOSE = verbose
|
166
|
+
end
|
167
|
+
#}}}
|
168
|
+
end
|
169
|
+
export 'exec'
|
170
|
+
def system(*args, &block)
|
171
|
+
#{{{
|
172
|
+
begin
|
173
|
+
verbose = $VERBOSE
|
174
|
+
$VERBOSE = nil
|
175
|
+
Kernel::system(*args, &block)
|
176
|
+
ensure
|
177
|
+
$VERBOSE = verbose
|
178
|
+
end
|
179
|
+
#}}}
|
180
|
+
end
|
181
|
+
export 'system'
|
182
|
+
def hostname
|
183
|
+
#{{{
|
184
|
+
@__hostname__ ||= Socket::gethostname
|
185
|
+
#}}}
|
186
|
+
end
|
187
|
+
export 'hostname'
|
188
|
+
def host
|
189
|
+
#{{{
|
190
|
+
@__host__ ||= Socket::gethostname.gsub(%r/\..*$/o,'')
|
191
|
+
#}}}
|
192
|
+
end
|
193
|
+
export 'host'
|
194
|
+
def emsg e
|
195
|
+
#{{{
|
196
|
+
"#{ e.message } - (#{ e.class })"
|
197
|
+
#}}}
|
198
|
+
end
|
199
|
+
export 'emsg'
|
200
|
+
def btrace e
|
201
|
+
#{{{
|
202
|
+
(e.backtrace or []).join("\n")
|
203
|
+
#}}}
|
204
|
+
end
|
205
|
+
export 'btrace'
|
206
|
+
def errmsg e
|
207
|
+
#{{{
|
208
|
+
emsg(e) << "\n" << btrace(e)
|
209
|
+
#}}}
|
210
|
+
end
|
211
|
+
export 'errmsg'
|
212
|
+
def erreq a, b
|
213
|
+
#{{{
|
214
|
+
a.class == b.class and
|
215
|
+
a.message == b.message and
|
216
|
+
a.backtrace == b.backtrace
|
217
|
+
#}}}
|
218
|
+
end
|
219
|
+
export 'erreq'
|
220
|
+
def tmpnam dir = Dir.tmpdir, seed = File::basename($0)
|
221
|
+
#{{{
|
222
|
+
pid = Process.pid
|
223
|
+
path = "%s_%s_%s_%s_%d" %
|
224
|
+
[Util::hostname, seed, pid, Util::timestamp.gsub(/\s+/o,'_'), rand(101010)]
|
225
|
+
File::join(dir, path)
|
226
|
+
#}}}
|
227
|
+
end
|
228
|
+
export 'tmpnam'
|
229
|
+
def uncache file
|
230
|
+
#{{{
|
231
|
+
refresh = nil
|
232
|
+
begin
|
233
|
+
is_a_file = File === file
|
234
|
+
path = (is_a_file ? file.path : file.to_s)
|
235
|
+
stat = (is_a_file ? file.stat : File::stat(file.to_s))
|
236
|
+
refresh = tmpnam(File::dirname(path))
|
237
|
+
File::link path, refresh rescue File::symlink path, refresh
|
238
|
+
File::chmod stat.mode, path
|
239
|
+
File::utime stat.atime, stat.mtime, path
|
240
|
+
ensure
|
241
|
+
begin
|
242
|
+
File::unlink refresh if refresh
|
243
|
+
rescue Errno::ENOENT
|
244
|
+
end
|
245
|
+
end
|
246
|
+
#}}}
|
247
|
+
end
|
248
|
+
export 'uncache'
|
249
|
+
def columnize buf, width = 80, indent = 0
|
250
|
+
#{{{
|
251
|
+
column = []
|
252
|
+
words = buf.split %r/\s+/o
|
253
|
+
row = ' ' * indent
|
254
|
+
while((word = words.shift))
|
255
|
+
if((row.size + word.size) < (width - 1))
|
256
|
+
row << word
|
257
|
+
else
|
258
|
+
column << row
|
259
|
+
row = ' ' * indent
|
260
|
+
row << word
|
261
|
+
end
|
262
|
+
row << ' ' unless row.size == (width - 1)
|
263
|
+
end
|
264
|
+
column << row unless row.strip.empty?
|
265
|
+
column.join "\n"
|
266
|
+
#}}}
|
267
|
+
end
|
268
|
+
export 'columnize'
|
269
|
+
def defval var, default = nil
|
270
|
+
#{{{
|
271
|
+
v = "#{ var }"
|
272
|
+
c = "DEFAULT_#{ v }".upcase
|
273
|
+
begin
|
274
|
+
klass.send(v) || klass.const_get(c)
|
275
|
+
rescue NameError
|
276
|
+
default
|
277
|
+
end
|
278
|
+
#}}}
|
279
|
+
end
|
280
|
+
export 'defval'
|
281
|
+
#}}}
|
282
|
+
end # module Util
|
283
|
+
#}}}
|
284
|
+
end # module RQ
|
285
|
+
$__rq_util__ = __FILE__
|
286
|
+
end
|
data/lib/rq.rb
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
unless defined? $__rq__
|
2
|
+
module RQ
|
3
|
+
#{{{
|
4
|
+
AUTHOR = 'ara.t.howard@noaa.gov'
|
5
|
+
LIBNAME = 'rq'
|
6
|
+
VERSION = '0.1.7'
|
7
|
+
LIBVER = "#{ LIBNAME }-#{ VERSION }"
|
8
|
+
DIRNAME = File::dirname(File::expand_path(__FILE__)) + File::SEPARATOR
|
9
|
+
ROOTDIR = File::dirname(DIRNAME)
|
10
|
+
LIBDIR = File::join(DIRNAME, LIBVER) + File::SEPARATOR
|
11
|
+
EXIT_SUCCESS = 0
|
12
|
+
EXIT_FAILURE = 1
|
13
|
+
#
|
14
|
+
# builtin
|
15
|
+
#
|
16
|
+
require 'optparse'
|
17
|
+
require 'logger'
|
18
|
+
require 'socket'
|
19
|
+
require 'rbconfig'
|
20
|
+
require 'optparse'
|
21
|
+
require 'logger'
|
22
|
+
require 'yaml'
|
23
|
+
require 'pp'
|
24
|
+
require 'socket'
|
25
|
+
require 'pathname'
|
26
|
+
require 'tempfile'
|
27
|
+
require 'fileutils'
|
28
|
+
require 'tmpdir'
|
29
|
+
require 'drb/drb'
|
30
|
+
#
|
31
|
+
# depends - http://raa.ruby-lang.org
|
32
|
+
#
|
33
|
+
begin
|
34
|
+
require 'arrayfields'
|
35
|
+
rescue LoadError
|
36
|
+
abort "require arrayfields - http://raa.ruby-lang.org/project/arrayfields/"
|
37
|
+
end
|
38
|
+
begin
|
39
|
+
require 'sqlite'
|
40
|
+
rescue LoadError
|
41
|
+
abort "require sqlite - http://raa.ruby-lang.org/project/sqlite-ruby/"
|
42
|
+
end
|
43
|
+
begin
|
44
|
+
require 'posixlock'
|
45
|
+
rescue LoadError
|
46
|
+
abort "require posixlock - http://raa.ruby-lang.org/project/posixlock/"
|
47
|
+
end
|
48
|
+
begin
|
49
|
+
require 'lockfile'
|
50
|
+
rescue LoadError
|
51
|
+
abort "require lockfile - http://raa.ruby-lang.org/project/lockfile/"
|
52
|
+
end
|
53
|
+
#
|
54
|
+
# rq support libs
|
55
|
+
#
|
56
|
+
require LIBDIR + 'util'
|
57
|
+
require LIBDIR + 'logging'
|
58
|
+
require LIBDIR + 'configfile'
|
59
|
+
require LIBDIR + 'sleepcycle'
|
60
|
+
require LIBDIR + 'refresher'
|
61
|
+
require LIBDIR + 'qdb'
|
62
|
+
require LIBDIR + 'jobqueue'
|
63
|
+
require LIBDIR + 'job'
|
64
|
+
require LIBDIR + 'jobrunner'
|
65
|
+
require LIBDIR + 'jobrunnerdaemon'
|
66
|
+
require LIBDIR + 'usage'
|
67
|
+
require LIBDIR + 'mainhelper'
|
68
|
+
require LIBDIR + 'creator'
|
69
|
+
require LIBDIR + 'submitter'
|
70
|
+
require LIBDIR + 'lister'
|
71
|
+
require LIBDIR + 'statuslister'
|
72
|
+
require LIBDIR + 'deleter'
|
73
|
+
require LIBDIR + 'updater'
|
74
|
+
require LIBDIR + 'querier'
|
75
|
+
require LIBDIR + 'executor'
|
76
|
+
require LIBDIR + 'configurator'
|
77
|
+
require LIBDIR + 'snapshotter'
|
78
|
+
require LIBDIR + 'locker'
|
79
|
+
require LIBDIR + 'backer'
|
80
|
+
require LIBDIR + 'feeder'
|
81
|
+
#}}}
|
82
|
+
end # module rq
|
83
|
+
$__rq__ = __FILE__
|
84
|
+
end
|
data/rdoc.cmd
ADDED
data/rq.gemspec
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'date'
|
2
|
+
Gem::Specification.new do |s|
|
3
|
+
s.name = %q{rq}
|
4
|
+
s.version = "0.1.7"
|
5
|
+
s.date = Date.today.to_s
|
6
|
+
s.summary = %q{rq is an __experimental__ tool used to manage nfs mounted work queues}
|
7
|
+
s.description =<<DESCRIPTION
|
8
|
+
rq is an __experimental__ tool used to manage nfs mounted work
|
9
|
+
queues. multiple instances of rq running from multiples hosts can
|
10
|
+
work from these queues to distribute processing load to 'n' nodes - bringing
|
11
|
+
many dozens of otherwise powerful cpus to their knees with a single blow.
|
12
|
+
clearly this software should be kept out of the hands of radicals, SETI
|
13
|
+
enthusiasts, and one mr. jeff safran.
|
14
|
+
|
15
|
+
rq operates in one of the modes create, submit, feed, list,
|
16
|
+
delete, query, snapshot, or help. depending on the mode of operation and
|
17
|
+
the options used the meaning of mode_args may change, sometime wildly and
|
18
|
+
unpredictably (i jest, of course).
|
19
|
+
DESCRIPTION
|
20
|
+
s.author = %q{-a}
|
21
|
+
s.email = %q{ara.t.howard@noaa.gov}
|
22
|
+
s.homepage = %q{http://www.codeforpeople.com/lib/ruby/rq/}
|
23
|
+
s.files = Dir.glob('**/*')
|
24
|
+
s.require_path = %q{lib}
|
25
|
+
s.autorequire = %q{rq}
|
26
|
+
s.has_rdoc = true
|
27
|
+
s.rdoc_options = ["--main", "README"]
|
28
|
+
s.extra_rdoc_files = ["README"]
|
29
|
+
s.executables = %w{rq}
|
30
|
+
s.bindir = %q{bin}
|
31
|
+
s.platform = Gem::Platform::RUBY
|
32
|
+
s.add_dependency('arrayfields', '>= 0.0.0')
|
33
|
+
s.add_dependency('sqlite-ruby', '>= 0.0.0')
|
34
|
+
s.add_dependency('lockfile', '>= 0.0.0')
|
35
|
+
s.add_dependency('posixlock', '>= 0.0.0')
|
36
|
+
end
|
data/rq.help
ADDED
@@ -0,0 +1,552 @@
|
|
1
|
+
NAME
|
2
|
+
rq v0.1.7
|
3
|
+
|
4
|
+
SYNOPSIS
|
5
|
+
rq (queue | export RQ_Q=q) mode [mode_args]* [options]*
|
6
|
+
|
7
|
+
|
8
|
+
DESCRIPTION
|
9
|
+
rq is an tool used to create instant linux clusters by managing nfs
|
10
|
+
mounted priority work queues. multiple instances of rq running from
|
11
|
+
multiples hosts can work from these queues to distribute processing load to n
|
12
|
+
nodes - bringing many dozens of otherwise powerful cpus to their knees with a
|
13
|
+
single blow. clearly this software should be kept out of the hands of free
|
14
|
+
radicals, seti enthusiasts, and one mr. j. safran.
|
15
|
+
|
16
|
+
the central concept of rq is that n nodes work in isolation to pull
|
17
|
+
jobs from an central nfs mounted work priority work queue in a synchronized
|
18
|
+
fashion. the nodes have absolutely no knowledge of each other and all
|
19
|
+
communication if done via the queue meaning that, so long as the queue is
|
20
|
+
available via nfs and a single node is running jobs from it, the system will
|
21
|
+
continue to process jobs. there is no centralized process whatsoever - all
|
22
|
+
nodes work to take jobs from the queue and run them as fast as possible. this
|
23
|
+
creates a system which load balances automatically and is robust in face of
|
24
|
+
node failures.
|
25
|
+
|
26
|
+
the first argument to any rq command is the name of the queue. this
|
27
|
+
name may be omitted if, and only if, the environment variable RQ_Q has been
|
28
|
+
set to contain the absolute path of target queue.
|
29
|
+
|
30
|
+
rq operates in one of the modes create, submit, list, status,
|
31
|
+
delete, update, query, execute, configure, snapshot, lock, backup, help, or
|
32
|
+
feed. depending on the mode of operation and the options used the meaning of
|
33
|
+
'mode_args' may change.
|
34
|
+
|
35
|
+
MODES
|
36
|
+
|
37
|
+
the following mode abbreviations exist
|
38
|
+
|
39
|
+
c => create
|
40
|
+
s => submit
|
41
|
+
l => list
|
42
|
+
ls => list
|
43
|
+
t => status
|
44
|
+
d => delete
|
45
|
+
rm => delete
|
46
|
+
u => update
|
47
|
+
q => query
|
48
|
+
e => execute
|
49
|
+
C => configure
|
50
|
+
S => snapshot
|
51
|
+
L => lock
|
52
|
+
b => backup
|
53
|
+
h => help
|
54
|
+
f => feed
|
55
|
+
|
56
|
+
create, c :
|
57
|
+
|
58
|
+
create a queue. the queue must be located on an nfs mounted file system
|
59
|
+
visible from all nodes intended to run jobs from it.
|
60
|
+
|
61
|
+
examples :
|
62
|
+
|
63
|
+
0) to create a queue
|
64
|
+
~ > rq /path/to/nfs/mounted/q create
|
65
|
+
or simply
|
66
|
+
~ > rq /path/to/nfs/mounted/q c
|
67
|
+
|
68
|
+
|
69
|
+
submit, s :
|
70
|
+
|
71
|
+
submit jobs to a queue to be proccesed by a feeding node. any 'mode_args'
|
72
|
+
are taken as the command to run. note that 'mode_args' are subject to shell
|
73
|
+
expansion - if you don't understand what this means do not use this feature
|
74
|
+
and pass jobs on stdin.
|
75
|
+
|
76
|
+
when running in submit mode a file may by specified as a list of commands to
|
77
|
+
run using the '--infile, -i' option. this file is taken to be a newline
|
78
|
+
separated list of commands to submit, blank lines and comments (#) are
|
79
|
+
allowed. if submitting a large number of jobs the input file method is
|
80
|
+
MUCH, more efficient. if no commands are specified on the command line rq
|
81
|
+
automatically reads them from STDIN. yaml formatted files are also allowed
|
82
|
+
as input (http://www.yaml.org/) - note that the output of nearly all rq
|
83
|
+
commands is valid yaml and may, therefore, be piped as input into the submit
|
84
|
+
command.
|
85
|
+
|
86
|
+
when submitting the '--priority, -p' option can be used here to determine
|
87
|
+
the priority of jobs. priorities may be any whole number - zero is the
|
88
|
+
default. note that submission of a high priority job will NOT supplant
|
89
|
+
currently running low priority jobs, but higher priority jobs WILL always
|
90
|
+
migrate above lower priority jobs in the queue in order that they be run as
|
91
|
+
soon as possible. constant submission of high priority jobs may create a
|
92
|
+
starvation situation whereby low priority jobs are never allowed to run.
|
93
|
+
avoiding this situation is the responsibility of the user. the only
|
94
|
+
guaruntee rq makes regarding job execution is that jobs are
|
95
|
+
executed in an 'oldest highest priority' order and that running jobs are
|
96
|
+
never supplanted.
|
97
|
+
|
98
|
+
examples :
|
99
|
+
|
100
|
+
0) submit the job ls to run on some feeding host
|
101
|
+
|
102
|
+
~ > rq q s ls
|
103
|
+
|
104
|
+
1) submit the job ls to run on some feeding host, at priority 9
|
105
|
+
|
106
|
+
~ > rq -p9 q s ls
|
107
|
+
|
108
|
+
2) submit 42000 jobs (quietly) from a command file.
|
109
|
+
|
110
|
+
~ > wc -l cmdfile
|
111
|
+
42000
|
112
|
+
~ > rq q s -q < cmdfile
|
113
|
+
|
114
|
+
3) submit 42 priority 9 jobs from a command file.
|
115
|
+
|
116
|
+
~ > wc -l cmdfile
|
117
|
+
42
|
118
|
+
~ > rq -p9 q s < cmdfile
|
119
|
+
|
120
|
+
4) submit 42 priority 9 jobs from a command file, marking them as
|
121
|
+
'important' using the '--tag, -t' option.
|
122
|
+
|
123
|
+
~ > wc -l cmdfile
|
124
|
+
42
|
125
|
+
~ > rq -p9 -timportant q s < cmdfile
|
126
|
+
|
127
|
+
5) re-submit all the 'important' jobs (see 'query' section below)
|
128
|
+
|
129
|
+
~ > rq q query tag=important | rq q s
|
130
|
+
|
131
|
+
6) re-submit all jobs which are already finished (see 'list' section
|
132
|
+
below)
|
133
|
+
|
134
|
+
~ > rq q l f | rq q s
|
135
|
+
|
136
|
+
|
137
|
+
list, l, ls :
|
138
|
+
|
139
|
+
list mode lists jobs of a certain state or job id. state may be one of
|
140
|
+
pending, running, finished, dead, or all. any 'mode_args' that are numbers
|
141
|
+
are taken to be job id's to list.
|
142
|
+
|
143
|
+
states may be abbreviated to uniqueness, therefore the following shortcuts
|
144
|
+
apply :
|
145
|
+
|
146
|
+
p => pending
|
147
|
+
r => running
|
148
|
+
f => finished
|
149
|
+
d => dead
|
150
|
+
a => all
|
151
|
+
|
152
|
+
examples :
|
153
|
+
|
154
|
+
0) show everything in q
|
155
|
+
~ > rq q list all
|
156
|
+
or
|
157
|
+
~ > rq q l all
|
158
|
+
or
|
159
|
+
~ > export RQ_Q=q
|
160
|
+
~ > rq l
|
161
|
+
|
162
|
+
1) show q's pending jobs
|
163
|
+
~ > rq q list pending
|
164
|
+
|
165
|
+
2) show q's running jobs
|
166
|
+
~ > rq q list running
|
167
|
+
|
168
|
+
3) show q's finished jobs
|
169
|
+
~ > rq q list finshed
|
170
|
+
|
171
|
+
4) show job id 42
|
172
|
+
~ > rq q l 42
|
173
|
+
|
174
|
+
|
175
|
+
status, t :
|
176
|
+
|
177
|
+
status mode shows the global state the queue. there are no 'mode_args'.
|
178
|
+
the meaning of each state is as follows:
|
179
|
+
|
180
|
+
pending => no feeder has yet taken this job
|
181
|
+
running => a feeder has taken this job
|
182
|
+
finished => a feeder has finished this job
|
183
|
+
dead => rq died while running a job, has restarted, and moved
|
184
|
+
this job to the dead state
|
185
|
+
|
186
|
+
note that rq cannot move jobs into the dead state unless it has
|
187
|
+
been restarted. this is because no node has any knowledge of other nodes
|
188
|
+
and cannot possibly know if a job was started on a node that died, or is
|
189
|
+
simply taking a very long time. only the node that dies, upon restart, can
|
190
|
+
determine that is has jobs that 'were started before it started' and move
|
191
|
+
these jobs into the dead state. normally only a machine crash would cause a
|
192
|
+
job to be placed into the dead state. dead jobs are never automatically
|
193
|
+
restarted, this is the responsibility of an operator.
|
194
|
+
|
195
|
+
examples :
|
196
|
+
|
197
|
+
0) show q's status
|
198
|
+
|
199
|
+
~ > rq q t
|
200
|
+
|
201
|
+
|
202
|
+
delete, d :
|
203
|
+
|
204
|
+
delete combinations of pending, running, finished, dead, or jobs specified
|
205
|
+
by jid. the delete mode is capable of parsing the output of list and query
|
206
|
+
modes, making it possible to create custom filters to delete jobs meeting
|
207
|
+
very specific conditions.
|
208
|
+
|
209
|
+
'mode_args' are the same as for list. note that while it is possible to
|
210
|
+
delete a running job, but there is no way to actually STOP it mid execution
|
211
|
+
since the node doing the deleteing has no way to communicate this
|
212
|
+
information to the (probably) remote execution node. therefore you should
|
213
|
+
use the 'delete running' feature with care and only for housekeeping
|
214
|
+
purposes or to prevent future jobs from being scheduled.
|
215
|
+
|
216
|
+
examples :
|
217
|
+
|
218
|
+
0) delete all pending, running, and finished jobs from a queue
|
219
|
+
|
220
|
+
~ > rq q d all
|
221
|
+
|
222
|
+
1) delete all pending jobs from a queue
|
223
|
+
|
224
|
+
~ > rq q d p
|
225
|
+
|
226
|
+
2) delete all finished jobs from a queue
|
227
|
+
|
228
|
+
~ > rq q d f
|
229
|
+
|
230
|
+
3) delete jobs via hand crafted filter program
|
231
|
+
|
232
|
+
~ > rq q list | yaml_filter_prog | rq q d
|
233
|
+
|
234
|
+
|
235
|
+
update, u :
|
236
|
+
|
237
|
+
update assumes all leading arguments are jids to update with subsequent
|
238
|
+
key=value pairs. currently only the 'command', 'priority', and 'tag' fields
|
239
|
+
of pending jobs can be updated.
|
240
|
+
|
241
|
+
examples:
|
242
|
+
|
243
|
+
0) update the priority of job 42
|
244
|
+
|
245
|
+
~ > rq q update 42 priority=7
|
246
|
+
|
247
|
+
1) update the priority of all pending jobs
|
248
|
+
|
249
|
+
~ > rq q update pending priority=7
|
250
|
+
|
251
|
+
2) query jobs with a command matching 'foobar' and update their command
|
252
|
+
to be 'barfoo'
|
253
|
+
|
254
|
+
~ > rq q q "command like '%foobar%'" |\
|
255
|
+
rq q u command=barfoo
|
256
|
+
|
257
|
+
|
258
|
+
query, q :
|
259
|
+
|
260
|
+
query exposes the database more directly the user, evaluating the where
|
261
|
+
clause specified on the command line (or from STDIN). this feature can be
|
262
|
+
used to make a fine grained slection of jobs for reporting or as input into
|
263
|
+
the delete command. you must have a basic understanding of SQL syntax to
|
264
|
+
use this feature, but it is fairly intuitive in this limited capacity.
|
265
|
+
|
266
|
+
examples:
|
267
|
+
|
268
|
+
0) show all jobs submitted within a specific 10 minute range
|
269
|
+
|
270
|
+
~ > rq q query "started >= '2004-06-29 22:51:00' and started < '2004-06-29 22:51:10'"
|
271
|
+
|
272
|
+
1) shell quoting can be tricky here so input on STDIN is also allowed to
|
273
|
+
avoid shell expansion
|
274
|
+
|
275
|
+
~ > cat constraints.txt
|
276
|
+
started >= '2004-06-29 22:51:00' and
|
277
|
+
started < '2004-06-29 22:51:10'
|
278
|
+
|
279
|
+
~ > rq q query < contraints.txt
|
280
|
+
or (same thing)
|
281
|
+
|
282
|
+
~ > cat contraints.txt| rq q query
|
283
|
+
|
284
|
+
** in general all but numbers will need to be surrounded by single quotes **
|
285
|
+
|
286
|
+
2) this query output might then be used to delete those jobs
|
287
|
+
|
288
|
+
~ > cat contraints.txt | rq q q | rq q d
|
289
|
+
|
290
|
+
3) show all jobs which are either finished or dead
|
291
|
+
|
292
|
+
~ > rq q q "state='finished' or state='dead'"
|
293
|
+
|
294
|
+
4) show all jobs which have non-zero exit status
|
295
|
+
|
296
|
+
~ > rq q query exit_status!=0
|
297
|
+
|
298
|
+
5) if you plan to query groups of jobs with some common feature consider
|
299
|
+
using the '--tag, -t' feature of the submit mode which allows a user to
|
300
|
+
tag a job with a user defined string which can then be used to easily
|
301
|
+
query that job group
|
302
|
+
|
303
|
+
~ > rq q submit --tag=my_jobs < joblist
|
304
|
+
~ > rq q query tag=my_jobs
|
305
|
+
|
306
|
+
|
307
|
+
execute, e :
|
308
|
+
|
309
|
+
execute mode is to be used by expert users with a knowledge of sql syntax
|
310
|
+
only. it follows the locking protocol used by rq and then allows
|
311
|
+
the user to execute arbitrary sql on the queue. unlike query mode a write
|
312
|
+
lock on the queue is obtained allowing a user to definitively shoot
|
313
|
+
themselves in the foot. for details on a queue's schema the file
|
314
|
+
'db.schema' in the queue directory should be examined.
|
315
|
+
|
316
|
+
examples :
|
317
|
+
|
318
|
+
0) list all jobs
|
319
|
+
|
320
|
+
~ > rq q execute 'select * from jobs'
|
321
|
+
|
322
|
+
|
323
|
+
configure, C :
|
324
|
+
|
325
|
+
this mode is not supported yet.
|
326
|
+
|
327
|
+
|
328
|
+
snapshot, p :
|
329
|
+
|
330
|
+
snapshot provides a means of taking a snapshot of the q. use this feature
|
331
|
+
when many queries are going to be run; for example when attempting to figure
|
332
|
+
out a complex pipeline command your test queries will not compete with the
|
333
|
+
feeders for the queue's lock. you should use this option whenever possible
|
334
|
+
to avoid lock competition.
|
335
|
+
|
336
|
+
examples:
|
337
|
+
|
338
|
+
0) take a snapshot using default snapshot naming, which is made via the
|
339
|
+
basename of the q plus '.snapshot'
|
340
|
+
|
341
|
+
~ > rq /path/to/nfs/q snapshot
|
342
|
+
|
343
|
+
1) use this snapshot to chceck status
|
344
|
+
|
345
|
+
~ > rq ./q.snapshot status
|
346
|
+
|
347
|
+
2) use the snapshot to see what's running on which host
|
348
|
+
|
349
|
+
~ > rq ./q.snapshot list running | grep `hostname`
|
350
|
+
|
351
|
+
note that there is also a snapshot option - this option is not the same as
|
352
|
+
the snapshot command. the option can be applied to ANY command. if in
|
353
|
+
effect then that command will be run on a snapshot of the database and the
|
354
|
+
snapshot then immediately deleted. this is really only useful if one were
|
355
|
+
to need to run a command against a very heavily loaded queue and did not
|
356
|
+
wish to wait to obtain the lock. eg.
|
357
|
+
|
358
|
+
0) get the status of a heavily loaded queue
|
359
|
+
|
360
|
+
~ > rq q t --snapshot
|
361
|
+
|
362
|
+
1) same as above
|
363
|
+
|
364
|
+
~ > rq q t -s
|
365
|
+
|
366
|
+
|
367
|
+
lock, L :
|
368
|
+
|
369
|
+
lock the queue and then execute an arbitrary shell command. lock mode uses
|
370
|
+
the queue's locking protocol to safely obtain a lock of the specified type
|
371
|
+
and execute a command on the user's behalf. lock type must be one of
|
372
|
+
|
373
|
+
(r)ead | (sh)ared | (w)rite | (ex)clusive
|
374
|
+
|
375
|
+
examples :
|
376
|
+
|
377
|
+
0) get a read lock on the queue and make a backup
|
378
|
+
|
379
|
+
~ > rq q L read -- cp -r q q.bak
|
380
|
+
|
381
|
+
(the '--' is needed to tell rq to stop parsing command line
|
382
|
+
options which allows the '-r' to be passed to the 'cp' command)
|
383
|
+
|
384
|
+
|
385
|
+
backup, b :
|
386
|
+
|
387
|
+
backup mode is exactly the same as getting a read lock on the queue and
|
388
|
+
making a copy of it. this mode is provided as a convenience.
|
389
|
+
|
390
|
+
0) make a backup of the queue using default naming ( qname + timestamp + .bak )
|
391
|
+
|
392
|
+
~ > rq q b
|
393
|
+
|
394
|
+
1) make a backup of the queue as 'q.bak'
|
395
|
+
|
396
|
+
~ > rq q b q.bak
|
397
|
+
|
398
|
+
help, h :
|
399
|
+
|
400
|
+
this message
|
401
|
+
|
402
|
+
examples :
|
403
|
+
|
404
|
+
0) get this message
|
405
|
+
|
406
|
+
~> rq q help
|
407
|
+
or
|
408
|
+
~> rq help
|
409
|
+
|
410
|
+
|
411
|
+
feed, f :
|
412
|
+
|
413
|
+
take jobs from the queue and run them on behalf of the submitter as quickly
|
414
|
+
as possible. jobs are taken from the queue in an 'oldest highest priority'
|
415
|
+
first order.
|
416
|
+
|
417
|
+
feeders can be run from any number of nodes allowing you to harness the CPU
|
418
|
+
power of many nodes simoultaneously in order to more effectively clobber
|
419
|
+
your network, anoy your sysads, and set output raids on fire.
|
420
|
+
|
421
|
+
the most useful method of feeding from a queue is to do so in daemon mode so
|
422
|
+
that if the process loses it's controling terminal it will not exit when you
|
423
|
+
exit your terminal session. use the '--daemon, -d' option to accomplish
|
424
|
+
this. by default only one feeding process per host per queue is allowed to
|
425
|
+
run at any given moment. because of this it is acceptable to start a feeder
|
426
|
+
at some regular interval from a cron entry since, if a feeder is alreay
|
427
|
+
running, the process will simply exit and otherwise a new feeder will be
|
428
|
+
started. in this way you may keep feeder processing running even acroess
|
429
|
+
machine reboots without requiring sysad intervention to add an entry to the
|
430
|
+
machine's startup tasks.
|
431
|
+
|
432
|
+
|
433
|
+
examples :
|
434
|
+
|
435
|
+
0) feed from a queue verbosely for debugging purposes, using a minimum and
|
436
|
+
maximum polling time of 2 and 4 respectively. you would NEVER specify
|
437
|
+
polling times this brief except for debugging purposes!!!
|
438
|
+
|
439
|
+
~ > rq q feed -v4 -m2 -M4
|
440
|
+
|
441
|
+
1) same as above, but viewing the executed sql as it is sent to the
|
442
|
+
database
|
443
|
+
|
444
|
+
~ > RQ_SQL_DEBUG=1 rq q f -v4 -m2 -M4
|
445
|
+
|
446
|
+
2) feed from a queue in daemon mode - logging to /home/ahoward/rq.log
|
447
|
+
|
448
|
+
~ > rq q f -d -l/home/ahoward/rq.log
|
449
|
+
|
450
|
+
log rolling in daemon mode is automatic so your logs should never need
|
451
|
+
to be deleted to prevent disk overflow.
|
452
|
+
|
453
|
+
3) use something like this sample crontab entry to keep a feeder running
|
454
|
+
forever - it attempts to (re)start every fifteen minutes but exits if
|
455
|
+
another process is already feeding.
|
456
|
+
|
457
|
+
#
|
458
|
+
# your crontab file - sample only
|
459
|
+
#
|
460
|
+
|
461
|
+
*/15 * * * * /full/path/to/bin/rq /full/path/to/nfs/mounted/q f -d -l/home/username/cfq.log -q
|
462
|
+
|
463
|
+
the '--quiet, -q' here tells rq to exit quietly (no STDERR)
|
464
|
+
when another process is found to already be feeding so that no cron
|
465
|
+
message would be sent under these conditions.
|
466
|
+
|
467
|
+
|
468
|
+
NOTES
|
469
|
+
- realize that your job is going to be running on a remote host and this has
|
470
|
+
implications. paths, for example, should be absolute, not relative.
|
471
|
+
specifically the submitted job script must be visible from all hosts
|
472
|
+
currently feeding from a queue as must be the input and output
|
473
|
+
files/directories.
|
474
|
+
|
475
|
+
- jobs are currently run under the bash shell using the --login option.
|
476
|
+
therefore any settings in your .bashrc will apply - specifically your PATH
|
477
|
+
setting. you should not, however, rely on jobs running with any given
|
478
|
+
environment.
|
479
|
+
|
480
|
+
- you need to consider __CAREFULLY__ what the ramifications of having multiple
|
481
|
+
instances of your program all potentially running at the same time will be.
|
482
|
+
for instance, it is beyond the scope of rq to ensure multiple
|
483
|
+
instances of a given program will not overwrite each others output files.
|
484
|
+
coordination of programs is left entirely to the user.
|
485
|
+
|
486
|
+
- the list of finished jobs will grow without bound unless you sometimes
|
487
|
+
delete some (all) of them. the reason for this is that rq cannot
|
488
|
+
know when the user has collected the exit_status of a given job, and so
|
489
|
+
keeps this information in the queue forever until instructed to delete it.
|
490
|
+
if you have collected the exit_status of you job(s) it is not an error to
|
491
|
+
then delete that job from the finished list - the information is kept for
|
492
|
+
your informational purposes only. in a production system it would be normal
|
493
|
+
to periodically save, and then delete, all finished jobs.
|
494
|
+
|
495
|
+
ENVIRONMENT
|
496
|
+
RQ_Q: set to the full path of nfs mounted queue
|
497
|
+
|
498
|
+
the queue argument to all commands may be omitted if, and only if, the
|
499
|
+
environment variable 'RQ_Q' contains the full path to the q. eg.
|
500
|
+
|
501
|
+
~ > export RQ_Q=/full/path/to/my/q
|
502
|
+
|
503
|
+
this feature can save a considerable amount of typing for those weak of
|
504
|
+
wrist.
|
505
|
+
|
506
|
+
DIAGNOSTICS
|
507
|
+
success : $? == 0
|
508
|
+
failure : $? != 0
|
509
|
+
|
510
|
+
AUTHOR
|
511
|
+
ara.t.howard@noaa.gov
|
512
|
+
|
513
|
+
BUGS
|
514
|
+
0 < bugno && bugno <= 42
|
515
|
+
|
516
|
+
reports to ara.t.howard@noaa.gov
|
517
|
+
|
518
|
+
OPTIONS
|
519
|
+
--priority=priority, -p
|
520
|
+
modes <submit> : set the job(s) priority - lowest(0) .. highest(n) -
|
521
|
+
(default 0)
|
522
|
+
--tag=tag, -t
|
523
|
+
modes <submit> : set the job(s) user data tag
|
524
|
+
--infile=infile, -i
|
525
|
+
modes <submit> : infile
|
526
|
+
--quiet, -q
|
527
|
+
modes <submit, feed> : do not echo submitted jobs, fail silently if
|
528
|
+
another process is already feeding
|
529
|
+
--daemon, -d
|
530
|
+
modes <feed> : spawn a daemon
|
531
|
+
--max_feed=max_feed, -f
|
532
|
+
modes <feed> : the maximum number of concurrent jobs run
|
533
|
+
--retries=retries, -r
|
534
|
+
modes <feed> : specify transaction retries
|
535
|
+
--min_sleep=min_sleep, -m
|
536
|
+
modes <feed> : specify min sleep
|
537
|
+
--max_sleep=max_sleep, -M
|
538
|
+
modes <feed> : specify max sleep
|
539
|
+
--snapshot, -s
|
540
|
+
operate on snapshot of queue
|
541
|
+
--verbosity=verbostiy, -v
|
542
|
+
0|fatal < 1|error < 2|warn < 3|info < 4|debug - (default info)
|
543
|
+
--log=path, -l
|
544
|
+
set log file - (default stderr)
|
545
|
+
--log_age=log_age
|
546
|
+
daily | weekly | monthly - what age will cause log rolling (default
|
547
|
+
nil)
|
548
|
+
--log_size=log_size
|
549
|
+
size in bytes - what size will cause log rolling (default nil)
|
550
|
+
--help, -h
|
551
|
+
this message
|
552
|
+
|