rq 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/DEPENDS +5 -0
- data/HISTORY +26 -0
- data/README +552 -0
- data/TODO +13 -0
- data/VERSION +1 -0
- data/bin/rq +391 -0
- data/bin/rq-0.1.7 +410 -0
- data/install.rb +143 -0
- data/lib/rq-0.1.7.rb +82 -0
- data/lib/rq-0.1.7/backer.rb +27 -0
- data/lib/rq-0.1.7/configfile.rb +78 -0
- data/lib/rq-0.1.7/configurator.rb +36 -0
- data/lib/rq-0.1.7/creator.rb +23 -0
- data/lib/rq-0.1.7/defaultconfig.txt +5 -0
- data/lib/rq-0.1.7/deleter.rb +39 -0
- data/lib/rq-0.1.7/executor.rb +41 -0
- data/lib/rq-0.1.7/feeder.rb +367 -0
- data/lib/rq-0.1.7/job.rb +51 -0
- data/lib/rq-0.1.7/jobqueue.rb +432 -0
- data/lib/rq-0.1.7/jobrunner.rb +63 -0
- data/lib/rq-0.1.7/jobrunnerdaemon.rb +179 -0
- data/lib/rq-0.1.7/lister.rb +22 -0
- data/lib/rq-0.1.7/locker.rb +37 -0
- data/lib/rq-0.1.7/logging.rb +117 -0
- data/lib/rq-0.1.7/mainhelper.rb +53 -0
- data/lib/rq-0.1.7/qdb.rb +634 -0
- data/lib/rq-0.1.7/querier.rb +33 -0
- data/lib/rq-0.1.7/refresher.rb +72 -0
- data/lib/rq-0.1.7/sleepcycle.rb +46 -0
- data/lib/rq-0.1.7/snapshotter.rb +25 -0
- data/lib/rq-0.1.7/statuslister.rb +22 -0
- data/lib/rq-0.1.7/submitter.rb +90 -0
- data/lib/rq-0.1.7/updater.rb +95 -0
- data/lib/rq-0.1.7/usage.rb +609 -0
- data/lib/rq-0.1.7/util.rb +286 -0
- data/lib/rq.rb +84 -0
- data/rdoc.cmd +2 -0
- data/rq +2 -0
- data/rq.gemspec +36 -0
- data/rq.help +552 -0
- data/white_box/crontab +2 -0
- data/white_box/killrq +18 -0
- data/white_box/rq_killer +27 -0
- metadata +126 -0
data/lib/rq-0.1.7/qdb.rb
ADDED
@@ -0,0 +1,634 @@
|
|
1
|
+
unless defined? $__rq_qdb__
|
2
|
+
module RQ
|
3
|
+
#{{{
|
4
|
+
LIBDIR = File::dirname(File::expand_path(__FILE__)) + File::SEPARATOR unless
|
5
|
+
defined? LIBDIR
|
6
|
+
|
7
|
+
require LIBDIR + 'util'
|
8
|
+
require LIBDIR + 'logging'
|
9
|
+
require LIBDIR + 'sleepcycle'
|
10
|
+
require LIBDIR + 'refresher'
|
11
|
+
|
12
|
+
class QDB
|
13
|
+
#{{{
|
14
|
+
include Util
|
15
|
+
include Logging
|
16
|
+
|
17
|
+
FIELDS =
|
18
|
+
#{{{
|
19
|
+
%w(
|
20
|
+
jid priority state
|
21
|
+
submitted started finished elapsed
|
22
|
+
submitter runner
|
23
|
+
pid exit_status
|
24
|
+
tag command
|
25
|
+
)
|
26
|
+
#}}}
|
27
|
+
|
28
|
+
PRAGMAS =
|
29
|
+
#{{{
|
30
|
+
<<-sql
|
31
|
+
PRAGMA default_synchronous = FULL;
|
32
|
+
sql
|
33
|
+
#}}}
|
34
|
+
|
35
|
+
SCHEMA =
|
36
|
+
#{{{
|
37
|
+
<<-sql
|
38
|
+
create table jobs
|
39
|
+
(
|
40
|
+
jid integer primary key,
|
41
|
+
#{ FIELDS[1..-1].join ",\n " }
|
42
|
+
);
|
43
|
+
create table attributes
|
44
|
+
(
|
45
|
+
key,
|
46
|
+
value,
|
47
|
+
primary key (key)
|
48
|
+
);
|
49
|
+
sql
|
50
|
+
#}}}
|
51
|
+
|
52
|
+
DEFAULT_LOGGER = Logger::new(STDERR)
|
53
|
+
DEFAULT_SQL_DEBUG = false
|
54
|
+
DEFAULT_TRANSACTION_RETRIES = 4
|
55
|
+
DEFAULT_AQUIRE_LOCK_SC = SleepCycle::new(2, 16, 2)
|
56
|
+
DEFAULT_TRANSACTION_RETRIES_SC = SleepCycle::new(8, 24, 8)
|
57
|
+
DEFAULT_ATTEMPT_LOCKD_RECOVERY = true
|
58
|
+
DEFAULT_LOCKD_RECOVER_WAIT = 1800
|
59
|
+
DEFAULT_AQUIRE_LOCK_LOCKFILE_STALE_AGE = 1800
|
60
|
+
DEFAULT_AQUIRE_LOCK_REFRESH_RATE = 8
|
61
|
+
|
62
|
+
class << self
|
63
|
+
#{{{
|
64
|
+
attr :logger, true
|
65
|
+
attr :sql_debug, true
|
66
|
+
attr :transaction_retries, true
|
67
|
+
attr :aquire_lock_sc, true
|
68
|
+
attr :transaction_retries_sc, true
|
69
|
+
attr :attempt_lockd_recovery, true
|
70
|
+
attr :lockd_recover_wait, true
|
71
|
+
attr :aquire_lock_lockfile_stale_age, true
|
72
|
+
attr :aquire_lock_refresh_rate, true
|
73
|
+
|
74
|
+
def fields
|
75
|
+
#{{{
|
76
|
+
FIELDS
|
77
|
+
#}}}
|
78
|
+
end
|
79
|
+
def integrity_check dbpath
|
80
|
+
#{{{
|
81
|
+
ret = false
|
82
|
+
tuple = nil
|
83
|
+
begin
|
84
|
+
db = SQLite::Database::new dbpath, 0
|
85
|
+
opened = true
|
86
|
+
db.use_array = true
|
87
|
+
tuple = db.execute 'PRAGMA integrity_check;'
|
88
|
+
ret = (tuple and tuple.first and (tuple.first["integrity_check"] =~ /^\s*ok\s*$/io))
|
89
|
+
ensure
|
90
|
+
db.close if opened
|
91
|
+
db = nil
|
92
|
+
end
|
93
|
+
ret
|
94
|
+
#}}}
|
95
|
+
end
|
96
|
+
def t2h tuple
|
97
|
+
#{{{
|
98
|
+
h = {}
|
99
|
+
FIELDS.each_with_index{|f,i| h[f] = tuple[i]}
|
100
|
+
h
|
101
|
+
#}}}
|
102
|
+
end
|
103
|
+
def h2t h
|
104
|
+
#{{{
|
105
|
+
t = tuple
|
106
|
+
FIELDS.each{|f| t[f] = h[f]}
|
107
|
+
t
|
108
|
+
#}}}
|
109
|
+
end
|
110
|
+
def tuple
|
111
|
+
#{{{
|
112
|
+
t = Array::new FIELDS.size
|
113
|
+
t.fields = FIELDS
|
114
|
+
t
|
115
|
+
#}}}
|
116
|
+
end
|
117
|
+
def q tuple
|
118
|
+
#{{{
|
119
|
+
tuple.map do |f|
|
120
|
+
if f
|
121
|
+
"'" << Util.escape(f,"'","'") << "'"
|
122
|
+
else
|
123
|
+
'NULL'
|
124
|
+
end
|
125
|
+
end
|
126
|
+
#}}}
|
127
|
+
end
|
128
|
+
def create path, opts = {}
|
129
|
+
#{{{
|
130
|
+
qdb = new path, opts
|
131
|
+
FileUtils::touch qdb.lockfile
|
132
|
+
create_schema qdb.schema
|
133
|
+
qdb.transaction do
|
134
|
+
qdb.execute PRAGMAS
|
135
|
+
qdb.execute SCHEMA
|
136
|
+
end
|
137
|
+
qdb
|
138
|
+
#}}}
|
139
|
+
end
|
140
|
+
def create_schema path
|
141
|
+
#{{{
|
142
|
+
tmp = "#{ path }.tmp"
|
143
|
+
open(tmp,'w') do |f|
|
144
|
+
f.puts PRAGMAS
|
145
|
+
f.puts SCHEMA
|
146
|
+
end
|
147
|
+
FileUtils::mv tmp, path
|
148
|
+
#}}}
|
149
|
+
end
|
150
|
+
#}}}
|
151
|
+
end
|
152
|
+
|
153
|
+
attr :path
|
154
|
+
attr :opts
|
155
|
+
attr :dirname
|
156
|
+
attr :schema
|
157
|
+
attr :fields
|
158
|
+
attr :mutex
|
159
|
+
attr :lockfile
|
160
|
+
attr :sql_debug, true
|
161
|
+
attr :transaction_retries, true
|
162
|
+
attr :aquire_lock_sc, true
|
163
|
+
attr :transaction_retries_sc, true
|
164
|
+
attr :attempt_lockd_recovery, true
|
165
|
+
attr :lockd_recover_wait, true
|
166
|
+
attr :aquire_lock_lockfile_stale_age, true
|
167
|
+
attr :aquire_lock_refresh_rate, true
|
168
|
+
|
169
|
+
|
170
|
+
def initialize path, opts = {}
|
171
|
+
#{{{
|
172
|
+
@path = path
|
173
|
+
@opts = opts
|
174
|
+
|
175
|
+
@logger =
|
176
|
+
Util::getopt('logger', @opts) ||
|
177
|
+
klass.logger ||
|
178
|
+
DEFAULT_LOGGER
|
179
|
+
|
180
|
+
@sql_debug =
|
181
|
+
Util::getopt('sql_debug', @opts) ||
|
182
|
+
klass.sql_debug ||
|
183
|
+
ENV['RQ_SQL_DEBUG'] ||
|
184
|
+
DEFAULT_SQL_DEBUG
|
185
|
+
|
186
|
+
@transaction_retries =
|
187
|
+
Util::getopt('transaction_retries', @opts) ||
|
188
|
+
klass.transaction_retries ||
|
189
|
+
DEFAULT_TRANSACTION_RETRIES
|
190
|
+
|
191
|
+
@aquire_lock_sc =
|
192
|
+
Util::getopt('aquire_lock_sc', @opts) ||
|
193
|
+
klass.aquire_lock_sc ||
|
194
|
+
DEFAULT_AQUIRE_LOCK_SC
|
195
|
+
|
196
|
+
@transaction_retries_sc =
|
197
|
+
Util::getopt('transaction_retries_sc', @opts) ||
|
198
|
+
klass.transaction_retries_sc ||
|
199
|
+
DEFAULT_TRANSACTION_RETRIES_SC
|
200
|
+
|
201
|
+
@attempt_lockd_recovery =
|
202
|
+
Util::getopt('attempt_lockd_recovery', @opts) ||
|
203
|
+
klass.attempt_lockd_recovery ||
|
204
|
+
DEFAULT_ATTEMPT_LOCKD_RECOVERY
|
205
|
+
|
206
|
+
@lockd_recover_wait =
|
207
|
+
Util::getopt('lockd_recover_wait', @opts) ||
|
208
|
+
klass.lockd_recover_wait ||
|
209
|
+
DEFAULT_LOCKD_RECOVER_WAIT
|
210
|
+
|
211
|
+
@aquire_lock_lockfile_stale_age =
|
212
|
+
Util::getopt('aquire_lock_lockfile_stale_age', @opts) ||
|
213
|
+
klass.aquire_lock_lockfile_stale_age ||
|
214
|
+
DEFAULT_AQUIRE_LOCK_LOCKFILE_STALE_AGE
|
215
|
+
|
216
|
+
@aquire_lock_refresh_rate =
|
217
|
+
Util::getopt('aquire_lock_refresh_rate', @opts) ||
|
218
|
+
klass.aquire_lock_refresh_rate ||
|
219
|
+
DEFAULT_AQUIRE_LOCK_REFRESH_RATE
|
220
|
+
|
221
|
+
|
222
|
+
@schema = "#{ @path }.schema"
|
223
|
+
@dirname = File::dirname(path).gsub(%r|/+\s*$|,'')
|
224
|
+
@basename = File::basename(path)
|
225
|
+
@waiting_w = File::join(@dirname, "#{ Util::hostname }.#{ $$ }.waiting.w")
|
226
|
+
@waiting_r = File::join(@dirname, "#{ Util::hostname }.#{ $$ }.waiting.r")
|
227
|
+
@lock_w = File::join(@dirname, "#{ Util::hostname }.#{ $$ }.lock.w")
|
228
|
+
@lock_r = File::join(@dirname, "#{ Util::hostname }.#{ $$ }.lock.r")
|
229
|
+
@lockfile = File::join(@dirname, 'lock')
|
230
|
+
@lockf = Lockfile::new("#{ @path }.lock")
|
231
|
+
@fields = FIELDS
|
232
|
+
@in_transaction = false
|
233
|
+
@db = nil
|
234
|
+
|
235
|
+
@lockd_recover = "#{ @dirname }.lockd_recover"
|
236
|
+
@lockd_recover_lockf = Lockfile::new "#{ @lockd_recover }.lock"
|
237
|
+
@lockd_recovered = false
|
238
|
+
#}}}
|
239
|
+
end
|
240
|
+
def ro_transaction(opts = {}, &block)
|
241
|
+
#{{{
|
242
|
+
opts['read_only'] = true
|
243
|
+
transaction(opts, &block)
|
244
|
+
#}}}
|
245
|
+
end
|
246
|
+
def transaction opts = {}
|
247
|
+
#{{{
|
248
|
+
raise 'nested transaction' if @in_transaction
|
249
|
+
ro = Util::getopt 'read_only', opts
|
250
|
+
ret = nil
|
251
|
+
begin
|
252
|
+
@in_transaction = true
|
253
|
+
lockd_recover_wrap(opts) do
|
254
|
+
transaction_wrap(opts) do
|
255
|
+
aquire_lock(opts) do
|
256
|
+
#sillyclean(opts) do
|
257
|
+
connect do
|
258
|
+
execute 'begin' unless ro
|
259
|
+
ret = yield
|
260
|
+
execute 'commit' unless ro
|
261
|
+
end
|
262
|
+
#end
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|
266
|
+
ensure
|
267
|
+
@in_transaction = false
|
268
|
+
end
|
269
|
+
ret
|
270
|
+
#}}}
|
271
|
+
end
|
272
|
+
def lockd_recover_wrap opts = {}
|
273
|
+
#{{{
|
274
|
+
ret = nil
|
275
|
+
try_again = false
|
276
|
+
begin
|
277
|
+
begin
|
278
|
+
@lockd_recovered = false
|
279
|
+
old_mtime =
|
280
|
+
begin
|
281
|
+
Util::uncache @lockd_recover rescue nil
|
282
|
+
File::stat(@lockd_recover).mtime
|
283
|
+
rescue
|
284
|
+
Time::now
|
285
|
+
end
|
286
|
+
ret = yield
|
287
|
+
ensure
|
288
|
+
new_mtime =
|
289
|
+
begin
|
290
|
+
Util::uncache @lockd_recover rescue nil
|
291
|
+
File::stat(@lockd_recover).mtime
|
292
|
+
rescue
|
293
|
+
old_mtime
|
294
|
+
end
|
295
|
+
|
296
|
+
if new_mtime and old_mtime and new_mtime > old_mtime and not @lockd_recovered
|
297
|
+
try_again = true
|
298
|
+
end
|
299
|
+
end
|
300
|
+
rescue
|
301
|
+
if try_again
|
302
|
+
warn{ "a remote lockd recovery has invalidated this transaction!" }
|
303
|
+
warn{ "retrying..."}
|
304
|
+
sleep 120
|
305
|
+
retry
|
306
|
+
else
|
307
|
+
raise
|
308
|
+
end
|
309
|
+
end
|
310
|
+
ret
|
311
|
+
#}}}
|
312
|
+
end
|
313
|
+
#
|
314
|
+
# TODO - perhaps should not retry on SQLException?? yet errors seem to map to
|
315
|
+
# this exception even when the sql is fine... safest (and most anoying) is to
|
316
|
+
# simply retry.
|
317
|
+
#
|
318
|
+
def transaction_wrap opts = {}
|
319
|
+
#{{{
|
320
|
+
ro = Util::getopt 'read_only', opts
|
321
|
+
ret = nil
|
322
|
+
if ro
|
323
|
+
ret = yield
|
324
|
+
else
|
325
|
+
errors = []
|
326
|
+
@transaction_retries_sc.reset
|
327
|
+
begin
|
328
|
+
ret = yield
|
329
|
+
rescue => e
|
330
|
+
#rescue SQLite::DatabaseException, SQLite::SQLException, SystemCallError => e
|
331
|
+
if @transaction_retries == 0
|
332
|
+
raise
|
333
|
+
elsif errors.size >= @transaction_retries
|
334
|
+
error{ "MAXIMUM TRANSACTION RETRIES SURPASSED" }
|
335
|
+
raise
|
336
|
+
else
|
337
|
+
warn{ e } if(errors.empty? or not Util::erreq(errors[-1], e))
|
338
|
+
errors << e
|
339
|
+
warn{ "retry <#{ errors.size }>..." }
|
340
|
+
end
|
341
|
+
sleep @transaction_retries_sc.next
|
342
|
+
retry
|
343
|
+
end
|
344
|
+
end
|
345
|
+
ret
|
346
|
+
#}}}
|
347
|
+
end
|
348
|
+
def sillyclean opts = {}
|
349
|
+
#{{{
|
350
|
+
ro = Util::getopt 'read_only', opts
|
351
|
+
ret = nil
|
352
|
+
if ro
|
353
|
+
ret = yield
|
354
|
+
else
|
355
|
+
glob = File::join @dirname,'.nfs*'
|
356
|
+
orgsilly = Dir[glob]
|
357
|
+
ret = yield
|
358
|
+
newsilly = Dir[glob]
|
359
|
+
silly = newsilly - orgsilly
|
360
|
+
silly.each{|path| FileUtils::rm_rf path}
|
361
|
+
end
|
362
|
+
ret
|
363
|
+
#}}}
|
364
|
+
end
|
365
|
+
def aquire_lock opts = {}
|
366
|
+
#{{{
|
367
|
+
ro = Util::getopt 'read_only', opts
|
368
|
+
ret = nil
|
369
|
+
|
370
|
+
@aquire_lock_sc.reset
|
371
|
+
|
372
|
+
waiting, ltype, lfile =
|
373
|
+
if ro
|
374
|
+
[@waiting_r, File::LOCK_SH | File::LOCK_NB, @lock_r]
|
375
|
+
else
|
376
|
+
[@waiting_w, File::LOCK_EX | File::LOCK_NB, @lock_w]
|
377
|
+
end
|
378
|
+
|
379
|
+
ltype_s = (ltype == File::LOCK_EX ? 'write' : 'read')
|
380
|
+
ltype ||= File::LOCK_NB
|
381
|
+
|
382
|
+
aquired = false
|
383
|
+
|
384
|
+
until aquired
|
385
|
+
begin
|
386
|
+
debug{ "aquiring lock" }
|
387
|
+
#@lockf.lock unless ro
|
388
|
+
|
389
|
+
open(@lockfile, 'a+') do |lf|
|
390
|
+
|
391
|
+
locked = false
|
392
|
+
refresher = nil
|
393
|
+
sc = nil
|
394
|
+
|
395
|
+
begin
|
396
|
+
FileUtils::touch waiting
|
397
|
+
# poll
|
398
|
+
42.times do
|
399
|
+
locked = lf.posixlock(ltype | File::LOCK_NB)
|
400
|
+
break if locked
|
401
|
+
sleep rand
|
402
|
+
end
|
403
|
+
|
404
|
+
if locked
|
405
|
+
aquired = true
|
406
|
+
refresher = Refresher::new @lockfile, @aquire_lock_refresh_rate
|
407
|
+
debug{ "refresher pid <#{ refresher.pid }> refresh_rate <#{ @aquire_lock_refresh_rate }>" }
|
408
|
+
FileUtils::rm_f waiting rescue nil
|
409
|
+
FileUtils::touch lfile rescue nil
|
410
|
+
debug{ "aquired lock" }
|
411
|
+
ret = yield
|
412
|
+
debug{ "released lock" }
|
413
|
+
else
|
414
|
+
aquired = false
|
415
|
+
stat = File::stat @lockfile
|
416
|
+
mtime = stat.mtime
|
417
|
+
stale = mtime < (Time::now - @aquire_lock_lockfile_stale_age)
|
418
|
+
warn{ "detected stale lockfile of mtime <#{ mtime }>" }
|
419
|
+
lockd_recover if stale and @attempt_lockd_recovery
|
420
|
+
sc = @aquire_lock_sc.next
|
421
|
+
debug{ "failed to aquire lock - sleep(#{ sc })" }
|
422
|
+
sleep sc
|
423
|
+
end
|
424
|
+
|
425
|
+
ensure
|
426
|
+
if locked
|
427
|
+
unlocked = false
|
428
|
+
begin
|
429
|
+
42.times do
|
430
|
+
unlocked = lf.posixlock(File::LOCK_UN | File::LOCK_NB)
|
431
|
+
break if unlocked
|
432
|
+
sleep rand
|
433
|
+
end
|
434
|
+
ensure
|
435
|
+
lf.posixlock File::LOCK_UN unless unlocked
|
436
|
+
end
|
437
|
+
end
|
438
|
+
refresher.kill if refresher
|
439
|
+
FileUtils::rm_f waiting rescue nil
|
440
|
+
FileUtils::rm_f lfile rescue nil
|
441
|
+
end
|
442
|
+
end
|
443
|
+
ensure
|
444
|
+
#@lockf.unlock rescue nil unless read_only
|
445
|
+
end
|
446
|
+
end
|
447
|
+
ret
|
448
|
+
#}}}
|
449
|
+
end
|
450
|
+
def connect
|
451
|
+
#{{{
|
452
|
+
ret = nil
|
453
|
+
opened = nil
|
454
|
+
begin
|
455
|
+
raise 'db has no schema' unless test ?e, @schema
|
456
|
+
debug{"connecting to db <#{ @path }>..."}
|
457
|
+
$db = @db = SQLite::Database::new(@path, 0)
|
458
|
+
debug{"connected."}
|
459
|
+
opened = true
|
460
|
+
@db.use_array = true
|
461
|
+
ret = yield @db
|
462
|
+
ensure
|
463
|
+
@db.close if opened
|
464
|
+
$db = @db = nil
|
465
|
+
debug{"disconnected from db <#{ @path }>"}
|
466
|
+
end
|
467
|
+
ret
|
468
|
+
#}}}
|
469
|
+
end
|
470
|
+
def execute sql, &block
|
471
|
+
#{{{
|
472
|
+
raise 'not in transaction' unless @in_transaction
|
473
|
+
if @sql_debug
|
474
|
+
logger << "SQL:\n#{ sql }\n"
|
475
|
+
end
|
476
|
+
#ret = retry_if_locked{ @db.execute sql, &block }
|
477
|
+
ret = @db.execute sql, &block
|
478
|
+
if @sql_debug and ret and ret.first
|
479
|
+
logger << "RESULT:\n#{ ret.first.inspect }\n...\n"
|
480
|
+
end
|
481
|
+
ret
|
482
|
+
#}}}
|
483
|
+
end
|
484
|
+
#
|
485
|
+
# TODO - add sleep cycle if this ends up getting used
|
486
|
+
#
|
487
|
+
def retry_if_locked
|
488
|
+
#{{{
|
489
|
+
ret = nil
|
490
|
+
begin
|
491
|
+
ret = yield
|
492
|
+
rescue SQLite::BusyException
|
493
|
+
warn{ "database locked - waiting(1.0) and retrying" }
|
494
|
+
sleep 1.0
|
495
|
+
retry
|
496
|
+
end
|
497
|
+
ret
|
498
|
+
#}}}
|
499
|
+
end
|
500
|
+
def vacuum
|
501
|
+
#{{{
|
502
|
+
raise 'nested transaction' if @in_transaction
|
503
|
+
begin
|
504
|
+
@in_transaction = true
|
505
|
+
connect{ execute 'vacuum' }
|
506
|
+
ensure
|
507
|
+
@in_transaction = false
|
508
|
+
end
|
509
|
+
self
|
510
|
+
#}}}
|
511
|
+
end
|
512
|
+
def lockd_recover
|
513
|
+
#{{{
|
514
|
+
return nil unless @attempt_lockd_recovery
|
515
|
+
warn{ "attempting lockd recovery" }
|
516
|
+
time = Time::now
|
517
|
+
ret = nil
|
518
|
+
|
519
|
+
@lockd_recover_lockf.lock do
|
520
|
+
Util::uncache @dirname rescue nil
|
521
|
+
Util::uncache @path rescue nil
|
522
|
+
Util::uncache @lockfile rescue nil
|
523
|
+
Util::uncache @lockd_recover rescue nil
|
524
|
+
mtime = File::stat(@lockd_recover).mtime rescue time
|
525
|
+
|
526
|
+
if mtime > time
|
527
|
+
warn{ "skipping lockd recovery (another node has already recovered)" }
|
528
|
+
ret = true
|
529
|
+
else
|
530
|
+
moved = false
|
531
|
+
begin
|
532
|
+
FileUtils::touch @lockd_recover
|
533
|
+
@lockd_recovered = false
|
534
|
+
|
535
|
+
begin
|
536
|
+
report = <<-msg
|
537
|
+
hostname : #{ Util::hostname }
|
538
|
+
pid : #{ Process.pid }
|
539
|
+
time : #{ Time::now }
|
540
|
+
q :
|
541
|
+
path : #{ @dirname }
|
542
|
+
stat : #{ File::stat(@dirname).inspect }
|
543
|
+
db :
|
544
|
+
path : #{ @path }
|
545
|
+
stat : #{ File::stat(@path).inspect }
|
546
|
+
lockfile :
|
547
|
+
path : #{ @lockfile }
|
548
|
+
stat : #{ File::stat(@lockfile).inspect }
|
549
|
+
msg
|
550
|
+
info{ "LOCKD RECOVERY REPORT" }
|
551
|
+
logger << report
|
552
|
+
cmd = "mail -s LOCKD_RECOVERY ara.t.howard@noaa.gov <<eof\n#{ report }\neof"
|
553
|
+
Util::system cmd
|
554
|
+
rescue
|
555
|
+
nil
|
556
|
+
end
|
557
|
+
|
558
|
+
warn{ "sleeping #{ @lockd_recover_wait }s before continuing..." }
|
559
|
+
sleep @lockd_recover_wait
|
560
|
+
|
561
|
+
tmp = "#{ @dirname }.tmp"
|
562
|
+
FileUtils::rm_rf tmp
|
563
|
+
FileUtils::mv @dirname, tmp
|
564
|
+
moved = true
|
565
|
+
|
566
|
+
rfiles = [@path, @lockfile].map{|f| File::join(tmp,File::basename(f))}
|
567
|
+
rfiles.each do |f|
|
568
|
+
ftmp = "#{ f }.tmp"
|
569
|
+
FileUtils::rm_rf ftmp
|
570
|
+
FileUtils::cp f, ftmp
|
571
|
+
FileUtils::rm f
|
572
|
+
FileUtils::mv ftmp, f
|
573
|
+
end
|
574
|
+
|
575
|
+
dbtmp = File::join(tmp,File::basename(@path))
|
576
|
+
|
577
|
+
if integrity_check(dbtmp)
|
578
|
+
FileUtils::mv tmp, @dirname
|
579
|
+
FileUtils::cp @lockd_recover_lockf.path, @lockd_recover
|
580
|
+
@lockd_recovered = true
|
581
|
+
Util::uncache @dirname rescue nil
|
582
|
+
Util::uncache @path rescue nil
|
583
|
+
Util::uncache @lockfile rescue nil
|
584
|
+
Util::uncache @lockd_recover rescue nil
|
585
|
+
warn{ "lockd recovery complete" }
|
586
|
+
else
|
587
|
+
FileUtils::mv tmp, @dirname
|
588
|
+
@lockd_recovered = false
|
589
|
+
error{ "lockd recovery failed" }
|
590
|
+
end
|
591
|
+
|
592
|
+
ret = @lockd_recovered
|
593
|
+
ensure
|
594
|
+
if moved and not @lockd_recovered and tmp and test(?d, tmp)
|
595
|
+
FileUtils::mv tmp, @dirname
|
596
|
+
end
|
597
|
+
end
|
598
|
+
end
|
599
|
+
end
|
600
|
+
ret
|
601
|
+
#}}}
|
602
|
+
end
|
603
|
+
def integrity_check path = @path
|
604
|
+
#{{{
|
605
|
+
debug{ "running integrity_check on <#{ path }>" }
|
606
|
+
klass.integrity_check(path)
|
607
|
+
#}}}
|
608
|
+
end
|
609
|
+
def lock opts = {}
|
610
|
+
#{{{
|
611
|
+
ret = nil
|
612
|
+
lockd_recover_wrap do
|
613
|
+
aquire_lock(opts) do
|
614
|
+
ret = yield
|
615
|
+
end
|
616
|
+
end
|
617
|
+
ret
|
618
|
+
#}}}
|
619
|
+
end
|
620
|
+
alias write_lock lock
|
621
|
+
alias wlock write_lock
|
622
|
+
def read_lock(opts = {}, &block)
|
623
|
+
#{{{
|
624
|
+
opts['read_only'] = true
|
625
|
+
lock opts, &block
|
626
|
+
#}}}
|
627
|
+
end
|
628
|
+
alias rlock read_lock
|
629
|
+
#}}}
|
630
|
+
end # class QDB
|
631
|
+
#}}}
|
632
|
+
end # module RQ
|
633
|
+
$__rq_qdb__ = __FILE__
|
634
|
+
end
|