rq 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,634 @@
1
+ unless defined? $__rq_qdb__
2
+ module RQ
3
+ #{{{
4
+ LIBDIR = File::dirname(File::expand_path(__FILE__)) + File::SEPARATOR unless
5
+ defined? LIBDIR
6
+
7
+ require LIBDIR + 'util'
8
+ require LIBDIR + 'logging'
9
+ require LIBDIR + 'sleepcycle'
10
+ require LIBDIR + 'refresher'
11
+
12
+ class QDB
13
+ #{{{
14
+ include Util
15
+ include Logging
16
+
17
+ FIELDS =
18
+ #{{{
19
+ %w(
20
+ jid priority state
21
+ submitted started finished elapsed
22
+ submitter runner
23
+ pid exit_status
24
+ tag command
25
+ )
26
+ #}}}
27
+
28
+ PRAGMAS =
29
+ #{{{
30
+ <<-sql
31
+ PRAGMA default_synchronous = FULL;
32
+ sql
33
+ #}}}
34
+
35
+ SCHEMA =
36
+ #{{{
37
+ <<-sql
38
+ create table jobs
39
+ (
40
+ jid integer primary key,
41
+ #{ FIELDS[1..-1].join ",\n " }
42
+ );
43
+ create table attributes
44
+ (
45
+ key,
46
+ value,
47
+ primary key (key)
48
+ );
49
+ sql
50
+ #}}}
51
+
52
+ DEFAULT_LOGGER = Logger::new(STDERR)
53
+ DEFAULT_SQL_DEBUG = false
54
+ DEFAULT_TRANSACTION_RETRIES = 4
55
+ DEFAULT_AQUIRE_LOCK_SC = SleepCycle::new(2, 16, 2)
56
+ DEFAULT_TRANSACTION_RETRIES_SC = SleepCycle::new(8, 24, 8)
57
+ DEFAULT_ATTEMPT_LOCKD_RECOVERY = true
58
+ DEFAULT_LOCKD_RECOVER_WAIT = 1800
59
+ DEFAULT_AQUIRE_LOCK_LOCKFILE_STALE_AGE = 1800
60
+ DEFAULT_AQUIRE_LOCK_REFRESH_RATE = 8
61
+
62
+ class << self
63
+ #{{{
64
+ attr :logger, true
65
+ attr :sql_debug, true
66
+ attr :transaction_retries, true
67
+ attr :aquire_lock_sc, true
68
+ attr :transaction_retries_sc, true
69
+ attr :attempt_lockd_recovery, true
70
+ attr :lockd_recover_wait, true
71
+ attr :aquire_lock_lockfile_stale_age, true
72
+ attr :aquire_lock_refresh_rate, true
73
+
74
+ def fields
75
+ #{{{
76
+ FIELDS
77
+ #}}}
78
+ end
79
+ def integrity_check dbpath
80
+ #{{{
81
+ ret = false
82
+ tuple = nil
83
+ begin
84
+ db = SQLite::Database::new dbpath, 0
85
+ opened = true
86
+ db.use_array = true
87
+ tuple = db.execute 'PRAGMA integrity_check;'
88
+ ret = (tuple and tuple.first and (tuple.first["integrity_check"] =~ /^\s*ok\s*$/io))
89
+ ensure
90
+ db.close if opened
91
+ db = nil
92
+ end
93
+ ret
94
+ #}}}
95
+ end
96
+ def t2h tuple
97
+ #{{{
98
+ h = {}
99
+ FIELDS.each_with_index{|f,i| h[f] = tuple[i]}
100
+ h
101
+ #}}}
102
+ end
103
+ def h2t h
104
+ #{{{
105
+ t = tuple
106
+ FIELDS.each{|f| t[f] = h[f]}
107
+ t
108
+ #}}}
109
+ end
110
+ def tuple
111
+ #{{{
112
+ t = Array::new FIELDS.size
113
+ t.fields = FIELDS
114
+ t
115
+ #}}}
116
+ end
117
+ def q tuple
118
+ #{{{
119
+ tuple.map do |f|
120
+ if f
121
+ "'" << Util.escape(f,"'","'") << "'"
122
+ else
123
+ 'NULL'
124
+ end
125
+ end
126
+ #}}}
127
+ end
128
+ def create path, opts = {}
129
+ #{{{
130
+ qdb = new path, opts
131
+ FileUtils::touch qdb.lockfile
132
+ create_schema qdb.schema
133
+ qdb.transaction do
134
+ qdb.execute PRAGMAS
135
+ qdb.execute SCHEMA
136
+ end
137
+ qdb
138
+ #}}}
139
+ end
140
+ def create_schema path
141
+ #{{{
142
+ tmp = "#{ path }.tmp"
143
+ open(tmp,'w') do |f|
144
+ f.puts PRAGMAS
145
+ f.puts SCHEMA
146
+ end
147
+ FileUtils::mv tmp, path
148
+ #}}}
149
+ end
150
+ #}}}
151
+ end
152
+
153
+ attr :path
154
+ attr :opts
155
+ attr :dirname
156
+ attr :schema
157
+ attr :fields
158
+ attr :mutex
159
+ attr :lockfile
160
+ attr :sql_debug, true
161
+ attr :transaction_retries, true
162
+ attr :aquire_lock_sc, true
163
+ attr :transaction_retries_sc, true
164
+ attr :attempt_lockd_recovery, true
165
+ attr :lockd_recover_wait, true
166
+ attr :aquire_lock_lockfile_stale_age, true
167
+ attr :aquire_lock_refresh_rate, true
168
+
169
+
170
+ def initialize path, opts = {}
171
+ #{{{
172
+ @path = path
173
+ @opts = opts
174
+
175
+ @logger =
176
+ Util::getopt('logger', @opts) ||
177
+ klass.logger ||
178
+ DEFAULT_LOGGER
179
+
180
+ @sql_debug =
181
+ Util::getopt('sql_debug', @opts) ||
182
+ klass.sql_debug ||
183
+ ENV['RQ_SQL_DEBUG'] ||
184
+ DEFAULT_SQL_DEBUG
185
+
186
+ @transaction_retries =
187
+ Util::getopt('transaction_retries', @opts) ||
188
+ klass.transaction_retries ||
189
+ DEFAULT_TRANSACTION_RETRIES
190
+
191
+ @aquire_lock_sc =
192
+ Util::getopt('aquire_lock_sc', @opts) ||
193
+ klass.aquire_lock_sc ||
194
+ DEFAULT_AQUIRE_LOCK_SC
195
+
196
+ @transaction_retries_sc =
197
+ Util::getopt('transaction_retries_sc', @opts) ||
198
+ klass.transaction_retries_sc ||
199
+ DEFAULT_TRANSACTION_RETRIES_SC
200
+
201
+ @attempt_lockd_recovery =
202
+ Util::getopt('attempt_lockd_recovery', @opts) ||
203
+ klass.attempt_lockd_recovery ||
204
+ DEFAULT_ATTEMPT_LOCKD_RECOVERY
205
+
206
+ @lockd_recover_wait =
207
+ Util::getopt('lockd_recover_wait', @opts) ||
208
+ klass.lockd_recover_wait ||
209
+ DEFAULT_LOCKD_RECOVER_WAIT
210
+
211
+ @aquire_lock_lockfile_stale_age =
212
+ Util::getopt('aquire_lock_lockfile_stale_age', @opts) ||
213
+ klass.aquire_lock_lockfile_stale_age ||
214
+ DEFAULT_AQUIRE_LOCK_LOCKFILE_STALE_AGE
215
+
216
+ @aquire_lock_refresh_rate =
217
+ Util::getopt('aquire_lock_refresh_rate', @opts) ||
218
+ klass.aquire_lock_refresh_rate ||
219
+ DEFAULT_AQUIRE_LOCK_REFRESH_RATE
220
+
221
+
222
+ @schema = "#{ @path }.schema"
223
+ @dirname = File::dirname(path).gsub(%r|/+\s*$|,'')
224
+ @basename = File::basename(path)
225
+ @waiting_w = File::join(@dirname, "#{ Util::hostname }.#{ $$ }.waiting.w")
226
+ @waiting_r = File::join(@dirname, "#{ Util::hostname }.#{ $$ }.waiting.r")
227
+ @lock_w = File::join(@dirname, "#{ Util::hostname }.#{ $$ }.lock.w")
228
+ @lock_r = File::join(@dirname, "#{ Util::hostname }.#{ $$ }.lock.r")
229
+ @lockfile = File::join(@dirname, 'lock')
230
+ @lockf = Lockfile::new("#{ @path }.lock")
231
+ @fields = FIELDS
232
+ @in_transaction = false
233
+ @db = nil
234
+
235
+ @lockd_recover = "#{ @dirname }.lockd_recover"
236
+ @lockd_recover_lockf = Lockfile::new "#{ @lockd_recover }.lock"
237
+ @lockd_recovered = false
238
+ #}}}
239
+ end
240
+ def ro_transaction(opts = {}, &block)
241
+ #{{{
242
+ opts['read_only'] = true
243
+ transaction(opts, &block)
244
+ #}}}
245
+ end
246
+ def transaction opts = {}
247
+ #{{{
248
+ raise 'nested transaction' if @in_transaction
249
+ ro = Util::getopt 'read_only', opts
250
+ ret = nil
251
+ begin
252
+ @in_transaction = true
253
+ lockd_recover_wrap(opts) do
254
+ transaction_wrap(opts) do
255
+ aquire_lock(opts) do
256
+ #sillyclean(opts) do
257
+ connect do
258
+ execute 'begin' unless ro
259
+ ret = yield
260
+ execute 'commit' unless ro
261
+ end
262
+ #end
263
+ end
264
+ end
265
+ end
266
+ ensure
267
+ @in_transaction = false
268
+ end
269
+ ret
270
+ #}}}
271
+ end
272
+ def lockd_recover_wrap opts = {}
273
+ #{{{
274
+ ret = nil
275
+ try_again = false
276
+ begin
277
+ begin
278
+ @lockd_recovered = false
279
+ old_mtime =
280
+ begin
281
+ Util::uncache @lockd_recover rescue nil
282
+ File::stat(@lockd_recover).mtime
283
+ rescue
284
+ Time::now
285
+ end
286
+ ret = yield
287
+ ensure
288
+ new_mtime =
289
+ begin
290
+ Util::uncache @lockd_recover rescue nil
291
+ File::stat(@lockd_recover).mtime
292
+ rescue
293
+ old_mtime
294
+ end
295
+
296
+ if new_mtime and old_mtime and new_mtime > old_mtime and not @lockd_recovered
297
+ try_again = true
298
+ end
299
+ end
300
+ rescue
301
+ if try_again
302
+ warn{ "a remote lockd recovery has invalidated this transaction!" }
303
+ warn{ "retrying..."}
304
+ sleep 120
305
+ retry
306
+ else
307
+ raise
308
+ end
309
+ end
310
+ ret
311
+ #}}}
312
+ end
313
+ #
314
+ # TODO - perhaps should not retry on SQLException?? yet errors seem to map to
315
+ # this exception even when the sql is fine... safest (and most anoying) is to
316
+ # simply retry.
317
+ #
318
+ def transaction_wrap opts = {}
319
+ #{{{
320
+ ro = Util::getopt 'read_only', opts
321
+ ret = nil
322
+ if ro
323
+ ret = yield
324
+ else
325
+ errors = []
326
+ @transaction_retries_sc.reset
327
+ begin
328
+ ret = yield
329
+ rescue => e
330
+ #rescue SQLite::DatabaseException, SQLite::SQLException, SystemCallError => e
331
+ if @transaction_retries == 0
332
+ raise
333
+ elsif errors.size >= @transaction_retries
334
+ error{ "MAXIMUM TRANSACTION RETRIES SURPASSED" }
335
+ raise
336
+ else
337
+ warn{ e } if(errors.empty? or not Util::erreq(errors[-1], e))
338
+ errors << e
339
+ warn{ "retry <#{ errors.size }>..." }
340
+ end
341
+ sleep @transaction_retries_sc.next
342
+ retry
343
+ end
344
+ end
345
+ ret
346
+ #}}}
347
+ end
348
+ def sillyclean opts = {}
349
+ #{{{
350
+ ro = Util::getopt 'read_only', opts
351
+ ret = nil
352
+ if ro
353
+ ret = yield
354
+ else
355
+ glob = File::join @dirname,'.nfs*'
356
+ orgsilly = Dir[glob]
357
+ ret = yield
358
+ newsilly = Dir[glob]
359
+ silly = newsilly - orgsilly
360
+ silly.each{|path| FileUtils::rm_rf path}
361
+ end
362
+ ret
363
+ #}}}
364
+ end
365
+ def aquire_lock opts = {}
366
+ #{{{
367
+ ro = Util::getopt 'read_only', opts
368
+ ret = nil
369
+
370
+ @aquire_lock_sc.reset
371
+
372
+ waiting, ltype, lfile =
373
+ if ro
374
+ [@waiting_r, File::LOCK_SH | File::LOCK_NB, @lock_r]
375
+ else
376
+ [@waiting_w, File::LOCK_EX | File::LOCK_NB, @lock_w]
377
+ end
378
+
379
+ ltype_s = (ltype == File::LOCK_EX ? 'write' : 'read')
380
+ ltype ||= File::LOCK_NB
381
+
382
+ aquired = false
383
+
384
+ until aquired
385
+ begin
386
+ debug{ "aquiring lock" }
387
+ #@lockf.lock unless ro
388
+
389
+ open(@lockfile, 'a+') do |lf|
390
+
391
+ locked = false
392
+ refresher = nil
393
+ sc = nil
394
+
395
+ begin
396
+ FileUtils::touch waiting
397
+ # poll
398
+ 42.times do
399
+ locked = lf.posixlock(ltype | File::LOCK_NB)
400
+ break if locked
401
+ sleep rand
402
+ end
403
+
404
+ if locked
405
+ aquired = true
406
+ refresher = Refresher::new @lockfile, @aquire_lock_refresh_rate
407
+ debug{ "refresher pid <#{ refresher.pid }> refresh_rate <#{ @aquire_lock_refresh_rate }>" }
408
+ FileUtils::rm_f waiting rescue nil
409
+ FileUtils::touch lfile rescue nil
410
+ debug{ "aquired lock" }
411
+ ret = yield
412
+ debug{ "released lock" }
413
+ else
414
+ aquired = false
415
+ stat = File::stat @lockfile
416
+ mtime = stat.mtime
417
+ stale = mtime < (Time::now - @aquire_lock_lockfile_stale_age)
418
+ warn{ "detected stale lockfile of mtime <#{ mtime }>" }
419
+ lockd_recover if stale and @attempt_lockd_recovery
420
+ sc = @aquire_lock_sc.next
421
+ debug{ "failed to aquire lock - sleep(#{ sc })" }
422
+ sleep sc
423
+ end
424
+
425
+ ensure
426
+ if locked
427
+ unlocked = false
428
+ begin
429
+ 42.times do
430
+ unlocked = lf.posixlock(File::LOCK_UN | File::LOCK_NB)
431
+ break if unlocked
432
+ sleep rand
433
+ end
434
+ ensure
435
+ lf.posixlock File::LOCK_UN unless unlocked
436
+ end
437
+ end
438
+ refresher.kill if refresher
439
+ FileUtils::rm_f waiting rescue nil
440
+ FileUtils::rm_f lfile rescue nil
441
+ end
442
+ end
443
+ ensure
444
+ #@lockf.unlock rescue nil unless read_only
445
+ end
446
+ end
447
+ ret
448
+ #}}}
449
+ end
450
+ def connect
451
+ #{{{
452
+ ret = nil
453
+ opened = nil
454
+ begin
455
+ raise 'db has no schema' unless test ?e, @schema
456
+ debug{"connecting to db <#{ @path }>..."}
457
+ $db = @db = SQLite::Database::new(@path, 0)
458
+ debug{"connected."}
459
+ opened = true
460
+ @db.use_array = true
461
+ ret = yield @db
462
+ ensure
463
+ @db.close if opened
464
+ $db = @db = nil
465
+ debug{"disconnected from db <#{ @path }>"}
466
+ end
467
+ ret
468
+ #}}}
469
+ end
470
+ def execute sql, &block
471
+ #{{{
472
+ raise 'not in transaction' unless @in_transaction
473
+ if @sql_debug
474
+ logger << "SQL:\n#{ sql }\n"
475
+ end
476
+ #ret = retry_if_locked{ @db.execute sql, &block }
477
+ ret = @db.execute sql, &block
478
+ if @sql_debug and ret and ret.first
479
+ logger << "RESULT:\n#{ ret.first.inspect }\n...\n"
480
+ end
481
+ ret
482
+ #}}}
483
+ end
484
+ #
485
+ # TODO - add sleep cycle if this ends up getting used
486
+ #
487
+ def retry_if_locked
488
+ #{{{
489
+ ret = nil
490
+ begin
491
+ ret = yield
492
+ rescue SQLite::BusyException
493
+ warn{ "database locked - waiting(1.0) and retrying" }
494
+ sleep 1.0
495
+ retry
496
+ end
497
+ ret
498
+ #}}}
499
+ end
500
+ def vacuum
501
+ #{{{
502
+ raise 'nested transaction' if @in_transaction
503
+ begin
504
+ @in_transaction = true
505
+ connect{ execute 'vacuum' }
506
+ ensure
507
+ @in_transaction = false
508
+ end
509
+ self
510
+ #}}}
511
+ end
512
+ def lockd_recover
513
+ #{{{
514
+ return nil unless @attempt_lockd_recovery
515
+ warn{ "attempting lockd recovery" }
516
+ time = Time::now
517
+ ret = nil
518
+
519
+ @lockd_recover_lockf.lock do
520
+ Util::uncache @dirname rescue nil
521
+ Util::uncache @path rescue nil
522
+ Util::uncache @lockfile rescue nil
523
+ Util::uncache @lockd_recover rescue nil
524
+ mtime = File::stat(@lockd_recover).mtime rescue time
525
+
526
+ if mtime > time
527
+ warn{ "skipping lockd recovery (another node has already recovered)" }
528
+ ret = true
529
+ else
530
+ moved = false
531
+ begin
532
+ FileUtils::touch @lockd_recover
533
+ @lockd_recovered = false
534
+
535
+ begin
536
+ report = <<-msg
537
+ hostname : #{ Util::hostname }
538
+ pid : #{ Process.pid }
539
+ time : #{ Time::now }
540
+ q :
541
+ path : #{ @dirname }
542
+ stat : #{ File::stat(@dirname).inspect }
543
+ db :
544
+ path : #{ @path }
545
+ stat : #{ File::stat(@path).inspect }
546
+ lockfile :
547
+ path : #{ @lockfile }
548
+ stat : #{ File::stat(@lockfile).inspect }
549
+ msg
550
+ info{ "LOCKD RECOVERY REPORT" }
551
+ logger << report
552
+ cmd = "mail -s LOCKD_RECOVERY ara.t.howard@noaa.gov <<eof\n#{ report }\neof"
553
+ Util::system cmd
554
+ rescue
555
+ nil
556
+ end
557
+
558
+ warn{ "sleeping #{ @lockd_recover_wait }s before continuing..." }
559
+ sleep @lockd_recover_wait
560
+
561
+ tmp = "#{ @dirname }.tmp"
562
+ FileUtils::rm_rf tmp
563
+ FileUtils::mv @dirname, tmp
564
+ moved = true
565
+
566
+ rfiles = [@path, @lockfile].map{|f| File::join(tmp,File::basename(f))}
567
+ rfiles.each do |f|
568
+ ftmp = "#{ f }.tmp"
569
+ FileUtils::rm_rf ftmp
570
+ FileUtils::cp f, ftmp
571
+ FileUtils::rm f
572
+ FileUtils::mv ftmp, f
573
+ end
574
+
575
+ dbtmp = File::join(tmp,File::basename(@path))
576
+
577
+ if integrity_check(dbtmp)
578
+ FileUtils::mv tmp, @dirname
579
+ FileUtils::cp @lockd_recover_lockf.path, @lockd_recover
580
+ @lockd_recovered = true
581
+ Util::uncache @dirname rescue nil
582
+ Util::uncache @path rescue nil
583
+ Util::uncache @lockfile rescue nil
584
+ Util::uncache @lockd_recover rescue nil
585
+ warn{ "lockd recovery complete" }
586
+ else
587
+ FileUtils::mv tmp, @dirname
588
+ @lockd_recovered = false
589
+ error{ "lockd recovery failed" }
590
+ end
591
+
592
+ ret = @lockd_recovered
593
+ ensure
594
+ if moved and not @lockd_recovered and tmp and test(?d, tmp)
595
+ FileUtils::mv tmp, @dirname
596
+ end
597
+ end
598
+ end
599
+ end
600
+ ret
601
+ #}}}
602
+ end
603
+ def integrity_check path = @path
604
+ #{{{
605
+ debug{ "running integrity_check on <#{ path }>" }
606
+ klass.integrity_check(path)
607
+ #}}}
608
+ end
609
+ def lock opts = {}
610
+ #{{{
611
+ ret = nil
612
+ lockd_recover_wrap do
613
+ aquire_lock(opts) do
614
+ ret = yield
615
+ end
616
+ end
617
+ ret
618
+ #}}}
619
+ end
620
+ alias write_lock lock
621
+ alias wlock write_lock
622
+ def read_lock(opts = {}, &block)
623
+ #{{{
624
+ opts['read_only'] = true
625
+ lock opts, &block
626
+ #}}}
627
+ end
628
+ alias rlock read_lock
629
+ #}}}
630
+ end # class QDB
631
+ #}}}
632
+ end # module RQ
633
+ $__rq_qdb__ = __FILE__
634
+ end