rq 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,634 @@
1
+ unless defined? $__rq_qdb__
2
+ module RQ
3
+ #{{{
4
+ LIBDIR = File::dirname(File::expand_path(__FILE__)) + File::SEPARATOR unless
5
+ defined? LIBDIR
6
+
7
+ require LIBDIR + 'util'
8
+ require LIBDIR + 'logging'
9
+ require LIBDIR + 'sleepcycle'
10
+ require LIBDIR + 'refresher'
11
+
12
+ class QDB
13
+ #{{{
14
+ include Util
15
+ include Logging
16
+
17
+ FIELDS =
18
+ #{{{
19
+ %w(
20
+ jid priority state
21
+ submitted started finished elapsed
22
+ submitter runner
23
+ pid exit_status
24
+ tag command
25
+ )
26
+ #}}}
27
+
28
+ PRAGMAS =
29
+ #{{{
30
+ <<-sql
31
+ PRAGMA default_synchronous = FULL;
32
+ sql
33
+ #}}}
34
+
35
+ SCHEMA =
36
+ #{{{
37
+ <<-sql
38
+ create table jobs
39
+ (
40
+ jid integer primary key,
41
+ #{ FIELDS[1..-1].join ",\n " }
42
+ );
43
+ create table attributes
44
+ (
45
+ key,
46
+ value,
47
+ primary key (key)
48
+ );
49
+ sql
50
+ #}}}
51
+
52
+ DEFAULT_LOGGER = Logger::new(STDERR)
53
+ DEFAULT_SQL_DEBUG = false
54
+ DEFAULT_TRANSACTION_RETRIES = 4
55
+ DEFAULT_AQUIRE_LOCK_SC = SleepCycle::new(2, 16, 2)
56
+ DEFAULT_TRANSACTION_RETRIES_SC = SleepCycle::new(8, 24, 8)
57
+ DEFAULT_ATTEMPT_LOCKD_RECOVERY = true
58
+ DEFAULT_LOCKD_RECOVER_WAIT = 1800
59
+ DEFAULT_AQUIRE_LOCK_LOCKFILE_STALE_AGE = 1800
60
+ DEFAULT_AQUIRE_LOCK_REFRESH_RATE = 8
61
+
62
+ class << self
63
+ #{{{
64
+ attr :logger, true
65
+ attr :sql_debug, true
66
+ attr :transaction_retries, true
67
+ attr :aquire_lock_sc, true
68
+ attr :transaction_retries_sc, true
69
+ attr :attempt_lockd_recovery, true
70
+ attr :lockd_recover_wait, true
71
+ attr :aquire_lock_lockfile_stale_age, true
72
+ attr :aquire_lock_refresh_rate, true
73
+
74
+ def fields
75
+ #{{{
76
+ FIELDS
77
+ #}}}
78
+ end
79
+ def integrity_check dbpath
80
+ #{{{
81
+ ret = false
82
+ tuple = nil
83
+ begin
84
+ db = SQLite::Database::new dbpath, 0
85
+ opened = true
86
+ db.use_array = true
87
+ tuple = db.execute 'PRAGMA integrity_check;'
88
+ ret = (tuple and tuple.first and (tuple.first["integrity_check"] =~ /^\s*ok\s*$/io))
89
+ ensure
90
+ db.close if opened
91
+ db = nil
92
+ end
93
+ ret
94
+ #}}}
95
+ end
96
+ def t2h tuple
97
+ #{{{
98
+ h = {}
99
+ FIELDS.each_with_index{|f,i| h[f] = tuple[i]}
100
+ h
101
+ #}}}
102
+ end
103
+ def h2t h
104
+ #{{{
105
+ t = tuple
106
+ FIELDS.each{|f| t[f] = h[f]}
107
+ t
108
+ #}}}
109
+ end
110
+ def tuple
111
+ #{{{
112
+ t = Array::new FIELDS.size
113
+ t.fields = FIELDS
114
+ t
115
+ #}}}
116
+ end
117
+ def q tuple
118
+ #{{{
119
+ tuple.map do |f|
120
+ if f
121
+ "'" << Util.escape(f,"'","'") << "'"
122
+ else
123
+ 'NULL'
124
+ end
125
+ end
126
+ #}}}
127
+ end
128
+ def create path, opts = {}
129
+ #{{{
130
+ qdb = new path, opts
131
+ FileUtils::touch qdb.lockfile
132
+ create_schema qdb.schema
133
+ qdb.transaction do
134
+ qdb.execute PRAGMAS
135
+ qdb.execute SCHEMA
136
+ end
137
+ qdb
138
+ #}}}
139
+ end
140
+ def create_schema path
141
+ #{{{
142
+ tmp = "#{ path }.tmp"
143
+ open(tmp,'w') do |f|
144
+ f.puts PRAGMAS
145
+ f.puts SCHEMA
146
+ end
147
+ FileUtils::mv tmp, path
148
+ #}}}
149
+ end
150
+ #}}}
151
+ end
152
+
153
+ attr :path
154
+ attr :opts
155
+ attr :dirname
156
+ attr :schema
157
+ attr :fields
158
+ attr :mutex
159
+ attr :lockfile
160
+ attr :sql_debug, true
161
+ attr :transaction_retries, true
162
+ attr :aquire_lock_sc, true
163
+ attr :transaction_retries_sc, true
164
+ attr :attempt_lockd_recovery, true
165
+ attr :lockd_recover_wait, true
166
+ attr :aquire_lock_lockfile_stale_age, true
167
+ attr :aquire_lock_refresh_rate, true
168
+
169
+
170
+ def initialize path, opts = {}
171
+ #{{{
172
+ @path = path
173
+ @opts = opts
174
+
175
+ @logger =
176
+ Util::getopt('logger', @opts) ||
177
+ klass.logger ||
178
+ DEFAULT_LOGGER
179
+
180
+ @sql_debug =
181
+ Util::getopt('sql_debug', @opts) ||
182
+ klass.sql_debug ||
183
+ ENV['RQ_SQL_DEBUG'] ||
184
+ DEFAULT_SQL_DEBUG
185
+
186
+ @transaction_retries =
187
+ Util::getopt('transaction_retries', @opts) ||
188
+ klass.transaction_retries ||
189
+ DEFAULT_TRANSACTION_RETRIES
190
+
191
+ @aquire_lock_sc =
192
+ Util::getopt('aquire_lock_sc', @opts) ||
193
+ klass.aquire_lock_sc ||
194
+ DEFAULT_AQUIRE_LOCK_SC
195
+
196
+ @transaction_retries_sc =
197
+ Util::getopt('transaction_retries_sc', @opts) ||
198
+ klass.transaction_retries_sc ||
199
+ DEFAULT_TRANSACTION_RETRIES_SC
200
+
201
+ @attempt_lockd_recovery =
202
+ Util::getopt('attempt_lockd_recovery', @opts) ||
203
+ klass.attempt_lockd_recovery ||
204
+ DEFAULT_ATTEMPT_LOCKD_RECOVERY
205
+
206
+ @lockd_recover_wait =
207
+ Util::getopt('lockd_recover_wait', @opts) ||
208
+ klass.lockd_recover_wait ||
209
+ DEFAULT_LOCKD_RECOVER_WAIT
210
+
211
+ @aquire_lock_lockfile_stale_age =
212
+ Util::getopt('aquire_lock_lockfile_stale_age', @opts) ||
213
+ klass.aquire_lock_lockfile_stale_age ||
214
+ DEFAULT_AQUIRE_LOCK_LOCKFILE_STALE_AGE
215
+
216
+ @aquire_lock_refresh_rate =
217
+ Util::getopt('aquire_lock_refresh_rate', @opts) ||
218
+ klass.aquire_lock_refresh_rate ||
219
+ DEFAULT_AQUIRE_LOCK_REFRESH_RATE
220
+
221
+
222
+ @schema = "#{ @path }.schema"
223
+ @dirname = File::dirname(path).gsub(%r|/+\s*$|,'')
224
+ @basename = File::basename(path)
225
+ @waiting_w = File::join(@dirname, "#{ Util::hostname }.#{ $$ }.waiting.w")
226
+ @waiting_r = File::join(@dirname, "#{ Util::hostname }.#{ $$ }.waiting.r")
227
+ @lock_w = File::join(@dirname, "#{ Util::hostname }.#{ $$ }.lock.w")
228
+ @lock_r = File::join(@dirname, "#{ Util::hostname }.#{ $$ }.lock.r")
229
+ @lockfile = File::join(@dirname, 'lock')
230
+ @lockf = Lockfile::new("#{ @path }.lock")
231
+ @fields = FIELDS
232
+ @in_transaction = false
233
+ @db = nil
234
+
235
+ @lockd_recover = "#{ @dirname }.lockd_recover"
236
+ @lockd_recover_lockf = Lockfile::new "#{ @lockd_recover }.lock"
237
+ @lockd_recovered = false
238
+ #}}}
239
+ end
240
+ def ro_transaction(opts = {}, &block)
241
+ #{{{
242
+ opts['read_only'] = true
243
+ transaction(opts, &block)
244
+ #}}}
245
+ end
246
+ def transaction opts = {}
247
+ #{{{
248
+ raise 'nested transaction' if @in_transaction
249
+ ro = Util::getopt 'read_only', opts
250
+ ret = nil
251
+ begin
252
+ @in_transaction = true
253
+ lockd_recover_wrap(opts) do
254
+ transaction_wrap(opts) do
255
+ aquire_lock(opts) do
256
+ #sillyclean(opts) do
257
+ connect do
258
+ execute 'begin' unless ro
259
+ ret = yield
260
+ execute 'commit' unless ro
261
+ end
262
+ #end
263
+ end
264
+ end
265
+ end
266
+ ensure
267
+ @in_transaction = false
268
+ end
269
+ ret
270
+ #}}}
271
+ end
272
+ def lockd_recover_wrap opts = {}
273
+ #{{{
274
+ ret = nil
275
+ try_again = false
276
+ begin
277
+ begin
278
+ @lockd_recovered = false
279
+ old_mtime =
280
+ begin
281
+ Util::uncache @lockd_recover rescue nil
282
+ File::stat(@lockd_recover).mtime
283
+ rescue
284
+ Time::now
285
+ end
286
+ ret = yield
287
+ ensure
288
+ new_mtime =
289
+ begin
290
+ Util::uncache @lockd_recover rescue nil
291
+ File::stat(@lockd_recover).mtime
292
+ rescue
293
+ old_mtime
294
+ end
295
+
296
+ if new_mtime and old_mtime and new_mtime > old_mtime and not @lockd_recovered
297
+ try_again = true
298
+ end
299
+ end
300
+ rescue
301
+ if try_again
302
+ warn{ "a remote lockd recovery has invalidated this transaction!" }
303
+ warn{ "retrying..."}
304
+ sleep 120
305
+ retry
306
+ else
307
+ raise
308
+ end
309
+ end
310
+ ret
311
+ #}}}
312
+ end
313
+ #
314
+ # TODO - perhaps should not retry on SQLException?? yet errors seem to map to
315
+ # this exception even when the sql is fine... safest (and most anoying) is to
316
+ # simply retry.
317
+ #
318
+ def transaction_wrap opts = {}
319
+ #{{{
320
+ ro = Util::getopt 'read_only', opts
321
+ ret = nil
322
+ if ro
323
+ ret = yield
324
+ else
325
+ errors = []
326
+ @transaction_retries_sc.reset
327
+ begin
328
+ ret = yield
329
+ rescue => e
330
+ #rescue SQLite::DatabaseException, SQLite::SQLException, SystemCallError => e
331
+ if @transaction_retries == 0
332
+ raise
333
+ elsif errors.size >= @transaction_retries
334
+ error{ "MAXIMUM TRANSACTION RETRIES SURPASSED" }
335
+ raise
336
+ else
337
+ warn{ e } if(errors.empty? or not Util::erreq(errors[-1], e))
338
+ errors << e
339
+ warn{ "retry <#{ errors.size }>..." }
340
+ end
341
+ sleep @transaction_retries_sc.next
342
+ retry
343
+ end
344
+ end
345
+ ret
346
+ #}}}
347
+ end
348
+ def sillyclean opts = {}
349
+ #{{{
350
+ ro = Util::getopt 'read_only', opts
351
+ ret = nil
352
+ if ro
353
+ ret = yield
354
+ else
355
+ glob = File::join @dirname,'.nfs*'
356
+ orgsilly = Dir[glob]
357
+ ret = yield
358
+ newsilly = Dir[glob]
359
+ silly = newsilly - orgsilly
360
+ silly.each{|path| FileUtils::rm_rf path}
361
+ end
362
+ ret
363
+ #}}}
364
+ end
365
+ def aquire_lock opts = {}
366
+ #{{{
367
+ ro = Util::getopt 'read_only', opts
368
+ ret = nil
369
+
370
+ @aquire_lock_sc.reset
371
+
372
+ waiting, ltype, lfile =
373
+ if ro
374
+ [@waiting_r, File::LOCK_SH | File::LOCK_NB, @lock_r]
375
+ else
376
+ [@waiting_w, File::LOCK_EX | File::LOCK_NB, @lock_w]
377
+ end
378
+
379
+ ltype_s = (ltype == File::LOCK_EX ? 'write' : 'read')
380
+ ltype ||= File::LOCK_NB
381
+
382
+ aquired = false
383
+
384
+ until aquired
385
+ begin
386
+ debug{ "aquiring lock" }
387
+ #@lockf.lock unless ro
388
+
389
+ open(@lockfile, 'a+') do |lf|
390
+
391
+ locked = false
392
+ refresher = nil
393
+ sc = nil
394
+
395
+ begin
396
+ FileUtils::touch waiting
397
+ # poll
398
+ 42.times do
399
+ locked = lf.posixlock(ltype | File::LOCK_NB)
400
+ break if locked
401
+ sleep rand
402
+ end
403
+
404
+ if locked
405
+ aquired = true
406
+ refresher = Refresher::new @lockfile, @aquire_lock_refresh_rate
407
+ debug{ "refresher pid <#{ refresher.pid }> refresh_rate <#{ @aquire_lock_refresh_rate }>" }
408
+ FileUtils::rm_f waiting rescue nil
409
+ FileUtils::touch lfile rescue nil
410
+ debug{ "aquired lock" }
411
+ ret = yield
412
+ debug{ "released lock" }
413
+ else
414
+ aquired = false
415
+ stat = File::stat @lockfile
416
+ mtime = stat.mtime
417
+ stale = mtime < (Time::now - @aquire_lock_lockfile_stale_age)
418
+ warn{ "detected stale lockfile of mtime <#{ mtime }>" }
419
+ lockd_recover if stale and @attempt_lockd_recovery
420
+ sc = @aquire_lock_sc.next
421
+ debug{ "failed to aquire lock - sleep(#{ sc })" }
422
+ sleep sc
423
+ end
424
+
425
+ ensure
426
+ if locked
427
+ unlocked = false
428
+ begin
429
+ 42.times do
430
+ unlocked = lf.posixlock(File::LOCK_UN | File::LOCK_NB)
431
+ break if unlocked
432
+ sleep rand
433
+ end
434
+ ensure
435
+ lf.posixlock File::LOCK_UN unless unlocked
436
+ end
437
+ end
438
+ refresher.kill if refresher
439
+ FileUtils::rm_f waiting rescue nil
440
+ FileUtils::rm_f lfile rescue nil
441
+ end
442
+ end
443
+ ensure
444
+ #@lockf.unlock rescue nil unless read_only
445
+ end
446
+ end
447
+ ret
448
+ #}}}
449
+ end
450
+ def connect
451
+ #{{{
452
+ ret = nil
453
+ opened = nil
454
+ begin
455
+ raise 'db has no schema' unless test ?e, @schema
456
+ debug{"connecting to db <#{ @path }>..."}
457
+ $db = @db = SQLite::Database::new(@path, 0)
458
+ debug{"connected."}
459
+ opened = true
460
+ @db.use_array = true
461
+ ret = yield @db
462
+ ensure
463
+ @db.close if opened
464
+ $db = @db = nil
465
+ debug{"disconnected from db <#{ @path }>"}
466
+ end
467
+ ret
468
+ #}}}
469
+ end
470
+ def execute sql, &block
471
+ #{{{
472
+ raise 'not in transaction' unless @in_transaction
473
+ if @sql_debug
474
+ logger << "SQL:\n#{ sql }\n"
475
+ end
476
+ #ret = retry_if_locked{ @db.execute sql, &block }
477
+ ret = @db.execute sql, &block
478
+ if @sql_debug and ret and ret.first
479
+ logger << "RESULT:\n#{ ret.first.inspect }\n...\n"
480
+ end
481
+ ret
482
+ #}}}
483
+ end
484
+ #
485
+ # TODO - add sleep cycle if this ends up getting used
486
+ #
487
+ def retry_if_locked
488
+ #{{{
489
+ ret = nil
490
+ begin
491
+ ret = yield
492
+ rescue SQLite::BusyException
493
+ warn{ "database locked - waiting(1.0) and retrying" }
494
+ sleep 1.0
495
+ retry
496
+ end
497
+ ret
498
+ #}}}
499
+ end
500
+ def vacuum
501
+ #{{{
502
+ raise 'nested transaction' if @in_transaction
503
+ begin
504
+ @in_transaction = true
505
+ connect{ execute 'vacuum' }
506
+ ensure
507
+ @in_transaction = false
508
+ end
509
+ self
510
+ #}}}
511
+ end
512
+ def lockd_recover
513
+ #{{{
514
+ return nil unless @attempt_lockd_recovery
515
+ warn{ "attempting lockd recovery" }
516
+ time = Time::now
517
+ ret = nil
518
+
519
+ @lockd_recover_lockf.lock do
520
+ Util::uncache @dirname rescue nil
521
+ Util::uncache @path rescue nil
522
+ Util::uncache @lockfile rescue nil
523
+ Util::uncache @lockd_recover rescue nil
524
+ mtime = File::stat(@lockd_recover).mtime rescue time
525
+
526
+ if mtime > time
527
+ warn{ "skipping lockd recovery (another node has already recovered)" }
528
+ ret = true
529
+ else
530
+ moved = false
531
+ begin
532
+ FileUtils::touch @lockd_recover
533
+ @lockd_recovered = false
534
+
535
+ begin
536
+ report = <<-msg
537
+ hostname : #{ Util::hostname }
538
+ pid : #{ Process.pid }
539
+ time : #{ Time::now }
540
+ q :
541
+ path : #{ @dirname }
542
+ stat : #{ File::stat(@dirname).inspect }
543
+ db :
544
+ path : #{ @path }
545
+ stat : #{ File::stat(@path).inspect }
546
+ lockfile :
547
+ path : #{ @lockfile }
548
+ stat : #{ File::stat(@lockfile).inspect }
549
+ msg
550
+ info{ "LOCKD RECOVERY REPORT" }
551
+ logger << report
552
+ cmd = "mail -s LOCKD_RECOVERY ara.t.howard@noaa.gov <<eof\n#{ report }\neof"
553
+ Util::system cmd
554
+ rescue
555
+ nil
556
+ end
557
+
558
+ warn{ "sleeping #{ @lockd_recover_wait }s before continuing..." }
559
+ sleep @lockd_recover_wait
560
+
561
+ tmp = "#{ @dirname }.tmp"
562
+ FileUtils::rm_rf tmp
563
+ FileUtils::mv @dirname, tmp
564
+ moved = true
565
+
566
+ rfiles = [@path, @lockfile].map{|f| File::join(tmp,File::basename(f))}
567
+ rfiles.each do |f|
568
+ ftmp = "#{ f }.tmp"
569
+ FileUtils::rm_rf ftmp
570
+ FileUtils::cp f, ftmp
571
+ FileUtils::rm f
572
+ FileUtils::mv ftmp, f
573
+ end
574
+
575
+ dbtmp = File::join(tmp,File::basename(@path))
576
+
577
+ if integrity_check(dbtmp)
578
+ FileUtils::mv tmp, @dirname
579
+ FileUtils::cp @lockd_recover_lockf.path, @lockd_recover
580
+ @lockd_recovered = true
581
+ Util::uncache @dirname rescue nil
582
+ Util::uncache @path rescue nil
583
+ Util::uncache @lockfile rescue nil
584
+ Util::uncache @lockd_recover rescue nil
585
+ warn{ "lockd recovery complete" }
586
+ else
587
+ FileUtils::mv tmp, @dirname
588
+ @lockd_recovered = false
589
+ error{ "lockd recovery failed" }
590
+ end
591
+
592
+ ret = @lockd_recovered
593
+ ensure
594
+ if moved and not @lockd_recovered and tmp and test(?d, tmp)
595
+ FileUtils::mv tmp, @dirname
596
+ end
597
+ end
598
+ end
599
+ end
600
+ ret
601
+ #}}}
602
+ end
603
+ def integrity_check path = @path
604
+ #{{{
605
+ debug{ "running integrity_check on <#{ path }>" }
606
+ klass.integrity_check(path)
607
+ #}}}
608
+ end
609
+ def lock opts = {}
610
+ #{{{
611
+ ret = nil
612
+ lockd_recover_wrap do
613
+ aquire_lock(opts) do
614
+ ret = yield
615
+ end
616
+ end
617
+ ret
618
+ #}}}
619
+ end
620
+ alias write_lock lock
621
+ alias wlock write_lock
622
+ def read_lock(opts = {}, &block)
623
+ #{{{
624
+ opts['read_only'] = true
625
+ lock opts, &block
626
+ #}}}
627
+ end
628
+ alias rlock read_lock
629
+ #}}}
630
+ end # class QDB
631
+ #}}}
632
+ end # module RQ
633
+ $__rq_qdb__ = __FILE__
634
+ end