dbmlite3 1.0.0 → 2.0.0.pre.alpha.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,542 @@
1
+
2
+ module Lite3
3
+
4
+ # Lite3::DBM encapsulates a single table in a single SQLite3
5
+ # database file and lets you access it as easily as a Hash.
6
+ # Multiple instances may be opened on different tables in the same
7
+ # database.
8
+ #
9
+ # Note that instances do not explicitly own their database
10
+ # connection; instead, they are managed internally and shared across
11
+ # `DBM` instances.
12
+ class Lite3::DBM
13
+ include Enumerable
14
+ include ErrUtil
15
+
16
+ PREFIX = "dbmlite3_tbl_"
17
+ META = :dbmlite3_meta
18
+ private_constant(:PREFIX, :META)
19
+
20
+ #
21
+ # Construction and setup
22
+ #
23
+
24
+
25
+ # Create a new `Lite3::DBM` object that opens database file
26
+ # `filename` and performs subsequent operations on `table`. Both
27
+ # the database file and the table will be created if they do not
28
+ # yet exist. The table name must be a valid name identifier
29
+ # (i.e. matches `/^[a-zA-Z_]\w*$/`).
30
+ #
31
+ # The optional third argument `serializer` is used to choose the
32
+ # serialization method for converting Ruby values into storable
33
+ # strings. There are three options:
34
+ #
35
+ # * `:yaml` uses the `Psych` module.
36
+ # * `:marshal` uses the `Marshal` module.
37
+ # * `:string` simply uses the default `to_s` method, just like the
38
+ # stock `DBM`.
39
+ #
40
+ # Each of these will have their pros and cons. The default is
41
+ # `:yaml` because that is the most portable. `:marshal` tends to
42
+ # be faster but is incompatible across minor Ruby versions.
43
+ #
44
+ # Your serializer choice is registered in a metadata table when
45
+ # `tablename` is created in the SQLite3 file. Afterward, it is an
46
+ # error to attempt to open the table with a different serializer
47
+ # and will result in a Lite3::Error exception.
48
+ #
49
+ # ## Caveats:
50
+ #
51
+ # 1. Both YAML and Marshal serialization have the usual security
52
+ # issues as described in the documentation for `Marshal` and
53
+ # `Psych`. If you are going to let an untrusted entity modify
54
+ # the database, you should not use these methods and instead
55
+ # stick to string conversion.
56
+ #
57
+ # 2. `DBM` does not check your Marshal version; a mismatch will
58
+ # fail dramatically at exactly the wrong time.
59
+ #
60
+ # 3. `filename` is normalized using `File.realpath` and this path
61
+ # is used to look up an existing database handle if one exists.
62
+ # Using hard links or other trickery to defeat this mechanism
63
+ # and open a second handle to the same database is **probably**
64
+ # still harmless but is not something this API guarantees will
65
+ # work correctly.
66
+ #
67
+ def initialize(filename, tablename, serializer = :yaml)
68
+ @filename = filename
69
+ @tablename = tablename
70
+ @valenc,
71
+ @valdec = value_encoders(serializer)
72
+ @handle = HandlePool.get(filename)
73
+
74
+ @handle.addref(self)
75
+
76
+ check("Malformed table name '#{tablename}'; must be a valid identifer") {
77
+ tablename =~ /^[a-zA-Z_]\w*$/
78
+ }
79
+
80
+ transaction {
81
+ register_serialization_scheme(serializer)
82
+ @handle.create_key_value_table( actual_tbl() )
83
+ }
84
+ rescue Error => e
85
+ self.close if @handle
86
+ raise e
87
+ end
88
+
89
+
90
+ # Identical to `initialize` except that if a block is provided, it
91
+ # is evaluated with a new Lite3::DBM which is then closed afterward.
92
+ # This is analagous to `File.open`.
93
+ def self.open(filename, tablename, serializer = :yaml, &block)
94
+ instance = self.new(filename, tablename, serializer)
95
+ return instance unless block
96
+
97
+ begin
98
+ return block.call(instance)
99
+ ensure
100
+ instance.close
101
+ end
102
+ end
103
+
104
+ private
105
+
106
+ # Return encode and decode procs for the requested serialization
107
+ # scheme.
108
+ def value_encoders(serializer)
109
+ case serializer
110
+ when :yaml
111
+ enc = proc{ |val| Psych.dump(val) }
112
+
113
+ # Psych (and module YAML) has gradually moved from defaulting
114
+ # from unsafe loading to safe loading. This is a pain for us
115
+ # because old versions don't provide `unsafe_load` as an alias
116
+ # to `load` and new versions default `load` to `safe_load`.
117
+ # So we have to do this thing to pick `unsafe_load` if it's
118
+ # available and `load` otherwise.
119
+ if Psych.respond_to? :unsafe_load
120
+ dec = proc{ |val| Psych.unsafe_load(val) }
121
+ else
122
+ dec = proc{ |val| Psych.load(val) }
123
+ end
124
+
125
+ when :marshal
126
+ enc = proc { |val| Marshal.dump(val) }
127
+ dec = proc { |val| Marshal.load(val) }
128
+
129
+ when :string
130
+ enc = proc { |val| val.to_s }
131
+ dec = proc { |val| val.to_s } # sqlite preserves some types
132
+
133
+ else
134
+ raise InternalError.new("Invalid serializer selected: '#{serializer}'")
135
+ end
136
+
137
+ return enc, dec
138
+ end
139
+
140
+ # Add the serialization scheme for this table to META
141
+ def register_serialization_scheme(req_ser)
142
+ @handle.create_key_value_table(META)
143
+
144
+ transaction {
145
+ srlzr = @handle.lookup(META, @tablename)
146
+ if srlzr
147
+ check("Serializer mismatch for '#{@tablename}; specified " +
148
+ "#{req_ser} but table uses #{srlzr}") {
149
+ req_ser.to_s == srlzr
150
+ }
151
+ else
152
+ @handle.upsert(META, @tablename, req_ser.to_s)
153
+ end
154
+ }
155
+ end
156
+
157
+
158
+
159
+ #
160
+ # Helpers
161
+ #
162
+
163
+
164
+ # Return the actual table name we are using.
165
+ def actual_tbl() return "#{PREFIX}#{@tablename}".intern; end
166
+
167
+
168
+ public
169
+
170
+ def to_s
171
+ openstr = closed? ? 'CLOSED' : 'OPEN'
172
+ return "<#{self.class}:0x#{object_id.to_s(16)} file='#{@filename}'" +
173
+ " tablename='#{@tablename}' #{openstr}>"
174
+ end
175
+ alias inspect to_s
176
+
177
+
178
+ # Disassociate `self` from the underlying database. If this is
179
+ # the last `DBM` using it, the handle will (probably) also be
180
+ # closed.
181
+ #
182
+ # Subsequent attempts to use `self` will fail with an error; the
183
+ # only exception to this is the method `closed?` which will return
184
+ # true.
185
+ def close
186
+ @handle.delref(self)
187
+ @handle = ClosedHandle.new(@filename, @tablename)
188
+ end
189
+
190
+ # Test if this object has been closed. This is safe to call on a
191
+ # closed `DBM`.
192
+ def closed?
193
+ return @handle.is_a? ClosedHandle
194
+ end
195
+
196
+
197
+ #
198
+ # Transactions
199
+ #
200
+
201
+
202
+ # Begins a transaction, evaluates the given block and then ends
203
+ # the transaction. If no error occurred (i.e. an exception was
204
+ # thrown), the transaction is committed; otherwise, it is rolled
205
+ # back.
206
+ #
207
+ # It is safe to call `DBM.transaction` within another
208
+ # `DBM.transaction` block's call chain because `DBM` will not
209
+ # start a new transaction on a database handle that already has
210
+ # one in progress. (It may be possible to trick `DBM` into trying
211
+ # via fibers or other flow control trickery; don't do that.)
212
+ #
213
+ # Note that it's probably not a good idea to assume too much about
214
+ # the precise semantics; I can't guarantee that the underlying
215
+ # library(es) won't change or be replaced outright.
216
+ #
217
+ # That being said, at present, this is simply a wrapper around
218
+ # `Sequel::Database.transaction` with the default options and so
219
+ # is subject to the quirks therein. In version 1.0.0,
220
+ # transactions were always executed in `:deferred` mode via the
221
+ # `sqlite3` gem.
222
+ #
223
+ # @yield [db] The block takes a reference to the receiver as an
224
+ # argument.
225
+ #
226
+ def transaction(&block)
227
+ return @handle.transaction { block.call(self) }
228
+ end
229
+
230
+ # Test if there is currently a transaction in progress
231
+ def transaction_active?
232
+ return @handle.transaction_active?
233
+ end
234
+
235
+
236
+ #
237
+ # Basic hash-like access
238
+ #
239
+
240
+
241
+ # Store `value` at `key` in the database.
242
+ #
243
+ # `key` **must** be a String or a Symbol; Symbols are
244
+ # transparently converted to Strings.
245
+ #
246
+ # `value` **must** be convertable to string by whichever
247
+ # serialization method you have chosen.
248
+ def []=(key, value)
249
+ key = check_key(key)
250
+ valstr = @valenc.call(value)
251
+
252
+ @handle.upsert(actual_tbl(), key, valstr)
253
+
254
+ return value
255
+ end
256
+ alias store :'[]='
257
+
258
+ # Retrieve the value associated with `key` from the database or
259
+ # nil if it is not present.
260
+ def [](key)
261
+ return fetch(key, nil)
262
+ end
263
+
264
+ # Retrieve the value associated with `key`.
265
+ #
266
+ # `key` **must** be a String or a Symbol; Symbols are
267
+ # transparently converted to Strings.
268
+ #
269
+ # If it is not present and a block is given, evaluate the block
270
+ # with the key as its argument and return that.
271
+ #
272
+ # If no block was given either but one extra parameter was given,
273
+ # that value is returned instead.
274
+ #
275
+ # Finally, if none of these was given, it throws an `IndexError`
276
+ # exception.
277
+ #
278
+ # It is an error if `fetch` is called with more than two arguments.
279
+ #
280
+ # @yield [key] The fallback block.
281
+ def fetch(key, *args, &default_block)
282
+
283
+ # Ensure there are no extra arguments
284
+ nargs = args.size + 1
285
+ check("Too many arguments for 'fetch'; expected 1 or 2; got #{nargs}") {
286
+ nargs <= 2
287
+ }
288
+
289
+ # Retrieve the value
290
+ key = check_key(key)
291
+
292
+ # Return the value if found. (nil will always mean the entry
293
+ # isn't present because values are encoded in strings.)
294
+ value = @handle.lookup(actual_tbl(), key)
295
+ return @valdec.call(value) if value
296
+
297
+ # Not found. If a block was given, evaluate it and return its
298
+ # result.
299
+ return default_block.call(key) if default_block
300
+
301
+ # Next, see if we have a default value we can return
302
+ return args[0] if args.size > 0
303
+
304
+ # And if all else fails, raise an IndexError.
305
+ raise IndexError.new("key '#{key}' not found.")
306
+ end
307
+
308
+ # Return a new `Array` containing the values corresponding to the
309
+ # given keys.
310
+ def values_at(*keys)
311
+ return keys.map{|k| self[k]}
312
+ end
313
+
314
+ # Return an `Array` of all of the keys in the table.
315
+ #
316
+ # **WARNING:** since this list is being read from disk, it is possible
317
+ # that the result could exceed available memory.
318
+ def keys
319
+ keys = []
320
+ fast_each { |k, v| keys.push k }
321
+ return keys
322
+ end
323
+
324
+ # Return an array of all values in the table.
325
+ #
326
+ # **WARNING:** since this list is being read from disk, it is possible
327
+ # that the result could exceed available memory.
328
+ def values
329
+ values = []
330
+ fast_each { |k, v| values.push v }
331
+ return values
332
+ end
333
+
334
+ # Return `true` if the table contains `key`; otherwise, return
335
+ # `false`.
336
+ def has_key?(key)
337
+ return false unless key.class == String || key.class == Symbol
338
+ fetch( key ) { return false }
339
+ return true
340
+ end
341
+ alias include? has_key?
342
+ alias member? has_key?
343
+ alias key? has_key?
344
+
345
+ # Delete all entries from the table.
346
+ def clear
347
+ @handle.clear_table(actual_tbl())
348
+ end
349
+
350
+
351
+ # Behaves like 'each' with a block--that is, call it for each
352
+ # key/value pair--but (probably) executes faster.
353
+ #
354
+ # The downside is that there is no guarantee of reentrance or
355
+ # safety. The block *MUST NOT* access the database in any way.
356
+ # In addition, no guarantee is made about element order.
357
+ #
358
+ # (You might be able to infer some ways to safely bend the rules
359
+ # by seeing what the underlying database libraries allow, but your
360
+ # code won't be future-proof if you do that.)
361
+ #
362
+ # @yield [key, value] The block to evaluate
363
+ def fast_each(&block)
364
+ transaction {
365
+ @handle.tbl_each_fast( actual_tbl() ) { |row|
366
+ block.call(row[:key], @valdec.call(row[:value]));
367
+ }
368
+ }
369
+ end
370
+
371
+
372
+ # Calls the given block with each key-value pair in the usual
373
+ # order, then return self. The entire call takes place in its own
374
+ # transaction.
375
+ #
376
+ # It is safe to modify `self` inside the block.
377
+ #
378
+ # If no block is given, returns an Enumerator instead. The
379
+ # Enumerator does *not* start a transaction but individual
380
+ # accesses of it (e.g. calling `next`) each take place in their
381
+ # own transaction.
382
+ #
383
+ # @yield [key, value] The block to evaluate
384
+ def each(&block)
385
+ return self.to_enum(:nt_each) unless block
386
+ transaction { nt_each(&block) }
387
+ return self
388
+ end
389
+ alias each_pair each
390
+
391
+ private
392
+
393
+ # Back-end for `each`; does not explicitly start a transaction.
394
+ def nt_each(&block)
395
+ @handle.tbl_each(actual_tbl()) do |k, v|
396
+ block.call(k, @valdec.call(v))
397
+ end
398
+ return self
399
+ end
400
+
401
+ public
402
+
403
+ # Calls the given block with each key; returns self. Exactly like
404
+ # `each` except for the block argument.
405
+ #
406
+ # @yield [key] The block to evaluate
407
+ def each_key(&block)
408
+ return Enumerator.new{|y| nt_each{ |k,v| y << k } } unless block
409
+ return each{ |k,v| block.call(k) }
410
+ end
411
+
412
+ # Calls the given block with each value; returns self. Exactly like
413
+ # `each` except for the block argument.
414
+ #
415
+ # @yield [value] The block to evaluate
416
+ def each_value(&block)
417
+ return Enumerator.new{|y| nt_each{ |k,v| y << v } } unless block
418
+ return each{ |k,v| block.call(v) }
419
+ end
420
+
421
+ # Updates the database with multiple values from the specified
422
+ # object. Takes any object which implements the each_pair method,
423
+ # including `Hash` and `DBM` objects.
424
+ def update(hash)
425
+ transaction {
426
+ hash.each{|k, v| self[k] = v }
427
+ }
428
+ end
429
+
430
+ # Remove `key` and its associated value from `self`. If `key` is
431
+ # not present, does nothing.
432
+ def delete(key)
433
+ @handle.delete(actual_tbl(), key)
434
+ end
435
+
436
+ # Evaluate the block on each key-value pair in `self` end delete
437
+ # each entry for which the block returns true.
438
+ #
439
+ # @yield [value] The block to evaluate
440
+ def delete_if(&block)
441
+ transaction {
442
+ self.each{ |k, v| block.call(k,v) and delete(k) }
443
+ }
444
+ end
445
+ alias reject! delete_if
446
+
447
+ # Return the number of entries (key-value pairs) in `self`.
448
+ def size
449
+ return @handle.get_size(actual_tbl())
450
+ end
451
+ alias length size
452
+
453
+ # Test if `self` is empty.
454
+ def empty?
455
+ return size == 0
456
+ end
457
+
458
+
459
+ #
460
+ # Conversion to internal types
461
+ #
462
+
463
+
464
+ # Copies the table into a `Hash` and returns it.
465
+ #
466
+ # **WARNING:** it is possible for tables to be significantly larger
467
+ # than available RAM; in that case, this will likely crash your
468
+ # program.
469
+ def to_hash
470
+ result = {}
471
+ fast_each{|k,v| result[k] = v}
472
+ return result
473
+ end
474
+
475
+
476
+ # Returns an `Array` of 2-element `Array` objects each containing a
477
+ # key-value pair from `self`.
478
+ #
479
+ # **WARNING:** it is possible for tables to be significantly larger
480
+ # than available RAM; in that case, this will likely crash your
481
+ # program.
482
+ def to_a
483
+ result = []
484
+ fast_each { |k,v| result.push [k,v] }
485
+ return result
486
+ end
487
+
488
+
489
+ #
490
+ # Hacky odds and ends
491
+ #
492
+
493
+
494
+ # Test if `val` is one of the values in this table.
495
+ #
496
+ # Potentially very slow, especially on large tables.
497
+ def has_value?(val)
498
+ fast_each{|k,v| return true if v == val }
499
+ return false
500
+ end
501
+ alias value? has_value?
502
+
503
+ # Return a `Hash` whose keys are the table's values and whose values
504
+ # are the table's keys.
505
+ #
506
+ # **WARNING:** it is possible for tables to be significantly larger
507
+ # than available RAM; in that case, this will likely crash your
508
+ # program.
509
+ def invert
510
+ result = {}
511
+ fast_each{|k,v| result[v] = k}
512
+ return result
513
+ end
514
+
515
+ # Remove the first key/value pair from `self` and return it. "First"
516
+ # is defined by `self`'s row order, which is the order of insertion
517
+ # as determined by SQLite3.
518
+ def shift
519
+ transaction {
520
+ return nil if empty?
521
+
522
+ key, value = self.each.first
523
+ delete(key)
524
+
525
+ return [key, value]
526
+ }
527
+ end
528
+
529
+ private
530
+
531
+ # Attempt to turn 'key' to a valid key and raise an exception if
532
+ # that isn't possible.
533
+ def check_key(key)
534
+ key = key.to_s if key.class == Symbol
535
+ raise TypeError.new("Key '#{key}' is not a string or symbol!") unless
536
+ key.class == String
537
+
538
+ return key
539
+ end
540
+ end
541
+
542
+ end
@@ -0,0 +1,27 @@
1
+
2
+
3
+ module Lite3
4
+ # Exception class for errors specific `Lite3::DBM`.
5
+ class Error < StandardError; end
6
+
7
+ # Exception class for internal errors; if you get one of these,
8
+ # either my code or your code is incorrect.
9
+ class InternalError < Error; end
10
+
11
+ private
12
+
13
+
14
+ module ErrUtil
15
+
16
+ # Error check: if block evaluates to false, raise a Lite3::DBM::Error
17
+ # with the given message.
18
+ def check(message, &block)
19
+ return if block && block.call
20
+ raise InternalError.new(message)
21
+ end
22
+
23
+ end
24
+
25
+
26
+ private_constant :ErrUtil
27
+ end