dbmlite3 1.0.0 → 2.0.0.pre.alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,542 @@
1
+
2
+ module Lite3
3
+
4
+ # Lite3::DBM encapsulates a single table in a single SQLite3
5
+ # database file and lets you access it as easily as a Hash.
6
+ # Multiple instances may be opened on different tables in the same
7
+ # database.
8
+ #
9
+ # Note that instances do not explicitly own their database
10
+ # connection; instead, they are managed internally and shared across
11
+ # `DBM` instances.
12
+ class Lite3::DBM
13
+ include Enumerable
14
+ include ErrUtil
15
+
16
+ PREFIX = "dbmlite3_tbl_"
17
+ META = :dbmlite3_meta
18
+ private_constant(:PREFIX, :META)
19
+
20
+ #
21
+ # Construction and setup
22
+ #
23
+
24
+
25
+ # Create a new `Lite3::DBM` object that opens database file
26
+ # `filename` and performs subsequent operations on `table`. Both
27
+ # the database file and the table will be created if they do not
28
+ # yet exist. The table name must be a valid name identifier
29
+ # (i.e. matches `/^[a-zA-Z_]\w*$/`).
30
+ #
31
+ # The optional third argument `serializer` is used to choose the
32
+ # serialization method for converting Ruby values into storable
33
+ # strings. There are three options:
34
+ #
35
+ # * `:yaml` uses the `Psych` module.
36
+ # * `:marshal` uses the `Marshal` module.
37
+ # * `:string` simply uses the default `to_s` method, just like the
38
+ # stock `DBM`.
39
+ #
40
+ # Each of these will have their pros and cons. The default is
41
+ # `:yaml` because that is the most portable. `:marshal` tends to
42
+ # be faster but is incompatible across minor Ruby versions.
43
+ #
44
+ # Your serializer choice is registered in a metadata table when
45
+ # `tablename` is created in the SQLite3 file. Afterward, it is an
46
+ # error to attempt to open the table with a different serializer
47
+ # and will result in a Lite3::Error exception.
48
+ #
49
+ # ## Caveats:
50
+ #
51
+ # 1. Both YAML and Marshal serialization have the usual security
52
+ # issues as described in the documentation for `Marshal` and
53
+ # `Psych`. If you are going to let an untrusted entity modify
54
+ # the database, you should not use these methods and instead
55
+ # stick to string conversion.
56
+ #
57
+ # 2. `DBM` does not check your Marshal version; a mismatch will
58
+ # fail dramatically at exactly the wrong time.
59
+ #
60
+ # 3. `filename` is normalized using `File.realpath` and this path
61
+ # is used to look up an existing database handle if one exists.
62
+ # Using hard links or other trickery to defeat this mechanism
63
+ # and open a second handle to the same database is **probably**
64
+ # still harmless but is not something this API guarantees will
65
+ # work correctly.
66
+ #
67
+ def initialize(filename, tablename, serializer = :yaml)
68
+ @filename = filename
69
+ @tablename = tablename
70
+ @valenc,
71
+ @valdec = value_encoders(serializer)
72
+ @handle = HandlePool.get(filename)
73
+
74
+ @handle.addref(self)
75
+
76
+ check("Malformed table name '#{tablename}'; must be a valid identifer") {
77
+ tablename =~ /^[a-zA-Z_]\w*$/
78
+ }
79
+
80
+ transaction {
81
+ register_serialization_scheme(serializer)
82
+ @handle.create_key_value_table( actual_tbl() )
83
+ }
84
+ rescue Error => e
85
+ self.close if @handle
86
+ raise e
87
+ end
88
+
89
+
90
+ # Identical to `initialize` except that if a block is provided, it
91
+ # is evaluated with a new Lite3::DBM which is then closed afterward.
92
+ # This is analagous to `File.open`.
93
+ def self.open(filename, tablename, serializer = :yaml, &block)
94
+ instance = self.new(filename, tablename, serializer)
95
+ return instance unless block
96
+
97
+ begin
98
+ return block.call(instance)
99
+ ensure
100
+ instance.close
101
+ end
102
+ end
103
+
104
+ private
105
+
106
+ # Return encode and decode procs for the requested serialization
107
+ # scheme.
108
+ def value_encoders(serializer)
109
+ case serializer
110
+ when :yaml
111
+ enc = proc{ |val| Psych.dump(val) }
112
+
113
+ # Psych (and module YAML) has gradually moved from defaulting
114
+ # from unsafe loading to safe loading. This is a pain for us
115
+ # because old versions don't provide `unsafe_load` as an alias
116
+ # to `load` and new versions default `load` to `safe_load`.
117
+ # So we have to do this thing to pick `unsafe_load` if it's
118
+ # available and `load` otherwise.
119
+ if Psych.respond_to? :unsafe_load
120
+ dec = proc{ |val| Psych.unsafe_load(val) }
121
+ else
122
+ dec = proc{ |val| Psych.load(val) }
123
+ end
124
+
125
+ when :marshal
126
+ enc = proc { |val| Marshal.dump(val) }
127
+ dec = proc { |val| Marshal.load(val) }
128
+
129
+ when :string
130
+ enc = proc { |val| val.to_s }
131
+ dec = proc { |val| val.to_s } # sqlite preserves some types
132
+
133
+ else
134
+ raise InternalError.new("Invalid serializer selected: '#{serializer}'")
135
+ end
136
+
137
+ return enc, dec
138
+ end
139
+
140
+ # Add the serialization scheme for this table to META
141
+ def register_serialization_scheme(req_ser)
142
+ @handle.create_key_value_table(META)
143
+
144
+ transaction {
145
+ srlzr = @handle.lookup(META, @tablename)
146
+ if srlzr
147
+ check("Serializer mismatch for '#{@tablename}; specified " +
148
+ "#{req_ser} but table uses #{srlzr}") {
149
+ req_ser.to_s == srlzr
150
+ }
151
+ else
152
+ @handle.upsert(META, @tablename, req_ser.to_s)
153
+ end
154
+ }
155
+ end
156
+
157
+
158
+
159
+ #
160
+ # Helpers
161
+ #
162
+
163
+
164
+ # Return the actual table name we are using.
165
+ def actual_tbl() return "#{PREFIX}#{@tablename}".intern; end
166
+
167
+
168
+ public
169
+
170
+ def to_s
171
+ openstr = closed? ? 'CLOSED' : 'OPEN'
172
+ return "<#{self.class}:0x#{object_id.to_s(16)} file='#{@filename}'" +
173
+ " tablename='#{@tablename}' #{openstr}>"
174
+ end
175
+ alias inspect to_s
176
+
177
+
178
+ # Disassociate `self` from the underlying database. If this is
179
+ # the last `DBM` using it, the handle will (probably) also be
180
+ # closed.
181
+ #
182
+ # Subsequent attempts to use `self` will fail with an error; the
183
+ # only exception to this is the method `closed?` which will return
184
+ # true.
185
+ def close
186
+ @handle.delref(self)
187
+ @handle = ClosedHandle.new(@filename, @tablename)
188
+ end
189
+
190
+ # Test if this object has been closed. This is safe to call on a
191
+ # closed `DBM`.
192
+ def closed?
193
+ return @handle.is_a? ClosedHandle
194
+ end
195
+
196
+
197
+ #
198
+ # Transactions
199
+ #
200
+
201
+
202
+ # Begins a transaction, evaluates the given block and then ends
203
+ # the transaction. If no error occurred (i.e. an exception was
204
+ # thrown), the transaction is committed; otherwise, it is rolled
205
+ # back.
206
+ #
207
+ # It is safe to call `DBM.transaction` within another
208
+ # `DBM.transaction` block's call chain because `DBM` will not
209
+ # start a new transaction on a database handle that already has
210
+ # one in progress. (It may be possible to trick `DBM` into trying
211
+ # via fibers or other flow control trickery; don't do that.)
212
+ #
213
+ # Note that it's probably not a good idea to assume too much about
214
+ # the precise semantics; I can't guarantee that the underlying
215
+ # library(es) won't change or be replaced outright.
216
+ #
217
+ # That being said, at present, this is simply a wrapper around
218
+ # `Sequel::Database.transaction` with the default options and so
219
+ # is subject to the quirks therein. In version 1.0.0,
220
+ # transactions were always executed in `:deferred` mode via the
221
+ # `sqlite3` gem.
222
+ #
223
+ # @yield [db] The block takes a reference to the receiver as an
224
+ # argument.
225
+ #
226
+ def transaction(&block)
227
+ return @handle.transaction { block.call(self) }
228
+ end
229
+
230
+ # Test if there is currently a transaction in progress
231
+ def transaction_active?
232
+ return @handle.transaction_active?
233
+ end
234
+
235
+
236
+ #
237
+ # Basic hash-like access
238
+ #
239
+
240
+
241
+ # Store `value` at `key` in the database.
242
+ #
243
+ # `key` **must** be a String or a Symbol; Symbols are
244
+ # transparently converted to Strings.
245
+ #
246
+ # `value` **must** be convertable to string by whichever
247
+ # serialization method you have chosen.
248
+ def []=(key, value)
249
+ key = check_key(key)
250
+ valstr = @valenc.call(value)
251
+
252
+ @handle.upsert(actual_tbl(), key, valstr)
253
+
254
+ return value
255
+ end
256
+ alias store :'[]='
257
+
258
+ # Retrieve the value associated with `key` from the database or
259
+ # nil if it is not present.
260
+ def [](key)
261
+ return fetch(key, nil)
262
+ end
263
+
264
+ # Retrieve the value associated with `key`.
265
+ #
266
+ # `key` **must** be a String or a Symbol; Symbols are
267
+ # transparently converted to Strings.
268
+ #
269
+ # If it is not present and a block is given, evaluate the block
270
+ # with the key as its argument and return that.
271
+ #
272
+ # If no block was given either but one extra parameter was given,
273
+ # that value is returned instead.
274
+ #
275
+ # Finally, if none of these was given, it throws an `IndexError`
276
+ # exception.
277
+ #
278
+ # It is an error if `fetch` is called with more than two arguments.
279
+ #
280
+ # @yield [key] The fallback block.
281
+ def fetch(key, *args, &default_block)
282
+
283
+ # Ensure there are no extra arguments
284
+ nargs = args.size + 1
285
+ check("Too many arguments for 'fetch'; expected 1 or 2; got #{nargs}") {
286
+ nargs <= 2
287
+ }
288
+
289
+ # Retrieve the value
290
+ key = check_key(key)
291
+
292
+ # Return the value if found. (nil will always mean the entry
293
+ # isn't present because values are encoded in strings.)
294
+ value = @handle.lookup(actual_tbl(), key)
295
+ return @valdec.call(value) if value
296
+
297
+ # Not found. If a block was given, evaluate it and return its
298
+ # result.
299
+ return default_block.call(key) if default_block
300
+
301
+ # Next, see if we have a default value we can return
302
+ return args[0] if args.size > 0
303
+
304
+ # And if all else fails, raise an IndexError.
305
+ raise IndexError.new("key '#{key}' not found.")
306
+ end
307
+
308
+ # Return a new `Array` containing the values corresponding to the
309
+ # given keys.
310
+ def values_at(*keys)
311
+ return keys.map{|k| self[k]}
312
+ end
313
+
314
+ # Return an `Array` of all of the keys in the table.
315
+ #
316
+ # **WARNING:** since this list is being read from disk, it is possible
317
+ # that the result could exceed available memory.
318
+ def keys
319
+ keys = []
320
+ fast_each { |k, v| keys.push k }
321
+ return keys
322
+ end
323
+
324
+ # Return an array of all values in the table.
325
+ #
326
+ # **WARNING:** since this list is being read from disk, it is possible
327
+ # that the result could exceed available memory.
328
+ def values
329
+ values = []
330
+ fast_each { |k, v| values.push v }
331
+ return values
332
+ end
333
+
334
+ # Return `true` if the table contains `key`; otherwise, return
335
+ # `false`.
336
+ def has_key?(key)
337
+ return false unless key.class == String || key.class == Symbol
338
+ fetch( key ) { return false }
339
+ return true
340
+ end
341
+ alias include? has_key?
342
+ alias member? has_key?
343
+ alias key? has_key?
344
+
345
+ # Delete all entries from the table.
346
+ def clear
347
+ @handle.clear_table(actual_tbl())
348
+ end
349
+
350
+
351
+ # Behaves like 'each' with a block--that is, call it for each
352
+ # key/value pair--but (probably) executes faster.
353
+ #
354
+ # The downside is that there is no guarantee of reentrance or
355
+ # safety. The block *MUST NOT* access the database in any way.
356
+ # In addition, no guarantee is made about element order.
357
+ #
358
+ # (You might be able to infer some ways to safely bend the rules
359
+ # by seeing what the underlying database libraries allow, but your
360
+ # code won't be future-proof if you do that.)
361
+ #
362
+ # @yield [key, value] The block to evaluate
363
+ def fast_each(&block)
364
+ transaction {
365
+ @handle.tbl_each_fast( actual_tbl() ) { |row|
366
+ block.call(row[:key], @valdec.call(row[:value]));
367
+ }
368
+ }
369
+ end
370
+
371
+
372
+ # Calls the given block with each key-value pair in the usual
373
+ # order, then return self. The entire call takes place in its own
374
+ # transaction.
375
+ #
376
+ # It is safe to modify `self` inside the block.
377
+ #
378
+ # If no block is given, returns an Enumerator instead. The
379
+ # Enumerator does *not* start a transaction but individual
380
+ # accesses of it (e.g. calling `next`) each take place in their
381
+ # own transaction.
382
+ #
383
+ # @yield [key, value] The block to evaluate
384
+ def each(&block)
385
+ return self.to_enum(:nt_each) unless block
386
+ transaction { nt_each(&block) }
387
+ return self
388
+ end
389
+ alias each_pair each
390
+
391
+ private
392
+
393
+ # Back-end for `each`; does not explicitly start a transaction.
394
+ def nt_each(&block)
395
+ @handle.tbl_each(actual_tbl()) do |k, v|
396
+ block.call(k, @valdec.call(v))
397
+ end
398
+ return self
399
+ end
400
+
401
+ public
402
+
403
+ # Calls the given block with each key; returns self. Exactly like
404
+ # `each` except for the block argument.
405
+ #
406
+ # @yield [key] The block to evaluate
407
+ def each_key(&block)
408
+ return Enumerator.new{|y| nt_each{ |k,v| y << k } } unless block
409
+ return each{ |k,v| block.call(k) }
410
+ end
411
+
412
+ # Calls the given block with each value; returns self. Exactly like
413
+ # `each` except for the block argument.
414
+ #
415
+ # @yield [value] The block to evaluate
416
+ def each_value(&block)
417
+ return Enumerator.new{|y| nt_each{ |k,v| y << v } } unless block
418
+ return each{ |k,v| block.call(v) }
419
+ end
420
+
421
+ # Updates the database with multiple values from the specified
422
+ # object. Takes any object which implements the each_pair method,
423
+ # including `Hash` and `DBM` objects.
424
+ def update(hash)
425
+ transaction {
426
+ hash.each{|k, v| self[k] = v }
427
+ }
428
+ end
429
+
430
+ # Remove `key` and its associated value from `self`. If `key` is
431
+ # not present, does nothing.
432
+ def delete(key)
433
+ @handle.delete(actual_tbl(), key)
434
+ end
435
+
436
+ # Evaluate the block on each key-value pair in `self` end delete
437
+ # each entry for which the block returns true.
438
+ #
439
+ # @yield [value] The block to evaluate
440
+ def delete_if(&block)
441
+ transaction {
442
+ self.each{ |k, v| block.call(k,v) and delete(k) }
443
+ }
444
+ end
445
+ alias reject! delete_if
446
+
447
+ # Return the number of entries (key-value pairs) in `self`.
448
+ def size
449
+ return @handle.get_size(actual_tbl())
450
+ end
451
+ alias length size
452
+
453
+ # Test if `self` is empty.
454
+ def empty?
455
+ return size == 0
456
+ end
457
+
458
+
459
+ #
460
+ # Conversion to internal types
461
+ #
462
+
463
+
464
+ # Copies the table into a `Hash` and returns it.
465
+ #
466
+ # **WARNING:** it is possible for tables to be significantly larger
467
+ # than available RAM; in that case, this will likely crash your
468
+ # program.
469
+ def to_hash
470
+ result = {}
471
+ fast_each{|k,v| result[k] = v}
472
+ return result
473
+ end
474
+
475
+
476
+ # Returns an `Array` of 2-element `Array` objects each containing a
477
+ # key-value pair from `self`.
478
+ #
479
+ # **WARNING:** it is possible for tables to be significantly larger
480
+ # than available RAM; in that case, this will likely crash your
481
+ # program.
482
+ def to_a
483
+ result = []
484
+ fast_each { |k,v| result.push [k,v] }
485
+ return result
486
+ end
487
+
488
+
489
+ #
490
+ # Hacky odds and ends
491
+ #
492
+
493
+
494
+ # Test if `val` is one of the values in this table.
495
+ #
496
+ # Potentially very slow, especially on large tables.
497
+ def has_value?(val)
498
+ fast_each{|k,v| return true if v == val }
499
+ return false
500
+ end
501
+ alias value? has_value?
502
+
503
+ # Return a `Hash` whose keys are the table's values and whose values
504
+ # are the table's keys.
505
+ #
506
+ # **WARNING:** it is possible for tables to be significantly larger
507
+ # than available RAM; in that case, this will likely crash your
508
+ # program.
509
+ def invert
510
+ result = {}
511
+ fast_each{|k,v| result[v] = k}
512
+ return result
513
+ end
514
+
515
+ # Remove the first key/value pair from `self` and return it. "First"
516
+ # is defined by `self`'s row order, which is the order of insertion
517
+ # as determined by SQLite3.
518
+ def shift
519
+ transaction {
520
+ return nil if empty?
521
+
522
+ key, value = self.each.first
523
+ delete(key)
524
+
525
+ return [key, value]
526
+ }
527
+ end
528
+
529
+ private
530
+
531
+ # Attempt to turn 'key' to a valid key and raise an exception if
532
+ # that isn't possible.
533
+ def check_key(key)
534
+ key = key.to_s if key.class == Symbol
535
+ raise TypeError.new("Key '#{key}' is not a string or symbol!") unless
536
+ key.class == String
537
+
538
+ return key
539
+ end
540
+ end
541
+
542
+ end
@@ -0,0 +1,27 @@
1
+
2
+
3
+ module Lite3
4
+ # Exception class for errors specific `Lite3::DBM`.
5
+ class Error < StandardError; end
6
+
7
+ # Exception class for internal errors; if you get one of these,
8
+ # either my code or your code is incorrect.
9
+ class InternalError < Error; end
10
+
11
+ private
12
+
13
+
14
+ module ErrUtil
15
+
16
+ # Error check: if block evaluates to false, raise a Lite3::DBM::Error
17
+ # with the given message.
18
+ def check(message, &block)
19
+ return if block && block.call
20
+ raise InternalError.new(message)
21
+ end
22
+
23
+ end
24
+
25
+
26
+ private_constant :ErrUtil
27
+ end