dbmlite3 1.0.0 → 2.0.0.pre.alpha.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,284 @@
1
+
2
+ require 'sequel'
3
+
4
+ require 'weakref'
5
+
6
+ module Lite3
7
+
8
+ # Wrapper around a Sequel::Database object.
9
+ #
10
+ # We do this instead of using them directly because transactions
11
+ # happen at the handle level rather than the file level and this
12
+ # lets us share the transaction across multiple tables in the same
13
+ # file.
14
+ #
15
+ # In addition, we can use this to transparently close and reopen the
16
+ # underlying database file when (e.g.) forking the process.
17
+ #
18
+ # Instances contain references to DBM objects using them. When the
19
+ # set becomes empty, the handle is closed; adding a reference will
20
+ # ensure the handle is open.
21
+ class Handle
22
+ attr_reader :path
23
+ def initialize(path)
24
+ @path = path
25
+ @db = open_db(path)
26
+ @refs = {}
27
+ end
28
+
29
+ private
30
+
31
+ def open_db(path)
32
+ return IS_JRUBY ?
33
+ Sequel.connect("jdbc:sqlite:#{path}") :
34
+ Sequel.sqlite(@path)
35
+ end
36
+
37
+ public
38
+
39
+ def to_s
40
+ "<#{self.class}:0x#{object_id.to_s(16)} path=#{@path}>"
41
+ end
42
+ alias inspect to_s
43
+
44
+
45
+ #
46
+ # References to the DBM object(s) using this handle.
47
+ #
48
+ # References are weak. scrub_refs! will remove all reclaimed refs
49
+ # and close the handle if there are none left. (Note that this
50
+ # doesn't preclude us from reopening the handle later, though. We
51
+ # could keep Handles around longer if we want and reuse them, but we
52
+ # don't.)
53
+ #
54
+
55
+ def addref(parent)
56
+ @refs[parent.object_id] = WeakRef.new(parent)
57
+ end
58
+
59
+ def delref(parent)
60
+ @refs.delete(parent.object_id)
61
+ scrub_refs!
62
+ end
63
+
64
+ def scrub_refs!
65
+ @refs.delete_if{|k,v| ! v.weakref_alive? }
66
+ disconnect! if @refs.empty?
67
+ end
68
+
69
+ def live_refs
70
+ scrub_refs!
71
+ return @refs.size
72
+ end
73
+
74
+
75
+ #
76
+ # Opening and closing
77
+ #
78
+
79
+ # Disconnect the underlying database handle.
80
+ def disconnect!
81
+ @db.disconnect
82
+ end
83
+
84
+
85
+ #
86
+ # Transactions
87
+ #
88
+
89
+ # Perform &block in a transaction. See DBM.transaction.
90
+ def transaction(&block)
91
+ @db.transaction({}, &block)
92
+ end
93
+
94
+ # Test if there is currently a transaction in progress
95
+ def transaction_active?
96
+ return @db.in_transaction?
97
+ end
98
+
99
+
100
+
101
+ #
102
+ # Table access; the common SQL idioms we care about. These all
103
+ # deal with tables of key/value pairs.
104
+ #
105
+
106
+ # Create a table of key-value pairs if it does not already exist.
107
+ def create_key_value_table(name)
108
+ @db.create_table?(name) do
109
+ String :key, primary_key: true
110
+ String :value
111
+ end
112
+ end
113
+
114
+ # Perform an upsert for the row with field 'key'
115
+ def upsert(table, key, value)
116
+ transaction {
117
+ recs = @db[table].where(key: key)
118
+ if recs.count == 0
119
+ @db[table].insert(key: key, value: value)
120
+ elsif recs.count == 1
121
+ recs.update(value: value)
122
+ else
123
+ raise InternalError.new("Duplicate entry for key '#{key}'")
124
+ end
125
+ }
126
+
127
+ return value
128
+ end
129
+
130
+ # Retrieve the 'value' field of the row with value 'key' in the given table.
131
+ def lookup(table, key)
132
+ row = @db[table].where(key:key).first
133
+ return nil unless row
134
+
135
+ return row[:value]
136
+ end
137
+
138
+ def clear_table(table)
139
+ @db[table].delete
140
+ end
141
+
142
+ def delete(table, key)
143
+ @db[table].where(key: key).delete
144
+ end
145
+
146
+ def get_size(table)
147
+ return @db[table].count
148
+ end
149
+
150
+
151
+ # Backend for `each`; evaluates `block` on each row in `table`
152
+ # with the undecoded key and value as arguments. It is *not* a
153
+ # single transaction.
154
+ #
155
+ # We do this instead of using `Dataset.each` because the latter is
156
+ # not guaranteed to be re-entrant.
157
+ #
158
+ # Each key/value pair is retrieved via a separate query so that it
159
+ # is safe to access the database from inside the block. Items are
160
+ # retrieved by rowid in increasing order. Since we preserve those,
161
+ # modifications done in the block (probably) won't break things.
162
+ #
163
+ # This is (probably) not very fast but it's (probably) good enough
164
+ # for most things.
165
+ def tbl_each(table, &block)
166
+ return if @db[table].count == 0
167
+
168
+ curr = -1
169
+ while true
170
+ row = @db[table].where{rowid > curr}
171
+ .limit(1)
172
+ .select(:rowid, :key, :value)
173
+ .first
174
+
175
+ return unless row
176
+ curr, key, value = *row.values
177
+
178
+ block.call(key, value)
179
+ end
180
+ end
181
+
182
+ # Wrapper around Dataset.each, with all the ensuing limitations.
183
+ def tbl_each_fast(table, &block)
184
+ @db[table].each(&block)
185
+ end
186
+
187
+
188
+ end
189
+
190
+
191
+
192
+ #
193
+ # Private classes
194
+ #
195
+
196
+ # Dummy `Handle` that throws an `Error` exception whenever something
197
+ # tries to treat it as an open handle. This replaces a `DBM`'s
198
+ # `Handle` object when `DBM.close` is called so that the error
199
+ # message will be useful if something tries to access a closed
200
+ # handle.
201
+ class ClosedHandle
202
+ def initialize(filename, table)
203
+ @filename, @table = [filename, table]
204
+ end
205
+
206
+ # We clone the rest of Handle's interface with methods that throw
207
+ # an Error.
208
+ Handle.instance_methods(false).each { |name|
209
+ next if method_defined? name
210
+ define_method(name) { |*args|
211
+ raise Error.new("Use of closed database at #{@filename}/#{@table}")
212
+ }
213
+ }
214
+ end
215
+
216
+
217
+ # Module to manage the collection of active Handle objects. See the
218
+ # docs for `Lite3::SQL` for an overview; this module hold the actual
219
+ # code and data.
220
+ module HandlePool
221
+ @@handles = {} # The hash of `Handle` objects keyed by filename
222
+
223
+ # Retrieve the `Handle` associated with `filename`, creating it
224
+ # first if necessary. `filename` is normalized with
225
+ # `File.realpath` before using as a key and so is as good or bad
226
+ # as that for detecting an existing file.
227
+ def self.get(filename)
228
+
229
+ # Scrub @@handles of all inactive Handles
230
+ self.gc
231
+
232
+ # We need to convert the filename to a canonical
233
+ # form. `File.realpath` does this for us but only if the file
234
+ # exists. If not, we use it on the parent directory instead and
235
+ # use `File.join` to create the full path.
236
+ if File.exist?(filename)
237
+ File.file?(filename) or
238
+ raise Error.new("Filename '#{filename}' exists but is not a file.")
239
+
240
+ filename = File.realpath(filename)
241
+ else
242
+ dn = File.dirname(filename)
243
+ File.directory?(dn) or
244
+ raise Error.new("Parent directory '#{dn}' nonexistant or " +
245
+ "not a directory.")
246
+
247
+ filename = File.join(File.realpath(dn), File.basename(filename))
248
+ end
249
+
250
+ @@handles[filename] = Handle.new(filename) unless
251
+ @@handles.has_key?(filename)
252
+
253
+ return @@handles[filename]
254
+ end
255
+
256
+ # Close all underlying database connections.
257
+ def self.close_all
258
+ Sequel::DATABASES.each(&:disconnect)
259
+ end
260
+
261
+ # Close and remove all Handle objects with no refs and return a
262
+ # hash mapping the filename for each live Handle to the number of
263
+ # DBM objects that currently reference it. Does **NOT** perform a
264
+ # Ruby GC.
265
+ def self.gc
266
+ results = {}
267
+ @@handles.select!{|path, handle|
268
+ handle.scrub_refs!
269
+
270
+ if handle.live_refs == 0
271
+ @@handles.delete(path)
272
+ next false
273
+ end
274
+
275
+ results[path] = handle.live_refs
276
+ true
277
+ }
278
+
279
+ return results
280
+ end
281
+ end
282
+
283
+ private_constant :Handle, :ClosedHandle, :HandlePool
284
+ end
@@ -0,0 +1,87 @@
1
+
2
+ module Lite3
3
+
4
+ # This module provides some basic access to the underlying
5
+ # `Sequel::Database` objects used by `Lite3::DBM` to actually store
6
+ # and retrieve data.
7
+ #
8
+ # The only thing you need to care about is that if your process
9
+ # forks, you *must* invoke `Lite3::SQL.close_all` before forking the
10
+ # process. Otherwise, it will clone the connection and could lead
11
+ # to database corruption.
12
+ #
13
+ # More details:
14
+ #
15
+ # `Lite3` maintains a pool of private handle objects (private class
16
+ # `Lite3::Handle`) which in turn manage the `Sequel::Database`
17
+ # objects that actually do the work. There is one handle per
18
+ # SQLite3 database file; since each `DBM` represents one table in a
19
+ # SQLite3 file, multiple `DBM` objects will use the same handle.
20
+ #
21
+ # Handle objects can themselves close and replace their
22
+ # `Sequel::Database` objects transparently.
23
+ #
24
+ # The underlying system keeps track of which `DBM` objects reference
25
+ # which files and will close a file's `Sequel::Database` when all
26
+ # of the `DBM`s using it have been closed. (It does **not** handle
27
+ # the case where a `DBM` object remains open and goes out of scope;
28
+ # that object will be kept around for the life of the process.)
29
+ #
30
+ # Mostly, you don't need to care about this. However, it affects
31
+ # you in two ways:
32
+ #
33
+ # 1. Transactions are done at the file level and not the table level.
34
+ # This means that you can access separate tables in the same
35
+ # transaction, which is a Very Good Thing.
36
+ #
37
+ # 2. You can safely fork the current process and keep using existing
38
+ # `DBM` objects in both processes, provided you've invoked
39
+ # `close_all` before the fork. This will have closed the actual
40
+ # database handles (which can't tolerate being carried across a
41
+ # fork) and opens new ones the next time they're needed.
42
+ #
43
+ # If you find yourself needing to be sure that you don't have any
44
+ # unexpected open file handles (e.g. before forking or if you need
45
+ # Windows to unlock it), you should call `close_all`.
46
+ #
47
+ # Otherwise, it's safe to ignore this stuff.
48
+
49
+
50
+ # This module provides some basic, consistent access to the
51
+ # underlying database library(es) (currently `sequel`).
52
+ module SQL
53
+
54
+ # Tests if the underlying database libraries are threadsafe.
55
+ #
56
+ # (Currently, it always returns true, since Sequel does that for
57
+ # us.)
58
+ def self.threadsafe?
59
+ return true
60
+ end
61
+
62
+ # Disconnect and delete all database handles and associated
63
+ # metadata that are no longer needed (i.e. because their
64
+ # corresponding `DBM`s have been closed or reclaimed).
65
+ #
66
+ # Returns a hash mapping the path to each open database file to
67
+ # the number of live DBM objects referencing it.
68
+ #
69
+ # You normally won't need to explicitly call this, but it's
70
+ # useful for testing and debugging.
71
+ def self.gc() return HandlePool.gc; end
72
+
73
+ # Close and remove the underlying database connections. This does
74
+ # not invalidate existing `Lite3::DBM` objects; they will recreate
75
+ # the connections when needed.
76
+ #
77
+ # The main use for this is for safely forking the current process.
78
+ # You should call this just before each `fork` to avoid potential
79
+ # corruption from duplicated database handles.
80
+ #
81
+ # This **should not** be called while a database operation is in
82
+ # progress. (E.g. do **not** call this from the block of
83
+ # `DBM.each`.)
84
+ def self.close_all() return HandlePool.close_all end
85
+ end
86
+
87
+ end
@@ -212,9 +212,11 @@ Serializations = Set.new
212
212
  it "implements each_*" do
213
213
  db = newdb.call(Tmp.file, "floop")
214
214
 
215
+ # Empty DBMs don't evaluate their bloc
215
216
  count = 0
216
217
  db.each {|k,v| count += 1}
217
218
  db.each_pair {|k,v| count += 1}
219
+ expect( count ) .to eq 0
218
220
 
219
221
 
220
222
  db["foo"] = 42
@@ -236,6 +238,64 @@ Serializations = Set.new
236
238
  db.close
237
239
  end
238
240
 
241
+ it "allows database modification in `each`" do
242
+ db = newdb.call(Tmp.file, "floop")
243
+
244
+ db["foo"] = 42
245
+ db["bar"] = 99
246
+ db["quux"] = 123
247
+ db["baz"] = 999
248
+
249
+ count = 0
250
+ db.each do|key, value|
251
+ count += 1
252
+
253
+ case count
254
+ when 1
255
+ expect(key) .to eq "foo"
256
+ expect(value) .to eq 42
257
+ db['foo'] = 'new_foo'
258
+
259
+ when 2
260
+ expect(key) .to eq "bar"
261
+ expect(value) .to eq 99
262
+
263
+ db['baz'] = "new_baz"
264
+
265
+ expect(db['foo']) .to eq "new_foo"
266
+
267
+ db.delete("quux")
268
+
269
+ when 3
270
+ expect(key) .to eq "baz"
271
+ expect(value) .to eq "new_baz"
272
+
273
+ when 4
274
+ fail "there should not be 4 items!"
275
+ end
276
+ end
277
+
278
+ expect(count) .to eq 3
279
+
280
+ db.close
281
+ end
282
+
283
+ it "implements fast_each" do
284
+ db = newdb.call(Tmp.file, "floop")
285
+
286
+ db["foo"] = 42
287
+ db["bar"] = 99
288
+ db["quux"] = 123
289
+
290
+ expected = []
291
+ db.fast_each{|key, value| expected.push [key, value]}
292
+
293
+ expect(expected) .to eq [ ["foo", 42], ["bar", 99], ["quux", 123]]
294
+
295
+ db.close
296
+ end
297
+
298
+
239
299
  it "deletes items from the table" do
240
300
  db = newdb.call(Tmp.file, "floop")
241
301
 
@@ -792,7 +852,7 @@ describe Lite3::DBM do
792
852
  end
793
853
 
794
854
  it "keeps most of its names private" do
795
- expect( Lite3.constants.to_set ) .to eq %i{SQL DBM Error}.to_set
855
+ expect( Lite3.constants.to_set ) .to eq %i{SQL DBM Error InternalError}.to_set
796
856
  end
797
857
  end
798
858
 
@@ -816,15 +876,6 @@ describe Lite3::SQL do
816
876
  db
817
877
  }
818
878
 
819
- # it "manages a pool of DB handles that should now all be closed." do
820
- # # If this fails, it (probably) means the previous tests didn't
821
- # # clean up after themselves.
822
- # GC.start
823
- # expect( Lite3::SQL.gc.empty? ) .to be true
824
-
825
- # Lite3::SQL.close_all # smoketest
826
- # end
827
-
828
879
  it "lets you close the actual handle without impeding database use" do
829
880
  expect( Lite3::SQL.gc.size ) .to eq 0
830
881
 
@@ -840,72 +891,19 @@ describe Lite3::SQL do
840
891
 
841
892
  # Referencing DBM objects should be db1 and db2
842
893
  path, refs = stats.to_a[0]
894
+ expect( path ) .to eq file
895
+ expect( refs ) .to eq 2
843
896
 
844
- expect( refs.size ) .to eq 2
845
- expect( refs.include?(db1) ) .to be true
846
- expect( refs.include?(db2) ) .to be true
847
-
848
- # Underlying handles should be open
849
- expect( db1.handle_closed? ) .to be false
850
- expect( db2.handle_closed? ) .to be false
897
+ # We can no longer test if the underlying file handles are still
898
+ # open, so we don't.
851
899
 
852
- # Test closing it
900
+ # Test closing it and forcing a re-open
853
901
  Lite3::SQL.close_all
854
- expect( db1.handle_closed? ) .to be true
855
- expect( db2.handle_closed? ) .to be true
856
-
857
- # Test auto-opening them.
858
902
  expect( db1["foo"] ) .to eq vv["foo"]
859
- expect( db1.handle_closed? ) .to be false
860
- expect( db2.handle_closed? ) .to be false
861
903
 
862
- db1.close
863
- db2.close
864
-
865
- expect( Lite3::SQL.gc.keys.size ) .to eq 0
866
- end
867
-
868
- # it "(eventually) closes handles that have gone out of scope" do
869
- # expect( Lite3::SQL.gc.keys.size ) .to eq 0
870
-
871
- # file = Tmp.file
872
- # db1 = newbasic.call(file, "first")
873
-
874
- # expect( db1.handle_closed? ) .to be false
875
- # expect( Lite3::SQL.gc.keys.size ) .to eq 1
876
-
877
- # db1 = nil
878
- # GC.start
879
- # expect( Lite3::SQL.gc.keys.size ) .to eq 0
880
- # end
881
-
882
- it "does close_all with multiple files" do
883
- db1 = newbasic.call(Tmp.file, "first")
884
- db2 = newbasic.call(Tmp.file, "second")
885
-
886
- # The above should be using the same handle, which is currently
887
- # open.
888
-
889
- stats = Lite3::SQL.gc
890
- expect( stats.keys.size ) .to eq 2
891
-
892
- all_refs = stats.values.flatten
893
- expect( all_refs.include?(db1) ) .to be true
894
- expect( all_refs.include?(db2) ) .to be true
895
-
896
- # Underlying handles should be open
897
- expect( db1.handle_closed? ) .to be false
898
- expect( db2.handle_closed? ) .to be false
899
-
900
- # Test closing it
901
- Lite3::SQL.close_all
902
- expect( db1.handle_closed? ) .to be true
903
- expect( db2.handle_closed? ) .to be true
904
-
905
- # Test auto-opening them.
904
+ # Repeat, but this time use the underlying lib
905
+ Sequel::DATABASES.each(&:disconnect)
906
906
  expect( db1["foo"] ) .to eq vv["foo"]
907
- expect( db1.handle_closed? ) .to be false
908
- expect( db2.handle_closed? ) .to be true
909
907
 
910
908
  db1.close
911
909
  db2.close
@@ -913,8 +911,7 @@ describe Lite3::SQL do
913
911
  expect( Lite3::SQL.gc.keys.size ) .to eq 0
914
912
  end
915
913
 
916
-
917
- it "allows multipe table accesses in the same transaction" do
914
+ it "allows multiple table accesses in the same transaction" do
918
915
  file = Tmp.file
919
916
  db1 = newbasic.call(file, "first")
920
917
  db2 = Lite3::DBM.new(file, "second")
@@ -964,6 +961,50 @@ describe Lite3::SQL do
964
961
  expect{ db1.size } .to raise_error Lite3::Error
965
962
  expect{ db1.to_a } .to raise_error Lite3::Error
966
963
  end
964
+
965
+ it "finalizes DBM objects that have gone out of scope." do
966
+
967
+ # This is really difficult to test because there's no reliable way
968
+ # to get the garbage collector to clean up when we want it to. As
969
+ # such, we make the attempt and skip with a warning if db2 hasn't
970
+ # been reclaimed.
971
+ #
972
+ # (Dropping into the debugger after GC.start seems to help.)
973
+
974
+
975
+ file = Tmp.file
976
+ db1 = newbasic.call(file, "first")
977
+ db2 = Lite3::DBM.new(file, "second")
978
+
979
+ # Two DBMs are currently open.
980
+ stats = Lite3::SQL.gc
981
+ expect( stats.size ) .to be 1
982
+ expect( stats.values[0] ) .to be 2
983
+
984
+ # Make db2 a weak reference so it goes out of scope after a GC.
985
+ # It's possible that a GC redesign will this.
986
+ db2 = WeakRef.new(db2)
987
+ GC.start
988
+
989
+ # Uncommenting the next line and then resuming seems to work:
990
+ #byebug
991
+
992
+ if db2.weakref_alive?
993
+ db2.close
994
+ db1.close
995
+ skip "GC has't reclaimed the handle; bailing."
996
+ end
997
+
998
+ # There should now be exactly 1 open DBM
999
+ stats = Lite3::SQL.gc
1000
+ expect( stats.size ) .to be 1
1001
+ expect( stats.values[0] ) .to be 1
1002
+
1003
+ db1.close
1004
+ end
1005
+
1006
+
1007
+
967
1008
  end
968
1009
 
969
1010