store-digest 0.3.1 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/TODO.org +115 -4
- data/lib/store/digest/blob/filesystem.rb +9 -4
- data/lib/store/digest/driver.rb +4 -0
- data/lib/store/digest/entry.rb +1214 -0
- data/lib/store/digest/error.rb +28 -0
- data/lib/store/digest/meta/lmdb/v0.rb +388 -0
- data/lib/store/digest/meta/lmdb/v1.rb +737 -0
- data/lib/store/digest/meta/lmdb.rb +59 -1041
- data/lib/store/digest/meta.rb +1 -1
- data/lib/store/digest/readwrapper.rb +174 -0
- data/lib/store/digest/version.rb +1 -1
- data/lib/store/digest.rb +335 -117
- data/store-digest.gemspec +6 -7
- metadata +45 -17
- data/lib/store/digest/object.rb +0 -623
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# again for the symbol
|
|
2
|
+
require 'store/digest/version'
|
|
3
|
+
|
|
4
|
+
class Store::Digest::Error < RuntimeError
|
|
5
|
+
|
|
6
|
+
# Raised when there is an irreconcilable configuration parameter.
|
|
7
|
+
#
|
|
8
|
+
class Configuration < self
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
# Raised when you try to add a deleted entry to a store.
|
|
12
|
+
#
|
|
13
|
+
class Deleted < self
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Raised when a scanned property fails to match an asserted property.
|
|
17
|
+
#
|
|
18
|
+
# @note Use this when e.g. the size or type don't match.
|
|
19
|
+
#
|
|
20
|
+
class Integrity < self
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Raised specificially when a scanned hash doesn't match an asserted one.
|
|
24
|
+
#
|
|
25
|
+
class CryptographicIntegrity < Integrity
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
end
|
|
@@ -0,0 +1,388 @@
|
|
|
1
|
+
require 'store/digest/meta'
|
|
2
|
+
|
|
3
|
+
module Store::Digest::Meta::LMDB
|
|
4
|
+
# This is the version zero (original) database layout.
|
|
5
|
+
module V0
|
|
6
|
+
|
|
7
|
+
FORMAT = 'Q>NNNNCZ*Z*Z*Z*'.freeze
|
|
8
|
+
RECORD = %i[
|
|
9
|
+
size ctime mtime ptime dtime flags type language charset encoding].freeze
|
|
10
|
+
INTS = %i[
|
|
11
|
+
size ctime mtime ptime dtime flags].map { |k| [k, :to_i] }.to_h.freeze
|
|
12
|
+
PACK = {
|
|
13
|
+
# control records
|
|
14
|
+
objects: 'Q>',
|
|
15
|
+
deleted: 'Q>',
|
|
16
|
+
bytes: 'Q>',
|
|
17
|
+
# object records
|
|
18
|
+
size: 'Q>',
|
|
19
|
+
ctime: ?N, # - also used in control
|
|
20
|
+
mtime: ?N, # - ditto
|
|
21
|
+
ptime: ?N,
|
|
22
|
+
dtime: ?N,
|
|
23
|
+
flags: ?C,
|
|
24
|
+
type: 'Z*',
|
|
25
|
+
language: 'Z*',
|
|
26
|
+
charset: 'Z*',
|
|
27
|
+
encoding: 'Z*',
|
|
28
|
+
}.transform_values(&:freeze).freeze
|
|
29
|
+
|
|
30
|
+
# NOTE these are all internal methods meant to be used inside other
|
|
31
|
+
# transactions so they do not run in transactions themselves
|
|
32
|
+
|
|
33
|
+
private
|
|
34
|
+
|
|
35
|
+
def db_decode raw, type
|
|
36
|
+
raw.unpack1 PACK[type]
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def db_encode value, type
|
|
40
|
+
[value].pack PACK[type]
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def setup_dbs
|
|
44
|
+
|
|
45
|
+
now = Time.now in: ?Z
|
|
46
|
+
%w[ctime mtime].each do |t|
|
|
47
|
+
unless @dbs[:control].has? t
|
|
48
|
+
@dbs[:control][t] = [now.to_i].pack ?N
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# clever if i do say so myself
|
|
53
|
+
%w[objects deleted bytes].each do |x|
|
|
54
|
+
@dbs[:control][x] = [0].pack 'Q>' unless send(x.to_sym)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# XXX we might actually wanna dupsort the non-primary digests too
|
|
58
|
+
dbs = RECORD.map do |k|
|
|
59
|
+
[k, [:dupsort]]
|
|
60
|
+
end.to_h.merge(algorithms.map { |k| [k, []] }.to_h)
|
|
61
|
+
|
|
62
|
+
@dbs.merge!(dbs.map do |name, flags|
|
|
63
|
+
[name, @lmdb.database(
|
|
64
|
+
name.to_s, (flags + [:create]).map { |f| [f, true] }.to_h
|
|
65
|
+
)]
|
|
66
|
+
end.to_h).freeze
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def control_add key, val
|
|
70
|
+
if ov = @dbs[:control][key.to_s]
|
|
71
|
+
fmt = case ov.length
|
|
72
|
+
when 4 then ?N
|
|
73
|
+
when 8 then 'Q>'
|
|
74
|
+
else
|
|
75
|
+
raise RuntimeError, "#{key} must be 4 or 8 bytes long"
|
|
76
|
+
end
|
|
77
|
+
ov = ov.unpack1 fmt
|
|
78
|
+
else
|
|
79
|
+
ov = 0
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
nv = ov + val
|
|
83
|
+
|
|
84
|
+
@dbs[:control][key.to_s] = [nv].pack 'Q>'
|
|
85
|
+
|
|
86
|
+
nv
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def control_get key
|
|
90
|
+
key = key.to_sym
|
|
91
|
+
raise ArgumentError, "Invalid control key #{key}" unless
|
|
92
|
+
%[ctime mtime objects deleted bytes].include? key
|
|
93
|
+
if val = @dbs[:control][key.to_s]
|
|
94
|
+
|
|
95
|
+
val.unpack1 PACK[key]
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def index_pack key
|
|
100
|
+
case key
|
|
101
|
+
when nil then return
|
|
102
|
+
when Time then [key.to_i].pack ?N
|
|
103
|
+
when Integer then [key].pack 'Q>'
|
|
104
|
+
when String then key.b # no \0: key length is stored in the record
|
|
105
|
+
else raise ArgumentError, "Invalid type #{key.class}"
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def index_add index, key, bin
|
|
110
|
+
key = index_pack(key) or return
|
|
111
|
+
# check first or it will just stupidly keep adding duplicate records
|
|
112
|
+
@dbs[index].put key, bin unless @dbs[index].has? key, bin
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def index_rm index, key, bin
|
|
116
|
+
key = index_pack(key) or return
|
|
117
|
+
# soft delete baleets only when there is something to baleet
|
|
118
|
+
@dbs[index.to_sym].delete? key, bin
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# return an enumerator
|
|
122
|
+
def index_get index, min, max = nil, range: false, &block
|
|
123
|
+
# min and max will be binary values and the cursor will return a range
|
|
124
|
+
min = index_pack(min)
|
|
125
|
+
max = index_pack(max)
|
|
126
|
+
return unless min || max
|
|
127
|
+
|
|
128
|
+
return enum_for :index_get, index, min, max unless block_given?
|
|
129
|
+
|
|
130
|
+
body = -> c do
|
|
131
|
+
# lmdb cursors are a pain in the ass because 'set' advances the
|
|
132
|
+
# cursor so you can't just run the whole thing in a loop, you
|
|
133
|
+
# have to do this instead:
|
|
134
|
+
if rec = (min ? c.set_range(min) : c.first)
|
|
135
|
+
return unless range or max or min == rec.first
|
|
136
|
+
block.call(*rec)
|
|
137
|
+
block.call(*rec) while rec = c.next_range(max || min)
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
@dbs[index.to_sym].cursor(&body)
|
|
142
|
+
nil
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def inflate bin, rec
|
|
146
|
+
rec = rec.dup
|
|
147
|
+
digests = algorithms.map do |a|
|
|
148
|
+
uri = URI::NI.build(scheme: 'ni', path: "/#{a}")
|
|
149
|
+
uri.digest = a == primary ? bin : rec.slice!(0, DIGESTS[a])
|
|
150
|
+
[a, uri]
|
|
151
|
+
end.to_h
|
|
152
|
+
|
|
153
|
+
# size ctime mtime ptime dtime flags type language charset encoding
|
|
154
|
+
hash = RECORD.zip(rec.unpack(FORMAT)).to_h
|
|
155
|
+
hash[:digests] = digests
|
|
156
|
+
|
|
157
|
+
%i[ctime ptime mtime dtime].each do |k|
|
|
158
|
+
hash[k] = (hash[k] == 0) ? nil : Time.at(hash[k])
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
%i[type language charset encoding].each do |k|
|
|
162
|
+
hash[k] = nil if hash[k].empty?
|
|
163
|
+
end
|
|
164
|
+
hash
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def deflate obj
|
|
168
|
+
obj = obj.to_h unless obj.is_a? Hash
|
|
169
|
+
algos = (algorithms - [primary]).map { |a| obj[:digests][a].digest }.join
|
|
170
|
+
rec = RECORD.map { |k| v = obj[k]; v.send INTS.fetch(k, :to_s) }
|
|
171
|
+
algos + rec.pack(FORMAT)
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
protected
|
|
175
|
+
|
|
176
|
+
# Returns a metadata hash or `nil` if no changes have been made. A
|
|
177
|
+
# common scenario is that the caller will attempt to store an object
|
|
178
|
+
# that is already present, with the only distinction being `:ctime`
|
|
179
|
+
# (which is always ignored) and/or `:mtime`. Setting the `:preserve`
|
|
180
|
+
# keyword parameter to a true value will cause any new value for
|
|
181
|
+
# `:mtime` to be ignored as well. In that case, an attempt to store
|
|
182
|
+
# an otherwise identical record overtop of an existing one will
|
|
183
|
+
# return `nil`.
|
|
184
|
+
#
|
|
185
|
+
# @param obj [Store::Digest::Entry] the object to store
|
|
186
|
+
# @param preserve [false, true] whether to preserve the mtime
|
|
187
|
+
# @return [nil, Hash] maybe the metadata content of the object
|
|
188
|
+
#
|
|
189
|
+
def set_meta obj, preserve: false
|
|
190
|
+
raise ArgumentError,
|
|
191
|
+
'Object does not have a complete set of digests' unless
|
|
192
|
+
(algorithms - obj.algorithms).empty?
|
|
193
|
+
|
|
194
|
+
body = -> do
|
|
195
|
+
# noop if object is present and not deleted and no details have changed
|
|
196
|
+
bin = obj[primary].digest
|
|
197
|
+
newh = obj.to_h
|
|
198
|
+
now = Time.now in: ?Z
|
|
199
|
+
|
|
200
|
+
change = newh[:dtime] ? -1 : 1 # net change in records
|
|
201
|
+
oldrec = @dbs[primary][bin]
|
|
202
|
+
oldh = nil
|
|
203
|
+
newh = if oldrec
|
|
204
|
+
oldh = inflate bin, oldrec
|
|
205
|
+
oldh.merge(newh) do |k, ov, nv|
|
|
206
|
+
case k
|
|
207
|
+
when :ctime then ov # never overwrite ctime
|
|
208
|
+
when :mtime # only overwrite the mtime if specified
|
|
209
|
+
preserve ? (ov || nv || now) : (nv || ov || now)
|
|
210
|
+
when :ptime then nv || ov || now # XXX derive ptime?
|
|
211
|
+
when :dtime
|
|
212
|
+
# net change is zero if both or neither are set
|
|
213
|
+
change = 0 if (nv && ov) || (!nv && !ov)
|
|
214
|
+
nv
|
|
215
|
+
else nv
|
|
216
|
+
end
|
|
217
|
+
end
|
|
218
|
+
else
|
|
219
|
+
%i[ctime mtime ptime].each { |k| newh[k] ||= now }
|
|
220
|
+
newh
|
|
221
|
+
end
|
|
222
|
+
newrec = deflate newh
|
|
223
|
+
|
|
224
|
+
# we have to *break* out of blocks, not return!
|
|
225
|
+
# (ah but we can return from a lambda)
|
|
226
|
+
return if newrec == oldrec
|
|
227
|
+
# anyway a common scenario is a write where nothing is different
|
|
228
|
+
# but the mtime, so thepurpose
|
|
229
|
+
|
|
230
|
+
# these only need to be done if they haven't been done before
|
|
231
|
+
(algorithms - [primary]).each do |algo|
|
|
232
|
+
@dbs[algo][obj[algo].digest] = bin
|
|
233
|
+
end unless oldrec
|
|
234
|
+
|
|
235
|
+
# this only needs to be done if there are changes
|
|
236
|
+
@dbs[primary][bin] = newrec
|
|
237
|
+
|
|
238
|
+
# if old dtime is nil and new dtime is non-nil then we are deleting
|
|
239
|
+
# if old dtime is non-nil and new dtime is nil then we are restoring
|
|
240
|
+
|
|
241
|
+
if !oldrec
|
|
242
|
+
# new record: increment object count (by 1), increment byte
|
|
243
|
+
# count (by size)
|
|
244
|
+
control_add :objects, 1
|
|
245
|
+
if change > 0
|
|
246
|
+
control_add :bytes, newh[:size]
|
|
247
|
+
elsif change < 0
|
|
248
|
+
# note objects *and* deleted counts get incremented;
|
|
249
|
+
# allowing for the possibility that a fresh object can be
|
|
250
|
+
# added to the store "deleted".
|
|
251
|
+
control_add :deleted, 1
|
|
252
|
+
end
|
|
253
|
+
elsif change > 0
|
|
254
|
+
# restored record: decrement deleted count (by 1), increment
|
|
255
|
+
# byte count (by size)
|
|
256
|
+
control_add :deleted, -1
|
|
257
|
+
control_add :bytes, newh[:size]
|
|
258
|
+
elsif change < 0
|
|
259
|
+
# "deleted" record: increment deleted count (by 1), decrement
|
|
260
|
+
# byte count (by size)
|
|
261
|
+
control_add :deleted, 1
|
|
262
|
+
control_add :bytes, -newh[:size]
|
|
263
|
+
end
|
|
264
|
+
# otherwise do nothing
|
|
265
|
+
|
|
266
|
+
# note that actually *removing* a record is separate process.
|
|
267
|
+
|
|
268
|
+
# okay now we update the indexes
|
|
269
|
+
RECORD.each do |k|
|
|
270
|
+
index_rm k, oldh[k], bin if oldh and oldh[k] and oldh[k] != newh[k]
|
|
271
|
+
index_add k, newh[k], bin # will noop on nil
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
# and finally update the mtime
|
|
275
|
+
@dbs[:control]['mtime'] = [now.to_i].pack ?N
|
|
276
|
+
|
|
277
|
+
newh
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
@lmdb.transaction(false, &body)
|
|
281
|
+
# body.call
|
|
282
|
+
# end
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
def get_meta obj
|
|
286
|
+
body = -> do
|
|
287
|
+
# find/inflate master record
|
|
288
|
+
algo = if obj[primary]
|
|
289
|
+
primary
|
|
290
|
+
else
|
|
291
|
+
raise ArgumentError, 'Object must have digests' unless
|
|
292
|
+
obj.scanned?
|
|
293
|
+
obj.algorithms.sort do |a, b|
|
|
294
|
+
cmp = DIGESTS[b] <=> DIGESTS[a]
|
|
295
|
+
cmp == 0 ? a <=> b : cmp
|
|
296
|
+
end.first
|
|
297
|
+
end
|
|
298
|
+
bin = obj[algo].digest
|
|
299
|
+
|
|
300
|
+
# look up the primary digest based on a secondary
|
|
301
|
+
unless algo == primary
|
|
302
|
+
bin = @dbs[algo][bin] or return
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
# actually raise maybe? because this should never happen
|
|
306
|
+
rec = @dbs[primary][bin] or return
|
|
307
|
+
|
|
308
|
+
# return just a hash of all the elements
|
|
309
|
+
inflate bin, rec
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
@lmdb.transaction(true, &body)
|
|
313
|
+
# body.call
|
|
314
|
+
# end
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
def remove_meta obj
|
|
318
|
+
body = -> do
|
|
319
|
+
hash = get_meta(obj) or return
|
|
320
|
+
bin = hash[:digests][primary].digest
|
|
321
|
+
now = Time.now in: ?Z
|
|
322
|
+
|
|
323
|
+
RECORD.each { |k| index_rm k, hash[k], bin }
|
|
324
|
+
hash[:digests].each { |algo, uri| @dbs[algo].delete uri.digest }
|
|
325
|
+
|
|
326
|
+
# remove counts
|
|
327
|
+
control_add :objects, -1
|
|
328
|
+
if hash[:dtime]
|
|
329
|
+
control_add :deleted, -1
|
|
330
|
+
else
|
|
331
|
+
control_add :bytes, -hash[:size]
|
|
332
|
+
hash[:dtime] = now
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
# and finally update the mtime
|
|
336
|
+
@dbs[:control]['mtime'] = [now.to_i].pack ?N
|
|
337
|
+
|
|
338
|
+
hash
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
@lmdb.transaction do
|
|
342
|
+
body.call
|
|
343
|
+
end
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
def mark_meta_deleted obj
|
|
347
|
+
body = -> do
|
|
348
|
+
# the object has to be in here to delete it
|
|
349
|
+
oldh = get_meta(obj) or return
|
|
350
|
+
# if the object is already "deleted" we do nothing
|
|
351
|
+
return if oldh[:dtime]
|
|
352
|
+
|
|
353
|
+
bin = oldh[:digests][primary].digest
|
|
354
|
+
now = Time.now in: ?Z
|
|
355
|
+
|
|
356
|
+
newh = oldh.merge(obj.to_h) do |k, ov, nv|
|
|
357
|
+
case k
|
|
358
|
+
when :digests then ov # - old values are guaranteed complete
|
|
359
|
+
when :size then ov # - we don't trust the new value
|
|
360
|
+
when :type then ov # - this gets set by default
|
|
361
|
+
when :dtime then now # - what we came here to do
|
|
362
|
+
else nv || ov
|
|
363
|
+
end
|
|
364
|
+
end
|
|
365
|
+
|
|
366
|
+
@dbs[primary][bin] = deflate newh
|
|
367
|
+
control_add :deleted, 1
|
|
368
|
+
control_add :bytes, -newh[:size]
|
|
369
|
+
|
|
370
|
+
# okay now we update the indexes
|
|
371
|
+
RECORD.each do |k|
|
|
372
|
+
index_rm k, oldh[k], bin if oldh and oldh[k] and oldh[k] != newh[k]
|
|
373
|
+
index_add k, newh[k], bin # will noop on nil
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
# and finally update the mtime
|
|
377
|
+
@dbs[:control]['mtime'] = [now.to_i].pack ?N
|
|
378
|
+
|
|
379
|
+
newh
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
@lmdb.transaction do
|
|
383
|
+
body.call
|
|
384
|
+
end
|
|
385
|
+
end
|
|
386
|
+
|
|
387
|
+
end
|
|
388
|
+
end
|