store-digest 0.3.0 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,28 @@
1
+ # again for the symbol
2
+ require 'store/digest/version'
3
+
4
+ class Store::Digest::Error < RuntimeError
5
+
6
+ # Raised when there is an irreconcilable configuration parameter.
7
+ #
8
+ class Configuration < self
9
+ end
10
+
11
+ # Raised when you try to add a deleted entry to a store.
12
+ #
13
+ class Deleted < self
14
+ end
15
+
16
+ # Raised when a scanned property fails to match an asserted property.
17
+ #
18
+ # @note Use this when e.g. the size or type don't match.
19
+ #
20
+ class Integrity < self
21
+ end
22
+
23
+ # Raised specificially when a scanned hash doesn't match an asserted one.
24
+ #
25
+ class CryptographicIntegrity < Integrity
26
+ end
27
+
28
+ end
@@ -0,0 +1,388 @@
1
+ require 'store/digest/meta'
2
+
3
+ module Store::Digest::Meta::LMDB
4
+ # This is the version zero (original) database layout.
5
+ module V0
6
+
7
+ FORMAT = 'Q>NNNNCZ*Z*Z*Z*'.freeze
8
+ RECORD = %i[
9
+ size ctime mtime ptime dtime flags type language charset encoding].freeze
10
+ INTS = %i[
11
+ size ctime mtime ptime dtime flags].map { |k| [k, :to_i] }.to_h.freeze
12
+ PACK = {
13
+ # control records
14
+ objects: 'Q>',
15
+ deleted: 'Q>',
16
+ bytes: 'Q>',
17
+ # object records
18
+ size: 'Q>',
19
+ ctime: ?N, # - also used in control
20
+ mtime: ?N, # - ditto
21
+ ptime: ?N,
22
+ dtime: ?N,
23
+ flags: ?C,
24
+ type: 'Z*',
25
+ language: 'Z*',
26
+ charset: 'Z*',
27
+ encoding: 'Z*',
28
+ }.transform_values(&:freeze).freeze
29
+
30
+ # NOTE these are all internal methods meant to be used inside other
31
+ # transactions so they do not run in transactions themselves
32
+
33
+ private
34
+
35
+ def db_decode raw, type
36
+ raw.unpack1 PACK[type]
37
+ end
38
+
39
+ def db_encode value, type
40
+ [value].pack PACK[type]
41
+ end
42
+
43
+ def setup_dbs
44
+
45
+ now = Time.now in: ?Z
46
+ %w[ctime mtime].each do |t|
47
+ unless @dbs[:control].has? t
48
+ @dbs[:control][t] = [now.to_i].pack ?N
49
+ end
50
+ end
51
+
52
+ # clever if i do say so myself
53
+ %w[objects deleted bytes].each do |x|
54
+ @dbs[:control][x] = [0].pack 'Q>' unless send(x.to_sym)
55
+ end
56
+
57
+ # XXX we might actually wanna dupsort the non-primary digests too
58
+ dbs = RECORD.map do |k|
59
+ [k, [:dupsort]]
60
+ end.to_h.merge(algorithms.map { |k| [k, []] }.to_h)
61
+
62
+ @dbs.merge!(dbs.map do |name, flags|
63
+ [name, @lmdb.database(
64
+ name.to_s, (flags + [:create]).map { |f| [f, true] }.to_h
65
+ )]
66
+ end.to_h).freeze
67
+ end
68
+
69
+ def control_add key, val
70
+ if ov = @dbs[:control][key.to_s]
71
+ fmt = case ov.length
72
+ when 4 then ?N
73
+ when 8 then 'Q>'
74
+ else
75
+ raise RuntimeError, "#{key} must be 4 or 8 bytes long"
76
+ end
77
+ ov = ov.unpack1 fmt
78
+ else
79
+ ov = 0
80
+ end
81
+
82
+ nv = ov + val
83
+
84
+ @dbs[:control][key.to_s] = [nv].pack 'Q>'
85
+
86
+ nv
87
+ end
88
+
89
+ def control_get key
90
+ key = key.to_sym
91
+ raise ArgumentError, "Invalid control key #{key}" unless
92
+ %[ctime mtime objects deleted bytes].include? key
93
+ if val = @dbs[:control][key.to_s]
94
+
95
+ val.unpack1 PACK[key]
96
+ end
97
+ end
98
+
99
+ def index_pack key
100
+ case key
101
+ when nil then return
102
+ when Time then [key.to_i].pack ?N
103
+ when Integer then [key].pack 'Q>'
104
+ when String then key.b # no \0: key length is stored in the record
105
+ else raise ArgumentError, "Invalid type #{key.class}"
106
+ end
107
+ end
108
+
109
+ def index_add index, key, bin
110
+ key = index_pack(key) or return
111
+ # check first or it will just stupidly keep adding duplicate records
112
+ @dbs[index].put key, bin unless @dbs[index].has? key, bin
113
+ end
114
+
115
+ def index_rm index, key, bin
116
+ key = index_pack(key) or return
117
+ # soft delete baleets only when there is something to baleet
118
+ @dbs[index.to_sym].delete? key, bin
119
+ end
120
+
121
+ # return an enumerator
122
+ def index_get index, min, max = nil, range: false, &block
123
+ # min and max will be binary values and the cursor will return a range
124
+ min = index_pack(min)
125
+ max = index_pack(max)
126
+ return unless min || max
127
+
128
+ return enum_for :index_get, index, min, max unless block_given?
129
+
130
+ body = -> c do
131
+ # lmdb cursors are a pain in the ass because 'set' advances the
132
+ # cursor so you can't just run the whole thing in a loop, you
133
+ # have to do this instead:
134
+ if rec = (min ? c.set_range(min) : c.first)
135
+ return unless range or max or min == rec.first
136
+ block.call(*rec)
137
+ block.call(*rec) while rec = c.next_range(max || min)
138
+ end
139
+ end
140
+
141
+ @dbs[index.to_sym].cursor(&body)
142
+ nil
143
+ end
144
+
145
+ def inflate bin, rec
146
+ rec = rec.dup
147
+ digests = algorithms.map do |a|
148
+ uri = URI::NI.build(scheme: 'ni', path: "/#{a}")
149
+ uri.digest = a == primary ? bin : rec.slice!(0, DIGESTS[a])
150
+ [a, uri]
151
+ end.to_h
152
+
153
+ # size ctime mtime ptime dtime flags type language charset encoding
154
+ hash = RECORD.zip(rec.unpack(FORMAT)).to_h
155
+ hash[:digests] = digests
156
+
157
+ %i[ctime ptime mtime dtime].each do |k|
158
+ hash[k] = (hash[k] == 0) ? nil : Time.at(hash[k])
159
+ end
160
+
161
+ %i[type language charset encoding].each do |k|
162
+ hash[k] = nil if hash[k].empty?
163
+ end
164
+ hash
165
+ end
166
+
167
+ def deflate obj
168
+ obj = obj.to_h unless obj.is_a? Hash
169
+ algos = (algorithms - [primary]).map { |a| obj[:digests][a].digest }.join
170
+ rec = RECORD.map { |k| v = obj[k]; v.send INTS.fetch(k, :to_s) }
171
+ algos + rec.pack(FORMAT)
172
+ end
173
+
174
+ protected
175
+
176
+ # Returns a metadata hash or `nil` if no changes have been made. A
177
+ # common scenario is that the caller will attempt to store an object
178
+ # that is already present, with the only distinction being `:ctime`
179
+ # (which is always ignored) and/or `:mtime`. Setting the `:preserve`
180
+ # keyword parameter to a true value will cause any new value for
181
+ # `:mtime` to be ignored as well. In that case, an attempt to store
182
+ # an otherwise identical record overtop of an existing one will
183
+ # return `nil`.
184
+ #
185
+ # @param obj [Store::Digest::Entry] the object to store
186
+ # @param preserve [false, true] whether to preserve the mtime
187
+ # @return [nil, Hash] maybe the metadata content of the object
188
+ #
189
+ def set_meta obj, preserve: false
190
+ raise ArgumentError,
191
+ 'Object does not have a complete set of digests' unless
192
+ (algorithms - obj.algorithms).empty?
193
+
194
+ body = -> do
195
+ # noop if object is present and not deleted and no details have changed
196
+ bin = obj[primary].digest
197
+ newh = obj.to_h
198
+ now = Time.now in: ?Z
199
+
200
+ change = newh[:dtime] ? -1 : 1 # net change in records
201
+ oldrec = @dbs[primary][bin]
202
+ oldh = nil
203
+ newh = if oldrec
204
+ oldh = inflate bin, oldrec
205
+ oldh.merge(newh) do |k, ov, nv|
206
+ case k
207
+ when :ctime then ov # never overwrite ctime
208
+ when :mtime # only overwrite the mtime if specified
209
+ preserve ? (ov || nv || now) : (nv || ov || now)
210
+ when :ptime then nv || ov || now # XXX derive ptime?
211
+ when :dtime
212
+ # net change is zero if both or neither are set
213
+ change = 0 if (nv && ov) || (!nv && !ov)
214
+ nv
215
+ else nv
216
+ end
217
+ end
218
+ else
219
+ %i[ctime mtime ptime].each { |k| newh[k] ||= now }
220
+ newh
221
+ end
222
+ newrec = deflate newh
223
+
224
+ # we have to *break* out of blocks, not return!
225
+ # (ah but we can return from a lambda)
226
+ return if newrec == oldrec
227
+ # anyway a common scenario is a write where nothing is different
228
+ # but the mtime, so thepurpose
229
+
230
+ # these only need to be done if they haven't been done before
231
+ (algorithms - [primary]).each do |algo|
232
+ @dbs[algo][obj[algo].digest] = bin
233
+ end unless oldrec
234
+
235
+ # this only needs to be done if there are changes
236
+ @dbs[primary][bin] = newrec
237
+
238
+ # if old dtime is nil and new dtime is non-nil then we are deleting
239
+ # if old dtime is non-nil and new dtime is nil then we are restoring
240
+
241
+ if !oldrec
242
+ # new record: increment object count (by 1), increment byte
243
+ # count (by size)
244
+ control_add :objects, 1
245
+ if change > 0
246
+ control_add :bytes, newh[:size]
247
+ elsif change < 0
248
+ # note objects *and* deleted counts get incremented;
249
+ # allowing for the possibility that a fresh object can be
250
+ # added to the store "deleted".
251
+ control_add :deleted, 1
252
+ end
253
+ elsif change > 0
254
+ # restored record: decrement deleted count (by 1), increment
255
+ # byte count (by size)
256
+ control_add :deleted, -1
257
+ control_add :bytes, newh[:size]
258
+ elsif change < 0
259
+ # "deleted" record: increment deleted count (by 1), decrement
260
+ # byte count (by size)
261
+ control_add :deleted, 1
262
+ control_add :bytes, -newh[:size]
263
+ end
264
+ # otherwise do nothing
265
+
266
+ # note that actually *removing* a record is separate process.
267
+
268
+ # okay now we update the indexes
269
+ RECORD.each do |k|
270
+ index_rm k, oldh[k], bin if oldh and oldh[k] and oldh[k] != newh[k]
271
+ index_add k, newh[k], bin # will noop on nil
272
+ end
273
+
274
+ # and finally update the mtime
275
+ @dbs[:control]['mtime'] = [now.to_i].pack ?N
276
+
277
+ newh
278
+ end
279
+
280
+ @lmdb.transaction(false, &body)
281
+ # body.call
282
+ # end
283
+ end
284
+
285
+ def get_meta obj
286
+ body = -> do
287
+ # find/inflate master record
288
+ algo = if obj[primary]
289
+ primary
290
+ else
291
+ raise ArgumentError, 'Object must have digests' unless
292
+ obj.scanned?
293
+ obj.algorithms.sort do |a, b|
294
+ cmp = DIGESTS[b] <=> DIGESTS[a]
295
+ cmp == 0 ? a <=> b : cmp
296
+ end.first
297
+ end
298
+ bin = obj[algo].digest
299
+
300
+ # look up the primary digest based on a secondary
301
+ unless algo == primary
302
+ bin = @dbs[algo][bin] or return
303
+ end
304
+
305
+ # actually raise maybe? because this should never happen
306
+ rec = @dbs[primary][bin] or return
307
+
308
+ # return just a hash of all the elements
309
+ inflate bin, rec
310
+ end
311
+
312
+ @lmdb.transaction(true, &body)
313
+ # body.call
314
+ # end
315
+ end
316
+
317
+ def remove_meta obj
318
+ body = -> do
319
+ hash = get_meta(obj) or return
320
+ bin = hash[:digests][primary].digest
321
+ now = Time.now in: ?Z
322
+
323
+ RECORD.each { |k| index_rm k, hash[k], bin }
324
+ hash[:digests].each { |algo, uri| @dbs[algo].delete uri.digest }
325
+
326
+ # remove counts
327
+ control_add :objects, -1
328
+ if hash[:dtime]
329
+ control_add :deleted, -1
330
+ else
331
+ control_add :bytes, -hash[:size]
332
+ hash[:dtime] = now
333
+ end
334
+
335
+ # and finally update the mtime
336
+ @dbs[:control]['mtime'] = [now.to_i].pack ?N
337
+
338
+ hash
339
+ end
340
+
341
+ @lmdb.transaction do
342
+ body.call
343
+ end
344
+ end
345
+
346
+ def mark_meta_deleted obj
347
+ body = -> do
348
+ # the object has to be in here to delete it
349
+ oldh = get_meta(obj) or return
350
+ # if the object is already "deleted" we do nothing
351
+ return if oldh[:dtime]
352
+
353
+ bin = oldh[:digests][primary].digest
354
+ now = Time.now in: ?Z
355
+
356
+ newh = oldh.merge(obj.to_h) do |k, ov, nv|
357
+ case k
358
+ when :digests then ov # - old values are guaranteed complete
359
+ when :size then ov # - we don't trust the new value
360
+ when :type then ov # - this gets set by default
361
+ when :dtime then now # - what we came here to do
362
+ else nv || ov
363
+ end
364
+ end
365
+
366
+ @dbs[primary][bin] = deflate newh
367
+ control_add :deleted, 1
368
+ control_add :bytes, -newh[:size]
369
+
370
+ # okay now we update the indexes
371
+ RECORD.each do |k|
372
+ index_rm k, oldh[k], bin if oldh and oldh[k] and oldh[k] != newh[k]
373
+ index_add k, newh[k], bin # will noop on nil
374
+ end
375
+
376
+ # and finally update the mtime
377
+ @dbs[:control]['mtime'] = [now.to_i].pack ?N
378
+
379
+ newh
380
+ end
381
+
382
+ @lmdb.transaction do
383
+ body.call
384
+ end
385
+ end
386
+
387
+ end
388
+ end