store-digest 0.3.0 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/TODO.org +115 -4
- data/lib/store/digest/blob/filesystem.rb +9 -4
- data/lib/store/digest/driver.rb +4 -0
- data/lib/store/digest/entry.rb +1214 -0
- data/lib/store/digest/error.rb +28 -0
- data/lib/store/digest/meta/lmdb/v0.rb +388 -0
- data/lib/store/digest/meta/lmdb/v1.rb +737 -0
- data/lib/store/digest/meta/lmdb.rb +59 -1041
- data/lib/store/digest/meta.rb +1 -1
- data/lib/store/digest/readwrapper.rb +174 -0
- data/lib/store/digest/version.rb +1 -1
- data/lib/store/digest.rb +335 -117
- data/store-digest.gemspec +6 -7
- metadata +47 -19
- data/lib/store/digest/object.rb +0 -602
|
@@ -0,0 +1,1214 @@
|
|
|
1
|
+
require 'store/digest/readwrapper'
|
|
2
|
+
require 'store/digest/error'
|
|
3
|
+
|
|
4
|
+
require 'uri'
|
|
5
|
+
require 'uri/ni'
|
|
6
|
+
require 'mimemagic-dorian'
|
|
7
|
+
|
|
8
|
+
# This class represents an entry in the content-addressable store.
|
|
9
|
+
#
|
|
10
|
+
# An entry can be initialized with:
|
|
11
|
+
#
|
|
12
|
+
# * a `String` (or anything that can `#to_s`)
|
|
13
|
+
# * an `Array` of strings (or anything that can `#each`)
|
|
14
|
+
# * a `Pathname` (as long as it refers to a file that can be opened for reading)
|
|
15
|
+
# * an `IO` object (as long as it's finite, such as a `File`, but
|
|
16
|
+
# it's your problem to ensure that it is)
|
|
17
|
+
# * anything that can `#read` (same deal on the finitude)
|
|
18
|
+
# * and two kinds of `#call`s:
|
|
19
|
+
# * zero arity, which is expected return something that quacks like
|
|
20
|
+
# a file handle,
|
|
21
|
+
# * nonzero arity, where the first argument is expected to be
|
|
22
|
+
# something that [behaves like a write
|
|
23
|
+
# handle](https://github.com/rack/rack/blob/main/SPEC.rdoc#streaming-body).
|
|
24
|
+
#
|
|
25
|
+
# This behaviour is so {Store::Digest::Entry} instances can be dropped
|
|
26
|
+
# into `Rack` request and response bodies and replace/consume whatever
|
|
27
|
+
# was in there before. As such, this class implements {#each},
|
|
28
|
+
# {#gets}, {#read}, {#rewind}, and {#close} to emulate an `Enumerable`
|
|
29
|
+
# and/or `IO` handle.
|
|
30
|
+
#
|
|
31
|
+
# Content is scanned lazily (i.e., not until you invoke any of the
|
|
32
|
+
# accessors or the {#scan}/{#scan!} or {#add_to} methods) unless you
|
|
33
|
+
# tell the constructor to be `eager:`. These objects are not
|
|
34
|
+
# associated with a store by default. You must {#initialize} with a
|
|
35
|
+
# reference to `store:`, {#add_to} a store later, or use
|
|
36
|
+
# {Store::Digest#add}, which returns one of these objects.
|
|
37
|
+
#
|
|
38
|
+
# If you initialize one of these objects with one or more hashes, it
|
|
39
|
+
# is assumed that it has already been scanned and the hashes are
|
|
40
|
+
# representative. If, however, you force a {#scan!}, it _will_ raise
|
|
41
|
+
# an error if the supplied hashes don't match.
|
|
42
|
+
#
|
|
43
|
+
class Store::Digest::Entry
|
|
44
|
+
|
|
45
|
+
# These is a struct for the bank of flags, with a couple of extra
|
|
46
|
+
# methods for parsing
|
|
47
|
+
#
|
|
48
|
+
Flags = Struct.new('Flags', :type_checked, :type_valid, :charset_checked,
|
|
49
|
+
:charset_valid, :encoding_checked, :encoding_valid,
|
|
50
|
+
:syntax_checked, :syntax_valid, :cache) do
|
|
51
|
+
|
|
52
|
+
class << self
|
|
53
|
+
# Initialize a struct of flags from arbitrary input
|
|
54
|
+
#
|
|
55
|
+
# @param arg [Store::Digest::Entry::Flags, Integer, #to_h, #to_a]
|
|
56
|
+
#
|
|
57
|
+
# @return [Store::Digest::Entry::Flags]
|
|
58
|
+
#
|
|
59
|
+
def from arg
|
|
60
|
+
# get the length since we use it in a few places
|
|
61
|
+
len = self.members.size
|
|
62
|
+
|
|
63
|
+
if arg.is_a? Integer
|
|
64
|
+
tmp = arg.digits(2).first(len)
|
|
65
|
+
elsif arg.is_a? self
|
|
66
|
+
# noop
|
|
67
|
+
return arg
|
|
68
|
+
elsif arg.is_a? Hash
|
|
69
|
+
tmp = arg.slice(*self.members).transform_values do |v|
|
|
70
|
+
!!(v && v != 0)
|
|
71
|
+
end
|
|
72
|
+
return self.[](**tmp)
|
|
73
|
+
elsif arg.respond_to? :to_a
|
|
74
|
+
tmp = arg.to_a.first(len)
|
|
75
|
+
else
|
|
76
|
+
raise ArgumentError, 'Input must be an integer or array'
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# append these
|
|
80
|
+
tmp += [false] * (len - tmp.size) if tmp.size < len
|
|
81
|
+
|
|
82
|
+
# make sure these are true/false
|
|
83
|
+
tmp.map! { |b| !!(b && b != 0) }
|
|
84
|
+
|
|
85
|
+
# we do this because `new` doesn't do this
|
|
86
|
+
self.[](*tmp)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Turn an arbitrary {Array} back into an {Integer}.
|
|
90
|
+
#
|
|
91
|
+
# @param array [Array]
|
|
92
|
+
#
|
|
93
|
+
# @return [Integer]
|
|
94
|
+
#
|
|
95
|
+
def to_i array
|
|
96
|
+
array.to_a.reverse.reduce(0) { |acc, b| (acc << 1) | (b ? 1 : 0) }
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def &(int)
|
|
101
|
+
to_i & int.to_i
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def |(int)
|
|
105
|
+
to_i | int.to_i
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# wish there was a cleaner way to do derive individual instance
|
|
109
|
+
# methods from class methods
|
|
110
|
+
begin
|
|
111
|
+
cm = singleton_method :to_i
|
|
112
|
+
define_method(:to_i) { cm.call to_a }
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# flag constants
|
|
117
|
+
TYPE_CHECKED = 1 << 0
|
|
118
|
+
TYPE_VALID = 1 << 1
|
|
119
|
+
CHARSET_CHECKED = 1 << 2
|
|
120
|
+
CHARSET_VALID = 1 << 3
|
|
121
|
+
ENCODING_CHECKED = 1 << 4
|
|
122
|
+
ENCODING_VALID = 1 << 5
|
|
123
|
+
SYNTAX_CHECKED = 1 << 6
|
|
124
|
+
SYNTAX_VALID = 1 << 7
|
|
125
|
+
IS_CACHE = 1 << 8
|
|
126
|
+
|
|
127
|
+
private
|
|
128
|
+
|
|
129
|
+
SAMPLE = 2**13 # must be big enough to detect ooxml
|
|
130
|
+
BLOCKSIZE = 2**16
|
|
131
|
+
|
|
132
|
+
CHARSETS = [
|
|
133
|
+
%w[utf8 utf-8],
|
|
134
|
+
%w[iso8859-1 iso-8859-1],
|
|
135
|
+
].map { |k, v| [k.freeze, v.freeze] }.to_h.freeze
|
|
136
|
+
|
|
137
|
+
ENCODINGS = [
|
|
138
|
+
%w[x-compress compress],
|
|
139
|
+
%w[x-gzip gzip],
|
|
140
|
+
].map { |k, v| [k.freeze, v.freeze] }.to_h.freeze
|
|
141
|
+
|
|
142
|
+
TOKEN = '[^\x0-\x20()<>@,;:\\\"/\[\]?=\x7f-\\xff]+'
|
|
143
|
+
|
|
144
|
+
# { key: [pattern, normalizer] } - assumes stripped and downcased
|
|
145
|
+
TOKENS = {
|
|
146
|
+
type: [/^(#{TOKEN}(?:\/#{TOKEN})?)$/on, -> c { MimeMagic[c] }],
|
|
147
|
+
charset: [/^(#{TOKEN})$/on,
|
|
148
|
+
-> c { c = c.tr(?_, ?-).downcase; CHARSETS.fetch c, c } ],
|
|
149
|
+
encoding: [/^(#{TOKEN})$/on,
|
|
150
|
+
-> c { c = c.tr(?_, ?-).downcase; ENCODINGS.fetch c, c } ],
|
|
151
|
+
language: [/^([a-z]{2,3}(?:[-_][0-9a-z]+)*)$/,
|
|
152
|
+
-> c { c.downcase.tr(?_, ?-).gsub(/-*$/, '') } ],
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
LABELS = {
|
|
156
|
+
size: 'Size (Bytes)',
|
|
157
|
+
ctime: 'Added to Store',
|
|
158
|
+
mtime: 'Last Modified',
|
|
159
|
+
ptime: 'Properties Modified',
|
|
160
|
+
dtime: 'Deleted (Expires)',
|
|
161
|
+
type: 'Content Type',
|
|
162
|
+
language: '(Natural) Language',
|
|
163
|
+
charset: 'Character Set',
|
|
164
|
+
encoding: 'Content Encoding',
|
|
165
|
+
}.freeze
|
|
166
|
+
|
|
167
|
+
MANDATORY = %i[size ctime mtime ptime]
|
|
168
|
+
OPTIONAL = %i[dtime type language charset encoding]
|
|
169
|
+
FLAG = %i[content-type charset content-encoding syntax].freeze
|
|
170
|
+
STATE = %i[unverified invalid recheck valid].freeze
|
|
171
|
+
|
|
172
|
+
def coerce_nn_int i
|
|
173
|
+
case i
|
|
174
|
+
when nil then 0
|
|
175
|
+
when Numeric
|
|
176
|
+
raise ArgumentError, 'size must be non-negative' if i < 0
|
|
177
|
+
i.to_i
|
|
178
|
+
else
|
|
179
|
+
raise TypeError, 'size must be nil or Numeric'
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
#
|
|
184
|
+
def coerce_time t, k = nil
|
|
185
|
+
case t
|
|
186
|
+
when nil then nil
|
|
187
|
+
when Time then t
|
|
188
|
+
when -> dt { dt.respond_to? :to_time }
|
|
189
|
+
t.to_time
|
|
190
|
+
when Integer
|
|
191
|
+
raise ArgumentError,
|
|
192
|
+
"#{k} given as Integer must be non-negative" if t < 0
|
|
193
|
+
Time.at t
|
|
194
|
+
else
|
|
195
|
+
raise TypeError, "Invalid type for #{k}: #{t.class}"
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def coerce_token t, k
|
|
200
|
+
t = t.to_s.strip.downcase
|
|
201
|
+
pat, norm = TOKENS[k]
|
|
202
|
+
raise "#{k} #{t} does not match #{pat}" unless m = pat.match(t)
|
|
203
|
+
norm.call m.captures.first
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def coerce_digests digests, empty: false, normative: nil
|
|
207
|
+
# we just sneak in the instance's algorithms
|
|
208
|
+
self.class.coerce_digests digests, algorithms: algorithms,
|
|
209
|
+
empty: empty, normative: normative
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
CACHE_TTL = 86400
|
|
213
|
+
|
|
214
|
+
def compute_cache cache
|
|
215
|
+
return unless cache
|
|
216
|
+
if cache.is_a? Numeric
|
|
217
|
+
# cache dtime should be relative to metadata parameter change time
|
|
218
|
+
@ptime + cache
|
|
219
|
+
elsif cache.is_a? Time
|
|
220
|
+
cache
|
|
221
|
+
elsif cache.respond_to? :to_time
|
|
222
|
+
cache.to_time
|
|
223
|
+
else
|
|
224
|
+
(@store ? @store.cache_ttl : CACHE_TTL)
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# Returns metadata without calling the accessors and triggering a
|
|
229
|
+
# scan.
|
|
230
|
+
#
|
|
231
|
+
# @return [Hash] the current set of metadata
|
|
232
|
+
#
|
|
233
|
+
def meta_hash content: false, store: false
|
|
234
|
+
keys = %i[digests size ctime mtime ptime dtime
|
|
235
|
+
flags type charset encoding language]
|
|
236
|
+
keys.unshift :store if store && @store
|
|
237
|
+
keys.unshift :content if content && @content
|
|
238
|
+
|
|
239
|
+
keys.each_with_object({}) do |k, h|
|
|
240
|
+
v = "@#{k}"
|
|
241
|
+
h[k] = instance_variable_get(v) if instance_variable_defined?(v)
|
|
242
|
+
end
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
# Merge a metadata hash into the object.
|
|
246
|
+
#
|
|
247
|
+
# @param hash [Hash{Symbol=>Object}]
|
|
248
|
+
#
|
|
249
|
+
# @raise [Store::Digest::Error::Integrity]
|
|
250
|
+
#
|
|
251
|
+
# @return [void]
|
|
252
|
+
#
|
|
253
|
+
def merge_meta hash, content: false
|
|
254
|
+
# do itt
|
|
255
|
+
@content = hash[:content] if content and hash[:content]
|
|
256
|
+
|
|
257
|
+
# check the byte size
|
|
258
|
+
if hash[:size]
|
|
259
|
+
s = coerce_nn_int hash[:size]
|
|
260
|
+
raise Store::Digest::Error::Integrity,
|
|
261
|
+
"Scanned size #{s} does not match asserted #{@size}" if
|
|
262
|
+
@size and s != @size
|
|
263
|
+
@size = s
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
# check the digests
|
|
267
|
+
if hash[:digests]
|
|
268
|
+
digests = coerce_digests(hash[:digests], normative: true)
|
|
269
|
+
(@digests.keys & digests.keys).each do |k|
|
|
270
|
+
scanned = digests[k]
|
|
271
|
+
asserted = @digests[k]
|
|
272
|
+
raise Store::Digest::Error::CryptographicIntegrity,
|
|
273
|
+
"Scanned digest #{scanned} does not match asserted #{asserted}" if
|
|
274
|
+
scanned != asserted
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
# make sure wee also do the algorithms for parity
|
|
278
|
+
@digests = digests.transform_values(&:freeze).freeze
|
|
279
|
+
@algorithms = digests.keys.to_set.freeze
|
|
280
|
+
@scanned = true
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
# only update the type if it's more specific than the asserted one
|
|
284
|
+
if hash[:type]
|
|
285
|
+
t = coerce_token hash[:type], :type
|
|
286
|
+
|
|
287
|
+
# warn "#{@type.inspect} -> #{t.inspect}"
|
|
288
|
+
|
|
289
|
+
@type = (t.canonical || t) unless @type and !t.descendant_of?(@type)
|
|
290
|
+
# @type = (t.canonical || t) if !@type || t.descendant_of?(@type)
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
%i[charset encoding language].each do |key|
|
|
294
|
+
val = coerce_token(hash[key], key).freeze if hash[key]
|
|
295
|
+
# note the distinction
|
|
296
|
+
instance_variable_set("@#{key}", val) if hash.key? key
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
# mtime is special
|
|
300
|
+
if hash[:mtime]
|
|
301
|
+
# XXX TODO preserve older newer
|
|
302
|
+
@mtime = coerce_time hash[:mtime], :mtime
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
%i[ctime ptime dtime].each do |key|
|
|
306
|
+
val = coerce_time(hash[key], key).freeze if hash[key]
|
|
307
|
+
# again note the distinction
|
|
308
|
+
instance_variable_set("@#{key}", val) if hash.key? key
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
# finally we do the flags
|
|
312
|
+
@flags = Flags.from(hash[:flags]) if hash[:flags]
|
|
313
|
+
|
|
314
|
+
nil
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
# this is to swtich the content over
|
|
318
|
+
#
|
|
319
|
+
def dereference?
|
|
320
|
+
@content = @content.call if @content.respond_to? :call
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
def seekable? io
|
|
324
|
+
return false unless io.respond_to? :seek
|
|
325
|
+
begin
|
|
326
|
+
# this should be a noop
|
|
327
|
+
io.seek 0, IO::SEEK_CUR
|
|
328
|
+
true
|
|
329
|
+
rescue Errno::ESPIPE, Errno::EINVAL
|
|
330
|
+
false
|
|
331
|
+
end
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
public
|
|
335
|
+
|
|
336
|
+
# Create a new object, naively recording whatever it is handed.
|
|
337
|
+
#
|
|
338
|
+
# @note use {.scan} or {#scan} to populate the digests.
|
|
339
|
+
#
|
|
340
|
+
# @param content [IO, String, Proc, File, Pathname, ...] some content
|
|
341
|
+
# @param store [Store::Digest] the associated store, if present
|
|
342
|
+
# @param digests [Hash] the digests ascribed to the content
|
|
343
|
+
# @param type [String] assert the object's MIME type
|
|
344
|
+
# @param charset [String] the character set, if applicable
|
|
345
|
+
# @param language [String] the (RFC5646) language tag, if applicable
|
|
346
|
+
# @param encoding [String] the content-encoding (e.g. compression)
|
|
347
|
+
# @param mtime [Time] assert object modification time
|
|
348
|
+
# @param flags [Integer, Flags] validation state flags
|
|
349
|
+
# @param strict [true, false] raise an error on bad input
|
|
350
|
+
#
|
|
351
|
+
# @return [Store::Digest::Entry] the object in question
|
|
352
|
+
#
|
|
353
|
+
def initialize content = nil, store: nil, digests: nil, mtime: nil,
|
|
354
|
+
type: nil, charset: nil, encoding: nil, language: nil, flags: 0,
|
|
355
|
+
cache: false, strict: false, scan: false, &block
|
|
356
|
+
|
|
357
|
+
# set the associated store, if one is passed in
|
|
358
|
+
if store
|
|
359
|
+
raise 'Store must be an instance of Store::Digest' unless
|
|
360
|
+
store.is_a? Store::Digest
|
|
361
|
+
@store = store
|
|
362
|
+
end
|
|
363
|
+
|
|
364
|
+
now = Time.now
|
|
365
|
+
|
|
366
|
+
# this sets the empty digest hash and the scanning state to false
|
|
367
|
+
self.content = content if content
|
|
368
|
+
|
|
369
|
+
# we do this little ballet because `content=` may set mtime and type
|
|
370
|
+
@mtime = mtime || @mtime || now
|
|
371
|
+
type ||= @type || MimeMagic[nil]
|
|
372
|
+
|
|
373
|
+
# the following can be strings or symbols:
|
|
374
|
+
b = binding
|
|
375
|
+
TOKENS.keys.each do |k|
|
|
376
|
+
if x = b.local_variable_get(k)
|
|
377
|
+
x = if strict
|
|
378
|
+
coerce_token(x, k)
|
|
379
|
+
else
|
|
380
|
+
coerce_token(x, k) rescue nil
|
|
381
|
+
end
|
|
382
|
+
instance_variable_set "@#{k}", x.freeze if x
|
|
383
|
+
end
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
# warn "wtf #{@type.inspect}"
|
|
387
|
+
|
|
388
|
+
# we let the empty through
|
|
389
|
+
digests = coerce_digests digests, empty: true
|
|
390
|
+
if digests.is_a? Hash
|
|
391
|
+
@digests = digests
|
|
392
|
+
@algorithms = digests.empty? ? algorithms : digests.keys.to_set
|
|
393
|
+
@scanned = !digests.empty?
|
|
394
|
+
elsif !digests.empty?
|
|
395
|
+
@algorithms = digests.to_set
|
|
396
|
+
end
|
|
397
|
+
|
|
398
|
+
# we use this for `#get`
|
|
399
|
+
if block
|
|
400
|
+
hash = block.call @content
|
|
401
|
+
|
|
402
|
+
raise TypeError,
|
|
403
|
+
"Block return value must be Hash, not #{hash.class}" unless
|
|
404
|
+
hash.is_a? Hash
|
|
405
|
+
#
|
|
406
|
+
@scanned = true if hash[:digests]
|
|
407
|
+
merge_meta hash, content: true
|
|
408
|
+
elsif @content.nil?
|
|
409
|
+
raise ArgumentError,
|
|
410
|
+
'Must initialize with either content, or a block, or both'
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
# just make sure the times are in
|
|
414
|
+
@ctime ||= now
|
|
415
|
+
@mtime ||= mtime || @ctime
|
|
416
|
+
@ptime ||= @ctime
|
|
417
|
+
|
|
418
|
+
# set the flags
|
|
419
|
+
@flags ||= Flags.from(flags || 0)
|
|
420
|
+
if cache
|
|
421
|
+
raise NotImplementedError, 'Associated store does not support caching' if
|
|
422
|
+
@store and !@store.can_cache?
|
|
423
|
+
@flags.cache = !!cache
|
|
424
|
+
@dtime = compute_cache cache
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
# scan preemptively if so directed
|
|
428
|
+
scan! if scan
|
|
429
|
+
end
|
|
430
|
+
|
|
431
|
+
attr_reader :store, :type, :charset, :language, :encoding,
|
|
432
|
+
:ctime, :mtime, :ptime, :dtime, :flags
|
|
433
|
+
|
|
434
|
+
TOKENS.keys.each do |key|
|
|
435
|
+
define_method("#{key}=") { |val| coerce_token val, key }
|
|
436
|
+
end
|
|
437
|
+
|
|
438
|
+
# This will take an array or hash or individual symbol or string or
|
|
439
|
+
# {URI::NI} object and try to coerce it into something it can use.
|
|
440
|
+
#
|
|
441
|
+
# * Individual strings/symbols/{URI::NI} objects will get wrapped in
|
|
442
|
+
# an array.
|
|
443
|
+
# * Strings will be scanned for conformance to RFC6920 and
|
|
444
|
+
# transformed into {URI::NI} objects if they match, otherwise they
|
|
445
|
+
# will be turned into symbols and matched against the repertoire
|
|
446
|
+
# of hash algorithms.
|
|
447
|
+
# * If a {URI::NI} object isn't valid (e.g., not the full length,
|
|
448
|
+
# algorithm not supported), this will raise an error; likewise if
|
|
449
|
+
# the symbol is not in the repertoire of algorithms.
|
|
450
|
+
# * Arrays must contain all the same kind of thing (strings,
|
|
451
|
+
# symbols, {URI::NI} objects)
|
|
452
|
+
# * Hash keys must coerce to symbols (via `#to_s`, `#to_sym`) that
|
|
453
|
+
# match the repertoire of algorithms.
|
|
454
|
+
# * Hash values must either be a string representing the decimal,
|
|
455
|
+
# base64, or hexadecimal digest of a length corresponding to the
|
|
456
|
+
# algorithm in the key, or a string representing an RFC6920 URI,
|
|
457
|
+
# or a {URI::NI}.
|
|
458
|
+
# * (Base64 strings may be padded or not, and use the standard
|
|
459
|
+
# non-URL-safe representation, or not)
|
|
460
|
+
# * Strings will then subsequently be transformed into {URI::NI}
|
|
461
|
+
# objects.
|
|
462
|
+
# * Hash values that are (either already or coerced into) {URI::NI}
|
|
463
|
+
# objects must be valid and their algorithms must match the hash
|
|
464
|
+
# key with which they are associated.
|
|
465
|
+
#
|
|
466
|
+
# The input (and thus the output) has two "moods":
|
|
467
|
+
#
|
|
468
|
+
# 1. _Anticipative_: "These are the digest algorithms we want to see
|
|
469
|
+
# hashes for."
|
|
470
|
+
# 2. _Normative_: "These are the hashes we already have for the
|
|
471
|
+
# input, and it should match them when scanned."
|
|
472
|
+
#
|
|
473
|
+
# In general inputs that coerce to arrays (except arrays whose
|
|
474
|
+
# contents coerce to {URI::NI} objects, which in turn will coerce to
|
|
475
|
+
# hashes) are considered anticipative, whereas inputs that coerce to
|
|
476
|
+
# hashes are considered normative. The return value will depend on
|
|
477
|
+
# the adjudicated intent: `Array` for anticipative, `Hash` for
|
|
478
|
+
# normative. The caller should inspect the return value to see which
|
|
479
|
+
# it is, because the difference is whether a subsequent scan of the
|
|
480
|
+
# content is intended to verify it (normative) or not (anticipative).
|
|
481
|
+
#
|
|
482
|
+
# @param digests [#to_sym, #to_s, URI::NI,
|
|
483
|
+
# #to_a<#to_sym,#to_s,URI::NI>, #to_h{#to_sym=>#to_s},
|
|
484
|
+
# #to_h{#to_sym=>URI::NI}] the thing to be coerced into digests
|
|
485
|
+
# @param empty [false, true] whether the set is allowed to be empty
|
|
486
|
+
# @param normative [nil, false, true] whether to assert the
|
|
487
|
+
# normative mood (`true`), the anticipative mood (`false`), or
|
|
488
|
+
# leave it to the caller (`nil`)
|
|
489
|
+
#
|
|
490
|
+
# @return [Array<Symbol>,Hash{Symbol=>URI::NI}]
|
|
491
|
+
#
|
|
492
|
+
def self.coerce_digests digests, algorithms: nil, empty: false, normative: nil
|
|
493
|
+
algorithms ||= URI::NI.algorithms
|
|
494
|
+
|
|
495
|
+
# handle nil
|
|
496
|
+
digests = [] if digests.nil?
|
|
497
|
+
|
|
498
|
+
# first we coerce into an array; note hashes respond to `#to_a`
|
|
499
|
+
digests = [digests] unless digests.respond_to? :to_a
|
|
500
|
+
|
|
501
|
+
raise ArgumentError,
|
|
502
|
+
'Digest list can\'t be empty' if !empty and digests.empty?
|
|
503
|
+
|
|
504
|
+
if digests.is_a? Hash
|
|
505
|
+
# digests = digests[:digests] if digests.key? :digests
|
|
506
|
+
out = digests.map do |k, v|
|
|
507
|
+
# keys must go to symbols; symbols must be valid
|
|
508
|
+
k = k.to_s.downcase.to_sym unless k.is_a? Symbol
|
|
509
|
+
raise ArgumentError,
|
|
510
|
+
"#{k} is not a supported algorithm in this configuration" unless
|
|
511
|
+
algorithms.include? k
|
|
512
|
+
|
|
513
|
+
# this should raise on any invalid values
|
|
514
|
+
v = URI::NI.ingest k, v
|
|
515
|
+
|
|
516
|
+
# then we assert that the result itself is valid
|
|
517
|
+
raise ArgumentError, "Hash URI #{v} is invalid" unless v.valid?
|
|
518
|
+
|
|
519
|
+
[k, v]
|
|
520
|
+
end.to_h
|
|
521
|
+
|
|
522
|
+
# warn out
|
|
523
|
+
|
|
524
|
+
# note we are explicitly looking to see if normative is false
|
|
525
|
+
# rather than nil
|
|
526
|
+
return normative == false ? out.keys : out
|
|
527
|
+
end
|
|
528
|
+
|
|
529
|
+
# otherwise it should be an array so we'll make it into a set
|
|
530
|
+
digests = digests.to_a.map do |thing|
|
|
531
|
+
case thing
|
|
532
|
+
when Symbol then thing
|
|
533
|
+
when URI then URI::NI.ingest thing
|
|
534
|
+
else
|
|
535
|
+
# whatever it is, it should now be a string
|
|
536
|
+
thing = thing.to_s
|
|
537
|
+
if %r{^(?i:ni|https?)://}.match?(thing) and uri = URI::NI.ingest(thing)
|
|
538
|
+
uri
|
|
539
|
+
else
|
|
540
|
+
# turn it into a symbol
|
|
541
|
+
thing.strip.downcase.to_sym
|
|
542
|
+
end
|
|
543
|
+
end
|
|
544
|
+
end.uniq
|
|
545
|
+
|
|
546
|
+
# warn digests.inspect
|
|
547
|
+
|
|
548
|
+
if digests.all? { |d| d.is_a? URI::NI }
|
|
549
|
+
# we are expressly asking for anticipative if normative is literally false
|
|
550
|
+
return digests.map(&:algorithm) if normative == false
|
|
551
|
+
|
|
552
|
+
# otherwise if these are all digest URIs then this is normative;
|
|
553
|
+
# return as a hash
|
|
554
|
+
return digests.map do |d|
|
|
555
|
+
raise ArgumentError,
|
|
556
|
+
"#{d} is not a supported algorithm" unless
|
|
557
|
+
algorithms.include? d.algorithm
|
|
558
|
+
|
|
559
|
+
[d.algorithm.to_sym, d]
|
|
560
|
+
end.to_h
|
|
561
|
+
elsif digests.all? { |d| d.is_a? Symbol }
|
|
562
|
+
raise ArgumentError, 'Normative expressly normative' if normative
|
|
563
|
+
|
|
564
|
+
return digests
|
|
565
|
+
end
|
|
566
|
+
|
|
567
|
+
# if we get here, it's an error
|
|
568
|
+
raise ArgumentError,
|
|
569
|
+
'Input must coerce to either all URIs or all Symbols'
|
|
570
|
+
end
|
|
571
|
+
|
|
572
|
+
# Scan a blob and return the digests and byte count.
|
|
573
|
+
#
|
|
574
|
+
# @note The `content` is assumed to be at position zero.
|
|
575
|
+
#
|
|
576
|
+
# @param content [#read] the object to be scanned
|
|
577
|
+
# @param algorithms [Array<Symbol,#to_sym>] the algorithms
|
|
578
|
+
# @param blocksize [Integer] the block size to use
|
|
579
|
+
# @param type [false, true] scan content for media type
|
|
580
|
+
#
|
|
581
|
+
# @yieldparam [String] a chunk of input
|
|
582
|
+
#
|
|
583
|
+
# @raise [ArgumentError] the content can't be coerced to
|
|
584
|
+
# something that quacks like `#read`
|
|
585
|
+
# @raise [ArgumentError] the algorithms supplied aren't supported
|
|
586
|
+
#
|
|
587
|
+
# @return [Array(Hash{Symbol=>URI::NI}, Integer)] a pair containing
|
|
588
|
+
# a hash of the digests and the size in bytes of the blob.
|
|
589
|
+
#
|
|
590
|
+
def self.scan_raw content, algorithms: URI::NI.algorithms,
|
|
591
|
+
blocksize: BLOCKSIZE, type: false, &block
|
|
592
|
+
# this will raise if it can't be coerced
|
|
593
|
+
content = Store::Digest::ReadWrapper.coerce content
|
|
594
|
+
|
|
595
|
+
# coerce digests
|
|
596
|
+
|
|
597
|
+
digests = begin
|
|
598
|
+
case algorithms
|
|
599
|
+
when Array, -> x { x.respond_to? :to_a }
|
|
600
|
+
algorithms.to_a.map(&:to_sym)
|
|
601
|
+
when Symbol, -> x { x.respond_to? :to_sym }
|
|
602
|
+
[algorithms.to_sym]
|
|
603
|
+
else
|
|
604
|
+
raise ArgumentError
|
|
605
|
+
end
|
|
606
|
+
rescue ArgumentError, TypeError, NoMethodError
|
|
607
|
+
raise ArgumentError,
|
|
608
|
+
"Digest algorithms must be coercible to an Array of Symbols"
|
|
609
|
+
end
|
|
610
|
+
|
|
611
|
+
# oh this shouldn't be empty btw
|
|
612
|
+
raise ArgumentError, 'Algorithm list should not be empty' if digests.empty?
|
|
613
|
+
|
|
614
|
+
# double-check if the digests are supported
|
|
615
|
+
raise ArgumentError,
|
|
616
|
+
"Unsupported digest algorithm(s) #{digests - URI::NI.algorithms}" unless
|
|
617
|
+
(digests - URI::NI.algorithms).empty?
|
|
618
|
+
|
|
619
|
+
# now queue up the contexts
|
|
620
|
+
digests = digests.map { |d| [d, URI::NI.context(d)] }.to_h
|
|
621
|
+
|
|
622
|
+
# we'll just make a uniform sequence to cycle through, why not
|
|
623
|
+
procs = digests.values.map { |u| -> buf { u << buf } }
|
|
624
|
+
procs << block if block
|
|
625
|
+
|
|
626
|
+
if type
|
|
627
|
+
sample = StringIO.new
|
|
628
|
+
procs << -> buf do
|
|
629
|
+
sample << buf
|
|
630
|
+
# take this out of the loop if we have enough
|
|
631
|
+
procs.pop if sample.pos >= SAMPLE
|
|
632
|
+
end
|
|
633
|
+
end
|
|
634
|
+
|
|
635
|
+
bytes = 0
|
|
636
|
+
while buf = content.read(blocksize)
|
|
637
|
+
buf = buf.to_s.b # ensure these are bytes we're reading
|
|
638
|
+
bytes += buf.size
|
|
639
|
+
procs.each { |b| b.call buf }
|
|
640
|
+
end
|
|
641
|
+
|
|
642
|
+
# apparently i do this because i painted myself into a corner with
|
|
643
|
+
# URI::NI and/or past me previously discovered that there is much
|
|
644
|
+
# more to the hash state than just the digest itself and forgot to
|
|
645
|
+
# tell later-past me when i discovered it a second time around
|
|
646
|
+
digests = digests.map do |k, v|
|
|
647
|
+
[k, URI::NI.compute(v, algorithm: k).freeze]
|
|
648
|
+
end.to_h
|
|
649
|
+
|
|
650
|
+
# return the gathered information; everything else is out of band
|
|
651
|
+
out = { digests: digests, size: bytes }
|
|
652
|
+
|
|
653
|
+
if sample
|
|
654
|
+
# felt cute lol
|
|
655
|
+
out[:type] = %i[by_magic default_type].lazy.filter_map do |m|
|
|
656
|
+
sample.rewind
|
|
657
|
+
MimeMagic.send m, sample
|
|
658
|
+
end.first
|
|
659
|
+
end
|
|
660
|
+
|
|
661
|
+
out
|
|
662
|
+
end
|
|
663
|
+
|
|
664
|
+
# Add this entry to a {Store::Digest} instance.
|
|
665
|
+
#
|
|
666
|
+
# @note This entry will become associated with the store if it isn't
|
|
667
|
+
# already. If this entry has already been scanned, it will be
|
|
668
|
+
# scanned again.
|
|
669
|
+
#
|
|
670
|
+
def add store = nil
|
|
671
|
+
raise ArgumentError,
|
|
672
|
+
'no store associated with the entry and none passed in' if
|
|
673
|
+
[store, @store].all?(&:nil?)
|
|
674
|
+
|
|
675
|
+
# use the internal store if one is not supplied
|
|
676
|
+
store ||= @store
|
|
677
|
+
raise TypeError, 'Argument must be an instance of Store::Digest' unless
|
|
678
|
+
store.is_a? Store::Digest
|
|
679
|
+
|
|
680
|
+
# do this if not scanned
|
|
681
|
+
|
|
682
|
+
unless scanned? && store.has?(self)
|
|
683
|
+
# ok add the thing
|
|
684
|
+
hash = store.send :add_raw, @content, **meta_hash
|
|
685
|
+
merge_meta hash, content: true
|
|
686
|
+
end
|
|
687
|
+
|
|
688
|
+
# set the internal store if one is supplied and not present; do
|
|
689
|
+
# this after because calling store.has? will cause the record to
|
|
690
|
+
# be scanned, potentially against the very store, so it would be
|
|
691
|
+
# scanned by the same store twice.
|
|
692
|
+
@store ||= store
|
|
693
|
+
|
|
694
|
+
self
|
|
695
|
+
end
|
|
696
|
+
|
|
697
|
+
# Remove this entry from a store. Dissociates the entry from the
|
|
698
|
+
# store in the process. Will not signal if the entry wasn't in the
|
|
699
|
+
# store to begin with.
|
|
700
|
+
#
|
|
701
|
+
# @param store [nil, Store::Digest] the store to remove the entry
|
|
702
|
+
# @param forget [false, true] whether to purge the entry completely
|
|
703
|
+
# from the metadata or just delete the blob
|
|
704
|
+
#
|
|
705
|
+
def remove store = nil, forget: false
|
|
706
|
+
raise ArgumentError,
|
|
707
|
+
'no store associated with the entry and none passed in' if
|
|
708
|
+
[store, @store].all?(&:nil?)
|
|
709
|
+
store ||= @store
|
|
710
|
+
|
|
711
|
+
raise TypeError, 'store must be a Store::Digest instance' unless
|
|
712
|
+
store.is_a? Store::Digest
|
|
713
|
+
|
|
714
|
+
# eliminate the relationship
|
|
715
|
+
@store = nil if @store.equal? store
|
|
716
|
+
|
|
717
|
+
rm = forget ? :forget : true
|
|
718
|
+
# this circumvents `private`; ignore return value
|
|
719
|
+
store.send :get_raw, digests[store.primary], remove: rm
|
|
720
|
+
|
|
721
|
+
self
|
|
722
|
+
end
|
|
723
|
+
|
|
724
|
+
# Preemptively scan a blob and return an entry.
|
|
725
|
+
#
|
|
726
|
+
# @param content [String, Pathname, IO, #each, #read, #call]
|
|
727
|
+
# anything that represents bytes or can be coerced or wrapped by
|
|
728
|
+
# {Store::Digest::ReadWrapper}
|
|
729
|
+
#
|
|
730
|
+
# @param store [Store::Digest]
|
|
731
|
+
# @param digests [Array<Symbol,#to_sym,URI::NI>, Hash{Symbol=>URI::NI}]
|
|
732
|
+
#
|
|
733
|
+
# @return [Store::Digest::Entry]
|
|
734
|
+
#
|
|
735
|
+
def self.scan content, store: nil, digests: URI::NI.algorithms, mtime: nil,
|
|
736
|
+
type: nil, language: nil, charset: nil, encoding: nil,
|
|
737
|
+
blocksize: BLOCKSIZE, &block
|
|
738
|
+
self.new content, store: store, digests: digests, mtime: mtime,
|
|
739
|
+
type: type, language: language, charset: charset, encoding: encoding,
|
|
740
|
+
scan: blocksize, &block
|
|
741
|
+
end
|
|
742
|
+
|
|
743
|
+
# Scan the blob if it hasn't already been scanned (idempotent).
|
|
744
|
+
#
|
|
745
|
+
# @return [self]
|
|
746
|
+
#
|
|
747
|
+
def scan
|
|
748
|
+
scan! if @content && !scanned?
|
|
749
|
+
self
|
|
750
|
+
end
|
|
751
|
+
|
|
752
|
+
STRINGIO_MAX = 2**16
|
|
753
|
+
|
|
754
|
+
# Scan the blob unconditionally. May raise an error if the byte size
|
|
755
|
+
# or digests are asserted in the constructor and don't match the scan.
|
|
756
|
+
#
|
|
757
|
+
# @raise [Store::Digest::Error:Integrity]
|
|
758
|
+
#
|
|
759
|
+
# @return [self]
|
|
760
|
+
#
|
|
761
|
+
def scan!
|
|
762
|
+
raise Store::Digest::Error::Deleted, 'Entry has no content' unless @content
|
|
763
|
+
|
|
764
|
+
if @store
|
|
765
|
+
# we use the store if one is associated
|
|
766
|
+
hash = @store.send :add_raw, @content, **meta_hash
|
|
767
|
+
|
|
768
|
+
@content = hash[:content]
|
|
769
|
+
elsif @content.respond_to? :rewind and seekable?(@content)
|
|
770
|
+
# we don't need a temporary file; we'll just reuse this file handle
|
|
771
|
+
@content.rewind
|
|
772
|
+
hash = self.class.scan_raw @content, algorithms: @algorithms, type: true
|
|
773
|
+
@content.rewind
|
|
774
|
+
else
|
|
775
|
+
# start with a stringio
|
|
776
|
+
tmp = StringIO.new
|
|
777
|
+
lam = -> buf do
|
|
778
|
+
tmp << buf
|
|
779
|
+
|
|
780
|
+
# check if it's too big
|
|
781
|
+
if tmp.size >= STRINGIO_MAX
|
|
782
|
+
# make an actual file
|
|
783
|
+
file = Tempfile.create anonymous: true, binmode: true
|
|
784
|
+
|
|
785
|
+
# put the string into it
|
|
786
|
+
tmp.rewind
|
|
787
|
+
file << tmp.read
|
|
788
|
+
|
|
789
|
+
# reassign tmp with the file
|
|
790
|
+
tmp = file
|
|
791
|
+
|
|
792
|
+
# reassign lam with this condition removed so we don't
|
|
793
|
+
# needlessly test it over and over with every iteration
|
|
794
|
+
lam = -> buf { file << buf }
|
|
795
|
+
end
|
|
796
|
+
end
|
|
797
|
+
|
|
798
|
+
# now we wrap lam in another block so it picks up the reassignment
|
|
799
|
+
hash = self.class.scan_raw(
|
|
800
|
+
@content, algorithms: @algorithms, type: true) { |buf| lam.call buf }
|
|
801
|
+
tmp.rewind
|
|
802
|
+
@content = tmp
|
|
803
|
+
end
|
|
804
|
+
|
|
805
|
+
# i suppose this is where the integrity is checked
|
|
806
|
+
if @scanned
|
|
807
|
+
# size
|
|
808
|
+
raise Store::Digest::Error::Integrity,
|
|
809
|
+
"Scanned size #{hash[:size]} does not match asserted #{@size}" if
|
|
810
|
+
hash[:size] != @size
|
|
811
|
+
|
|
812
|
+
# digests
|
|
813
|
+
(@digests.keys & hash[:digests].keys).each do |k|
|
|
814
|
+
scanned = hash[:digests][k]
|
|
815
|
+
asserted = @digests[k]
|
|
816
|
+
raise Store::Digest::Error::CryptographicIntegrity,
|
|
817
|
+
"Scanned digest #{scanned} does not match asserted #{asserted}" if
|
|
818
|
+
scanned != asserted
|
|
819
|
+
end
|
|
820
|
+
# XXX also do content type??
|
|
821
|
+
end
|
|
822
|
+
|
|
823
|
+
merge_meta hash
|
|
824
|
+
|
|
825
|
+
# unconditionally set this now
|
|
826
|
+
@scanned = true
|
|
827
|
+
|
|
828
|
+
self
|
|
829
|
+
end
|
|
830
|
+
|
|
831
|
+
# Returns true if the entry has already been scanned.
|
|
832
|
+
#
|
|
833
|
+
# @return [false, true]
|
|
834
|
+
#
|
|
835
|
+
def scanned?
|
|
836
|
+
!!@scanned
|
|
837
|
+
end
|
|
838
|
+
|
|
839
|
+
# Iterate over the blob contents.
|
|
840
|
+
#
|
|
841
|
+
# @yieldparam chunk [String] the chunk of blob
|
|
842
|
+
#
|
|
843
|
+
# @return [self]
|
|
844
|
+
#
|
|
845
|
+
def each sep = $/, limit = nil, chomp: false, &block
|
|
846
|
+
scan
|
|
847
|
+
dereference?
|
|
848
|
+
@content.each(sep, limit, chomp: chomp, &block)
|
|
849
|
+
end
|
|
850
|
+
|
|
851
|
+
# Emulate {IO#read}.
|
|
852
|
+
#
|
|
853
|
+
# @param length [Integer] the number of bytes to read
|
|
854
|
+
#
|
|
855
|
+
# @return [String, nil] up to `length` bytes or `nil` on EOF
|
|
856
|
+
#
|
|
857
|
+
def read length = nil, buffer = nil
|
|
858
|
+
scan
|
|
859
|
+
dereference?
|
|
860
|
+
# this should be set by scan
|
|
861
|
+
@content.read length, buffer
|
|
862
|
+
end
|
|
863
|
+
|
|
864
|
+
# Emulate {IO#gets}.
|
|
865
|
+
#
|
|
866
|
+
# @return [String] the next character
|
|
867
|
+
#
|
|
868
|
+
def gets sep = $/, chomp = false
|
|
869
|
+
scan
|
|
870
|
+
dereference?
|
|
871
|
+
@content.gets sep, chomp
|
|
872
|
+
end
|
|
873
|
+
|
|
874
|
+
def seek offset, whence = IO::SEEK_SET
|
|
875
|
+
scan
|
|
876
|
+
dereference?
|
|
877
|
+
@content.seek offset, whence
|
|
878
|
+
end
|
|
879
|
+
|
|
880
|
+
def pos
|
|
881
|
+
scan
|
|
882
|
+
dereference?
|
|
883
|
+
@content.pos
|
|
884
|
+
end
|
|
885
|
+
|
|
886
|
+
alias_method :tell, :pos
|
|
887
|
+
|
|
888
|
+
def pos= position
|
|
889
|
+
scan
|
|
890
|
+
dereference?
|
|
891
|
+
@content.pos = position
|
|
892
|
+
end
|
|
893
|
+
|
|
894
|
+
# Emulate {IO#rewind}.
|
|
895
|
+
#
|
|
896
|
+
# @return [0] always zero
|
|
897
|
+
#
|
|
898
|
+
def rewind
|
|
899
|
+
scan
|
|
900
|
+
dereference?
|
|
901
|
+
|
|
902
|
+
# content should be rewindable after a scan
|
|
903
|
+
@content.rewind
|
|
904
|
+
end
|
|
905
|
+
|
|
906
|
+
# No-op of {IO#open} for parity.
|
|
907
|
+
#
|
|
908
|
+
# @note Once the blob is scanned, an internal file handle is opened
|
|
909
|
+
# and stays open.
|
|
910
|
+
#
|
|
911
|
+
# @return [self]
|
|
912
|
+
#
|
|
913
|
+
def open *args
|
|
914
|
+
rewind
|
|
915
|
+
self
|
|
916
|
+
end
|
|
917
|
+
|
|
918
|
+
# No-op of {IO#close}.
|
|
919
|
+
#
|
|
920
|
+
# @return [self]
|
|
921
|
+
#
|
|
922
|
+
def close
|
|
923
|
+
rewind
|
|
924
|
+
self
|
|
925
|
+
end
|
|
926
|
+
|
|
927
|
+
# Determine (if possible) if the object is in the store. Returns
|
|
928
|
+
# `nil` if no store is associated with the entry, otherwise it will
|
|
929
|
+
# query the store.
|
|
930
|
+
#
|
|
931
|
+
# @return [nil, false, true] the status of the entry
|
|
932
|
+
#
|
|
933
|
+
def stored?
|
|
934
|
+
# warn @digests
|
|
935
|
+
scan
|
|
936
|
+
# warn scanned?
|
|
937
|
+
@store.has?(digests) if @store
|
|
938
|
+
end
|
|
939
|
+
|
|
940
|
+
# Return the algorithms used in the object.
|
|
941
|
+
#
|
|
942
|
+
# @return [Array]
|
|
943
|
+
#
|
|
944
|
+
def algorithms
|
|
945
|
+
@algorithms ||= (@store || URI::NI).algorithms.to_set
|
|
946
|
+
end
|
|
947
|
+
|
|
948
|
+
# Get the digest hash.
|
|
949
|
+
#
|
|
950
|
+
# @return [Hash] the digests
|
|
951
|
+
#
|
|
952
|
+
def digests
|
|
953
|
+
scan
|
|
954
|
+
@digests
|
|
955
|
+
end
|
|
956
|
+
|
|
957
|
+
# Get the byte size.
|
|
958
|
+
#
|
|
959
|
+
# @return [Integer] the bytes
|
|
960
|
+
#
|
|
961
|
+
def size
|
|
962
|
+
scan
|
|
963
|
+
@size
|
|
964
|
+
end
|
|
965
|
+
|
|
966
|
+
# Return a particular digest. Returns nil if there is no match.
|
|
967
|
+
#
|
|
968
|
+
# @param symbol [Symbol, #to_s, #to_sym] the digest
|
|
969
|
+
#
|
|
970
|
+
# @return [URI::NI, nil]
|
|
971
|
+
#
|
|
972
|
+
def digest symbol
|
|
973
|
+
raise ArgumentError, "This method takes a symbol" unless
|
|
974
|
+
symbol.respond_to? :to_sym
|
|
975
|
+
digests[symbol.to_sym]
|
|
976
|
+
end
|
|
977
|
+
|
|
978
|
+
alias_method :"[]", :digest
|
|
979
|
+
|
|
980
|
+
# Returns the content stored in the object.
|
|
981
|
+
#
|
|
982
|
+
# @note This is a vestigial method since {Store::Digest::Entry}
|
|
983
|
+
# now proxies {IO} calls.
|
|
984
|
+
#
|
|
985
|
+
# @return [self, nil] no-op if there is content, nil if not.
|
|
986
|
+
#
|
|
987
|
+
def content
|
|
988
|
+
self if @content
|
|
989
|
+
end
|
|
990
|
+
|
|
991
|
+
# Determines if there is content embedded in the object.
|
|
992
|
+
#
|
|
993
|
+
# @return [false, true]
|
|
994
|
+
#
|
|
995
|
+
def content?
|
|
996
|
+
!!@content
|
|
997
|
+
end
|
|
998
|
+
|
|
999
|
+
# Reset the content (and unset the scanned state).
|
|
1000
|
+
#
|
|
1001
|
+
# @param content [IO, String, Proc, File, Pathname, ...] some content
|
|
1002
|
+
#
|
|
1003
|
+
def content= content
|
|
1004
|
+
@digests = {}
|
|
1005
|
+
@scanned = false
|
|
1006
|
+
@content = Store::Digest::ReadWrapper.coerce content, thunk: true
|
|
1007
|
+
|
|
1008
|
+
if @content.respond_to?(:path) and path = @content.path
|
|
1009
|
+
warn MimeMagic.by_path path
|
|
1010
|
+
@type = MimeMagic.by_path path
|
|
1011
|
+
end
|
|
1012
|
+
|
|
1013
|
+
@mtime = @content.respond_to?(:stat) ? @content.stat.mtime : Time.now(in: ?Z)
|
|
1014
|
+
end
|
|
1015
|
+
|
|
1016
|
+
# Returns the type and charset, suitable for an HTTP header.
|
|
1017
|
+
#
|
|
1018
|
+
# @return [String]
|
|
1019
|
+
#
|
|
1020
|
+
def type_charset
|
|
1021
|
+
out = type.to_s
|
|
1022
|
+
out += ";charset=#{charset}" if charset
|
|
1023
|
+
out
|
|
1024
|
+
end
|
|
1025
|
+
|
|
1026
|
+
# Determines if the object has been scanned.
|
|
1027
|
+
#
|
|
1028
|
+
# @return [false, true]
|
|
1029
|
+
#
|
|
1030
|
+
def scanned?
|
|
1031
|
+
!@digests.empty?
|
|
1032
|
+
end
|
|
1033
|
+
|
|
1034
|
+
def flags= val
|
|
1035
|
+
@flags = Flags.from val
|
|
1036
|
+
end
|
|
1037
|
+
|
|
1038
|
+
# Returns whether the object is cache.
|
|
1039
|
+
#
|
|
1040
|
+
# @return [false, true]
|
|
1041
|
+
#
|
|
1042
|
+
def cache?
|
|
1043
|
+
!!@flags.cache
|
|
1044
|
+
end
|
|
1045
|
+
|
|
1046
|
+
# Assigns the cache status.
|
|
1047
|
+
#
|
|
1048
|
+
# @param value [false, true] anything falsy/truthy
|
|
1049
|
+
#
|
|
1050
|
+
# @return [void]
|
|
1051
|
+
#
|
|
1052
|
+
def cache= value
|
|
1053
|
+
@flags.cache = !!value
|
|
1054
|
+
end
|
|
1055
|
+
|
|
1056
|
+
# XXX i'm keeping these as-is for now
|
|
1057
|
+
|
|
1058
|
+
# Returns true if the content type has been checked.
|
|
1059
|
+
#
|
|
1060
|
+
# @return [false, true]
|
|
1061
|
+
#
|
|
1062
|
+
def type_checked?
|
|
1063
|
+
@flags.type_checked
|
|
1064
|
+
end
|
|
1065
|
+
|
|
1066
|
+
# Returns true if the content type has been checked _and_ is valid.
|
|
1067
|
+
#
|
|
1068
|
+
# @return [nil, false, true]
|
|
1069
|
+
#
|
|
1070
|
+
def type_valid?
|
|
1071
|
+
return nil unless @flags.type_checked
|
|
1072
|
+
@flags.type_valid
|
|
1073
|
+
end
|
|
1074
|
+
|
|
1075
|
+
# Returns true if the character set has been checked.
|
|
1076
|
+
#
|
|
1077
|
+
# @return [false, true]
|
|
1078
|
+
#
|
|
1079
|
+
def charset_checked?
|
|
1080
|
+
@flags.charset_checked
|
|
1081
|
+
end
|
|
1082
|
+
|
|
1083
|
+
# Returns true if the character set has been checked _and_ is valid.
|
|
1084
|
+
#
|
|
1085
|
+
# @return [nil, false, true]
|
|
1086
|
+
#
|
|
1087
|
+
def charset_valid?
|
|
1088
|
+
return nil unless @flags.charset_checked
|
|
1089
|
+
@flags.charset_valid
|
|
1090
|
+
end
|
|
1091
|
+
|
|
1092
|
+
# Returns true if the content encoding (e.g. gzip, deflate) has
|
|
1093
|
+
# been checked.
|
|
1094
|
+
#
|
|
1095
|
+
# @return [false, true]
|
|
1096
|
+
#
|
|
1097
|
+
def encoding_checked?
|
|
1098
|
+
@flags.encoding_checked
|
|
1099
|
+
end
|
|
1100
|
+
|
|
1101
|
+
# Returns true if the content encoding has been checked _and_ is valid.
|
|
1102
|
+
#
|
|
1103
|
+
# @return [nil, false, true]
|
|
1104
|
+
#
|
|
1105
|
+
def encoding_valid?
|
|
1106
|
+
return nil unless @flags.encoding_checked
|
|
1107
|
+
@flags.encoding_valid
|
|
1108
|
+
end
|
|
1109
|
+
|
|
1110
|
+
# Returns true if the blob's syntax has been checked.
|
|
1111
|
+
#
|
|
1112
|
+
# @return [false, true]
|
|
1113
|
+
#
|
|
1114
|
+
def syntax_checked?
|
|
1115
|
+
@flags.syntax_checked
|
|
1116
|
+
end
|
|
1117
|
+
|
|
1118
|
+
# Returns true if the blob's syntax has been checked _and_ is valid.
|
|
1119
|
+
#
|
|
1120
|
+
# @return [nil, false, true]
|
|
1121
|
+
#
|
|
1122
|
+
def syntax_valid?
|
|
1123
|
+
return nil unless @flags.syntax_checked
|
|
1124
|
+
@flags.syntax_valid
|
|
1125
|
+
end
|
|
1126
|
+
|
|
1127
|
+
%i[ctime mtime ptime dtime].each do |k|
|
|
1128
|
+
define_method "#{k}=" do |v|
|
|
1129
|
+
instance_variable_set "@#{k}", coerce_time(v, k).freeze
|
|
1130
|
+
end
|
|
1131
|
+
end
|
|
1132
|
+
|
|
1133
|
+
%i[type charset encoding language].each do |k|
|
|
1134
|
+
define_method "#{k}=" do |v|
|
|
1135
|
+
instance_variable_set "@#{k}", coerce_token(v, k).freeze
|
|
1136
|
+
end
|
|
1137
|
+
|
|
1138
|
+
define_method "#{k}_ok?" do |v|
|
|
1139
|
+
TOKENS[k].first.match? v
|
|
1140
|
+
end
|
|
1141
|
+
end
|
|
1142
|
+
|
|
1143
|
+
# If the entry is flagged as cache and the expiry time is in the
|
|
1144
|
+
# past, then the entry is stale.
|
|
1145
|
+
#
|
|
1146
|
+
def stale?
|
|
1147
|
+
cache? && @dtime && @dtime < Time.now
|
|
1148
|
+
end
|
|
1149
|
+
|
|
1150
|
+
# Just a plain old predicate to determine whether the blob has been
|
|
1151
|
+
# deleted from the store (but implicitly the metadata record
|
|
1152
|
+
# remains).
|
|
1153
|
+
#
|
|
1154
|
+
# @return [false, true]
|
|
1155
|
+
#
|
|
1156
|
+
def deleted?
|
|
1157
|
+
stale? or @dtime && !cache?
|
|
1158
|
+
end
|
|
1159
|
+
|
|
1160
|
+
# Return the object as a hash. Omits the content by default.
|
|
1161
|
+
#
|
|
1162
|
+
# @param content [false, true] include the content if true
|
|
1163
|
+
# @return [Hash] the object as a hash
|
|
1164
|
+
#
|
|
1165
|
+
def to_h content: false
|
|
1166
|
+
main = %i[content digests]
|
|
1167
|
+
main.shift unless content
|
|
1168
|
+
(main + MANDATORY + OPTIONAL + [:flags]).map do |k|
|
|
1169
|
+
[k, send(k).dup]
|
|
1170
|
+
end.to_h
|
|
1171
|
+
end
|
|
1172
|
+
|
|
1173
|
+
# Outputs a human-readable string representation of the object.
|
|
1174
|
+
#
|
|
1175
|
+
# @return [String] said representation
|
|
1176
|
+
#
|
|
1177
|
+
def to_s
|
|
1178
|
+
out = "#{self.class}\n Digests:\n"
|
|
1179
|
+
|
|
1180
|
+
# disgorge the digests
|
|
1181
|
+
digests.values.sort { |a, b| a.to_s <=> b.to_s }.each do |d|
|
|
1182
|
+
out << " #{d}\n"
|
|
1183
|
+
end
|
|
1184
|
+
|
|
1185
|
+
# now the fields
|
|
1186
|
+
MANDATORY.each { |m| out << " #{LABELS[m]}: #{send m}\n" }
|
|
1187
|
+
OPTIONAL.each do |o|
|
|
1188
|
+
val = send o
|
|
1189
|
+
out << " #{LABELS[o]}: #{val}\n" if val
|
|
1190
|
+
end
|
|
1191
|
+
|
|
1192
|
+
# now the validation statuses
|
|
1193
|
+
out << "Validation:\n"
|
|
1194
|
+
FLAG.each_index do |i|
|
|
1195
|
+
x = flags.to_i >> (3 - i) & 3
|
|
1196
|
+
out << (" %-16s: %s\n" % [FLAG[i], STATE[x]])
|
|
1197
|
+
end
|
|
1198
|
+
|
|
1199
|
+
out
|
|
1200
|
+
end
|
|
1201
|
+
|
|
1202
|
+
def inspect
|
|
1203
|
+
text = if scanned?
|
|
1204
|
+
ds = digests.values.map(&:to_s).sort.join ', '
|
|
1205
|
+
"size=#{size} type=#{type} (#{})"
|
|
1206
|
+
else
|
|
1207
|
+
"(not scanned)"
|
|
1208
|
+
end
|
|
1209
|
+
|
|
1210
|
+
"<#{self.class} #{text}>"
|
|
1211
|
+
end
|
|
1212
|
+
end
|
|
1213
|
+
|
|
1214
|
+
Store::Digest::Object = Store::Digest::Entry
|