store-digest 0.1.3 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,180 @@
1
- module Store
2
- class Digest
3
- module Meta
4
- # This is an abstract module for metadata operations.
5
- end
1
+ # grab the symbols so we don't have to indent
2
+ require_relative 'version'
3
+
4
+ # This is an abstract module for metadata operations. All required
5
+ # methods are defined, and raise {NotImplementedError}.
6
+ module Store::Digest::Meta
7
+
8
+ private
9
+
10
+ PRIMARY = :"sha-256"
11
+
12
+ DIGESTS = {
13
+ md5: 16,
14
+ "sha-1": 20,
15
+ "sha-256": 32,
16
+ "sha-384": 48,
17
+ "sha-512": 64,
18
+ }.freeze
19
+
20
+ INTS = %i[size ctime mtime ptime dtime flags].map do |k|
21
+ [k, :to_i]
22
+ end.to_h.freeze
23
+
24
+ protected
25
+
26
+ # This method is run on initialization to bootstrap or otherwise
27
+ # verify the integrity of the database.
28
+ #
29
+ # @param options [Hash] whatever the parameters entail; it's your
30
+ # driver lol
31
+ #
32
+ # @return [void]
33
+ #
34
+ def setup **options
35
+ raise NotImplementedError
36
+ end
37
+
38
+ # Set/add an individual object's metadata to the database.
39
+ #
40
+ # @param obj [Store::Digest::Object] the object to store
41
+ # @param preserve [false, true] flag to preserve modification time
42
+ #
43
+ # @return [void]
44
+ #
45
+ def set_meta obj, preserve: false
46
+ raise NotImplementedError
47
+ end
48
+
49
+ # Retrieve a hash of the metadata stored for the given object.
50
+ #
51
+ # @param obj [#to_h, URI::ni] something hash-like which has keys that
52
+ # correspond to the identifiers for the digest algorithms, or
53
+ # otherwise a `ni:` URI.
54
+ #
55
+ # @return [Hash]
56
+ #
57
+ def get_meta obj, preserve: false
58
+ raise NotImplementedError
59
+ end
60
+
61
+ # Remove the metadata from the database and return it.
62
+ #
63
+ # @see #get_meta
64
+ #
65
+ # @param obj [#to_h, URI::ni] The object/identifier
66
+ #
67
+ # @return [Hash] The eliminated metadata.
68
+ #
69
+ def remove_meta obj, preserve: false
70
+ raise NotImplementedError
71
+ end
72
+
73
+ # Mark the object's record as "deleted" (but do not actually delete
74
+ # it) and return the updated record.
75
+ #
76
+ # @see #get_meta
77
+ #
78
+ # @param obj [#to_h, URI::ni] The object/identifier
79
+ #
80
+ # @return [Hash] The updated metadata.
81
+ #
82
+ # @return [Hash]
83
+ #
84
+ def mark_meta_deleted obj
85
+ raise NotImplementedError
86
+ end
87
+
88
+ # Retrieve storage statistics from the database itself.
89
+ #
90
+ # @return [Hash] global stats for the database.
91
+ #
92
+ def meta_get_stats
93
+ raise NotImplementedError
94
+ end
95
+
96
+ public
97
+
98
+ # Wrap the block in a transaction.
99
+ #
100
+ # @param block [Proc] whatever you pass into the transaction.
101
+ #
102
+ # @return [Object] whatever the block returns.
103
+ #
104
+ def transaction &block
105
+ raise NotImplementedError
106
+ end
107
+
108
+ # Return the set of algorithms initialized in the database.
109
+ #
110
+ # @return [Array] the algorithms
111
+ #
112
+ def algorithms
113
+ raise NotImplementedError
114
+ end
115
+
116
+ # Return the primary digest algorithm.
117
+ #
118
+ # @return [Symbol] the primary algorithm
119
+ #
120
+ def primary
121
+ raise NotImplementedError
122
+ end
123
+
124
+ # Return the number of objects in the database.
125
+ #
126
+ # @return [Integer]
127
+ #
128
+ def objects
129
+ raise NotImplementedError
130
+ end
131
+
132
+ # Return the number of objects whose payloads are deleted but are
133
+ # still on record.
134
+ #
135
+ # @return [Integer]
136
+ #
137
+ def deleted
138
+ raise NotImplementedError
139
+ end
140
+
141
+ # Return the number of bytes stored in the database (notwithstanding
142
+ # the database itself).
143
+ #
144
+ # @return [Integer]
145
+ #
146
+ def bytes
147
+ raise NotImplementedError
148
+ end
149
+
150
+ # Return a list of objects matching the given criteria. The result
151
+ # set will be the intersection of all supplied parameters. `:type`,
152
+ # `:charset`, `:encoding`, and `:language` are treated like discrete
153
+ # sets, while the rest of the parameters are treated like ranges
154
+ # (two-element arrays). Single values will be coerced into arrays;
155
+ # single range values will be interpreted as an inclusive lower
156
+ # bound. To bound only at the top, use a two-element array with its
157
+ # first value `nil`, like so: `size: [nil, 31337]`. The sorting
158
+ # criteria are the symbols of the other parameters.
159
+ #
160
+ # @param type [nil, String, #to_a]
161
+ # @param charset [nil, String, #to_a]
162
+ # @param encoding [nil, String, #to_a]
163
+ # @param language [nil, String, #to_a]
164
+ # @param size [nil, Integer, #to_a] byte size range
165
+ # @param ctime [nil, Time, DateTime, #to_a] creation time range
166
+ # @param mtime [nil, Time, DateTime, #to_a] modification time range
167
+ # @param ptime [nil, Time, DateTime, #to_a] medatata property change range
168
+ # @param dtime [nil, Time, DateTime, #to_a] deletion time range
169
+ # @param sort [nil, Symbol, #to_a] sorting criteria
170
+ #
171
+ # @return [Array] the list
172
+ #
173
+ def list type: nil, charset: nil, encoding: nil, language: nil,
174
+ size: nil, ctime: nil, mtime: nil, ptime: nil, dtime: nil, sort: nil
175
+ raise NotImplementedError
176
+ end
177
+
178
+ class CorruptStateError < RuntimeError
6
179
  end
7
180
  end
@@ -1,25 +1,44 @@
1
1
  require 'store/digest/version'
2
2
 
3
+ require 'forwardable'
3
4
  require 'uri'
4
5
  require 'uri/ni'
5
6
  require 'mimemagic'
6
- require 'mimemagic/overlay'
7
7
 
8
8
  class MimeMagic
9
9
  # XXX erase this when these methods get added
10
- unless self.method_defined? :parents
10
+ unless singleton_class.method_defined? :parents
11
11
  def self.parents type
12
- TYPES.fetch(type, [nil,[]])[1].map { |t| new t }.uniq
12
+ TYPES.fetch(type.to_s, [nil,[]])[1].map { |t| new t }.uniq
13
13
  end
14
14
  end
15
15
 
16
- unless self.method_defined? :ancestor_types
17
- def self.ancestor_types type
18
- parents(type).map { |t| ancestors(t) }.flatten.uniq
16
+ unless method_defined? :parents
17
+ def parents
18
+ out = TYPES.fetch(type.to_s.downcase, [nil, []])[1].map do |x|
19
+ self.class.new x
20
+ end
21
+ # add this unless we're it
22
+ out << self.class.new('application/octet-stream') if
23
+ out.empty? and type.downcase != 'application/octet-stream'
24
+
25
+ out.uniq
19
26
  end
20
27
  end
21
28
 
22
- unless self.method_defined? :binary?
29
+ unless method_defined? :lineage
30
+ def lineage
31
+ ([self] + parents.map { |t| t.lineage }.flatten).uniq
32
+ end
33
+ end
34
+
35
+ unless method_defined? :descendant_of?
36
+ def descendant_of? type
37
+ lineage.map(&:type).include? type.to_s.downcase
38
+ end
39
+ end
40
+
41
+ unless singleton_class.method_defined? :binary?
23
42
  def self.binary? thing
24
43
  sample = nil
25
44
 
@@ -42,15 +61,97 @@ class MimeMagic
42
61
  end
43
62
  end
44
63
 
45
- unless self.method_defined? :default_type
64
+ unless singleton_class.method_defined? :default_type
46
65
  def self.default_type thing
47
66
  new self.binary?(thing) ? 'application/octet-stream' : 'text/plain'
48
67
  end
49
68
  end
50
69
  end
51
70
 
71
+ # Store entry object class.
72
+ #
52
73
  class Store::Digest::Object
53
74
 
75
+ # Proxy IO instance that has a backreference to the store object.
76
+ #
77
+ class IOWrapper
78
+ extend Forwardable
79
+
80
+ def initialize object, io
81
+ @object = object
82
+ @io = io
83
+ end
84
+
85
+ attr_reader :object
86
+
87
+ # any others??
88
+ def_delegators :@io, :gets, :read, :each, :seek, :pos, :rewind
89
+
90
+ end
91
+
92
+ # These is a struct for the bank of flags, with a couple of extra
93
+ # methods for parsing
94
+ #
95
+ Flags = Struct.new(
96
+ 'Flags',
97
+ :type_checked, :type_valid, :charset_checked, :charset_valid,
98
+ :encoding_checked, :encoding_valid, :syntax_checked, :syntax_valid, :cache
99
+ ) do |name|
100
+
101
+ # Initialize a struct of flags from arbitrary input
102
+ #
103
+ # @param arg [Store::Digest::Object::Flags, Integer, #to_h, #to_a]
104
+ #
105
+ # @return [Store::Digest::Object::Flags]
106
+ #
107
+ def self.from arg
108
+ # get the length since we use it in a few places
109
+ len = self.members.size
110
+
111
+ if arg.is_a? Integer
112
+ tmp = arg.digits(2).first(len)
113
+ elsif arg.is_a? self
114
+ # noop
115
+ return arg
116
+ elsif arg.is_a? Hash
117
+ tmp = arg.slice(*self.members).transform_values do |v|
118
+ !!(v && v != 0)
119
+ end
120
+ return self.[](**tmp)
121
+ elsif arg.respond_to? :to_a
122
+ tmp = arg.to_a.first(len)
123
+ else
124
+ raise ArgumentError, 'Input must be an integer or array'
125
+ end
126
+
127
+ # append these
128
+ tmp += [false] * (len - tmp.size) if tmp.size < len
129
+
130
+ # make sure these are true/false
131
+ tmp.map! { |b| !!(b && b != 0) }
132
+
133
+ # we do this because `new` doesn't do this
134
+ self.[](*tmp)
135
+ end
136
+
137
+ # Turn an arbitrary {Array} back into an {Integer}.
138
+ #
139
+ # @param array [Array]
140
+ #
141
+ # @return [Integer]
142
+ #
143
+ def self.to_i array
144
+ array.to_a.reverse.reduce(0) { |acc, b| (acc << 1) | (b ? 1 : 0) }
145
+ end
146
+
147
+ # wish there was a cleaner way to do derive individual instance
148
+ # methods from class methods
149
+ begin
150
+ cm = self.method :to_i
151
+ define_method(:to_i) { cm.call self.to_a }
152
+ end
153
+ end
154
+
54
155
  private
55
156
 
56
157
  SAMPLE = 2**13 # must be big enough to detect ooxml
@@ -88,13 +189,14 @@ class Store::Digest::Object
88
189
  ENCODING_VALID = 1 << 5
89
190
  SYNTAX_CHECKED = 1 << 6
90
191
  SYNTAX_VALID = 1 << 7
192
+ IS_CACHE = 1 << 8
91
193
 
92
194
  LABELS = {
93
195
  size: 'Size (Bytes)',
94
196
  ctime: 'Added to Store',
95
197
  mtime: 'Last Modified',
96
198
  ptime: 'Properties Modified',
97
- dtime: 'Deleted',
199
+ dtime: 'Deleted (Expires)',
98
200
  type: 'Content Type',
99
201
  language: '(Natural) Language',
100
202
  charset: 'Character Set',
@@ -132,7 +234,7 @@ class Store::Digest::Object
132
234
 
133
235
  # Create a new object, naively recording whatever is handed
134
236
  #
135
- # @note use {.scan} or {#scan} to populate
237
+ # @note use {.scan} or {#scan} to populate
136
238
  #
137
239
  # @param content [IO, String, Proc, File, Pathname, ...] some content
138
240
  # @param digests [Hash] the digests ascribed to the content
@@ -148,7 +250,9 @@ class Store::Digest::Object
148
250
  # @param flags [Integer] validation state flags
149
251
  # @param strict [true, false] raise an error on bad input
150
252
  # @param fresh [true, false] assert "freshness" of object vis-a-vis the store
253
+ #
151
254
  # @return [Store::Digest::Object] the object in question
255
+ #
152
256
  def initialize content = nil, digests: {}, size: 0,
153
257
  type: 'application/octet-stream', charset: nil, language: nil,
154
258
  encoding: nil, ctime: nil, mtime: nil, ptime: nil, dtime: nil,
@@ -211,19 +315,17 @@ class Store::Digest::Object
211
315
  instance_variable_set "@#{k}", v
212
316
  end
213
317
 
214
- # size and flags should be non-negative integers
215
- %i[size flags].each do |k|
216
- x = b.local_variable_get k
217
- v = case x
218
- when nil then 0
219
- when Integer
220
- raise ArgumentError, "#{k} must be non-negative" if x < 0
221
- x
222
- else
223
- raise ArgumentError, "#{k} must be nil or an Integer"
224
- end
225
- instance_variable_set "@#{k}", v
226
- end
318
+ # set the flags
319
+ @flags = Flags.from(flags || 0)
320
+
321
+ @size = case size
322
+ when nil then 0
323
+ when Numeric
324
+ raise ArgumentError, 'size must be non-negative' if size < 0
325
+ size.to_i
326
+ else
327
+ raise ArgumentError, 'size must be nil or Numeric'
328
+ end
227
329
 
228
330
  # the following can be strings or symbols:
229
331
  TOKENS.keys.each do |k|
@@ -274,7 +376,7 @@ class Store::Digest::Object
274
376
 
275
377
  # sane default for mtime
276
378
  @mtime = coerce_time(mtime || @mtime ||
277
- (content.respond_to?(:mtime) ? content.mtime : Time.now), :mtime)
379
+ (content.respond_to?(:mtime) ? content.mtime : Time.now(in: ?Z)), :mtime)
278
380
 
279
381
  # eh, *some* code reuse
280
382
  b = binding
@@ -321,15 +423,21 @@ class Store::Digest::Object
321
423
  [k, URI::NI.compute(v, algorithm: k).freeze]
322
424
  end.to_h.freeze
323
425
 
426
+ # ensure there is the most generic of possible types
427
+ type ||= 'application/octet-stream'.freeze
428
+
324
429
  # obtain the sampled content type
325
430
  ts = MimeMagic.by_magic(sample) || MimeMagic.default_type(sample)
326
431
  if content.respond_to? :path
327
432
  # may as well use the path if it's available and more specific
328
- ps = MimeMagic.by_path(content.path)
433
+ ps = MimeMagic.by_path(content.path.to_s)
329
434
  # XXX the need to do ts.to_s is a bug in mimemagic
330
- ts = ps if ps and ps.child_of?(ts.to_s)
435
+ ts = ps if ps and ps.descendant_of?(ts.to_s)
331
436
  end
332
- @type = !type || ts.child_of?(type) ? ts.to_s : type
437
+
438
+ # set the type to ts if it is more specific
439
+ @type = ts.descendant_of?(type.to_s) ? ts.to_s.freeze :
440
+ type.to_s.dup.downcase.freeze
333
441
 
334
442
  self
335
443
  end
@@ -337,9 +445,14 @@ class Store::Digest::Object
337
445
  # Determine (or set) whether the object is "fresh", i.e. whether it
338
446
  # is new (or restored), or had been previously been in the store.
339
447
  #
340
- # @param state [true, false]
341
- def fresh? state = nil
342
- state.nil? ? @fresh : @fresh = !!state
448
+ # @return [true, false]
449
+ #
450
+ def fresh?
451
+ !!@fresh
452
+ end
453
+
454
+ def fresh= state
455
+ @fresh = !!state
343
456
  end
344
457
 
345
458
  # Return the algorithms used in the object.
@@ -360,9 +473,12 @@ class Store::Digest::Object
360
473
  alias_method :"[]", :digest
361
474
 
362
475
  # Returns the content stored in the object.
363
- # @return [IO]
476
+ #
477
+ # @return [#read]
478
+ #
364
479
  def content
365
- @content.is_a?(Proc) ? @content.call : @content
480
+ io = @content.is_a?(Proc) ? @content.call : @content
481
+ io = io ? IOWrapper.new(self, io) : io
366
482
  end
367
483
 
368
484
  # Determines if there is content embedded in the object.
@@ -385,53 +501,63 @@ class Store::Digest::Object
385
501
  !@digests.empty?
386
502
  end
387
503
 
504
+ # Returns whether the object is cache.
505
+ #
506
+ # @return [false, true]
507
+ #
508
+ def cache?
509
+ !!@flags.cache
510
+ end
511
+
512
+ # XXX i'm keeping these as-is for now
513
+
388
514
  # Returns true if the content type has been checked.
389
515
  # @return [false, true]
390
516
  def type_checked?
391
- 0 != @flags & TYPE_CHECKED
517
+ 0 != @flags.to_i & TYPE_CHECKED
392
518
  end
393
519
 
394
520
  # Returns true if the content type has been checked _and_ is valid.
395
521
  # @return [false, true]
396
522
  def type_valid?
397
- 0 != @flags & (TYPE_CHECKED|TYPE_VALID)
523
+ 0 != @flags.to_i & (TYPE_CHECKED|TYPE_VALID)
398
524
  end
399
525
 
400
526
  # Returns true if the character set has been checked.
401
527
  # @return [false, true]
402
528
  def charset_checked?
403
- 0 != @flags & CHARSET_CHECKED
529
+ 0 != @flags.to_i & CHARSET_CHECKED
404
530
  end
405
531
 
406
532
  # Returns true if the character set has been checked _and_ is valid.
407
533
  # @return [false, true]
408
534
  def charset_valid?
409
- 0 != @flags & (CHARSET_CHECKED|CHARSET_VALID)
535
+ 0 != @flags.to_i & (CHARSET_CHECKED|CHARSET_VALID)
410
536
  end
411
537
 
412
538
  # Returns true if the content encoding (e.g. gzip, deflate) has
413
539
  # been checked.
414
540
  # @return [false, true]
415
541
  def encoding_checked?
416
- 0 != @flags & ENCODING_CHECKED
542
+ 0 != @flags.to_i & ENCODING_CHECKED
417
543
  end
418
544
 
419
545
  # Returns true if the content encoding has been checked _and_ is valid.
420
546
  # @return [false, true]
421
547
  def encoding_valid?
422
- 0 != @flags & (ENCODING_CHECKED|ENCODING_VALID)
548
+ 0 != @flags.to_i & (ENCODING_CHECKED|ENCODING_VALID)
423
549
  end
424
550
 
425
551
  # Returns true if the blob's syntax has been checked.
426
552
  # @return [false, true]
427
553
  def syntax_checked?
428
- 0 != @flags & SYNTAX_CHECKED
554
+ 0 != @flags.to_i & SYNTAX_CHECKED
429
555
  end
430
556
 
431
557
  # Returns true if the blob's syntax has been checked _and_ is valid.
432
558
  # @return [false, true]
433
559
  def syntax_valid?
434
- 0 != @flags & (SYNTAX_CHECKED|SYNTAX_VALID)
560
+ 0 != @flags.to_i & (SYNTAX_CHECKED|SYNTAX_VALID)
435
561
  end
436
562
 
437
563
  %i[ctime mtime ptime dtime].each do |k|
@@ -488,7 +614,7 @@ class Store::Digest::Object
488
614
  # now the validation statuses
489
615
  out << "Validation:\n"
490
616
  FLAG.each_index do |i|
491
- x = flags >> (3 - i) & 3
617
+ x = flags.to_i >> (3 - i) & 3
492
618
  out << (" %-16s: %s\n" % [FLAG[i], STATE[x]])
493
619
  end
494
620
 
@@ -1,5 +1,5 @@
1
1
  module Store
2
2
  class Digest
3
- VERSION = "0.1.3"
3
+ VERSION = "0.3.1"
4
4
  end
5
5
  end
data/lib/store/digest.rb CHANGED
@@ -67,7 +67,7 @@ class Store::Digest
67
67
 
68
68
  extend driver
69
69
 
70
- #
70
+ #
71
71
  setup(**options)
72
72
  end
73
73
 
@@ -79,8 +79,8 @@ class Store::Digest
79
79
 
80
80
  # alias_method :inspect, :to_s
81
81
 
82
- # Add an object to the store. Takes pretty much anything that
83
- #
82
+ # Add an object to the store. Takes pretty much anything that makes
83
+ # sense to throw at it.
84
84
  #
85
85
  # @note Prefabricated {Store::Digest::Object} instances will be
86
86
  # rescanned.
@@ -96,15 +96,19 @@ class Store::Digest
96
96
  # @param mtime [Time] the modification time, if not "now"
97
97
  # @param strict [true, false] strict checking on metadata input
98
98
  # @param preserve [false, true] preserve existing modification time
99
+ #
99
100
  # @return [Store::Digest::Object] The (potentially pre-existing) entry
101
+ #
100
102
  def add obj, type: nil, charset: nil, language: nil, encoding: nil,
101
103
  mtime: nil, strict: true, preserve: false
102
104
  return unless obj
103
- #transaction do # |txn|
105
+
106
+ transaction do # |txn|
104
107
  obj = coerce_object obj, type: type, charset: charset,
105
108
  language: language, encoding: encoding, mtime: mtime, strict: strict
106
109
  raise ArgumentError, 'We need something to store!' unless obj.content?
107
110
 
111
+ # this method is helicoptered in
108
112
  tmp = temp_blob
109
113
 
110
114
  # XXX this is stupid; figure out a better way to do this
@@ -121,6 +125,7 @@ class Store::Digest
121
125
 
122
126
  # set_meta will return nil if there is no difference in what is set
123
127
  if h = set_meta(obj, preserve: preserve)
128
+ # warn h.inspect
124
129
  # replace the object
125
130
 
126
131
  content = obj.content
@@ -135,30 +140,34 @@ class Store::Digest
135
140
 
136
141
  # now settle the blob into storage
137
142
  settle_blob obj[primary].digest, tmp, mtime: obj.mtime
138
- #txn.commit
139
143
  else
140
144
  tmp.close
141
145
  tmp.unlink
142
146
 
147
+ # warn "got here lolol"
148
+
143
149
  # eh just do this
144
150
  obj = get obj
145
- obj.fresh? false # object is not fresh since we already have it
151
+ obj.fresh = false # object is not fresh since we already have it
146
152
  end
147
153
 
148
154
  obj
149
- #end
155
+ end
150
156
  end
151
157
 
152
158
  # Retrieve an object from the store.
153
- # @param
159
+ #
160
+ # @param obj [URI, Store::Digest::Object]
161
+ #
162
+ # @return [Store::Digest::Object, nil]
154
163
  def get obj
155
- body = -> do
164
+ transaction readonly: true do
156
165
  obj = coerce_object obj
157
- h = get_meta(obj) or return # bail if this does not exist
158
- b = get_blob h[:digests][primary].digest # may be nil
159
- Store::Digest::Object.new b, **h
166
+ if h = get_meta(obj) # bail if this does not exist
167
+ b = get_blob h[:digests][primary].digest # may be nil
168
+ Store::Digest::Object.new b, **h
169
+ end
160
170
  end
161
- transaction(&body)
162
171
  end
163
172
 
164
173
  # Remove an object from the store, optionally "forgetting" it ever existed.
@@ -171,23 +180,19 @@ class Store::Digest
171
180
  obj.scan digests: algorithms, blocksize: 2**20
172
181
  end
173
182
 
174
- # remove blob and mark metadata entry as deleted
175
- meta = nil
183
+ # remove or mark metadata entry as deleted and remove blob
176
184
  transaction do
177
- meta = forget ? remove_meta(obj) : mark_meta_deleted(obj)
178
- end
179
-
180
- if meta
181
- if blob = remove_blob(meta[:digests][primary].digest)
182
- return Store::Digest::Object.new blob, **meta
185
+ if meta = forget ? remove_meta(obj) : mark_meta_deleted(obj)
186
+ if blob = remove_blob(meta[:digests][primary].digest)
187
+ Store::Digest::Object.new blob, **meta
188
+ end
183
189
  end
184
190
  end
185
- nil
186
191
  end
187
192
 
188
193
  # Remove an object from the store and "forget" it ever existed,
189
194
  # i.e., purge it from the metadata.
190
- #
195
+ #
191
196
  def forget obj
192
197
  remove obj, forget: true
193
198
  end