store-digest 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rspec +3 -0
- data/.travis.yml +7 -0
- data/Gemfile +6 -0
- data/LICENSE +202 -0
- data/README.md +231 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/store/digest.rb +282 -0
- data/lib/store/digest/blob.rb +7 -0
- data/lib/store/digest/blob/filesystem.rb +146 -0
- data/lib/store/digest/driver.rb +14 -0
- data/lib/store/digest/driver/lmdb.rb +15 -0
- data/lib/store/digest/meta.rb +7 -0
- data/lib/store/digest/meta/lmdb.rb +621 -0
- data/lib/store/digest/object.rb +497 -0
- data/lib/store/digest/trait.rb +32 -0
- data/lib/store/digest/version.rb +5 -0
- data/store-digest.gemspec +39 -0
- metadata +161 -0
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "store/digest"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/lib/store/digest.rb
ADDED
@@ -0,0 +1,282 @@
|
|
1
|
+
require 'store/digest/version'
|
2
|
+
require 'store/digest/driver'
|
3
|
+
require 'store/digest/object'
|
4
|
+
|
5
|
+
class Store::Digest
|
6
|
+
private
|
7
|
+
|
8
|
+
def coerce_object obj, type: nil, charset: nil,
|
9
|
+
language: nil, encoding: nil, mtime: nil, strict: true
|
10
|
+
obj = case obj
|
11
|
+
when Store::Digest::Object
|
12
|
+
obj.dup
|
13
|
+
when URI::NI
|
14
|
+
# just return the uri
|
15
|
+
Store::Digest::Object.new digests: obj,
|
16
|
+
type: type, charset: charset, language: language,
|
17
|
+
encoding: encoding, mtime: mtime
|
18
|
+
when IO, String, StringIO,
|
19
|
+
-> x { %i[seek pos read].all? { |m| x.respond_to? m } }
|
20
|
+
# assume this is going to be scanned later
|
21
|
+
Store::Digest::Object.new obj,
|
22
|
+
type: type, charset: charset, language: language,
|
23
|
+
encoding: encoding, mtime: mtime
|
24
|
+
when Pathname
|
25
|
+
# actually open pathnames that are handed directly into S::D
|
26
|
+
Store::Digest::Object.new obj.expand_path.open('rb'),
|
27
|
+
type: type, charset: charset, language: language,
|
28
|
+
encoding: encoding, mtime: mtime
|
29
|
+
else
|
30
|
+
raise ArgumentError,
|
31
|
+
"Can't coerce a #{obj.class} to Store::Digest::Object"
|
32
|
+
end
|
33
|
+
|
34
|
+
# overwrite the user-mutable metadata
|
35
|
+
b = binding
|
36
|
+
%i[type charset language encoding mtime].each do |field|
|
37
|
+
begin
|
38
|
+
if x = b.local_variable_get(field)
|
39
|
+
obj.send "#{field}=", x
|
40
|
+
end
|
41
|
+
rescue RuntimeError => e
|
42
|
+
raise e if strict
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
obj
|
47
|
+
end
|
48
|
+
|
49
|
+
public
|
50
|
+
|
51
|
+
# Initialize a storage
|
52
|
+
def initialize **options
|
53
|
+
driver = options.delete(:driver) || Store::Digest::Driver::LMDB
|
54
|
+
|
55
|
+
unless driver.is_a? Module
|
56
|
+
# coerce to symbol
|
57
|
+
driver = driver.to_s.to_sym
|
58
|
+
raise ArgumentError,
|
59
|
+
"There is no storage driver Store::Digest::Driver::#{driver}" unless
|
60
|
+
Store::Digest::Driver.const_defined? driver
|
61
|
+
driver = Store::Digest::Driver.const_get driver
|
62
|
+
end
|
63
|
+
|
64
|
+
raise ArgumentError,
|
65
|
+
"Driver #{driver} is not a Store::Digest::Driver" unless
|
66
|
+
driver.ancestors.include? Store::Digest::Driver
|
67
|
+
|
68
|
+
extend driver
|
69
|
+
|
70
|
+
#
|
71
|
+
setup(**options)
|
72
|
+
end
|
73
|
+
|
74
|
+
# XXX this is not right; leave it for now
|
75
|
+
# def to_s
|
76
|
+
# '<%s:0x%016x objects=%d deleted=%d bytes=%d>' %
|
77
|
+
# [self.class, self.object_id, objects, deleted, bytes]
|
78
|
+
# end
|
79
|
+
|
80
|
+
# alias_method :inspect, :to_s
|
81
|
+
|
82
|
+
# Add an object to the store. Takes pretty much anything that
|
83
|
+
#
|
84
|
+
#
|
85
|
+
# @note Prefabricated {Store::Digest::Object} instances will be
|
86
|
+
# rescanned.
|
87
|
+
#
|
88
|
+
# @note `:preserve` will cause a noop if object metadata is identical
|
89
|
+
# save for `:ctime` and `:mtime` (`:ctime` is always ignored).
|
90
|
+
#
|
91
|
+
# @param obj [IO,File,Pathname,String,Store::Digest::Object] the object
|
92
|
+
# @param type [String] the content type
|
93
|
+
# @param charset [String] the character set, if applicable
|
94
|
+
# @param language [String] the language, if applicable
|
95
|
+
# @param encoding [String] the encoding (eg compression) if applicable
|
96
|
+
# @param mtime [Time] the modification time, if not "now"
|
97
|
+
# @param strict [true, false] strict checking on metadata input
|
98
|
+
# @param preserve [false, true] preserve existing modification time
|
99
|
+
# @return [Store::Digest::Object] The (potentially pre-existing) entry
|
100
|
+
def add obj, type: nil, charset: nil, language: nil, encoding: nil,
|
101
|
+
mtime: nil, strict: true, preserve: false
|
102
|
+
return unless obj
|
103
|
+
#transaction do # |txn|
|
104
|
+
obj = coerce_object obj, type: type, charset: charset,
|
105
|
+
language: language, encoding: encoding, mtime: mtime, strict: strict
|
106
|
+
raise ArgumentError, 'We need something to store!' unless obj.content?
|
107
|
+
|
108
|
+
tmp = temp_blob
|
109
|
+
|
110
|
+
# XXX this is stupid; figure out a better way to do this
|
111
|
+
|
112
|
+
# get our digests
|
113
|
+
obj.scan(digests: algorithms, blocksize: 2**20, strict: strict,
|
114
|
+
type: type, charset: charset, language: language,
|
115
|
+
encoding: encoding, mtime: mtime) do |buf|
|
116
|
+
tmp << buf
|
117
|
+
end
|
118
|
+
|
119
|
+
# if we are scanning an object it is necessarily not deleted
|
120
|
+
obj.dtime = nil
|
121
|
+
|
122
|
+
# set_meta will return nil if there is no difference in what is set
|
123
|
+
if h = set_meta(obj, preserve: preserve)
|
124
|
+
# replace the object
|
125
|
+
|
126
|
+
content = obj.content
|
127
|
+
|
128
|
+
# do this to prevent too many open files
|
129
|
+
if content.is_a? File
|
130
|
+
path = Pathname(content.path).expand_path
|
131
|
+
content = -> { path.open('rb') }
|
132
|
+
end
|
133
|
+
|
134
|
+
obj = Store::Digest::Object.new content, fresh: true, **h
|
135
|
+
|
136
|
+
# now settle the blob into storage
|
137
|
+
settle_blob obj[primary].digest, tmp, mtime: obj.mtime
|
138
|
+
#txn.commit
|
139
|
+
else
|
140
|
+
tmp.close
|
141
|
+
tmp.unlink
|
142
|
+
|
143
|
+
# eh just do this
|
144
|
+
obj = get obj
|
145
|
+
obj.fresh? false # object is not fresh since we already have it
|
146
|
+
end
|
147
|
+
|
148
|
+
obj
|
149
|
+
#end
|
150
|
+
end
|
151
|
+
|
152
|
+
# Retrieve an object from the store.
|
153
|
+
# @param
|
154
|
+
def get obj
|
155
|
+
body = -> do
|
156
|
+
obj = coerce_object obj
|
157
|
+
h = get_meta(obj) or return # bail if this does not exist
|
158
|
+
b = get_blob h[:digests][primary].digest # may be nil
|
159
|
+
Store::Digest::Object.new b, **h
|
160
|
+
end
|
161
|
+
transaction(&body)
|
162
|
+
end
|
163
|
+
|
164
|
+
# Remove an object from the store, optionally "forgetting" it ever existed.
|
165
|
+
# @param obj
|
166
|
+
def remove obj, forget: false
|
167
|
+
obj = coerce_object obj
|
168
|
+
unless obj.scanned?
|
169
|
+
raise ArgumentError,
|
170
|
+
'Cannot scan object because there is no content' unless obj.content?
|
171
|
+
obj.scan digests: algorithms, blocksize: 2**20
|
172
|
+
end
|
173
|
+
|
174
|
+
# remove blob and mark metadata entry as deleted
|
175
|
+
meta = nil
|
176
|
+
transaction do
|
177
|
+
meta = forget ? remove_meta(obj) : mark_meta_deleted(obj)
|
178
|
+
end
|
179
|
+
|
180
|
+
if meta
|
181
|
+
if blob = remove_blob(meta[:digests][primary].digest)
|
182
|
+
return Store::Digest::Object.new blob, **meta
|
183
|
+
end
|
184
|
+
end
|
185
|
+
nil
|
186
|
+
end
|
187
|
+
|
188
|
+
# Remove an object from the store and "forget" it ever existed,
|
189
|
+
# i.e., purge it from the metadata.
|
190
|
+
#
|
191
|
+
def forget obj
|
192
|
+
remove obj, forget: true
|
193
|
+
end
|
194
|
+
|
195
|
+
# Return statistics on the store
|
196
|
+
def stats
|
197
|
+
Stats.new(**meta_get_stats)
|
198
|
+
end
|
199
|
+
|
200
|
+
class Stats
|
201
|
+
private
|
202
|
+
|
203
|
+
# i dunno do you wanna come up with funny labels? here's where you put em
|
204
|
+
LABELS = {
|
205
|
+
charsets: "Character sets",
|
206
|
+
}.transform_values(&:freeze).freeze
|
207
|
+
|
208
|
+
# lol, petabytes
|
209
|
+
MAGNITUDES = %w[B KiB MiB GiB TiB PiB].freeze
|
210
|
+
|
211
|
+
public
|
212
|
+
|
213
|
+
attr_reader :ctime, :mtime, :objects, :deleted, :bytes
|
214
|
+
|
215
|
+
# At this juncture the constructor just puts whatever you throw at
|
216
|
+
# it into the object. See
|
217
|
+
# {Store::Digest::Meta::LMDB#meta_get_stats} for the real magic.
|
218
|
+
# @param options [Hash]
|
219
|
+
def initialize **options
|
220
|
+
# XXX help i am so lazy
|
221
|
+
options.each { |k, v| instance_variable_set "@#{k}", v }
|
222
|
+
end
|
223
|
+
|
224
|
+
# Return a human-readable byte size.
|
225
|
+
# @return [String] a representation of the byte size of the store.
|
226
|
+
def human_size
|
227
|
+
# the deci-magnitude also happens to conveniently work as an array index
|
228
|
+
mag = @bytes == 0 ? 0 : (Math.log(@bytes, 2) / 10).floor
|
229
|
+
if mag > 0
|
230
|
+
'%0.2f %s (%d bytes)' % [(@bytes.to_f / 2**(mag * 10)).round(2),
|
231
|
+
MAGNITUDES[mag], @bytes]
|
232
|
+
else
|
233
|
+
"#{@bytes} bytes"
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
def label_struct
|
238
|
+
out = {}
|
239
|
+
%i[types languages charsets encodings].each do |k|
|
240
|
+
stats = instance_variable_get("@#{k}")
|
241
|
+
if stats and !stats.empty?
|
242
|
+
# XXX note that all these plurals are just inflected with
|
243
|
+
# 's' so clipping off the last character is correct
|
244
|
+
ks = k.to_s[0, k.to_s.length - 1].to_sym
|
245
|
+
x = out[ks] ||= [LABELS.fetch(k, k.capitalize), {}]
|
246
|
+
stats.keys.sort.each do |s|
|
247
|
+
x.last[s] = stats[s]
|
248
|
+
end
|
249
|
+
end
|
250
|
+
end
|
251
|
+
out
|
252
|
+
end
|
253
|
+
|
254
|
+
# Return the stats object as a nicely formatted string.
|
255
|
+
# @return [String] no joke.
|
256
|
+
def to_s
|
257
|
+
|
258
|
+
out = <<-EOT
|
259
|
+
#{self.class}
|
260
|
+
Statistics:
|
261
|
+
Created: #{@ctime}
|
262
|
+
Last modified: #{@mtime}
|
263
|
+
Total objects: #{@objects}
|
264
|
+
Deleted records: #{@deleted}
|
265
|
+
Repository size: #{human_size}
|
266
|
+
EOT
|
267
|
+
|
268
|
+
%i[types languages charsets encodings].each do |k|
|
269
|
+
stats = instance_variable_get("@#{k}")
|
270
|
+
if stats and !stats.empty?
|
271
|
+
out << " #{LABELS.fetch k, k.capitalize}: #{stats.count}\n"
|
272
|
+
stats.keys.sort.each do |s|
|
273
|
+
out << " #{s}: #{stats[s]}\n"
|
274
|
+
end
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
out
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
end
|
@@ -0,0 +1,146 @@
|
|
1
|
+
require 'store/digest/blob'
|
2
|
+
require 'store/digest/trait'
|
3
|
+
|
4
|
+
require 'time'
|
5
|
+
require 'pathname'
|
6
|
+
require 'base32'
|
7
|
+
require 'tempfile'
|
8
|
+
|
9
|
+
module Store::Digest::Blob::FileSystem
|
10
|
+
include Store::Digest::Trait::RootDir
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
STORE = 'store'.freeze
|
15
|
+
TMP = 'tmp'.freeze
|
16
|
+
|
17
|
+
# The location of the store
|
18
|
+
# @return [Pathname]
|
19
|
+
def store
|
20
|
+
dir + STORE
|
21
|
+
end
|
22
|
+
|
23
|
+
# The location of the temp directory
|
24
|
+
# @return [Pathname]
|
25
|
+
def tmp
|
26
|
+
dir + TMP
|
27
|
+
end
|
28
|
+
|
29
|
+
# Return a hash-pathed location of the blob, suitable for
|
30
|
+
# case-insensitive file systems.
|
31
|
+
# @param bin [String] The binary representation of the keying digest
|
32
|
+
# @return [Pathname] The absolute path for the blob
|
33
|
+
def path_for bin
|
34
|
+
parts = Base32.encode(bin).tr('=', '').downcase.unpack 'a4a4a4a*'
|
35
|
+
store + parts.join('/')
|
36
|
+
end
|
37
|
+
|
38
|
+
protected
|
39
|
+
|
40
|
+
def setup **options
|
41
|
+
super
|
42
|
+
|
43
|
+
[STORE, TMP].each do |d|
|
44
|
+
d = dir + d
|
45
|
+
if d.exist?
|
46
|
+
raise "#{d} exists and is not a directory!" unless d.directory?
|
47
|
+
raise "#{d} is not readable!" unless d.readable?
|
48
|
+
raise "#{d} is not writable!" unless d.writable?
|
49
|
+
raise "#{d} cannot be entered!" unless d.executable?
|
50
|
+
else
|
51
|
+
# wtf Pathname#mkdir takes no args
|
52
|
+
Dir.mkdir d, 0777 & ~umask | 02000
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# Return an open tempfile in the designated temp directory
|
58
|
+
# @return [Tempfile]
|
59
|
+
def temp_blob
|
60
|
+
Tempfile.new 'blob', tmp
|
61
|
+
end
|
62
|
+
|
63
|
+
# Settle a blob from its temporary location to its permanent location.
|
64
|
+
# @param bin [String] The binary representation of the keying digest
|
65
|
+
# @param fh [File] An open filehandle, presumably a temp file
|
66
|
+
# @param mtime [nil, Time, DateTime, Integer] the modification time
|
67
|
+
# (defaults to now)
|
68
|
+
# @param overwrite [false, true] whether to overwrite the target
|
69
|
+
# @return [true] a throwaway return value
|
70
|
+
# @raise [SystemCallError] as we are mucking with the file system
|
71
|
+
def settle_blob bin, fh, mtime: nil, overwrite: false
|
72
|
+
# get the mtimes
|
73
|
+
mtime ||= Time.now
|
74
|
+
mtime = case mtime
|
75
|
+
when Time then mtime.to_i
|
76
|
+
when Integer then mtime
|
77
|
+
when -> x { x.respond_to? :to_time }
|
78
|
+
mtime.to_time.to_i
|
79
|
+
else
|
80
|
+
raise ArgumentError,
|
81
|
+
"mtime must be a Time, DateTime, or Integer, not #{mtime.class}"
|
82
|
+
end
|
83
|
+
|
84
|
+
# get the filenames
|
85
|
+
source = fh.path
|
86
|
+
target = path_for bin
|
87
|
+
|
88
|
+
# make sure this thing is flushed
|
89
|
+
unless fh.closed?
|
90
|
+
fh.flush
|
91
|
+
fh.close
|
92
|
+
end
|
93
|
+
|
94
|
+
# these can all raise, of course
|
95
|
+
FileUtils.mkpath(target.dirname, mode: 0777 & ~umask | 02000)
|
96
|
+
|
97
|
+
if !target.exist? || overwrite
|
98
|
+
FileUtils.mv source, target
|
99
|
+
target.chmod 0444 & ~umask
|
100
|
+
target.utime mtime, mtime
|
101
|
+
end
|
102
|
+
|
103
|
+
true
|
104
|
+
end
|
105
|
+
|
106
|
+
# Return a blob filehandle (or closure that will return said blob).
|
107
|
+
# @param bin [String] The binary representation of the keying digest
|
108
|
+
# @param direct [false, true] whether to open the filehandle directly
|
109
|
+
# @return [Proc, IO] Either a closure or the blob itself
|
110
|
+
# @raise [RuntimeError] blows up if the blob is not what is expected
|
111
|
+
# @raise [SystemCallError] if there's trouble opening the blob
|
112
|
+
def get_blob bin, direct: false
|
113
|
+
path = path_for bin
|
114
|
+
return unless path.exist?
|
115
|
+
hex = bin.unpack1 'H*'
|
116
|
+
raise "Blob #{hex} is not a file!" unless path.file?
|
117
|
+
raise "Blob #{hex} is not readable!" unless path.readable?
|
118
|
+
|
119
|
+
# return a closure (maybe)
|
120
|
+
direct ? path.open('rb') : -> { path.open('rb') }
|
121
|
+
end
|
122
|
+
|
123
|
+
# Remove a blob based on its binary digest value.
|
124
|
+
# @param bin [String] The binary representation of the keying digest
|
125
|
+
# @return [File] reutnr
|
126
|
+
# @raise [SystemCallError] since it's mucking with the file system
|
127
|
+
def remove_blob bin
|
128
|
+
# XXX we should really flock the directory stack
|
129
|
+
path = path_for bin
|
130
|
+
ret = if path.exist?
|
131
|
+
fh = path.open 'rb'
|
132
|
+
path.unlink
|
133
|
+
fh
|
134
|
+
end
|
135
|
+
|
136
|
+
# XXX we should really flock the directory stack
|
137
|
+
dn = path.dirname.relative_path_from(store).to_s.split ?/
|
138
|
+
dn.each_index.reverse_each do |i|
|
139
|
+
subpath = store + dn.slice(0..i).join(?/)
|
140
|
+
subpath.rmdir if subpath.exist? and subpath.empty?
|
141
|
+
end
|
142
|
+
|
143
|
+
ret
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|