store-digest 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rspec +3 -0
- data/.travis.yml +7 -0
- data/Gemfile +6 -0
- data/LICENSE +202 -0
- data/README.md +231 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/store/digest.rb +282 -0
- data/lib/store/digest/blob.rb +7 -0
- data/lib/store/digest/blob/filesystem.rb +146 -0
- data/lib/store/digest/driver.rb +14 -0
- data/lib/store/digest/driver/lmdb.rb +15 -0
- data/lib/store/digest/meta.rb +7 -0
- data/lib/store/digest/meta/lmdb.rb +621 -0
- data/lib/store/digest/object.rb +497 -0
- data/lib/store/digest/trait.rb +32 -0
- data/lib/store/digest/version.rb +5 -0
- data/store-digest.gemspec +39 -0
- metadata +161 -0
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "store/digest"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/lib/store/digest.rb
ADDED
@@ -0,0 +1,282 @@
|
|
1
|
+
require 'store/digest/version'
|
2
|
+
require 'store/digest/driver'
|
3
|
+
require 'store/digest/object'
|
4
|
+
|
5
|
+
class Store::Digest
|
6
|
+
private
|
7
|
+
|
8
|
+
def coerce_object obj, type: nil, charset: nil,
|
9
|
+
language: nil, encoding: nil, mtime: nil, strict: true
|
10
|
+
obj = case obj
|
11
|
+
when Store::Digest::Object
|
12
|
+
obj.dup
|
13
|
+
when URI::NI
|
14
|
+
# just return the uri
|
15
|
+
Store::Digest::Object.new digests: obj,
|
16
|
+
type: type, charset: charset, language: language,
|
17
|
+
encoding: encoding, mtime: mtime
|
18
|
+
when IO, String, StringIO,
|
19
|
+
-> x { %i[seek pos read].all? { |m| x.respond_to? m } }
|
20
|
+
# assume this is going to be scanned later
|
21
|
+
Store::Digest::Object.new obj,
|
22
|
+
type: type, charset: charset, language: language,
|
23
|
+
encoding: encoding, mtime: mtime
|
24
|
+
when Pathname
|
25
|
+
# actually open pathnames that are handed directly into S::D
|
26
|
+
Store::Digest::Object.new obj.expand_path.open('rb'),
|
27
|
+
type: type, charset: charset, language: language,
|
28
|
+
encoding: encoding, mtime: mtime
|
29
|
+
else
|
30
|
+
raise ArgumentError,
|
31
|
+
"Can't coerce a #{obj.class} to Store::Digest::Object"
|
32
|
+
end
|
33
|
+
|
34
|
+
# overwrite the user-mutable metadata
|
35
|
+
b = binding
|
36
|
+
%i[type charset language encoding mtime].each do |field|
|
37
|
+
begin
|
38
|
+
if x = b.local_variable_get(field)
|
39
|
+
obj.send "#{field}=", x
|
40
|
+
end
|
41
|
+
rescue RuntimeError => e
|
42
|
+
raise e if strict
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
obj
|
47
|
+
end
|
48
|
+
|
49
|
+
public
|
50
|
+
|
51
|
+
# Initialize a storage
|
52
|
+
def initialize **options
|
53
|
+
driver = options.delete(:driver) || Store::Digest::Driver::LMDB
|
54
|
+
|
55
|
+
unless driver.is_a? Module
|
56
|
+
# coerce to symbol
|
57
|
+
driver = driver.to_s.to_sym
|
58
|
+
raise ArgumentError,
|
59
|
+
"There is no storage driver Store::Digest::Driver::#{driver}" unless
|
60
|
+
Store::Digest::Driver.const_defined? driver
|
61
|
+
driver = Store::Digest::Driver.const_get driver
|
62
|
+
end
|
63
|
+
|
64
|
+
raise ArgumentError,
|
65
|
+
"Driver #{driver} is not a Store::Digest::Driver" unless
|
66
|
+
driver.ancestors.include? Store::Digest::Driver
|
67
|
+
|
68
|
+
extend driver
|
69
|
+
|
70
|
+
#
|
71
|
+
setup(**options)
|
72
|
+
end
|
73
|
+
|
74
|
+
# XXX this is not right; leave it for now
|
75
|
+
# def to_s
|
76
|
+
# '<%s:0x%016x objects=%d deleted=%d bytes=%d>' %
|
77
|
+
# [self.class, self.object_id, objects, deleted, bytes]
|
78
|
+
# end
|
79
|
+
|
80
|
+
# alias_method :inspect, :to_s
|
81
|
+
|
82
|
+
# Add an object to the store. Takes pretty much anything that
|
83
|
+
#
|
84
|
+
#
|
85
|
+
# @note Prefabricated {Store::Digest::Object} instances will be
|
86
|
+
# rescanned.
|
87
|
+
#
|
88
|
+
# @note `:preserve` will cause a noop if object metadata is identical
|
89
|
+
# save for `:ctime` and `:mtime` (`:ctime` is always ignored).
|
90
|
+
#
|
91
|
+
# @param obj [IO,File,Pathname,String,Store::Digest::Object] the object
|
92
|
+
# @param type [String] the content type
|
93
|
+
# @param charset [String] the character set, if applicable
|
94
|
+
# @param language [String] the language, if applicable
|
95
|
+
# @param encoding [String] the encoding (eg compression) if applicable
|
96
|
+
# @param mtime [Time] the modification time, if not "now"
|
97
|
+
# @param strict [true, false] strict checking on metadata input
|
98
|
+
# @param preserve [false, true] preserve existing modification time
|
99
|
+
# @return [Store::Digest::Object] The (potentially pre-existing) entry
|
100
|
+
def add obj, type: nil, charset: nil, language: nil, encoding: nil,
|
101
|
+
mtime: nil, strict: true, preserve: false
|
102
|
+
return unless obj
|
103
|
+
#transaction do # |txn|
|
104
|
+
obj = coerce_object obj, type: type, charset: charset,
|
105
|
+
language: language, encoding: encoding, mtime: mtime, strict: strict
|
106
|
+
raise ArgumentError, 'We need something to store!' unless obj.content?
|
107
|
+
|
108
|
+
tmp = temp_blob
|
109
|
+
|
110
|
+
# XXX this is stupid; figure out a better way to do this
|
111
|
+
|
112
|
+
# get our digests
|
113
|
+
obj.scan(digests: algorithms, blocksize: 2**20, strict: strict,
|
114
|
+
type: type, charset: charset, language: language,
|
115
|
+
encoding: encoding, mtime: mtime) do |buf|
|
116
|
+
tmp << buf
|
117
|
+
end
|
118
|
+
|
119
|
+
# if we are scanning an object it is necessarily not deleted
|
120
|
+
obj.dtime = nil
|
121
|
+
|
122
|
+
# set_meta will return nil if there is no difference in what is set
|
123
|
+
if h = set_meta(obj, preserve: preserve)
|
124
|
+
# replace the object
|
125
|
+
|
126
|
+
content = obj.content
|
127
|
+
|
128
|
+
# do this to prevent too many open files
|
129
|
+
if content.is_a? File
|
130
|
+
path = Pathname(content.path).expand_path
|
131
|
+
content = -> { path.open('rb') }
|
132
|
+
end
|
133
|
+
|
134
|
+
obj = Store::Digest::Object.new content, fresh: true, **h
|
135
|
+
|
136
|
+
# now settle the blob into storage
|
137
|
+
settle_blob obj[primary].digest, tmp, mtime: obj.mtime
|
138
|
+
#txn.commit
|
139
|
+
else
|
140
|
+
tmp.close
|
141
|
+
tmp.unlink
|
142
|
+
|
143
|
+
# eh just do this
|
144
|
+
obj = get obj
|
145
|
+
obj.fresh? false # object is not fresh since we already have it
|
146
|
+
end
|
147
|
+
|
148
|
+
obj
|
149
|
+
#end
|
150
|
+
end
|
151
|
+
|
152
|
+
# Retrieve an object from the store.
|
153
|
+
# @param
|
154
|
+
def get obj
|
155
|
+
body = -> do
|
156
|
+
obj = coerce_object obj
|
157
|
+
h = get_meta(obj) or return # bail if this does not exist
|
158
|
+
b = get_blob h[:digests][primary].digest # may be nil
|
159
|
+
Store::Digest::Object.new b, **h
|
160
|
+
end
|
161
|
+
transaction(&body)
|
162
|
+
end
|
163
|
+
|
164
|
+
# Remove an object from the store, optionally "forgetting" it ever existed.
|
165
|
+
# @param obj
|
166
|
+
def remove obj, forget: false
|
167
|
+
obj = coerce_object obj
|
168
|
+
unless obj.scanned?
|
169
|
+
raise ArgumentError,
|
170
|
+
'Cannot scan object because there is no content' unless obj.content?
|
171
|
+
obj.scan digests: algorithms, blocksize: 2**20
|
172
|
+
end
|
173
|
+
|
174
|
+
# remove blob and mark metadata entry as deleted
|
175
|
+
meta = nil
|
176
|
+
transaction do
|
177
|
+
meta = forget ? remove_meta(obj) : mark_meta_deleted(obj)
|
178
|
+
end
|
179
|
+
|
180
|
+
if meta
|
181
|
+
if blob = remove_blob(meta[:digests][primary].digest)
|
182
|
+
return Store::Digest::Object.new blob, **meta
|
183
|
+
end
|
184
|
+
end
|
185
|
+
nil
|
186
|
+
end
|
187
|
+
|
188
|
+
# Remove an object from the store and "forget" it ever existed,
|
189
|
+
# i.e., purge it from the metadata.
|
190
|
+
#
|
191
|
+
def forget obj
|
192
|
+
remove obj, forget: true
|
193
|
+
end
|
194
|
+
|
195
|
+
# Return statistics on the store
|
196
|
+
def stats
|
197
|
+
Stats.new(**meta_get_stats)
|
198
|
+
end
|
199
|
+
|
200
|
+
class Stats
|
201
|
+
private
|
202
|
+
|
203
|
+
# i dunno do you wanna come up with funny labels? here's where you put em
|
204
|
+
LABELS = {
|
205
|
+
charsets: "Character sets",
|
206
|
+
}.transform_values(&:freeze).freeze
|
207
|
+
|
208
|
+
# lol, petabytes
|
209
|
+
MAGNITUDES = %w[B KiB MiB GiB TiB PiB].freeze
|
210
|
+
|
211
|
+
public
|
212
|
+
|
213
|
+
attr_reader :ctime, :mtime, :objects, :deleted, :bytes
|
214
|
+
|
215
|
+
# At this juncture the constructor just puts whatever you throw at
|
216
|
+
# it into the object. See
|
217
|
+
# {Store::Digest::Meta::LMDB#meta_get_stats} for the real magic.
|
218
|
+
# @param options [Hash]
|
219
|
+
def initialize **options
|
220
|
+
# XXX help i am so lazy
|
221
|
+
options.each { |k, v| instance_variable_set "@#{k}", v }
|
222
|
+
end
|
223
|
+
|
224
|
+
# Return a human-readable byte size.
|
225
|
+
# @return [String] a representation of the byte size of the store.
|
226
|
+
def human_size
|
227
|
+
# the deci-magnitude also happens to conveniently work as an array index
|
228
|
+
mag = @bytes == 0 ? 0 : (Math.log(@bytes, 2) / 10).floor
|
229
|
+
if mag > 0
|
230
|
+
'%0.2f %s (%d bytes)' % [(@bytes.to_f / 2**(mag * 10)).round(2),
|
231
|
+
MAGNITUDES[mag], @bytes]
|
232
|
+
else
|
233
|
+
"#{@bytes} bytes"
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
def label_struct
|
238
|
+
out = {}
|
239
|
+
%i[types languages charsets encodings].each do |k|
|
240
|
+
stats = instance_variable_get("@#{k}")
|
241
|
+
if stats and !stats.empty?
|
242
|
+
# XXX note that all these plurals are just inflected with
|
243
|
+
# 's' so clipping off the last character is correct
|
244
|
+
ks = k.to_s[0, k.to_s.length - 1].to_sym
|
245
|
+
x = out[ks] ||= [LABELS.fetch(k, k.capitalize), {}]
|
246
|
+
stats.keys.sort.each do |s|
|
247
|
+
x.last[s] = stats[s]
|
248
|
+
end
|
249
|
+
end
|
250
|
+
end
|
251
|
+
out
|
252
|
+
end
|
253
|
+
|
254
|
+
# Return the stats object as a nicely formatted string.
|
255
|
+
# @return [String] no joke.
|
256
|
+
def to_s
|
257
|
+
|
258
|
+
out = <<-EOT
|
259
|
+
#{self.class}
|
260
|
+
Statistics:
|
261
|
+
Created: #{@ctime}
|
262
|
+
Last modified: #{@mtime}
|
263
|
+
Total objects: #{@objects}
|
264
|
+
Deleted records: #{@deleted}
|
265
|
+
Repository size: #{human_size}
|
266
|
+
EOT
|
267
|
+
|
268
|
+
%i[types languages charsets encodings].each do |k|
|
269
|
+
stats = instance_variable_get("@#{k}")
|
270
|
+
if stats and !stats.empty?
|
271
|
+
out << " #{LABELS.fetch k, k.capitalize}: #{stats.count}\n"
|
272
|
+
stats.keys.sort.each do |s|
|
273
|
+
out << " #{s}: #{stats[s]}\n"
|
274
|
+
end
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
out
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
end
|
@@ -0,0 +1,146 @@
|
|
1
|
+
require 'store/digest/blob'
|
2
|
+
require 'store/digest/trait'
|
3
|
+
|
4
|
+
require 'time'
|
5
|
+
require 'pathname'
|
6
|
+
require 'base32'
|
7
|
+
require 'tempfile'
|
8
|
+
|
9
|
+
module Store::Digest::Blob::FileSystem
|
10
|
+
include Store::Digest::Trait::RootDir
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
STORE = 'store'.freeze
|
15
|
+
TMP = 'tmp'.freeze
|
16
|
+
|
17
|
+
# The location of the store
|
18
|
+
# @return [Pathname]
|
19
|
+
def store
|
20
|
+
dir + STORE
|
21
|
+
end
|
22
|
+
|
23
|
+
# The location of the temp directory
|
24
|
+
# @return [Pathname]
|
25
|
+
def tmp
|
26
|
+
dir + TMP
|
27
|
+
end
|
28
|
+
|
29
|
+
# Return a hash-pathed location of the blob, suitable for
|
30
|
+
# case-insensitive file systems.
|
31
|
+
# @param bin [String] The binary representation of the keying digest
|
32
|
+
# @return [Pathname] The absolute path for the blob
|
33
|
+
def path_for bin
|
34
|
+
parts = Base32.encode(bin).tr('=', '').downcase.unpack 'a4a4a4a*'
|
35
|
+
store + parts.join('/')
|
36
|
+
end
|
37
|
+
|
38
|
+
protected
|
39
|
+
|
40
|
+
def setup **options
|
41
|
+
super
|
42
|
+
|
43
|
+
[STORE, TMP].each do |d|
|
44
|
+
d = dir + d
|
45
|
+
if d.exist?
|
46
|
+
raise "#{d} exists and is not a directory!" unless d.directory?
|
47
|
+
raise "#{d} is not readable!" unless d.readable?
|
48
|
+
raise "#{d} is not writable!" unless d.writable?
|
49
|
+
raise "#{d} cannot be entered!" unless d.executable?
|
50
|
+
else
|
51
|
+
# wtf Pathname#mkdir takes no args
|
52
|
+
Dir.mkdir d, 0777 & ~umask | 02000
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# Return an open tempfile in the designated temp directory
|
58
|
+
# @return [Tempfile]
|
59
|
+
def temp_blob
|
60
|
+
Tempfile.new 'blob', tmp
|
61
|
+
end
|
62
|
+
|
63
|
+
# Settle a blob from its temporary location to its permanent location.
|
64
|
+
# @param bin [String] The binary representation of the keying digest
|
65
|
+
# @param fh [File] An open filehandle, presumably a temp file
|
66
|
+
# @param mtime [nil, Time, DateTime, Integer] the modification time
|
67
|
+
# (defaults to now)
|
68
|
+
# @param overwrite [false, true] whether to overwrite the target
|
69
|
+
# @return [true] a throwaway return value
|
70
|
+
# @raise [SystemCallError] as we are mucking with the file system
|
71
|
+
def settle_blob bin, fh, mtime: nil, overwrite: false
|
72
|
+
# get the mtimes
|
73
|
+
mtime ||= Time.now
|
74
|
+
mtime = case mtime
|
75
|
+
when Time then mtime.to_i
|
76
|
+
when Integer then mtime
|
77
|
+
when -> x { x.respond_to? :to_time }
|
78
|
+
mtime.to_time.to_i
|
79
|
+
else
|
80
|
+
raise ArgumentError,
|
81
|
+
"mtime must be a Time, DateTime, or Integer, not #{mtime.class}"
|
82
|
+
end
|
83
|
+
|
84
|
+
# get the filenames
|
85
|
+
source = fh.path
|
86
|
+
target = path_for bin
|
87
|
+
|
88
|
+
# make sure this thing is flushed
|
89
|
+
unless fh.closed?
|
90
|
+
fh.flush
|
91
|
+
fh.close
|
92
|
+
end
|
93
|
+
|
94
|
+
# these can all raise, of course
|
95
|
+
FileUtils.mkpath(target.dirname, mode: 0777 & ~umask | 02000)
|
96
|
+
|
97
|
+
if !target.exist? || overwrite
|
98
|
+
FileUtils.mv source, target
|
99
|
+
target.chmod 0444 & ~umask
|
100
|
+
target.utime mtime, mtime
|
101
|
+
end
|
102
|
+
|
103
|
+
true
|
104
|
+
end
|
105
|
+
|
106
|
+
# Return a blob filehandle (or closure that will return said blob).
|
107
|
+
# @param bin [String] The binary representation of the keying digest
|
108
|
+
# @param direct [false, true] whether to open the filehandle directly
|
109
|
+
# @return [Proc, IO] Either a closure or the blob itself
|
110
|
+
# @raise [RuntimeError] blows up if the blob is not what is expected
|
111
|
+
# @raise [SystemCallError] if there's trouble opening the blob
|
112
|
+
def get_blob bin, direct: false
|
113
|
+
path = path_for bin
|
114
|
+
return unless path.exist?
|
115
|
+
hex = bin.unpack1 'H*'
|
116
|
+
raise "Blob #{hex} is not a file!" unless path.file?
|
117
|
+
raise "Blob #{hex} is not readable!" unless path.readable?
|
118
|
+
|
119
|
+
# return a closure (maybe)
|
120
|
+
direct ? path.open('rb') : -> { path.open('rb') }
|
121
|
+
end
|
122
|
+
|
123
|
+
# Remove a blob based on its binary digest value.
|
124
|
+
# @param bin [String] The binary representation of the keying digest
|
125
|
+
# @return [File] reutnr
|
126
|
+
# @raise [SystemCallError] since it's mucking with the file system
|
127
|
+
def remove_blob bin
|
128
|
+
# XXX we should really flock the directory stack
|
129
|
+
path = path_for bin
|
130
|
+
ret = if path.exist?
|
131
|
+
fh = path.open 'rb'
|
132
|
+
path.unlink
|
133
|
+
fh
|
134
|
+
end
|
135
|
+
|
136
|
+
# XXX we should really flock the directory stack
|
137
|
+
dn = path.dirname.relative_path_from(store).to_s.split ?/
|
138
|
+
dn.each_index.reverse_each do |i|
|
139
|
+
subpath = store + dn.slice(0..i).join(?/)
|
140
|
+
subpath.rmdir if subpath.exist? and subpath.empty?
|
141
|
+
end
|
142
|
+
|
143
|
+
ret
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|