store-digest 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "store/digest"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,282 @@
1
+ require 'store/digest/version'
2
+ require 'store/digest/driver'
3
+ require 'store/digest/object'
4
+
5
+ class Store::Digest
6
+ private
7
+
8
+ def coerce_object obj, type: nil, charset: nil,
9
+ language: nil, encoding: nil, mtime: nil, strict: true
10
+ obj = case obj
11
+ when Store::Digest::Object
12
+ obj.dup
13
+ when URI::NI
14
+ # just return the uri
15
+ Store::Digest::Object.new digests: obj,
16
+ type: type, charset: charset, language: language,
17
+ encoding: encoding, mtime: mtime
18
+ when IO, String, StringIO,
19
+ -> x { %i[seek pos read].all? { |m| x.respond_to? m } }
20
+ # assume this is going to be scanned later
21
+ Store::Digest::Object.new obj,
22
+ type: type, charset: charset, language: language,
23
+ encoding: encoding, mtime: mtime
24
+ when Pathname
25
+ # actually open pathnames that are handed directly into S::D
26
+ Store::Digest::Object.new obj.expand_path.open('rb'),
27
+ type: type, charset: charset, language: language,
28
+ encoding: encoding, mtime: mtime
29
+ else
30
+ raise ArgumentError,
31
+ "Can't coerce a #{obj.class} to Store::Digest::Object"
32
+ end
33
+
34
+ # overwrite the user-mutable metadata
35
+ b = binding
36
+ %i[type charset language encoding mtime].each do |field|
37
+ begin
38
+ if x = b.local_variable_get(field)
39
+ obj.send "#{field}=", x
40
+ end
41
+ rescue RuntimeError => e
42
+ raise e if strict
43
+ end
44
+ end
45
+
46
+ obj
47
+ end
48
+
49
+ public
50
+
51
+ # Initialize a storage
52
+ def initialize **options
53
+ driver = options.delete(:driver) || Store::Digest::Driver::LMDB
54
+
55
+ unless driver.is_a? Module
56
+ # coerce to symbol
57
+ driver = driver.to_s.to_sym
58
+ raise ArgumentError,
59
+ "There is no storage driver Store::Digest::Driver::#{driver}" unless
60
+ Store::Digest::Driver.const_defined? driver
61
+ driver = Store::Digest::Driver.const_get driver
62
+ end
63
+
64
+ raise ArgumentError,
65
+ "Driver #{driver} is not a Store::Digest::Driver" unless
66
+ driver.ancestors.include? Store::Digest::Driver
67
+
68
+ extend driver
69
+
70
+ #
71
+ setup(**options)
72
+ end
73
+
74
+ # XXX this is not right; leave it for now
75
+ # def to_s
76
+ # '<%s:0x%016x objects=%d deleted=%d bytes=%d>' %
77
+ # [self.class, self.object_id, objects, deleted, bytes]
78
+ # end
79
+
80
+ # alias_method :inspect, :to_s
81
+
82
+ # Add an object to the store. Takes pretty much anything that
83
+ #
84
+ #
85
+ # @note Prefabricated {Store::Digest::Object} instances will be
86
+ # rescanned.
87
+ #
88
+ # @note `:preserve` will cause a noop if object metadata is identical
89
+ # save for `:ctime` and `:mtime` (`:ctime` is always ignored).
90
+ #
91
+ # @param obj [IO,File,Pathname,String,Store::Digest::Object] the object
92
+ # @param type [String] the content type
93
+ # @param charset [String] the character set, if applicable
94
+ # @param language [String] the language, if applicable
95
+ # @param encoding [String] the encoding (eg compression) if applicable
96
+ # @param mtime [Time] the modification time, if not "now"
97
+ # @param strict [true, false] strict checking on metadata input
98
+ # @param preserve [false, true] preserve existing modification time
99
+ # @return [Store::Digest::Object] The (potentially pre-existing) entry
100
+ def add obj, type: nil, charset: nil, language: nil, encoding: nil,
101
+ mtime: nil, strict: true, preserve: false
102
+ return unless obj
103
+ #transaction do # |txn|
104
+ obj = coerce_object obj, type: type, charset: charset,
105
+ language: language, encoding: encoding, mtime: mtime, strict: strict
106
+ raise ArgumentError, 'We need something to store!' unless obj.content?
107
+
108
+ tmp = temp_blob
109
+
110
+ # XXX this is stupid; figure out a better way to do this
111
+
112
+ # get our digests
113
+ obj.scan(digests: algorithms, blocksize: 2**20, strict: strict,
114
+ type: type, charset: charset, language: language,
115
+ encoding: encoding, mtime: mtime) do |buf|
116
+ tmp << buf
117
+ end
118
+
119
+ # if we are scanning an object it is necessarily not deleted
120
+ obj.dtime = nil
121
+
122
+ # set_meta will return nil if there is no difference in what is set
123
+ if h = set_meta(obj, preserve: preserve)
124
+ # replace the object
125
+
126
+ content = obj.content
127
+
128
+ # do this to prevent too many open files
129
+ if content.is_a? File
130
+ path = Pathname(content.path).expand_path
131
+ content = -> { path.open('rb') }
132
+ end
133
+
134
+ obj = Store::Digest::Object.new content, fresh: true, **h
135
+
136
+ # now settle the blob into storage
137
+ settle_blob obj[primary].digest, tmp, mtime: obj.mtime
138
+ #txn.commit
139
+ else
140
+ tmp.close
141
+ tmp.unlink
142
+
143
+ # eh just do this
144
+ obj = get obj
145
+ obj.fresh? false # object is not fresh since we already have it
146
+ end
147
+
148
+ obj
149
+ #end
150
+ end
151
+
152
+ # Retrieve an object from the store.
153
+ # @param
154
+ def get obj
155
+ body = -> do
156
+ obj = coerce_object obj
157
+ h = get_meta(obj) or return # bail if this does not exist
158
+ b = get_blob h[:digests][primary].digest # may be nil
159
+ Store::Digest::Object.new b, **h
160
+ end
161
+ transaction(&body)
162
+ end
163
+
164
+ # Remove an object from the store, optionally "forgetting" it ever existed.
165
+ # @param obj
166
+ def remove obj, forget: false
167
+ obj = coerce_object obj
168
+ unless obj.scanned?
169
+ raise ArgumentError,
170
+ 'Cannot scan object because there is no content' unless obj.content?
171
+ obj.scan digests: algorithms, blocksize: 2**20
172
+ end
173
+
174
+ # remove blob and mark metadata entry as deleted
175
+ meta = nil
176
+ transaction do
177
+ meta = forget ? remove_meta(obj) : mark_meta_deleted(obj)
178
+ end
179
+
180
+ if meta
181
+ if blob = remove_blob(meta[:digests][primary].digest)
182
+ return Store::Digest::Object.new blob, **meta
183
+ end
184
+ end
185
+ nil
186
+ end
187
+
188
+ # Remove an object from the store and "forget" it ever existed,
189
+ # i.e., purge it from the metadata.
190
+ #
191
+ def forget obj
192
+ remove obj, forget: true
193
+ end
194
+
195
+ # Return statistics on the store
196
+ def stats
197
+ Stats.new(**meta_get_stats)
198
+ end
199
+
200
+ class Stats
201
+ private
202
+
203
+ # i dunno do you wanna come up with funny labels? here's where you put em
204
+ LABELS = {
205
+ charsets: "Character sets",
206
+ }.transform_values(&:freeze).freeze
207
+
208
+ # lol, petabytes
209
+ MAGNITUDES = %w[B KiB MiB GiB TiB PiB].freeze
210
+
211
+ public
212
+
213
+ attr_reader :ctime, :mtime, :objects, :deleted, :bytes
214
+
215
+ # At this juncture the constructor just puts whatever you throw at
216
+ # it into the object. See
217
+ # {Store::Digest::Meta::LMDB#meta_get_stats} for the real magic.
218
+ # @param options [Hash]
219
+ def initialize **options
220
+ # XXX help i am so lazy
221
+ options.each { |k, v| instance_variable_set "@#{k}", v }
222
+ end
223
+
224
+ # Return a human-readable byte size.
225
+ # @return [String] a representation of the byte size of the store.
226
+ def human_size
227
+ # the deci-magnitude also happens to conveniently work as an array index
228
+ mag = @bytes == 0 ? 0 : (Math.log(@bytes, 2) / 10).floor
229
+ if mag > 0
230
+ '%0.2f %s (%d bytes)' % [(@bytes.to_f / 2**(mag * 10)).round(2),
231
+ MAGNITUDES[mag], @bytes]
232
+ else
233
+ "#{@bytes} bytes"
234
+ end
235
+ end
236
+
237
+ def label_struct
238
+ out = {}
239
+ %i[types languages charsets encodings].each do |k|
240
+ stats = instance_variable_get("@#{k}")
241
+ if stats and !stats.empty?
242
+ # XXX note that all these plurals are just inflected with
243
+ # 's' so clipping off the last character is correct
244
+ ks = k.to_s[0, k.to_s.length - 1].to_sym
245
+ x = out[ks] ||= [LABELS.fetch(k, k.capitalize), {}]
246
+ stats.keys.sort.each do |s|
247
+ x.last[s] = stats[s]
248
+ end
249
+ end
250
+ end
251
+ out
252
+ end
253
+
254
+ # Return the stats object as a nicely formatted string.
255
+ # @return [String] no joke.
256
+ def to_s
257
+
258
+ out = <<-EOT
259
+ #{self.class}
260
+ Statistics:
261
+ Created: #{@ctime}
262
+ Last modified: #{@mtime}
263
+ Total objects: #{@objects}
264
+ Deleted records: #{@deleted}
265
+ Repository size: #{human_size}
266
+ EOT
267
+
268
+ %i[types languages charsets encodings].each do |k|
269
+ stats = instance_variable_get("@#{k}")
270
+ if stats and !stats.empty?
271
+ out << " #{LABELS.fetch k, k.capitalize}: #{stats.count}\n"
272
+ stats.keys.sort.each do |s|
273
+ out << " #{s}: #{stats[s]}\n"
274
+ end
275
+ end
276
+ end
277
+
278
+ out
279
+ end
280
+ end
281
+
282
+ end
@@ -0,0 +1,7 @@
1
+ module Store
2
+ class Digest
3
+ module Blob
4
+ # This is an abstract module for blob operations.
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,146 @@
1
+ require 'store/digest/blob'
2
+ require 'store/digest/trait'
3
+
4
+ require 'time'
5
+ require 'pathname'
6
+ require 'base32'
7
+ require 'tempfile'
8
+
9
+ module Store::Digest::Blob::FileSystem
10
+ include Store::Digest::Trait::RootDir
11
+
12
+ private
13
+
14
+ STORE = 'store'.freeze
15
+ TMP = 'tmp'.freeze
16
+
17
+ # The location of the store
18
+ # @return [Pathname]
19
+ def store
20
+ dir + STORE
21
+ end
22
+
23
+ # The location of the temp directory
24
+ # @return [Pathname]
25
+ def tmp
26
+ dir + TMP
27
+ end
28
+
29
+ # Return a hash-pathed location of the blob, suitable for
30
+ # case-insensitive file systems.
31
+ # @param bin [String] The binary representation of the keying digest
32
+ # @return [Pathname] The absolute path for the blob
33
+ def path_for bin
34
+ parts = Base32.encode(bin).tr('=', '').downcase.unpack 'a4a4a4a*'
35
+ store + parts.join('/')
36
+ end
37
+
38
+ protected
39
+
40
+ def setup **options
41
+ super
42
+
43
+ [STORE, TMP].each do |d|
44
+ d = dir + d
45
+ if d.exist?
46
+ raise "#{d} exists and is not a directory!" unless d.directory?
47
+ raise "#{d} is not readable!" unless d.readable?
48
+ raise "#{d} is not writable!" unless d.writable?
49
+ raise "#{d} cannot be entered!" unless d.executable?
50
+ else
51
+ # wtf Pathname#mkdir takes no args
52
+ Dir.mkdir d, 0777 & ~umask | 02000
53
+ end
54
+ end
55
+ end
56
+
57
+ # Return an open tempfile in the designated temp directory
58
+ # @return [Tempfile]
59
+ def temp_blob
60
+ Tempfile.new 'blob', tmp
61
+ end
62
+
63
+ # Settle a blob from its temporary location to its permanent location.
64
+ # @param bin [String] The binary representation of the keying digest
65
+ # @param fh [File] An open filehandle, presumably a temp file
66
+ # @param mtime [nil, Time, DateTime, Integer] the modification time
67
+ # (defaults to now)
68
+ # @param overwrite [false, true] whether to overwrite the target
69
+ # @return [true] a throwaway return value
70
+ # @raise [SystemCallError] as we are mucking with the file system
71
+ def settle_blob bin, fh, mtime: nil, overwrite: false
72
+ # get the mtimes
73
+ mtime ||= Time.now
74
+ mtime = case mtime
75
+ when Time then mtime.to_i
76
+ when Integer then mtime
77
+ when -> x { x.respond_to? :to_time }
78
+ mtime.to_time.to_i
79
+ else
80
+ raise ArgumentError,
81
+ "mtime must be a Time, DateTime, or Integer, not #{mtime.class}"
82
+ end
83
+
84
+ # get the filenames
85
+ source = fh.path
86
+ target = path_for bin
87
+
88
+ # make sure this thing is flushed
89
+ unless fh.closed?
90
+ fh.flush
91
+ fh.close
92
+ end
93
+
94
+ # these can all raise, of course
95
+ FileUtils.mkpath(target.dirname, mode: 0777 & ~umask | 02000)
96
+
97
+ if !target.exist? || overwrite
98
+ FileUtils.mv source, target
99
+ target.chmod 0444 & ~umask
100
+ target.utime mtime, mtime
101
+ end
102
+
103
+ true
104
+ end
105
+
106
+ # Return a blob filehandle (or closure that will return said blob).
107
+ # @param bin [String] The binary representation of the keying digest
108
+ # @param direct [false, true] whether to open the filehandle directly
109
+ # @return [Proc, IO] Either a closure or the blob itself
110
+ # @raise [RuntimeError] blows up if the blob is not what is expected
111
+ # @raise [SystemCallError] if there's trouble opening the blob
112
+ def get_blob bin, direct: false
113
+ path = path_for bin
114
+ return unless path.exist?
115
+ hex = bin.unpack1 'H*'
116
+ raise "Blob #{hex} is not a file!" unless path.file?
117
+ raise "Blob #{hex} is not readable!" unless path.readable?
118
+
119
+ # return a closure (maybe)
120
+ direct ? path.open('rb') : -> { path.open('rb') }
121
+ end
122
+
123
+ # Remove a blob based on its binary digest value.
124
+ # @param bin [String] The binary representation of the keying digest
125
+ # @return [File] reutnr
126
+ # @raise [SystemCallError] since it's mucking with the file system
127
+ def remove_blob bin
128
+ # XXX we should really flock the directory stack
129
+ path = path_for bin
130
+ ret = if path.exist?
131
+ fh = path.open 'rb'
132
+ path.unlink
133
+ fh
134
+ end
135
+
136
+ # XXX we should really flock the directory stack
137
+ dn = path.dirname.relative_path_from(store).to_s.split ?/
138
+ dn.each_index.reverse_each do |i|
139
+ subpath = store + dn.slice(0..i).join(?/)
140
+ subpath.rmdir if subpath.exist? and subpath.empty?
141
+ end
142
+
143
+ ret
144
+ end
145
+
146
+ end