store-digest 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "store/digest"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,282 @@
1
+ require 'store/digest/version'
2
+ require 'store/digest/driver'
3
+ require 'store/digest/object'
4
+
5
+ class Store::Digest
6
+ private
7
+
8
+ def coerce_object obj, type: nil, charset: nil,
9
+ language: nil, encoding: nil, mtime: nil, strict: true
10
+ obj = case obj
11
+ when Store::Digest::Object
12
+ obj.dup
13
+ when URI::NI
14
+ # just return the uri
15
+ Store::Digest::Object.new digests: obj,
16
+ type: type, charset: charset, language: language,
17
+ encoding: encoding, mtime: mtime
18
+ when IO, String, StringIO,
19
+ -> x { %i[seek pos read].all? { |m| x.respond_to? m } }
20
+ # assume this is going to be scanned later
21
+ Store::Digest::Object.new obj,
22
+ type: type, charset: charset, language: language,
23
+ encoding: encoding, mtime: mtime
24
+ when Pathname
25
+ # actually open pathnames that are handed directly into S::D
26
+ Store::Digest::Object.new obj.expand_path.open('rb'),
27
+ type: type, charset: charset, language: language,
28
+ encoding: encoding, mtime: mtime
29
+ else
30
+ raise ArgumentError,
31
+ "Can't coerce a #{obj.class} to Store::Digest::Object"
32
+ end
33
+
34
+ # overwrite the user-mutable metadata
35
+ b = binding
36
+ %i[type charset language encoding mtime].each do |field|
37
+ begin
38
+ if x = b.local_variable_get(field)
39
+ obj.send "#{field}=", x
40
+ end
41
+ rescue RuntimeError => e
42
+ raise e if strict
43
+ end
44
+ end
45
+
46
+ obj
47
+ end
48
+
49
+ public
50
+
51
+ # Initialize a storage
52
+ def initialize **options
53
+ driver = options.delete(:driver) || Store::Digest::Driver::LMDB
54
+
55
+ unless driver.is_a? Module
56
+ # coerce to symbol
57
+ driver = driver.to_s.to_sym
58
+ raise ArgumentError,
59
+ "There is no storage driver Store::Digest::Driver::#{driver}" unless
60
+ Store::Digest::Driver.const_defined? driver
61
+ driver = Store::Digest::Driver.const_get driver
62
+ end
63
+
64
+ raise ArgumentError,
65
+ "Driver #{driver} is not a Store::Digest::Driver" unless
66
+ driver.ancestors.include? Store::Digest::Driver
67
+
68
+ extend driver
69
+
70
+ #
71
+ setup(**options)
72
+ end
73
+
74
+ # XXX this is not right; leave it for now
75
+ # def to_s
76
+ # '<%s:0x%016x objects=%d deleted=%d bytes=%d>' %
77
+ # [self.class, self.object_id, objects, deleted, bytes]
78
+ # end
79
+
80
+ # alias_method :inspect, :to_s
81
+
82
+ # Add an object to the store. Takes pretty much anything that
83
+ #
84
+ #
85
+ # @note Prefabricated {Store::Digest::Object} instances will be
86
+ # rescanned.
87
+ #
88
+ # @note `:preserve` will cause a noop if object metadata is identical
89
+ # save for `:ctime` and `:mtime` (`:ctime` is always ignored).
90
+ #
91
+ # @param obj [IO,File,Pathname,String,Store::Digest::Object] the object
92
+ # @param type [String] the content type
93
+ # @param charset [String] the character set, if applicable
94
+ # @param language [String] the language, if applicable
95
+ # @param encoding [String] the encoding (eg compression) if applicable
96
+ # @param mtime [Time] the modification time, if not "now"
97
+ # @param strict [true, false] strict checking on metadata input
98
+ # @param preserve [false, true] preserve existing modification time
99
+ # @return [Store::Digest::Object] The (potentially pre-existing) entry
100
+ def add obj, type: nil, charset: nil, language: nil, encoding: nil,
101
+ mtime: nil, strict: true, preserve: false
102
+ return unless obj
103
+ #transaction do # |txn|
104
+ obj = coerce_object obj, type: type, charset: charset,
105
+ language: language, encoding: encoding, mtime: mtime, strict: strict
106
+ raise ArgumentError, 'We need something to store!' unless obj.content?
107
+
108
+ tmp = temp_blob
109
+
110
+ # XXX this is stupid; figure out a better way to do this
111
+
112
+ # get our digests
113
+ obj.scan(digests: algorithms, blocksize: 2**20, strict: strict,
114
+ type: type, charset: charset, language: language,
115
+ encoding: encoding, mtime: mtime) do |buf|
116
+ tmp << buf
117
+ end
118
+
119
+ # if we are scanning an object it is necessarily not deleted
120
+ obj.dtime = nil
121
+
122
+ # set_meta will return nil if there is no difference in what is set
123
+ if h = set_meta(obj, preserve: preserve)
124
+ # replace the object
125
+
126
+ content = obj.content
127
+
128
+ # do this to prevent too many open files
129
+ if content.is_a? File
130
+ path = Pathname(content.path).expand_path
131
+ content = -> { path.open('rb') }
132
+ end
133
+
134
+ obj = Store::Digest::Object.new content, fresh: true, **h
135
+
136
+ # now settle the blob into storage
137
+ settle_blob obj[primary].digest, tmp, mtime: obj.mtime
138
+ #txn.commit
139
+ else
140
+ tmp.close
141
+ tmp.unlink
142
+
143
+ # eh just do this
144
+ obj = get obj
145
+ obj.fresh? false # object is not fresh since we already have it
146
+ end
147
+
148
+ obj
149
+ #end
150
+ end
151
+
152
+ # Retrieve an object from the store.
153
+ # @param
154
+ def get obj
155
+ body = -> do
156
+ obj = coerce_object obj
157
+ h = get_meta(obj) or return # bail if this does not exist
158
+ b = get_blob h[:digests][primary].digest # may be nil
159
+ Store::Digest::Object.new b, **h
160
+ end
161
+ transaction(&body)
162
+ end
163
+
164
+ # Remove an object from the store, optionally "forgetting" it ever existed.
165
+ # @param obj
166
+ def remove obj, forget: false
167
+ obj = coerce_object obj
168
+ unless obj.scanned?
169
+ raise ArgumentError,
170
+ 'Cannot scan object because there is no content' unless obj.content?
171
+ obj.scan digests: algorithms, blocksize: 2**20
172
+ end
173
+
174
+ # remove blob and mark metadata entry as deleted
175
+ meta = nil
176
+ transaction do
177
+ meta = forget ? remove_meta(obj) : mark_meta_deleted(obj)
178
+ end
179
+
180
+ if meta
181
+ if blob = remove_blob(meta[:digests][primary].digest)
182
+ return Store::Digest::Object.new blob, **meta
183
+ end
184
+ end
185
+ nil
186
+ end
187
+
188
+ # Remove an object from the store and "forget" it ever existed,
189
+ # i.e., purge it from the metadata.
190
+ #
191
+ def forget obj
192
+ remove obj, forget: true
193
+ end
194
+
195
+ # Return statistics on the store
196
+ def stats
197
+ Stats.new(**meta_get_stats)
198
+ end
199
+
200
+ class Stats
201
+ private
202
+
203
+ # i dunno do you wanna come up with funny labels? here's where you put em
204
+ LABELS = {
205
+ charsets: "Character sets",
206
+ }.transform_values(&:freeze).freeze
207
+
208
+ # lol, petabytes
209
+ MAGNITUDES = %w[B KiB MiB GiB TiB PiB].freeze
210
+
211
+ public
212
+
213
+ attr_reader :ctime, :mtime, :objects, :deleted, :bytes
214
+
215
+ # At this juncture the constructor just puts whatever you throw at
216
+ # it into the object. See
217
+ # {Store::Digest::Meta::LMDB#meta_get_stats} for the real magic.
218
+ # @param options [Hash]
219
+ def initialize **options
220
+ # XXX help i am so lazy
221
+ options.each { |k, v| instance_variable_set "@#{k}", v }
222
+ end
223
+
224
+ # Return a human-readable byte size.
225
+ # @return [String] a representation of the byte size of the store.
226
+ def human_size
227
+ # the deci-magnitude also happens to conveniently work as an array index
228
+ mag = @bytes == 0 ? 0 : (Math.log(@bytes, 2) / 10).floor
229
+ if mag > 0
230
+ '%0.2f %s (%d bytes)' % [(@bytes.to_f / 2**(mag * 10)).round(2),
231
+ MAGNITUDES[mag], @bytes]
232
+ else
233
+ "#{@bytes} bytes"
234
+ end
235
+ end
236
+
237
+ def label_struct
238
+ out = {}
239
+ %i[types languages charsets encodings].each do |k|
240
+ stats = instance_variable_get("@#{k}")
241
+ if stats and !stats.empty?
242
+ # XXX note that all these plurals are just inflected with
243
+ # 's' so clipping off the last character is correct
244
+ ks = k.to_s[0, k.to_s.length - 1].to_sym
245
+ x = out[ks] ||= [LABELS.fetch(k, k.capitalize), {}]
246
+ stats.keys.sort.each do |s|
247
+ x.last[s] = stats[s]
248
+ end
249
+ end
250
+ end
251
+ out
252
+ end
253
+
254
+ # Return the stats object as a nicely formatted string.
255
+ # @return [String] no joke.
256
+ def to_s
257
+
258
+ out = <<-EOT
259
+ #{self.class}
260
+ Statistics:
261
+ Created: #{@ctime}
262
+ Last modified: #{@mtime}
263
+ Total objects: #{@objects}
264
+ Deleted records: #{@deleted}
265
+ Repository size: #{human_size}
266
+ EOT
267
+
268
+ %i[types languages charsets encodings].each do |k|
269
+ stats = instance_variable_get("@#{k}")
270
+ if stats and !stats.empty?
271
+ out << " #{LABELS.fetch k, k.capitalize}: #{stats.count}\n"
272
+ stats.keys.sort.each do |s|
273
+ out << " #{s}: #{stats[s]}\n"
274
+ end
275
+ end
276
+ end
277
+
278
+ out
279
+ end
280
+ end
281
+
282
+ end
@@ -0,0 +1,7 @@
1
+ module Store
2
+ class Digest
3
+ module Blob
4
+ # This is an abstract module for blob operations.
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,146 @@
1
+ require 'store/digest/blob'
2
+ require 'store/digest/trait'
3
+
4
+ require 'time'
5
+ require 'pathname'
6
+ require 'base32'
7
+ require 'tempfile'
8
+
9
+ module Store::Digest::Blob::FileSystem
10
+ include Store::Digest::Trait::RootDir
11
+
12
+ private
13
+
14
+ STORE = 'store'.freeze
15
+ TMP = 'tmp'.freeze
16
+
17
+ # The location of the store
18
+ # @return [Pathname]
19
+ def store
20
+ dir + STORE
21
+ end
22
+
23
+ # The location of the temp directory
24
+ # @return [Pathname]
25
+ def tmp
26
+ dir + TMP
27
+ end
28
+
29
+ # Return a hash-pathed location of the blob, suitable for
30
+ # case-insensitive file systems.
31
+ # @param bin [String] The binary representation of the keying digest
32
+ # @return [Pathname] The absolute path for the blob
33
+ def path_for bin
34
+ parts = Base32.encode(bin).tr('=', '').downcase.unpack 'a4a4a4a*'
35
+ store + parts.join('/')
36
+ end
37
+
38
+ protected
39
+
40
+ def setup **options
41
+ super
42
+
43
+ [STORE, TMP].each do |d|
44
+ d = dir + d
45
+ if d.exist?
46
+ raise "#{d} exists and is not a directory!" unless d.directory?
47
+ raise "#{d} is not readable!" unless d.readable?
48
+ raise "#{d} is not writable!" unless d.writable?
49
+ raise "#{d} cannot be entered!" unless d.executable?
50
+ else
51
+ # wtf Pathname#mkdir takes no args
52
+ Dir.mkdir d, 0777 & ~umask | 02000
53
+ end
54
+ end
55
+ end
56
+
57
+ # Return an open tempfile in the designated temp directory
58
+ # @return [Tempfile]
59
+ def temp_blob
60
+ Tempfile.new 'blob', tmp
61
+ end
62
+
63
+ # Settle a blob from its temporary location to its permanent location.
64
+ # @param bin [String] The binary representation of the keying digest
65
+ # @param fh [File] An open filehandle, presumably a temp file
66
+ # @param mtime [nil, Time, DateTime, Integer] the modification time
67
+ # (defaults to now)
68
+ # @param overwrite [false, true] whether to overwrite the target
69
+ # @return [true] a throwaway return value
70
+ # @raise [SystemCallError] as we are mucking with the file system
71
+ def settle_blob bin, fh, mtime: nil, overwrite: false
72
+ # get the mtimes
73
+ mtime ||= Time.now
74
+ mtime = case mtime
75
+ when Time then mtime.to_i
76
+ when Integer then mtime
77
+ when -> x { x.respond_to? :to_time }
78
+ mtime.to_time.to_i
79
+ else
80
+ raise ArgumentError,
81
+ "mtime must be a Time, DateTime, or Integer, not #{mtime.class}"
82
+ end
83
+
84
+ # get the filenames
85
+ source = fh.path
86
+ target = path_for bin
87
+
88
+ # make sure this thing is flushed
89
+ unless fh.closed?
90
+ fh.flush
91
+ fh.close
92
+ end
93
+
94
+ # these can all raise, of course
95
+ FileUtils.mkpath(target.dirname, mode: 0777 & ~umask | 02000)
96
+
97
+ if !target.exist? || overwrite
98
+ FileUtils.mv source, target
99
+ target.chmod 0444 & ~umask
100
+ target.utime mtime, mtime
101
+ end
102
+
103
+ true
104
+ end
105
+
106
+ # Return a blob filehandle (or closure that will return said blob).
107
+ # @param bin [String] The binary representation of the keying digest
108
+ # @param direct [false, true] whether to open the filehandle directly
109
+ # @return [Proc, IO] Either a closure or the blob itself
110
+ # @raise [RuntimeError] blows up if the blob is not what is expected
111
+ # @raise [SystemCallError] if there's trouble opening the blob
112
+ def get_blob bin, direct: false
113
+ path = path_for bin
114
+ return unless path.exist?
115
+ hex = bin.unpack1 'H*'
116
+ raise "Blob #{hex} is not a file!" unless path.file?
117
+ raise "Blob #{hex} is not readable!" unless path.readable?
118
+
119
+ # return a closure (maybe)
120
+ direct ? path.open('rb') : -> { path.open('rb') }
121
+ end
122
+
123
+ # Remove a blob based on its binary digest value.
124
+ # @param bin [String] The binary representation of the keying digest
125
+ # @return [File] reutnr
126
+ # @raise [SystemCallError] since it's mucking with the file system
127
+ def remove_blob bin
128
+ # XXX we should really flock the directory stack
129
+ path = path_for bin
130
+ ret = if path.exist?
131
+ fh = path.open 'rb'
132
+ path.unlink
133
+ fh
134
+ end
135
+
136
+ # XXX we should really flock the directory stack
137
+ dn = path.dirname.relative_path_from(store).to_s.split ?/
138
+ dn.each_index.reverse_each do |i|
139
+ subpath = store + dn.slice(0..i).join(?/)
140
+ subpath.rmdir if subpath.exist? and subpath.empty?
141
+ end
142
+
143
+ ret
144
+ end
145
+
146
+ end