dis 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +20 -0
- data/README.md +99 -66
- data/lib/dis/errors.rb +8 -0
- data/lib/dis/jobs/change_type.rb +9 -2
- data/lib/dis/jobs/delete.rb +6 -1
- data/lib/dis/jobs/evict.rb +24 -0
- data/lib/dis/jobs/store.rb +8 -1
- data/lib/dis/jobs.rb +1 -0
- data/lib/dis/layer.rb +174 -55
- data/lib/dis/layers.rb +56 -5
- data/lib/dis/model/class_methods.rb +24 -7
- data/lib/dis/model/data.rb +41 -11
- data/lib/dis/model.rb +43 -17
- data/lib/dis/storage.rb +209 -42
- data/lib/dis/validations/data_presence.rb +9 -1
- data/lib/dis/version.rb +1 -1
- data/lib/dis.rb +14 -1
- data/lib/rails/generators/dis/install/templates/initializer.rb +11 -0
- data/lib/tasks/dis.rake +43 -38
- metadata +27 -11
data/lib/dis/model.rb
CHANGED
|
@@ -7,7 +7,7 @@ module Dis
|
|
|
7
7
|
# = Dis Model
|
|
8
8
|
#
|
|
9
9
|
# ActiveModel extension for the model holding your data. To use it,
|
|
10
|
-
#
|
|
10
|
+
# include the module in your model:
|
|
11
11
|
#
|
|
12
12
|
# class Document < ActiveRecord::Base
|
|
13
13
|
# include Dis::Model
|
|
@@ -36,24 +36,24 @@ module Dis
|
|
|
36
36
|
#
|
|
37
37
|
# == Usage
|
|
38
38
|
#
|
|
39
|
-
# To save a file,
|
|
39
|
+
# To save a file, assign to the <tt>file</tt> attribute.
|
|
40
40
|
#
|
|
41
41
|
# document = Document.create(file: params.permit(:file))
|
|
42
42
|
#
|
|
43
43
|
# <tt>content_type</tt> and <tt>filename</tt> will automatically be set if
|
|
44
|
-
# the supplied object quacks like a file. <tt>content_length</tt>
|
|
45
|
-
#
|
|
44
|
+
# the supplied object quacks like a file. <tt>content_length</tt> and
|
|
45
|
+
# <tt>content_hash</tt> will always be set.
|
|
46
46
|
#
|
|
47
|
-
#
|
|
48
|
-
#
|
|
47
|
+
# To store a binary blob without filenames or content types, set the
|
|
48
|
+
# <tt>data</tt> attribute directly.
|
|
49
49
|
#
|
|
50
50
|
# my_data = File.read('document.pdf')
|
|
51
51
|
# document.update(data: my_data)
|
|
52
52
|
#
|
|
53
|
-
# The data won't be stored until the record is saved, and
|
|
53
|
+
# The data won't be stored until the record is saved, and only if
|
|
54
54
|
# the record is valid.
|
|
55
55
|
#
|
|
56
|
-
# To retrieve your data,
|
|
56
|
+
# To retrieve your data, read the <tt>data</tt> attribute. The file
|
|
57
57
|
# will be lazily loaded from the store on demand and cached in memory as long
|
|
58
58
|
# as the record stays in scope.
|
|
59
59
|
#
|
|
@@ -73,7 +73,7 @@ module Dis
|
|
|
73
73
|
# validates_data_presence
|
|
74
74
|
# end
|
|
75
75
|
#
|
|
76
|
-
# If you want to validate content types, size or similar,
|
|
76
|
+
# If you want to validate content types, size or similar, use standard
|
|
77
77
|
# Rails validations on the metadata attributes:
|
|
78
78
|
#
|
|
79
79
|
# validates :content_type, presence: true, format: /\Aapplication\/pdf\z/
|
|
@@ -89,18 +89,26 @@ module Dis
|
|
|
89
89
|
attribute :data, :binary
|
|
90
90
|
end
|
|
91
91
|
|
|
92
|
-
# Returns the data as a binary string, or nil if no data has
|
|
92
|
+
# Returns the data as a binary string, or nil if no data has
|
|
93
|
+
# been set.
|
|
94
|
+
#
|
|
95
|
+
# @return [String, nil]
|
|
93
96
|
def data
|
|
94
97
|
dis_data.read
|
|
95
98
|
end
|
|
96
99
|
|
|
97
100
|
# Returns true if data is set.
|
|
101
|
+
#
|
|
102
|
+
# @return [Boolean]
|
|
98
103
|
def data?
|
|
99
104
|
dis_data.any?
|
|
100
105
|
end
|
|
101
106
|
|
|
102
|
-
# Assigns new data. This also sets
|
|
103
|
-
#
|
|
107
|
+
# Assigns new data. This also sets +content_length+ and
|
|
108
|
+
# +content_hash+.
|
|
109
|
+
#
|
|
110
|
+
# @param raw_data [File, IO, String, nil] the content to store
|
|
111
|
+
# @return [void]
|
|
104
112
|
def data=(raw_data)
|
|
105
113
|
new_data = Dis::Model::Data.new(self, raw_data)
|
|
106
114
|
attribute_will_change!("data") unless new_data == dis_data
|
|
@@ -113,30 +121,48 @@ module Dis
|
|
|
113
121
|
dis_set :content_length, dis_data.content_length
|
|
114
122
|
end
|
|
115
123
|
|
|
116
|
-
# Returns true if the data has been changed since the object
|
|
124
|
+
# Returns true if the data has been changed since the object
|
|
125
|
+
# was last saved.
|
|
126
|
+
#
|
|
127
|
+
# @return [Boolean]
|
|
117
128
|
def data_changed?
|
|
118
129
|
changes.include?("data")
|
|
119
130
|
end
|
|
120
131
|
|
|
132
|
+
# Returns true if the record has been persisted and its data
|
|
133
|
+
# has not been changed since the last save.
|
|
134
|
+
#
|
|
135
|
+
# @return [Boolean]
|
|
121
136
|
def dis_stored?
|
|
122
137
|
!(new_record? || data_changed?)
|
|
123
138
|
end
|
|
124
139
|
|
|
125
|
-
# Assigns new data from an uploaded file. In addition to the
|
|
126
|
-
# performed by
|
|
127
|
-
#
|
|
140
|
+
# Assigns new data from an uploaded file. In addition to the
|
|
141
|
+
# actions performed by {#data=}, this will set +content_type+
|
|
142
|
+
# and +filename+.
|
|
143
|
+
#
|
|
144
|
+
# @param file [ActionDispatch::Http::UploadedFile,
|
|
145
|
+
# Rack::Test::UploadedFile] an uploaded file that responds to
|
|
146
|
+
# +content_type+ and +original_filename+
|
|
147
|
+
# @return [void]
|
|
128
148
|
def file=(file)
|
|
129
149
|
self.data = file
|
|
130
150
|
dis_set :content_type, file.content_type
|
|
131
151
|
dis_set :filename, file.original_filename
|
|
132
152
|
end
|
|
133
153
|
|
|
134
|
-
# Returns a file path to the data, preferring local storage
|
|
154
|
+
# Returns a file path to the data, preferring local storage
|
|
155
|
+
# paths. Falls back to a tempfile path if no local layer has
|
|
156
|
+
# the file.
|
|
157
|
+
#
|
|
158
|
+
# @return [String]
|
|
135
159
|
def data_file_path
|
|
136
160
|
dis_data.file_path
|
|
137
161
|
end
|
|
138
162
|
|
|
139
163
|
# Returns the data as a temporary file.
|
|
164
|
+
#
|
|
165
|
+
# @return [Tempfile]
|
|
140
166
|
delegate :tempfile, to: :dis_data
|
|
141
167
|
|
|
142
168
|
private
|
data/lib/dis/storage.rb
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
module Dis
|
|
4
4
|
# = Dis Storage
|
|
5
5
|
#
|
|
6
|
-
#
|
|
6
|
+
# Interface for interacting with the storage layers.
|
|
7
7
|
#
|
|
8
8
|
# All queries are scoped by object type, which will default to the table
|
|
9
9
|
# name of the model. Take care to use your own scope if you interact with
|
|
@@ -17,8 +17,13 @@ module Dis
|
|
|
17
17
|
# one writeable, non-delayed layer must exist.
|
|
18
18
|
class Storage
|
|
19
19
|
class << self
|
|
20
|
-
# Returns a hex digest for a given binary. Accepts
|
|
21
|
-
# and Fog models.
|
|
20
|
+
# Returns a hex digest for a given binary. Accepts File/IO objects,
|
|
21
|
+
# strings, and Fog models.
|
|
22
|
+
#
|
|
23
|
+
# @param file [File, IO, String, Fog::Model] the content to digest
|
|
24
|
+
# @yield [hash] if a block is given, yields the hex digest
|
|
25
|
+
# @yieldparam hash [String] the computed SHA1 hex digest
|
|
26
|
+
# @return [String] the SHA1 hex digest
|
|
22
27
|
def file_digest(file)
|
|
23
28
|
hash = case file
|
|
24
29
|
when Fog::Model
|
|
@@ -32,15 +37,25 @@ module Dis
|
|
|
32
37
|
hash
|
|
33
38
|
end
|
|
34
39
|
|
|
35
|
-
# Exposes the layer set
|
|
36
|
-
#
|
|
40
|
+
# Exposes the layer set.
|
|
41
|
+
#
|
|
42
|
+
# @return [Dis::Layers]
|
|
37
43
|
def layers
|
|
38
44
|
@layers ||= Dis::Layers.new
|
|
39
45
|
end
|
|
40
46
|
|
|
41
47
|
# Changes the type of an object. Kicks off a
|
|
42
|
-
#
|
|
48
|
+
# {Dis::Jobs::ChangeType} job if any delayed layers are defined.
|
|
43
49
|
#
|
|
50
|
+
# @param prev_type [String] the current type scope
|
|
51
|
+
# @param new_type [String] the new type scope
|
|
52
|
+
# @param key [String] the content hash
|
|
53
|
+
# @return [String] the content hash
|
|
54
|
+
# @raise [Dis::Errors::NoLayersError] if no writeable immediate
|
|
55
|
+
# layers exist
|
|
56
|
+
# @raise [Dis::Errors::NotFoundError] if the file is not found
|
|
57
|
+
#
|
|
58
|
+
# @example
|
|
44
59
|
# Dis::Storage.change_type("old_things", "new_things", key)
|
|
45
60
|
def change_type(prev_type, new_type, key)
|
|
46
61
|
require_writeable_layers!
|
|
@@ -49,29 +64,37 @@ module Dis
|
|
|
49
64
|
layers.immediate.writeable.each do |layer|
|
|
50
65
|
layer.delete(prev_type, key)
|
|
51
66
|
end
|
|
52
|
-
|
|
53
|
-
Dis::Jobs::ChangeType.perform_later(prev_type, new_type, key)
|
|
54
|
-
end
|
|
67
|
+
enqueue_delayed_jobs(prev_type, new_type, key)
|
|
55
68
|
key
|
|
56
69
|
end
|
|
57
70
|
|
|
58
|
-
# Stores a file and returns a
|
|
59
|
-
#
|
|
71
|
+
# Stores a file and returns a content hash. Kicks off a
|
|
72
|
+
# {Dis::Jobs::Store} job if any delayed layers are defined.
|
|
73
|
+
#
|
|
74
|
+
# @param type [String] the type scope (e.g. table name)
|
|
75
|
+
# @param file [File, IO, String, Fog::Model] the content to store
|
|
76
|
+
# @return [String] the SHA1 content hash
|
|
77
|
+
# @raise [Dis::Errors::NoLayersError] if no writeable immediate
|
|
78
|
+
# layers exist
|
|
60
79
|
#
|
|
61
|
-
#
|
|
80
|
+
# @example
|
|
81
|
+
# hash = Dis::Storage.store("things", File.open("foo.bin"))
|
|
62
82
|
# # => "8843d7f92416211de9ebb963ff4ce28125932878"
|
|
63
83
|
def store(type, file)
|
|
64
84
|
require_writeable_layers!
|
|
65
85
|
hash = store_immediately!(type, file)
|
|
66
|
-
if layers.delayed.writeable.any?
|
|
67
|
-
|
|
68
|
-
end
|
|
86
|
+
Dis::Jobs::Store.perform_later(type, hash) if layers.delayed.writeable.any?
|
|
87
|
+
Dis::Jobs::Evict.perform_later if layers.cache?
|
|
69
88
|
hash
|
|
70
89
|
end
|
|
71
90
|
|
|
72
91
|
# Transfers files from immediate layers to all delayed layers.
|
|
92
|
+
# Called internally by {Dis::Jobs::Store}.
|
|
73
93
|
#
|
|
74
|
-
#
|
|
94
|
+
# @param type [String] the type scope
|
|
95
|
+
# @param hash [String] the content hash
|
|
96
|
+
# @return [void]
|
|
97
|
+
# @raise [Dis::Errors::NotFoundError] if the file is not found
|
|
75
98
|
def delayed_store(type, hash)
|
|
76
99
|
file = get(type, hash)
|
|
77
100
|
layers.delayed.writeable.each do |layer|
|
|
@@ -81,75 +104,148 @@ module Dis
|
|
|
81
104
|
|
|
82
105
|
# Returns true if the file exists in any layer.
|
|
83
106
|
#
|
|
107
|
+
# @param type [String] the type scope
|
|
108
|
+
# @param key [String] the content hash
|
|
109
|
+
# @return [Boolean]
|
|
110
|
+
# @raise [Dis::Errors::NoLayersError] if no layers are configured
|
|
111
|
+
#
|
|
112
|
+
# @example
|
|
84
113
|
# Dis::Storage.exists?("things", key) # => true
|
|
85
114
|
def exists?(type, key)
|
|
86
115
|
require_layers!
|
|
87
116
|
layers.each do |layer|
|
|
88
117
|
return true if layer.exists?(type, key)
|
|
118
|
+
rescue StandardError => e
|
|
119
|
+
report_layer_error(e, layer:, type:, key:)
|
|
89
120
|
end
|
|
90
121
|
false
|
|
91
122
|
end
|
|
92
123
|
|
|
93
|
-
# Retrieves a file from the store.
|
|
94
|
-
#
|
|
95
|
-
#
|
|
124
|
+
# Retrieves a file from the store. If the first layer misses,
|
|
125
|
+
# the file is fetched from the next available layer and
|
|
126
|
+
# backfilled to all immediate layers.
|
|
96
127
|
#
|
|
97
|
-
#
|
|
98
|
-
#
|
|
128
|
+
# @param type [String] the type scope
|
|
129
|
+
# @param key [String] the content hash
|
|
130
|
+
# @return [Fog::Model] the stored file
|
|
131
|
+
# @raise [Dis::Errors::NoLayersError] if no layers are configured
|
|
132
|
+
# @raise [Dis::Errors::NotFoundError] if the file is not found
|
|
133
|
+
# in any layer
|
|
99
134
|
#
|
|
100
|
-
#
|
|
135
|
+
# @example
|
|
136
|
+
# file = Dis::Storage.get("things", hash)
|
|
137
|
+
# file.body # => "file contents..."
|
|
101
138
|
def get(type, key)
|
|
102
139
|
require_layers!
|
|
103
|
-
|
|
104
140
|
fetch_count = 0
|
|
105
141
|
result = layers.inject(nil) do |res, layer|
|
|
106
|
-
res
|
|
107
|
-
fetch_count += 1
|
|
108
|
-
layer.get(type, key)
|
|
109
|
-
end.call
|
|
110
|
-
end || raise(Dis::Errors::NotFoundError)
|
|
142
|
+
next res if res
|
|
111
143
|
|
|
112
|
-
|
|
144
|
+
fetch_count += 1
|
|
145
|
+
fetch_from_layer(layer, type, key)
|
|
146
|
+
end || raise(Dis::Errors::NotFoundError)
|
|
147
|
+
backfill!(type, result) if fetch_count > 1
|
|
113
148
|
result
|
|
114
149
|
end
|
|
115
150
|
|
|
116
151
|
# Returns the absolute file path from the first layer that has a
|
|
117
152
|
# local copy, or nil if no layer stores files locally.
|
|
118
153
|
#
|
|
119
|
-
#
|
|
154
|
+
# @param type [String] the type scope
|
|
155
|
+
# @param key [String] the content hash
|
|
156
|
+
# @return [String, nil] the absolute file path, or nil
|
|
157
|
+
# @raise [Dis::Errors::NoLayersError] if no layers are configured
|
|
120
158
|
def file_path(type, key)
|
|
121
159
|
require_layers!
|
|
122
160
|
layers.each do |layer|
|
|
123
161
|
path = layer.file_path(type, key)
|
|
124
162
|
return path if path
|
|
163
|
+
rescue StandardError => e
|
|
164
|
+
report_layer_error(e, layer:, type:, key:)
|
|
125
165
|
end
|
|
126
166
|
nil
|
|
127
167
|
end
|
|
128
168
|
|
|
129
169
|
# Deletes a file from all layers. Kicks off a
|
|
130
|
-
#
|
|
131
|
-
#
|
|
132
|
-
#
|
|
133
|
-
#
|
|
134
|
-
#
|
|
135
|
-
#
|
|
136
|
-
#
|
|
137
|
-
#
|
|
170
|
+
# {Dis::Jobs::Delete} job if any delayed layers are defined.
|
|
171
|
+
#
|
|
172
|
+
# @param type [String] the type scope
|
|
173
|
+
# @param key [String] the content hash
|
|
174
|
+
# @return [Boolean] true if the file existed in any immediate
|
|
175
|
+
# layer
|
|
176
|
+
# @raise [Dis::Errors::NoLayersError] if no writeable immediate
|
|
177
|
+
# layers exist
|
|
178
|
+
#
|
|
179
|
+
# @example
|
|
180
|
+
# Dis::Storage.delete("things", key) # => true
|
|
181
|
+
# Dis::Storage.delete("things", key) # => false
|
|
138
182
|
def delete(type, key)
|
|
139
183
|
require_writeable_layers!
|
|
140
184
|
deleted = false
|
|
141
185
|
layers.immediate.writeable.each do |layer|
|
|
142
186
|
deleted = true if layer.delete(type, key)
|
|
143
187
|
end
|
|
144
|
-
if layers.delayed.writeable.any?
|
|
145
|
-
Dis::Jobs::Delete.perform_later(type, key)
|
|
146
|
-
end
|
|
188
|
+
Dis::Jobs::Delete.perform_later(type, key) if layers.delayed.writeable.any?
|
|
147
189
|
deleted
|
|
148
190
|
end
|
|
149
191
|
|
|
192
|
+
# Evicts cached files from all cache layers that exceed
|
|
193
|
+
# their size limit. Only evicts files that have been
|
|
194
|
+
# replicated to a non-cache writeable layer.
|
|
195
|
+
#
|
|
196
|
+
# @return [void]
|
|
197
|
+
def evict_caches
|
|
198
|
+
layers.cache.each { |layer| evict_cache(layer) }
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
# Returns content hashes from the model's table that exist in
|
|
202
|
+
# no non-cache layer.
|
|
203
|
+
#
|
|
204
|
+
# @param model [Class] an ActiveRecord model that includes
|
|
205
|
+
# {Dis::Model}
|
|
206
|
+
# @yield [batch_size] called after each batch is checked
|
|
207
|
+
# @yieldparam batch_size [Integer] the number of keys in the
|
|
208
|
+
# batch
|
|
209
|
+
# @return [Array<String>] content hashes with no backing file
|
|
210
|
+
#
|
|
211
|
+
# @example
|
|
212
|
+
# Dis::Storage.missing_keys(Image)
|
|
213
|
+
def missing_keys(model)
|
|
214
|
+
attr = model.dis_attributes[:content_hash]
|
|
215
|
+
missing = []
|
|
216
|
+
|
|
217
|
+
model.where.not(attr => nil).in_batches(of: 200) do |batch|
|
|
218
|
+
keys = batch.pluck(attr)
|
|
219
|
+
missing.concat(uncovered_keys(keys.uniq, model.dis_type))
|
|
220
|
+
yield keys.size if block_given?
|
|
221
|
+
end
|
|
222
|
+
missing.uniq
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
# Returns a hash of layer => orphaned content hashes for files
|
|
226
|
+
# that exist in storage but have no matching database record.
|
|
227
|
+
#
|
|
228
|
+
# @param model [Class] an ActiveRecord model that includes
|
|
229
|
+
# {Dis::Model}
|
|
230
|
+
# @return [Hash{Dis::Layer => Array<String>}] orphaned content
|
|
231
|
+
# hashes per layer
|
|
232
|
+
#
|
|
233
|
+
# @example
|
|
234
|
+
# Dis::Storage.orphaned_keys(Image)
|
|
235
|
+
def orphaned_keys(model)
|
|
236
|
+
layers.non_cache.each_with_object({}) do |layer, result|
|
|
237
|
+
orphans = layer_orphans(layer, model.dis_type, model,
|
|
238
|
+
model.dis_attributes[:content_hash])
|
|
239
|
+
result[layer] = orphans if orphans.any?
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
|
|
150
243
|
# Deletes content from all delayed layers.
|
|
244
|
+
# Called internally by {Dis::Jobs::Delete}.
|
|
151
245
|
#
|
|
152
|
-
#
|
|
246
|
+
# @param type [String] the type scope
|
|
247
|
+
# @param key [String] the content hash
|
|
248
|
+
# @return [void]
|
|
153
249
|
def delayed_delete(type, key)
|
|
154
250
|
layers.delayed.writeable.each do |layer|
|
|
155
251
|
layer.delete(type, key)
|
|
@@ -158,6 +254,69 @@ module Dis
|
|
|
158
254
|
|
|
159
255
|
private
|
|
160
256
|
|
|
257
|
+
def enqueue_delayed_jobs(prev_type, new_type, key)
|
|
258
|
+
if layers.delayed.writeable.any?
|
|
259
|
+
Dis::Jobs::ChangeType.perform_later(
|
|
260
|
+
prev_type, new_type, key
|
|
261
|
+
)
|
|
262
|
+
end
|
|
263
|
+
Dis::Jobs::Evict.perform_later if layers.cache?
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
def uncovered_keys(keys, type)
|
|
267
|
+
remaining = keys.dup
|
|
268
|
+
layers.non_cache.each do |layer|
|
|
269
|
+
break if remaining.empty?
|
|
270
|
+
|
|
271
|
+
remaining -= layer.existing(type, remaining)
|
|
272
|
+
end
|
|
273
|
+
remaining
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
def layer_orphans(layer, type, model, attr)
|
|
277
|
+
stored = layer.stored_keys(type)
|
|
278
|
+
return [] if stored.empty?
|
|
279
|
+
|
|
280
|
+
referenced = model.where(attr => stored).pluck(attr)
|
|
281
|
+
stored - referenced
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
def evict_cache(layer)
|
|
285
|
+
return if layer.size <= layer.max_size
|
|
286
|
+
|
|
287
|
+
current_size = layer.size
|
|
288
|
+
layer.cached_files.each do |entry|
|
|
289
|
+
break if current_size <= layer.max_size
|
|
290
|
+
|
|
291
|
+
next unless replicated?(entry[:type], entry[:key])
|
|
292
|
+
|
|
293
|
+
layer.delete(entry[:type], entry[:key])
|
|
294
|
+
current_size -= entry[:size]
|
|
295
|
+
end
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
def replicated?(type, key)
|
|
299
|
+
layers.non_cache.writeable.any? do |l|
|
|
300
|
+
l.exists?(type, key)
|
|
301
|
+
rescue StandardError => e
|
|
302
|
+
report_layer_error(e, layer: l, type:, key:)
|
|
303
|
+
false
|
|
304
|
+
end
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
def fetch_from_layer(layer, type, key)
|
|
308
|
+
layer.get(type, key)
|
|
309
|
+
rescue StandardError => e
|
|
310
|
+
report_layer_error(e, layer:, type:, key:)
|
|
311
|
+
nil
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
def backfill!(type, file)
|
|
315
|
+
store_immediately!(type, file)
|
|
316
|
+
rescue StandardError => e
|
|
317
|
+
report_layer_error(e, type:)
|
|
318
|
+
end
|
|
319
|
+
|
|
161
320
|
def store_immediately!(type, file)
|
|
162
321
|
file_digest(file) do |hash|
|
|
163
322
|
layers.immediate.writeable.each do |layer|
|
|
@@ -174,6 +333,14 @@ module Dis
|
|
|
174
333
|
raise Dis::Errors::NoLayersError unless layers.immediate.writeable.any?
|
|
175
334
|
end
|
|
176
335
|
|
|
336
|
+
def report_layer_error(err, layer: nil, type: nil, key: nil)
|
|
337
|
+
Rails.error.report(
|
|
338
|
+
err, handled: true,
|
|
339
|
+
severity: :warning,
|
|
340
|
+
context: { layer: layer&.name, type:, key: }
|
|
341
|
+
)
|
|
342
|
+
end
|
|
343
|
+
|
|
177
344
|
def digest
|
|
178
345
|
Digest::SHA1
|
|
179
346
|
end
|
|
@@ -4,9 +4,17 @@ module Dis
|
|
|
4
4
|
module Validations
|
|
5
5
|
# = Dis Data Presence Validation
|
|
6
6
|
#
|
|
7
|
+
# Validates that data has been assigned to a {Dis::Model} record.
|
|
8
|
+
# Empty strings are treated as missing data.
|
|
9
|
+
#
|
|
10
|
+
# @see Dis::Model::ClassMethods#validates_data_presence
|
|
7
11
|
class DataPresence < ActiveModel::Validator
|
|
8
12
|
# Validates that a record has data, either freshly assigned or
|
|
9
|
-
# persisted in the storage. Adds a
|
|
13
|
+
# persisted in the storage. Adds a +:blank+ error on +:data+
|
|
14
|
+
# if not.
|
|
15
|
+
#
|
|
16
|
+
# @param record [ActiveRecord::Base]
|
|
17
|
+
# @return [void]
|
|
10
18
|
def validate(record)
|
|
11
19
|
return if record.data? && record.content_hash != self.class.empty_hash
|
|
12
20
|
|
data/lib/dis/version.rb
CHANGED
data/lib/dis.rb
CHANGED
|
@@ -5,7 +5,7 @@ require "digest/sha1"
|
|
|
5
5
|
require "fog/core"
|
|
6
6
|
require "fog/local"
|
|
7
7
|
require "active_job"
|
|
8
|
-
require "
|
|
8
|
+
require "concurrent"
|
|
9
9
|
require "dis/engine"
|
|
10
10
|
require "dis/errors"
|
|
11
11
|
require "dis/jobs"
|
|
@@ -16,5 +16,18 @@ require "dis/model"
|
|
|
16
16
|
require "dis/storage"
|
|
17
17
|
require "dis/validations"
|
|
18
18
|
|
|
19
|
+
# Dis is a content-addressable store for file uploads in Rails.
|
|
20
|
+
#
|
|
21
|
+
# Files are stored as binary blobs keyed by the SHA1 digest of their
|
|
22
|
+
# contents, enabling automatic deduplication. Storage is organized in
|
|
23
|
+
# layers (see {Dis::Layer}) that can target local disk or any cloud
|
|
24
|
+
# provider supported by Fog.
|
|
25
|
+
#
|
|
26
|
+
# Include {Dis::Model} in an ActiveRecord model to get started, and
|
|
27
|
+
# configure layers via {Dis::Storage.layers}.
|
|
28
|
+
#
|
|
29
|
+
# @see Dis::Model
|
|
30
|
+
# @see Dis::Storage
|
|
31
|
+
# @see Dis::Layer
|
|
19
32
|
module Dis
|
|
20
33
|
end
|
|
@@ -9,6 +9,17 @@ Dis::Storage.layers << Dis::Layer.new(
|
|
|
9
9
|
path: Rails.env
|
|
10
10
|
)
|
|
11
11
|
|
|
12
|
+
# You can also use a cache layer with bounded storage and LRU eviction:
|
|
13
|
+
|
|
14
|
+
# Dis::Storage.layers << Dis::Layer.new(
|
|
15
|
+
# Fog::Storage.new(
|
|
16
|
+
# provider: "Local",
|
|
17
|
+
# local_root: Rails.root.join("tmp/dis")
|
|
18
|
+
# ),
|
|
19
|
+
# path: Rails.env,
|
|
20
|
+
# cache: 1.gigabyte
|
|
21
|
+
# )
|
|
22
|
+
|
|
12
23
|
# You can also add cloud storage:
|
|
13
24
|
|
|
14
25
|
# require 'fog/aws/storage'
|
data/lib/tasks/dis.rake
CHANGED
|
@@ -1,58 +1,63 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "ruby-progressbar"
|
|
4
|
+
|
|
3
5
|
namespace :dis do
|
|
4
|
-
desc "
|
|
5
|
-
task
|
|
6
|
+
desc "List records with no backing file in any storage layer"
|
|
7
|
+
task missing: :environment do
|
|
6
8
|
unless ENV["MODELS"]
|
|
7
|
-
puts "Usage: #{$PROGRAM_NAME} dis:
|
|
8
|
-
"MODELS=Avatar,Document"
|
|
9
|
+
puts "Usage: #{$PROGRAM_NAME} dis:missing MODELS=Avatar,Document"
|
|
9
10
|
exit
|
|
10
11
|
end
|
|
11
12
|
|
|
12
13
|
models = ENV["MODELS"].split(",").map(&:strip).map(&:constantize)
|
|
13
14
|
|
|
14
|
-
jobs = Set.new
|
|
15
|
-
|
|
16
15
|
models.each do |model|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
16
|
+
bar = ProgressBar.create(
|
|
17
|
+
title: model.name,
|
|
18
|
+
total: model.where.not(
|
|
19
|
+
model.dis_attributes[:content_hash] => nil
|
|
20
|
+
).count,
|
|
21
|
+
format: "%t: |%B| %c/%C records"
|
|
22
|
+
)
|
|
24
23
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
missing = objects - existing
|
|
30
|
-
global_missing -= existing
|
|
31
|
-
puts "#{existing.length} existing, #{missing.length} missing" +
|
|
32
|
-
(layer.readonly? ? " (read-only)" : "")
|
|
33
|
-
|
|
34
|
-
next unless layer.delayed? && !layer.readonly?
|
|
35
|
-
|
|
36
|
-
jobs += (missing - global_missing).pmap do |hash|
|
|
37
|
-
[model.dis_type, hash]
|
|
38
|
-
end.compact
|
|
24
|
+
missing = ActiveRecord::Base.logger.silence do
|
|
25
|
+
Dis::Storage.missing_keys(model) do |count|
|
|
26
|
+
bar.progress += count
|
|
27
|
+
end
|
|
39
28
|
end
|
|
29
|
+
bar.finish
|
|
40
30
|
|
|
41
|
-
if
|
|
42
|
-
puts "
|
|
43
|
-
|
|
31
|
+
if missing.any?
|
|
32
|
+
puts "#{missing.length} missing:"
|
|
33
|
+
missing.each { |key| puts " #{key}" }
|
|
34
|
+
else
|
|
35
|
+
puts "0 missing"
|
|
44
36
|
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
45
39
|
|
|
46
|
-
|
|
40
|
+
desc "List stored files with no matching database record"
|
|
41
|
+
task orphaned: :environment do
|
|
42
|
+
unless ENV["MODELS"]
|
|
43
|
+
puts "Usage: #{$PROGRAM_NAME} dis:orphaned MODELS=Avatar,Document"
|
|
44
|
+
exit
|
|
47
45
|
end
|
|
48
46
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
47
|
+
models = ENV["MODELS"].split(",").map(&:strip).map(&:constantize)
|
|
48
|
+
|
|
49
|
+
models.each do |model|
|
|
50
|
+
orphans = ActiveRecord::Base.logger.silence do
|
|
51
|
+
Dis::Storage.orphaned_keys(model)
|
|
52
|
+
end
|
|
53
|
+
if orphans.any?
|
|
54
|
+
orphans.each do |layer, keys|
|
|
55
|
+
puts "#{model.name} (#{layer.name}): " \
|
|
56
|
+
"#{keys.length} orphaned"
|
|
57
|
+
keys.each { |key| puts " #{key}" }
|
|
58
|
+
end
|
|
59
|
+
else
|
|
60
|
+
puts "#{model.name}: 0 orphaned"
|
|
56
61
|
end
|
|
57
62
|
end
|
|
58
63
|
end
|