dis 1.1.21 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/dis/layer.rb CHANGED
@@ -3,52 +3,47 @@
3
3
  module Dis
4
4
  # = Dis Layer
5
5
  #
6
- # Represents a layer of storage. It's a wrapper around
7
- # <tt>Fog::Storage</tt>, any provider supported by Fog should be usable.
8
- #
9
- # ==== Options
10
- #
11
- # * <tt>:delayed</tt> - Delayed layers will be processed outside of
12
- # the request cycle by ActiveJob.
13
- # * <tt>:readonly</tt> - Readonly layers can only be read from,
14
- # not written to.
15
- # * <tt>:public</tt> - Objects stored in public layers will have the
16
- # public readable flag set if supported by the storage provider.
17
- # * <tt>:path</tt> - Directory name to use for the store. For Amazon S3,
18
- # this will be the name of the bucket.
19
- #
20
- # ==== Examples
21
- #
22
- # This creates a local storage layer. It's a good idea to have a local layer
23
- # first, this provides you with a cache on disk that will be faster than
24
- # reading from the cloud.
6
+ # Represents a layer of storage. Wraps a +Fog::Storage+ connection;
7
+ # any provider supported by Fog should be usable.
25
8
  #
9
+ # @example Local storage layer
26
10
  # Dis::Layer.new(
27
- # Fog::Storage.new({
28
- # provider: 'Local',
29
- # local_root: Rails.root.join('db', 'dis')
30
- # }),
11
+ # Fog::Storage.new(
12
+ # provider: "Local",
13
+ # local_root: Rails.root.join("db/dis")
14
+ # ),
31
15
  # path: Rails.env
32
16
  # )
33
17
  #
34
- # This creates a delayed layer on Amazon S3. ActiveJob will kick in and
35
- # and transfer content from one of the immediate layers later at it's
36
- # leisure.
37
- #
18
+ # @example Delayed layer on Amazon S3
38
19
  # Dis::Layer.new(
39
- # Fog::Storage.new({
40
- # provider: 'AWS',
41
- # aws_access_key_id: YOUR_AWS_ACCESS_KEY_ID,
20
+ # Fog::Storage.new(
21
+ # provider: "AWS",
22
+ # aws_access_key_id: YOUR_AWS_ACCESS_KEY_ID,
42
23
  # aws_secret_access_key: YOUR_AWS_SECRET_ACCESS_KEY
43
- # }),
24
+ # ),
44
25
  # path: "my_bucket",
45
26
  # delayed: true
46
27
  # )
47
28
  class Layer
48
29
  include Dis::Logging
49
30
 
31
+ # @return [Fog::Storage] the underlying Fog connection
50
32
  attr_reader :connection
51
33
 
34
+ # @param connection [Fog::Storage] a Fog storage connection
35
+ # @param options [Hash] layer configuration options
36
+ # @option options [Boolean] :delayed (false) process writes
37
+ # asynchronously via ActiveJob
38
+ # @option options [Boolean] :readonly (false) only allow reads
39
+ # @option options [Boolean] :public (false) set the public
40
+ # readable flag on stored objects (provider-dependent)
41
+ # @option options [String] :path (nil) directory or bucket name
42
+ # @option options [Integer, false] :cache (false) enable bounded
43
+ # cache with this soft size limit in bytes. Cannot be combined
44
+ # with +:delayed+ or +:readonly+
45
+ # @raise [ArgumentError] if +:cache+ is combined with +:delayed+
46
+ # or +:readonly+
52
47
  def initialize(connection, options = {})
53
48
  options = default_options.merge(options)
54
49
  @connection = connection
@@ -56,44 +51,102 @@ module Dis
56
51
  @readonly = options[:readonly]
57
52
  @public = options[:public]
58
53
  @path = options[:path]
54
+ @cache = options[:cache]
55
+ validate_cache_options!
59
56
  end
60
57
 
61
58
  # Returns true if the layer is a delayed layer.
59
+ #
60
+ # @return [Boolean]
62
61
  def delayed?
63
62
  @delayed
64
63
  end
65
64
 
66
65
  # Returns true if the layer isn't a delayed layer.
66
+ #
67
+ # @return [Boolean]
67
68
  def immediate?
68
69
  !delayed?
69
70
  end
70
71
 
71
72
  # Returns true if the layer is public.
73
+ #
74
+ # @return [Boolean]
72
75
  def public?
73
76
  @public
74
77
  end
75
78
 
76
79
  # Returns true if the layer is read only.
80
+ #
81
+ # @return [Boolean]
77
82
  def readonly?
78
83
  @readonly
79
84
  end
80
85
 
81
86
  # Returns true if the layer is writeable.
87
+ #
88
+ # @return [Boolean]
82
89
  def writeable?
83
90
  !readonly?
84
91
  end
85
92
 
86
- # Stores a file.
93
+ # Returns true if the layer is a cache layer.
87
94
  #
88
- # key = Digest::SHA1.file(file.path).hexdigest
89
- # layer.store("documents", key, path)
95
+ # @return [Boolean]
96
+ def cache?
97
+ !!@cache
98
+ end
99
+
100
+ # Returns the cache size limit in bytes, or nil if not a cache.
90
101
  #
91
- # The key must be a hex digest of the file content. If an object with the
92
- # supplied hash already exists, no action will be performed. In other
93
- # words, no data will be overwritten if a hash collision occurs.
102
+ # @return [Integer, nil]
103
+ def max_size
104
+ @cache if cache?
105
+ end
106
+
107
+ # Returns the total size in bytes of all files stored locally.
108
+ # Returns 0 for non-local providers.
94
109
  #
95
- # Returns an instance of Fog::Model, or raises an error if the layer
96
- # is readonly.
110
+ # @return [Integer]
111
+ def size
112
+ return 0 unless connection.respond_to?(:local_root)
113
+
114
+ root = local_root_path
115
+ return 0 unless root.exist?
116
+
117
+ root.glob("**/*").sum { |f| f.file? ? f.size : 0 }
118
+ end
119
+
120
+ # Returns cached file entries sorted by mtime ascending
121
+ # (oldest first).
122
+ #
123
+ # @return [Array<Hash>] each entry has keys +:path+
124
+ # (Pathname), +:type+ (String), +:key+ (String), +:mtime+
125
+ # (Time), +:size+ (Integer)
126
+ def cached_files
127
+ return [] unless connection.respond_to?(:local_root)
128
+
129
+ root = local_root_path
130
+ return [] unless root.exist?
131
+
132
+ entries = root.glob("**/*").select(&:file?)
133
+ entries.filter_map { |f| cached_file_entry(f, root) }
134
+ .sort_by { |e| e[:mtime] }
135
+ end
136
+
137
+ # Stores a file. The key must be a hex digest of the file
138
+ # content. If an object with the supplied hash already exists,
139
+ # no action will be performed.
140
+ #
141
+ # @param type [String] the type scope
142
+ # @param key [String] the content hash
143
+ # @param file [File, IO, String, Fog::Model] the content
144
+ # @return [Fog::Model] the stored file
145
+ # @raise [Dis::Errors::ReadOnlyError] if the layer is readonly
146
+ #
147
+ # @example
148
+ # key = Digest::SHA1.file(file.path).hexdigest
149
+ # layer.store("documents", key, file)
97
150
  def store(type, key, file)
98
151
  raise Dis::Errors::ReadOnlyError if readonly?
99
152
 
@@ -104,18 +157,42 @@ module Dis
104
157
 
105
158
  # Returns all the given keys that exist in the layer.
106
159
  #
107
- # layer.existing("documents", keys)
160
+ # @param type [String] the type scope
161
+ # @param keys [Array<String>] content hashes to check
162
+ # @return [Array<String>] the subset of keys that exist
108
163
  def existing(type, keys)
109
164
  return [] if keys.empty?
110
165
 
111
- list = directory(type, keys.first).files.map(&:key)
166
+ futures = keys.map do |key|
167
+ Concurrent::Promises.future { key if exists?(type, key) }
168
+ end
169
+ futures.filter_map(&:value!)
170
+ end
171
+
172
+ # Returns all content hashes stored under the given type.
173
+ #
174
+ # @param type [String] the type scope
175
+ # @return [Array<String>] content hashes
176
+ def stored_keys(type)
177
+ dir = connection.directories.get(path || "")
178
+ return [] unless dir
112
179
 
113
- keys.select { |key| list.include?(key_component(type, key)) }
180
+ prefix = "#{type}/"
181
+ dir.files.filter_map do |file|
182
+ next unless file.key.start_with?(prefix)
183
+
184
+ parts = file.key.delete_prefix(prefix).split("/")
185
+ next unless parts.length == 2
186
+
187
+ "#{parts[0]}#{parts[1]}"
188
+ end
114
189
  end
115
190
 
116
- # Returns true if a object with the given key exists.
191
+ # Returns true if an object with the given key exists.
117
192
  #
118
- # layer.exists?("documents", key)
193
+ # @param type [String] the type scope
194
+ # @param key [String] the content hash
195
+ # @return [Boolean]
119
196
  def exists?(type, key)
120
197
  if directory(type, key)&.files&.head(key_component(type, key))
121
198
  true
@@ -126,22 +203,44 @@ module Dis
126
203
 
127
204
  # Retrieves a file from the store.
128
205
  #
129
- # layer.get("documents", key)
206
+ # @param type [String] the type scope
207
+ # @param key [String] the content hash
208
+ # @return [Fog::Model, nil] the file, or nil if not found
130
209
  def get(type, key)
131
210
  dir = directory(type, key)
132
211
  return unless dir
133
212
 
134
- debug_log("Get #{type}/#{key} from #{name}") do
213
+ result = debug_log("Get #{type}/#{key} from #{name}") do
135
214
  dir.files.get(key_component(type, key))
136
215
  end
216
+ touch_file(type, key) if result && cache?
217
+ result
137
218
  end
138
219
 
139
- # Deletes a file from the store.
220
+ # Returns the absolute file path for a locally stored file, or
221
+ # nil if the provider is not local or the file does not exist.
140
222
  #
141
- # layer.delete("documents", key)
223
+ # @param type [String] the type scope
224
+ # @param key [String] the content hash
225
+ # @return [String, nil]
226
+ def file_path(type, key)
227
+ return unless connection.respond_to?(:local_root)
228
+ return unless exists?(type, key)
229
+
230
+ File.join(
231
+ connection.local_root,
232
+ directory_component(type, key),
233
+ key_component(type, key)
234
+ )
235
+ end
236
+
237
+ # Deletes a file from the store.
142
238
  #
143
- # Returns true if the file was deleted, or false if it could not be found.
144
- # Raises an error if the layer is readonly.
239
+ # @param type [String] the type scope
240
+ # @param key [String] the content hash
241
+ # @return [Boolean] true if the file was deleted, false if not
242
+ # found
243
+ # @raise [Dis::Errors::ReadOnlyError] if the layer is readonly
145
244
  def delete(type, key)
146
245
  raise Dis::Errors::ReadOnlyError if readonly?
147
246
 
@@ -152,6 +251,9 @@ module Dis
152
251
 
153
252
  # Returns a name for the layer.
154
253
  #
254
+ # @return [String]
255
+ #
256
+ # @example
155
257
  # layer.name # => "Fog::Storage::Local::Real/development"
156
258
  def name
157
259
  "#{connection.class.name}/#{path}"
@@ -160,7 +262,39 @@ module Dis
160
262
  private
161
263
 
162
264
  def default_options
163
- { delayed: false, readonly: false, public: false, path: nil }
265
+ { delayed: false, readonly: false, public: false,
266
+ path: nil, cache: false }
267
+ end
268
+
269
+ def validate_cache_options!
270
+ return unless cache?
271
+
272
+ if delayed?
273
+ raise ArgumentError,
274
+ "cache layers cannot be delayed"
275
+ end
276
+ return unless readonly?
277
+
278
+ raise ArgumentError,
279
+ "cache layers cannot be readonly"
280
+ end
281
+
282
+ def local_root_path
283
+ root = Pathname.new(connection.local_root)
284
+ path ? root.join(path) : root
285
+ end
286
+
287
+ def cached_file_entry(file, root)
288
+ parts = file.relative_path_from(root).to_s.split("/")
289
+ return unless parts.length == 3
290
+
291
+ { path: file, type: parts[0], key: parts[1] + parts[2],
292
+ mtime: file.mtime, size: file.size }
293
+ end
294
+
295
+ def touch_file(type, key)
296
+ fp = file_path(type, key)
297
+ FileUtils.touch(fp) if fp
164
298
  end
165
299
 
166
300
  def directory_component(_type, _key)
@@ -168,7 +302,7 @@ module Dis
168
302
  end
169
303
 
170
304
  def key_component(type, key)
171
- [type, key[0...2], key[2..key.length]].compact.join("/")
305
+ [type, key[0...2], key[2..]].compact.join("/")
172
306
  end
173
307
 
174
308
  def delete!(type, key)
data/lib/dis/layers.rb CHANGED
@@ -3,10 +3,15 @@
3
3
  module Dis
4
4
  # = Dis Layers
5
5
  #
6
- # Represents a collection of layers.
6
+ # Represents a filterable collection of {Dis::Layer} instances.
7
+ # Supports chained filtering by layer properties.
8
+ #
9
+ # @example
10
+ # Dis::Storage.layers.delayed.writeable.each { |l| ... }
7
11
  class Layers
8
12
  include Enumerable
9
13
 
14
+ # @param layers [Array<Dis::Layer>] initial layers
10
15
  def initialize(layers = [])
11
16
  @layers = layers
12
17
  end
@@ -15,6 +20,8 @@ module Dis
15
20
  delegate :<<, to: :@layers
16
21
 
17
22
  # Clears all layers from the collection.
23
+ #
24
+ # @return [void]
18
25
  def clear!
19
26
  @layers = []
20
27
  end
@@ -25,43 +32,87 @@ module Dis
25
32
  end
26
33
 
27
34
  # Returns a new instance containing only the delayed layers.
35
+ #
36
+ # @return [Dis::Layers]
28
37
  def delayed
29
38
  self.class.new select(&:delayed?)
30
39
  end
31
40
 
32
41
  # Returns true if one or more delayed layers exist.
42
+ #
43
+ # @return [Boolean]
33
44
  def delayed?
34
- delayed.any?
45
+ any?(&:delayed?)
35
46
  end
36
47
 
37
48
  # Returns a new instance containing only the immediate layers.
49
+ #
50
+ # @return [Dis::Layers]
38
51
  def immediate
39
52
  self.class.new select(&:immediate?)
40
53
  end
41
54
 
42
55
  # Returns true if one or more immediate layers exist.
56
+ #
57
+ # @return [Boolean]
43
58
  def immediate?
44
- immediate.any?
59
+ any?(&:immediate?)
45
60
  end
46
61
 
47
62
  # Returns a new instance containing only the readonly layers.
63
+ #
64
+ # @return [Dis::Layers]
48
65
  def readonly
49
66
  self.class.new select(&:readonly?)
50
67
  end
51
68
 
52
69
  # Returns true if one or more readonly layers exist.
70
+ #
71
+ # @return [Boolean]
53
72
  def readonly?
54
- readonly.any?
73
+ any?(&:readonly?)
55
74
  end
56
75
 
57
76
  # Returns a new instance containing only the writeable layers.
77
+ #
78
+ # @return [Dis::Layers]
58
79
  def writeable
59
80
  self.class.new select(&:writeable?)
60
81
  end
61
82
 
62
83
  # Returns true if one or more writeable layers exist.
84
+ #
85
+ # @return [Boolean]
63
86
  def writeable?
64
- writeable.any?
87
+ any?(&:writeable?)
88
+ end
89
+
90
+ # Returns a new instance containing only the cache layers.
91
+ #
92
+ # @return [Dis::Layers]
93
+ def cache
94
+ self.class.new select(&:cache?)
95
+ end
96
+
97
+ # Returns true if one or more cache layers exist.
98
+ #
99
+ # @return [Boolean]
100
+ def cache?
101
+ any?(&:cache?)
102
+ end
103
+
104
+ # Returns a new instance containing only the non-cache layers.
105
+ #
106
+ # @return [Dis::Layers]
107
+ def non_cache
108
+ self.class.new reject(&:cache?)
109
+ end
110
+
111
+ # Returns true if one or more non-cache layers exist.
112
+ #
113
+ # @return [Boolean]
114
+ def non_cache?
115
+ any? { |l| !l.cache? }
65
116
  end
66
117
  end
67
118
  end
@@ -4,13 +4,21 @@ module Dis
4
4
  module Model
5
5
  module ClassMethods
6
6
  # Returns the mapping of attribute names.
7
+ #
8
+ # @return [Hash{Symbol => Symbol}]
7
9
  def dis_attributes
8
10
  default_dis_attributes.merge(@dis_attributes ||= {})
9
11
  end
10
12
 
11
- # Sets the current mapping of attribute names. Use this if you want to
12
- # override the attributes and database columns that Dis will use.
13
+ # Sets the current mapping of attribute names. Use this if you
14
+ # want to override the attributes and database columns that
15
+ # Dis will use. Valid keys: +:content_hash+, +:content_type+,
16
+ # +:content_length+, +:filename+.
17
+ #
18
+ # @param new_attributes [Hash{Symbol => Symbol}] attribute
19
+ # name overrides
13
20
  #
21
+ # @example
14
22
  # class Document < ActiveRecord::Base
15
23
  # include Dis::Model
16
24
  # self.dis_attributes = { filename: :my_custom_filename }
@@ -20,8 +28,11 @@ module Dis
20
28
  end
21
29
 
22
30
  # Returns the storage type name, which Dis will use for
23
- # directory scoping. Defaults to the name of the database table.
31
+ # directory scoping. Defaults to the table name.
32
+ #
33
+ # @return [String]
24
34
  #
35
+ # @example
25
36
  # class Document < ActiveRecord::Base; end
26
37
  # Document.dis_type # => "documents"
27
38
  def dis_type
@@ -30,17 +41,23 @@ module Dis
30
41
 
31
42
  # Sets the storage type name.
32
43
  #
33
- # Take care not to set the same name for multiple models, this will
34
- # cause data loss when a record is destroyed.
44
+ # Take care not to set the same name for multiple models,
45
+ # this will cause data loss when a record is destroyed.
46
+ #
47
+ # @param new_type [String] the new type scope
48
+ # @return [void]
35
49
  def dis_type=(new_type)
36
50
  @dis_type = new_type
37
51
  end
38
52
 
39
53
  # Adds a presence validation on the +data+ attribute.
40
54
  #
41
- # This is better than using `validates :data, presence: true`, since
42
- # that would cause it to load the data from storage on each save.
55
+ # This is preferred over +validates :data, presence: true+,
56
+ # which would load the data from storage on each save.
57
+ #
58
+ # @return [void]
43
59
  #
60
+ # @example
44
61
  # class Document < ActiveRecord::Base
45
62
  # include Dis::Model
46
63
  # validates_data_presence
@@ -7,39 +7,56 @@ module Dis
7
7
  # Facilitates communication between the model and the storage,
8
8
  # and holds any newly assigned data before the record is saved.
9
9
  class Data
10
+ # @param record [ActiveRecord::Base] the model instance
11
+ # @param raw [File, IO, String, nil] newly assigned data
10
12
  def initialize(record, raw = nil)
11
13
  @record = record
12
14
  @raw = raw
13
15
  end
14
16
 
15
17
  # Returns true if two Data objects represent the same data.
18
+ #
19
+ # @param other [Dis::Model::Data, #read, Object] the object to
20
+ # compare
21
+ # @return [Boolean]
16
22
  def ==(other)
17
- return false unless other.respond_to?(:read)
18
-
19
- # TODO: This can be made faster by
20
- # comparing hashes for stored objects.
21
- other.read == read
23
+ if !raw? && other.is_a?(self.class) && !other.changed?
24
+ content_hash == other.content_hash
25
+ elsif other.respond_to?(:read)
26
+ other.read == read
27
+ else
28
+ false
29
+ end
22
30
  end
23
31
 
24
32
  # Returns true if data exists either in memory or in storage.
33
+ #
34
+ # @return [Boolean]
25
35
  def any?
26
36
  raw? || stored?
27
37
  end
28
38
 
29
39
  # Returns the data as a binary string.
40
+ #
41
+ # @return [String, nil]
30
42
  def read
31
43
  @read ||= read_from(closest)
32
44
  end
33
45
 
34
- # Will be true if data has been explicitely set.
46
+ # Will be true if data has been explicitly set.
35
47
  #
48
+ # @return [Boolean]
49
+ #
50
+ # @example
36
51
  # Dis::Model::Data.new(record).changed? # => false
37
- # Dis::Model::Data.new(record, new_file).changed? # => true
52
+ # Dis::Model::Data.new(record, file).changed? # => true
38
53
  def changed?
39
54
  raw?
40
55
  end
41
56
 
42
- # Returns the length of the data.
57
+ # Returns the length of the data in bytes.
58
+ #
59
+ # @return [Integer]
43
60
  def content_length
44
61
  if raw? && raw.respond_to?(:length)
45
62
  raw.length
@@ -48,9 +65,13 @@ module Dis
48
65
  end
49
66
  end
50
67
 
51
- # Expires a data object from the storage if it's no longer being used
52
- # by existing records. This is triggered from callbacks on the record
53
- # whenever they are changed or destroyed.
68
+ # Expires a data object from the storage if it's no longer
69
+ # being used by existing records. This is triggered from
70
+ # callbacks on the record whenever they are changed or
71
+ # destroyed.
72
+ #
73
+ # @param hash [String] the content hash to expire
74
+ # @return [void]
54
75
  def expire(hash)
55
76
  return if hash.blank?
56
77
 
@@ -61,14 +82,41 @@ module Dis
61
82
  end
62
83
  end
63
84
 
64
- # Stores the data. Returns a hash of the content for reference.
85
+ # Stores the data and returns the content hash.
86
+ #
87
+ # @return [String] the SHA1 content hash
88
+ # @raise [Dis::Errors::NoDataError] if no data has been
89
+ # assigned
65
90
  def store!
66
91
  raise Dis::Errors::NoDataError unless raw?
67
92
 
68
93
  Dis::Storage.store(storage_type, raw)
69
94
  end
70
95
 
96
+ # Clears cached data and tempfiles, allowing them to be
97
+ # garbage collected. Subsequent calls to +read+ or +tempfile+
98
+ # will re-fetch from storage.
99
+ #
100
+ # @return [void]
101
+ def reset_read_cache!
102
+ @read = nil
103
+ return unless @tempfile
104
+
105
+ @tempfile.close!
106
+ @tempfile = nil
107
+ end
108
+
109
+ # Returns the file path to the data. Prefers a local storage
110
+ # path to avoid unnecessary copies, falls back to a tempfile.
111
+ #
112
+ # @return [String]
113
+ def file_path
114
+ local_path || tempfile.path
115
+ end
116
+
71
117
  # Writes the data to a temporary file.
118
+ #
119
+ # @return [Tempfile]
72
120
  def tempfile
73
121
  unless @tempfile
74
122
  @tempfile = Tempfile.new(binmode: true)
@@ -78,6 +126,12 @@ module Dis
78
126
  @tempfile
79
127
  end
80
128
 
129
+ protected
130
+
131
+ def content_hash
132
+ @record[@record.class.dis_attributes[:content_hash]]
133
+ end
134
+
81
135
  private
82
136
 
83
137
  def closest
@@ -88,10 +142,6 @@ module Dis
88
142
  end
89
143
  end
90
144
 
91
- def content_hash
92
- @record[@record.class.dis_attributes[:content_hash]]
93
- end
94
-
95
145
  def raw?
96
146
  raw ? true : false
97
147
  end
@@ -129,6 +179,12 @@ module Dis
129
179
  Dis::Storage.get(storage_type, content_hash)
130
180
  end
131
181
 
182
+ def local_path
183
+ return if raw?
184
+
185
+ Dis::Storage.file_path(storage_type, content_hash)
186
+ end
187
+
132
188
  attr_reader :raw
133
189
  end
134
190
  end