dis 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/dis/layer.rb CHANGED
@@ -3,52 +3,47 @@
3
3
  module Dis
4
4
  # = Dis Layer
5
5
  #
6
- # Represents a layer of storage. It's a wrapper around
7
- # <tt>Fog::Storage</tt>, any provider supported by Fog should be usable.
8
- #
9
- # ==== Options
10
- #
11
- # * <tt>:delayed</tt> - Delayed layers will be processed outside of
12
- # the request cycle by ActiveJob.
13
- # * <tt>:readonly</tt> - Readonly layers can only be read from,
14
- # not written to.
15
- # * <tt>:public</tt> - Objects stored in public layers will have the
16
- # public readable flag set if supported by the storage provider.
17
- # * <tt>:path</tt> - Directory name to use for the store. For Amazon S3,
18
- # this will be the name of the bucket.
19
- #
20
- # ==== Examples
21
- #
22
- # This creates a local storage layer. It's a good idea to have a local layer
23
- # first, this provides you with a cache on disk that will be faster than
24
- # reading from the cloud.
6
+ # Represents a layer of storage. Wraps a +Fog::Storage+ connection;
7
+ # any provider supported by Fog should be usable.
25
8
  #
9
+ # @example Local storage layer
26
10
  # Dis::Layer.new(
27
- # Fog::Storage.new({
28
- # provider: 'Local',
29
- # local_root: Rails.root.join('db', 'dis')
30
- # }),
11
+ # Fog::Storage.new(
12
+ # provider: "Local",
13
+ # local_root: Rails.root.join("db/dis")
14
+ # ),
31
15
  # path: Rails.env
32
16
  # )
33
17
  #
34
- # This creates a delayed layer on Amazon S3. ActiveJob will kick in and
35
- # and transfer content from one of the immediate layers later at it's
36
- # leisure.
37
- #
18
+ # @example Delayed layer on Amazon S3
38
19
  # Dis::Layer.new(
39
- # Fog::Storage.new({
40
- # provider: 'AWS',
41
- # aws_access_key_id: YOUR_AWS_ACCESS_KEY_ID,
20
+ # Fog::Storage.new(
21
+ # provider: "AWS",
22
+ # aws_access_key_id: YOUR_AWS_ACCESS_KEY_ID,
42
23
  # aws_secret_access_key: YOUR_AWS_SECRET_ACCESS_KEY
43
- # }),
24
+ # ),
44
25
  # path: "my_bucket",
45
26
  # delayed: true
46
27
  # )
47
28
  class Layer
48
29
  include Dis::Logging
49
30
 
31
+ # @return [Fog::Storage] the underlying Fog connection
50
32
  attr_reader :connection
51
33
 
34
+ # @param connection [Fog::Storage] a Fog storage connection
35
+ # @param options [Hash] layer configuration options
36
+ # @option options [Boolean] :delayed (false) process writes
37
+ # asynchronously via ActiveJob
38
+ # @option options [Boolean] :readonly (false) only allow reads
39
+ # @option options [Boolean] :public (false) set the public
40
+ # readable flag on stored objects (provider-dependent)
41
+ # @option options [String] :path (nil) directory or bucket name
42
+ # @option options [Integer, false] :cache (false) enable bounded
43
+ # cache with this soft size limit in bytes. Cannot be combined
44
+ # with +:delayed+ or +:readonly+
45
+ # @raise [ArgumentError] if +:cache+ is combined with +:delayed+
46
+ # or +:readonly+
52
47
  def initialize(connection, options = {})
53
48
  options = default_options.merge(options)
54
49
  @connection = connection
@@ -56,44 +51,102 @@ module Dis
56
51
  @readonly = options[:readonly]
57
52
  @public = options[:public]
58
53
  @path = options[:path]
54
+ @cache = options[:cache]
55
+ validate_cache_options!
59
56
  end
60
57
 
61
58
  # Returns true if the layer is a delayed layer.
59
+ #
60
+ # @return [Boolean]
62
61
  def delayed?
63
62
  @delayed
64
63
  end
65
64
 
66
65
  # Returns true if the layer isn't a delayed layer.
66
+ #
67
+ # @return [Boolean]
67
68
  def immediate?
68
69
  !delayed?
69
70
  end
70
71
 
71
72
  # Returns true if the layer is public.
73
+ #
74
+ # @return [Boolean]
72
75
  def public?
73
76
  @public
74
77
  end
75
78
 
76
79
  # Returns true if the layer is read only.
80
+ #
81
+ # @return [Boolean]
77
82
  def readonly?
78
83
  @readonly
79
84
  end
80
85
 
81
86
  # Returns true if the layer is writeable.
87
+ #
88
+ # @return [Boolean]
82
89
  def writeable?
83
90
  !readonly?
84
91
  end
85
92
 
86
- # Stores a file.
93
+ # Returns true if the layer is a cache layer.
87
94
  #
88
- # key = Digest::SHA1.file(file.path).hexdigest
89
- # layer.store("documents", key, path)
95
+ # @return [Boolean]
96
+ def cache?
97
+ !!@cache
98
+ end
99
+
100
+ # Returns the cache size limit in bytes, or nil if not a cache.
101
+ #
102
+ # @return [Integer, nil]
103
+ def max_size
104
+ @cache if cache?
105
+ end
106
+
107
+ # Returns the total size in bytes of all files stored locally.
108
+ # Returns 0 for non-local providers.
109
+ #
110
+ # @return [Integer]
111
+ def size
112
+ return 0 unless connection.respond_to?(:local_root)
113
+
114
+ root = local_root_path
115
+ return 0 unless root.exist?
116
+
117
+ root.glob("**/*").sum { |f| f.file? ? f.size : 0 }
118
+ end
119
+
120
+ # Returns cached file entries sorted by mtime ascending
121
+ # (oldest first).
122
+ #
123
+ # @return [Array<Hash>] each entry has keys +:path+
124
+ # (Pathname), +:type+ (String), +:key+ (String), +:mtime+
125
+ # (Time), +:size+ (Integer)
126
+ def cached_files
127
+ return [] unless connection.respond_to?(:local_root)
128
+
129
+ root = local_root_path
130
+ return [] unless root.exist?
131
+
132
+ entries = root.glob("**/*").select(&:file?)
133
+ entries.filter_map { |f| cached_file_entry(f, root) }
134
+ .sort_by { |e| e[:mtime] }
135
+ end
136
+
137
+ # Stores a file. The key must be a hex digest of the file
138
+ # content. If an object with the supplied hash already exists,
139
+ # no action will be performed.
90
140
  #
91
- # The key must be a hex digest of the file content. If an object with the
92
- # supplied hash already exists, no action will be performed. In other
93
- # words, no data will be overwritten if a hash collision occurs.
141
+ # @param type [String] the type scope
142
+ # @param key [String] the content hash
143
+ # @param file [File, IO, String, Fog::Model] the content
144
+ # @return [Fog::Model] the stored file
145
+ # @raise [Dis::Errors::ReadOnlyError] if the layer is readonly
94
146
  #
95
- # Returns an instance of Fog::Model, or raises an error if the layer
96
- # is readonly.
147
+ # @example
148
+ # key = Digest::SHA1.file(file.path).hexdigest
149
+ # layer.store("documents", key, file)
97
150
  def store(type, key, file)
98
151
  raise Dis::Errors::ReadOnlyError if readonly?
99
152
 
@@ -104,18 +157,42 @@ module Dis
104
157
 
105
158
  # Returns all the given keys that exist in the layer.
106
159
  #
107
- # layer.existing("documents", keys)
160
+ # @param type [String] the type scope
161
+ # @param keys [Array<String>] content hashes to check
162
+ # @return [Array<String>] the subset of keys that exist
108
163
  def existing(type, keys)
109
164
  return [] if keys.empty?
110
165
 
111
- list = directory(type, keys.first).files.map(&:key)
166
+ futures = keys.map do |key|
167
+ Concurrent::Promises.future { key if exists?(type, key) }
168
+ end
169
+ futures.filter_map(&:value!)
170
+ end
171
+
172
+ # Returns all content hashes stored under the given type.
173
+ #
174
+ # @param type [String] the type scope
175
+ # @return [Array<String>] content hashes
176
+ def stored_keys(type)
177
+ dir = connection.directories.get(path || "")
178
+ return [] unless dir
179
+
180
+ prefix = "#{type}/"
181
+ dir.files.filter_map do |file|
182
+ next unless file.key.start_with?(prefix)
183
+
184
+ parts = file.key.delete_prefix(prefix).split("/")
185
+ next unless parts.length == 2
112
186
 
113
- keys.select { |key| list.include?(key_component(type, key)) }
187
+ "#{parts[0]}#{parts[1]}"
188
+ end
114
189
  end
115
190
 
116
- # Returns true if a object with the given key exists.
191
+ # Returns true if an object with the given key exists.
117
192
  #
118
- # layer.exists?("documents", key)
193
+ # @param type [String] the type scope
194
+ # @param key [String] the content hash
195
+ # @return [Boolean]
119
196
  def exists?(type, key)
120
197
  if directory(type, key)&.files&.head(key_component(type, key))
121
198
  true
@@ -126,20 +203,26 @@ module Dis
126
203
 
127
204
  # Retrieves a file from the store.
128
205
  #
129
- # layer.get("documents", key)
206
+ # @param type [String] the type scope
207
+ # @param key [String] the content hash
208
+ # @return [Fog::Model, nil] the file, or nil if not found
130
209
  def get(type, key)
131
210
  dir = directory(type, key)
132
211
  return unless dir
133
212
 
134
- debug_log("Get #{type}/#{key} from #{name}") do
213
+ result = debug_log("Get #{type}/#{key} from #{name}") do
135
214
  dir.files.get(key_component(type, key))
136
215
  end
216
+ touch_file(type, key) if result && cache?
217
+ result
137
218
  end
138
219
 
139
- # Returns the absolute file path for a locally stored file, or nil
140
- # if the provider is not local or the file does not exist.
220
+ # Returns the absolute file path for a locally stored file, or
221
+ # nil if the provider is not local or the file does not exist.
141
222
  #
142
- # layer.file_path("documents", key)
223
+ # @param type [String] the type scope
224
+ # @param key [String] the content hash
225
+ # @return [String, nil]
143
226
  def file_path(type, key)
144
227
  return unless connection.respond_to?(:local_root)
145
228
  return unless exists?(type, key)
@@ -153,10 +236,11 @@ module Dis
153
236
 
154
237
  # Deletes a file from the store.
155
238
  #
156
- # layer.delete("documents", key)
157
- #
158
- # Returns true if the file was deleted, or false if it could not be found.
159
- # Raises an error if the layer is readonly.
239
+ # @param type [String] the type scope
240
+ # @param key [String] the content hash
241
+ # @return [Boolean] true if the file was deleted, false if not
242
+ # found
243
+ # @raise [Dis::Errors::ReadOnlyError] if the layer is readonly
160
244
  def delete(type, key)
161
245
  raise Dis::Errors::ReadOnlyError if readonly?
162
246
 
@@ -167,6 +251,9 @@ module Dis
167
251
 
168
252
  # Returns a name for the layer.
169
253
  #
254
+ # @return [String]
255
+ #
256
+ # @example
170
257
  # layer.name # => "Fog::Storage::Local::Real/development"
171
258
  def name
172
259
  "#{connection.class.name}/#{path}"
@@ -175,7 +262,39 @@ module Dis
175
262
  private
176
263
 
177
264
  def default_options
178
- { delayed: false, readonly: false, public: false, path: nil }
265
+ { delayed: false, readonly: false, public: false,
266
+ path: nil, cache: false }
267
+ end
268
+
269
+ def validate_cache_options!
270
+ return unless cache?
271
+
272
+ if delayed?
273
+ raise ArgumentError,
274
+ "cache layers cannot be delayed"
275
+ end
276
+ return unless readonly?
277
+
278
+ raise ArgumentError,
279
+ "cache layers cannot be readonly"
280
+ end
281
+
282
+ def local_root_path
283
+ root = Pathname.new(connection.local_root)
284
+ path ? root.join(path) : root
285
+ end
286
+
287
+ def cached_file_entry(file, root)
288
+ parts = file.relative_path_from(root).to_s.split("/")
289
+ return unless parts.length == 3
290
+
291
+ { path: file, type: parts[0], key: parts[1] + parts[2],
292
+ mtime: file.mtime, size: file.size }
293
+ end
294
+
295
+ def touch_file(type, key)
296
+ fp = file_path(type, key)
297
+ FileUtils.touch(fp) if fp
179
298
  end
180
299
 
181
300
  def directory_component(_type, _key)
@@ -183,7 +302,7 @@ module Dis
183
302
  end
184
303
 
185
304
  def key_component(type, key)
186
- [type, key[0...2], key[2..key.length]].compact.join("/")
305
+ [type, key[0...2], key[2..]].compact.join("/")
187
306
  end
188
307
 
189
308
  def delete!(type, key)
data/lib/dis/layers.rb CHANGED
@@ -3,10 +3,15 @@
3
3
  module Dis
4
4
  # = Dis Layers
5
5
  #
6
- # Represents a collection of layers.
6
+ # Represents a filterable collection of {Dis::Layer} instances.
7
+ # Supports chained filtering by layer properties.
8
+ #
9
+ # @example
10
+ # Dis::Storage.layers.delayed.writeable.each { |l| ... }
7
11
  class Layers
8
12
  include Enumerable
9
13
 
14
+ # @param layers [Array<Dis::Layer>] initial layers
10
15
  def initialize(layers = [])
11
16
  @layers = layers
12
17
  end
@@ -15,6 +20,8 @@ module Dis
15
20
  delegate :<<, to: :@layers
16
21
 
17
22
  # Clears all layers from the collection.
23
+ #
24
+ # @return [void]
18
25
  def clear!
19
26
  @layers = []
20
27
  end
@@ -25,43 +32,87 @@ module Dis
25
32
  end
26
33
 
27
34
  # Returns a new instance containing only the delayed layers.
35
+ #
36
+ # @return [Dis::Layers]
28
37
  def delayed
29
38
  self.class.new select(&:delayed?)
30
39
  end
31
40
 
32
41
  # Returns true if one or more delayed layers exist.
42
+ #
43
+ # @return [Boolean]
33
44
  def delayed?
34
- delayed.any?
45
+ any?(&:delayed?)
35
46
  end
36
47
 
37
48
  # Returns a new instance containing only the immediate layers.
49
+ #
50
+ # @return [Dis::Layers]
38
51
  def immediate
39
52
  self.class.new select(&:immediate?)
40
53
  end
41
54
 
42
55
  # Returns true if one or more immediate layers exist.
56
+ #
57
+ # @return [Boolean]
43
58
  def immediate?
44
- immediate.any?
59
+ any?(&:immediate?)
45
60
  end
46
61
 
47
62
  # Returns a new instance containing only the readonly layers.
63
+ #
64
+ # @return [Dis::Layers]
48
65
  def readonly
49
66
  self.class.new select(&:readonly?)
50
67
  end
51
68
 
52
69
  # Returns true if one or more readonly layers exist.
70
+ #
71
+ # @return [Boolean]
53
72
  def readonly?
54
- readonly.any?
73
+ any?(&:readonly?)
55
74
  end
56
75
 
57
76
  # Returns a new instance containing only the writeable layers.
77
+ #
78
+ # @return [Dis::Layers]
58
79
  def writeable
59
80
  self.class.new select(&:writeable?)
60
81
  end
61
82
 
62
83
  # Returns true if one or more writeable layers exist.
84
+ #
85
+ # @return [Boolean]
63
86
  def writeable?
64
- writeable.any?
87
+ any?(&:writeable?)
88
+ end
89
+
90
+ # Returns a new instance containing only the cache layers.
91
+ #
92
+ # @return [Dis::Layers]
93
+ def cache
94
+ self.class.new select(&:cache?)
95
+ end
96
+
97
+ # Returns true if one or more cache layers exist.
98
+ #
99
+ # @return [Boolean]
100
+ def cache?
101
+ any?(&:cache?)
102
+ end
103
+
104
+ # Returns a new instance containing only the non-cache layers.
105
+ #
106
+ # @return [Dis::Layers]
107
+ def non_cache
108
+ self.class.new reject(&:cache?)
109
+ end
110
+
111
+ # Returns true if one or more non-cache layers exist.
112
+ #
113
+ # @return [Boolean]
114
+ def non_cache?
115
+ any? { |l| !l.cache? }
65
116
  end
66
117
  end
67
118
  end
@@ -4,13 +4,21 @@ module Dis
4
4
  module Model
5
5
  module ClassMethods
6
6
  # Returns the mapping of attribute names.
7
+ #
8
+ # @return [Hash{Symbol => Symbol}]
7
9
  def dis_attributes
8
10
  default_dis_attributes.merge(@dis_attributes ||= {})
9
11
  end
10
12
 
11
- # Sets the current mapping of attribute names. Use this if you want to
12
- # override the attributes and database columns that Dis will use.
13
+ # Sets the current mapping of attribute names. Use this if you
14
+ # want to override the attributes and database columns that
15
+ # Dis will use. Valid keys: +:content_hash+, +:content_type+,
16
+ # +:content_length+, +:filename+.
17
+ #
18
+ # @param new_attributes [Hash{Symbol => Symbol}] attribute
19
+ # name overrides
13
20
  #
21
+ # @example
14
22
  # class Document < ActiveRecord::Base
15
23
  # include Dis::Model
16
24
  # self.dis_attributes = { filename: :my_custom_filename }
@@ -20,8 +28,11 @@ module Dis
20
28
  end
21
29
 
22
30
  # Returns the storage type name, which Dis will use for
23
- # directory scoping. Defaults to the name of the database table.
31
+ # directory scoping. Defaults to the table name.
32
+ #
33
+ # @return [String]
24
34
  #
35
+ # @example
25
36
  # class Document < ActiveRecord::Base; end
26
37
  # Document.dis_type # => "documents"
27
38
  def dis_type
@@ -30,17 +41,23 @@ module Dis
30
41
 
31
42
  # Sets the storage type name.
32
43
  #
33
- # Take care not to set the same name for multiple models, this will
34
- # cause data loss when a record is destroyed.
44
+ # Take care not to set the same name for multiple models,
45
+ # this will cause data loss when a record is destroyed.
46
+ #
47
+ # @param new_type [String] the new type scope
48
+ # @return [void]
35
49
  def dis_type=(new_type)
36
50
  @dis_type = new_type
37
51
  end
38
52
 
39
53
  # Adds a presence validation on the +data+ attribute.
40
54
  #
41
- # This is better than using `validates :data, presence: true`, since
42
- # that would cause it to load the data from storage on each save.
55
+ # This is preferred over +validates :data, presence: true+,
56
+ # which would load the data from storage on each save.
57
+ #
58
+ # @return [void]
43
59
  #
60
+ # @example
44
61
  # class Document < ActiveRecord::Base
45
62
  # include Dis::Model
46
63
  # validates_data_presence
@@ -7,12 +7,18 @@ module Dis
7
7
  # Facilitates communication between the model and the storage,
8
8
  # and holds any newly assigned data before the record is saved.
9
9
  class Data
10
+ # @param record [ActiveRecord::Base] the model instance
11
+ # @param raw [File, IO, String, nil] newly assigned data
10
12
  def initialize(record, raw = nil)
11
13
  @record = record
12
14
  @raw = raw
13
15
  end
14
16
 
15
17
  # Returns true if two Data objects represent the same data.
18
+ #
19
+ # @param other [Dis::Model::Data, #read, Object] the object to
20
+ # compare
21
+ # @return [Boolean]
16
22
  def ==(other)
17
23
  if !raw? && other.is_a?(self.class) && !other.changed?
18
24
  content_hash == other.content_hash
@@ -24,24 +30,33 @@ module Dis
24
30
  end
25
31
 
26
32
  # Returns true if data exists either in memory or in storage.
33
+ #
34
+ # @return [Boolean]
27
35
  def any?
28
36
  raw? || stored?
29
37
  end
30
38
 
31
39
  # Returns the data as a binary string.
40
+ #
41
+ # @return [String, nil]
32
42
  def read
33
43
  @read ||= read_from(closest)
34
44
  end
35
45
 
36
- # Will be true if data has been explicitely set.
46
+ # Will be true if data has been explicitly set.
37
47
  #
48
+ # @return [Boolean]
49
+ #
50
+ # @example
38
51
  # Dis::Model::Data.new(record).changed? # => false
39
- # Dis::Model::Data.new(record, new_file).changed? # => true
52
+ # Dis::Model::Data.new(record, file).changed? # => true
40
53
  def changed?
41
54
  raw?
42
55
  end
43
56
 
44
- # Returns the length of the data.
57
+ # Returns the length of the data in bytes.
58
+ #
59
+ # @return [Integer]
45
60
  def content_length
46
61
  if raw? && raw.respond_to?(:length)
47
62
  raw.length
@@ -50,9 +65,13 @@ module Dis
50
65
  end
51
66
  end
52
67
 
53
- # Expires a data object from the storage if it's no longer being used
54
- # by existing records. This is triggered from callbacks on the record
55
- # whenever they are changed or destroyed.
68
+ # Expires a data object from the storage if it's no longer
69
+ # being used by existing records. This is triggered from
70
+ # callbacks on the record whenever they are changed or
71
+ # destroyed.
72
+ #
73
+ # @param hash [String] the content hash to expire
74
+ # @return [void]
56
75
  def expire(hash)
57
76
  return if hash.blank?
58
77
 
@@ -63,15 +82,22 @@ module Dis
63
82
  end
64
83
  end
65
84
 
66
- # Stores the data. Returns a hash of the content for reference.
85
+ # Stores the data and returns the content hash.
86
+ #
87
+ # @return [String] the SHA1 content hash
88
+ # @raise [Dis::Errors::NoDataError] if no data has been
89
+ # assigned
67
90
  def store!
68
91
  raise Dis::Errors::NoDataError unless raw?
69
92
 
70
93
  Dis::Storage.store(storage_type, raw)
71
94
  end
72
95
 
73
- # Clears cached data and tempfiles, allowing them to be garbage
74
- # collected. Subsequent calls to read or tempfile will re-fetch.
96
+ # Clears cached data and tempfiles, allowing them to be
97
+ # garbage collected. Subsequent calls to +read+ or +tempfile+
98
+ # will re-fetch from storage.
99
+ #
100
+ # @return [void]
75
101
  def reset_read_cache!
76
102
  @read = nil
77
103
  return unless @tempfile
@@ -80,13 +106,17 @@ module Dis
80
106
  @tempfile = nil
81
107
  end
82
108
 
83
- # Returns the file path to the data. Prefers a local storage path
84
- # to avoid unnecessary copies, falls back to a tempfile.
109
+ # Returns the file path to the data. Prefers a local storage
110
+ # path to avoid unnecessary copies, falls back to a tempfile.
111
+ #
112
+ # @return [String]
85
113
  def file_path
86
114
  local_path || tempfile.path
87
115
  end
88
116
 
89
117
  # Writes the data to a temporary file.
118
+ #
119
+ # @return [Tempfile]
90
120
  def tempfile
91
121
  unless @tempfile
92
122
  @tempfile = Tempfile.new(binmode: true)