fs_cache 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: c7307ee577d076235b7d885087638f0576b59d408339a5c02150eeeaec12bc00
4
+ data.tar.gz: 8f66ac733ff6144a2fdcfc0f1fd052996daa24d78d8d64fc4e11415f46352d3a
5
+ SHA512:
6
+ metadata.gz: 9889b8e9d5371facd30e07d7cf46c6d84ddb8bbc8b5059bf7a702bb5bf122c86bba5b01f0f2b37581ffbfaba77ebba7ec80be43ab4d219d02fa15e1de1b25fad
7
+ data.tar.gz: b660da8740076a449a15a8dca09080b5b89e3819181ebce93b335de544dc365bc51043eafdae1dffdca6a9286082206af3594390042779355cc82aedcc4f0741
data/lib/fs_cache.rb ADDED
@@ -0,0 +1,364 @@
1
+ require 'progressbar'
2
+ require 'fs_cache/attribute'
3
+
4
+ # Implement a cache of the file system: directories and files presence.
5
+ # Plugins can be used to also cache attributes of the files, like crc, size...
6
+ class FsCache
7
+
8
+ ATTRIBUTE_PLUGINS_MODULE = FsCache::Attributes
9
+
10
+ # Constructor
11
+ #
12
+ # Parameters::
13
+ # * *attribute_plugins_dirs* (Array<String>): List of directories containing possible attribute plugins [default = []]
14
+ def initialize(attribute_plugins_dirs: [])
15
+ # List of possible attribute plugins, per attribute name
16
+ # Hash<Symbol, Attribute>
17
+ @attribute_plugins = {}
18
+ # Tree of dependent attributes: for each attribute in this tree, the list of attributes to be invalidated if this attribute changes.
19
+ # Hash<Symbol, Array<Symbol> >
20
+ @dependent_attributes = {}
21
+ # Big database of files information
22
+ # Hash<String, Hash<Symbol,Object> >: For each file name, the file info (can be incomplete if it was never fetched):
23
+ # * *exist* (Boolean): Does the file exist?
24
+ # * *size* (Integer): File size.
25
+ # * *crc* (String): File CRC.
26
+ # * *corruption* (false or Object): Info about this file's corruption, or false if sane.
27
+ @files = Hash.new { |h, k| h[k] = {} }
28
+ # Directories information
29
+ # Hash<String, Hash<Symbol,Object> >: For each directory name, the dir info (can be incomplete if it was never fetched):
30
+ # * *files* (Hash<String,nil>): Set of files (base names)
31
+ # * *dirs* (Hash<String,nil>): Set of directories (base names)
32
+ # * *recursive_dirs* (Hash<String,nil>): Set of recursive sub-directories (full paths)
33
+ # * *recursive_files* (Hash<String,nil>): Set of recursive files (full paths)
34
+ @dirs = Hash.new { |h, k| h[k] = {} }
35
+
36
+ # Automatically register attributes from the plugins dirs
37
+ (["#{__dir__}/fs_cache/attributes"] + attribute_plugins_dirs).each do |attribute_plugins_dir|
38
+ Dir.glob("#{attribute_plugins_dir}/*.rb") do |attribute_plugin_file|
39
+ attribute = File.basename(attribute_plugin_file)[0..-4].to_sym
40
+ require attribute_plugin_file
41
+ class_name = attribute.to_s.split('_').collect(&:capitalize).join.to_sym
42
+ if ATTRIBUTE_PLUGINS_MODULE.const_defined?(class_name)
43
+ plugin_class = ATTRIBUTE_PLUGINS_MODULE.const_get(class_name)
44
+ register_attribute_plugin(attribute, plugin_class.new)
45
+ else
46
+ raise "Attributes plugin #{attribute_plugin_file} does not define the class #{class_name} inside #{ATTRIBUTE_PLUGINS_MODULE}" if plugin_class.nil?
47
+ end
48
+ end
49
+ end
50
+ end
51
+
52
+ # Register a new attributes' plugin.
53
+ # The constructor already registers all plugins found in the plugins directories.
54
+ # This method exists in order to register plugins that could be dynamically instantiated.
55
+ #
56
+ # Parameters::
57
+ # * *attribute* (Symbol): The attribute
58
+ # * *plugin* (Attribute): The attribute plugin
59
+ def register_attribute_plugin(attribute, plugin)
60
+ puts "Registering attribute plugin #{attribute}..."
61
+ raise "Attributes plugin #{attribute} is already defined (by class #{@attribute_plugins[attribute].class.name})." if @attribute_plugins.key?(attribute)
62
+ @attribute_plugins[attribute] = plugin
63
+ # Define the getter methods for this attribute, directly in the base class for performance purposes
64
+
65
+ # Get the attribute for a given file.
66
+ # Use the cache if possible.
67
+ #
68
+ # Parameters::
69
+ # * *file* (String): File path for which we look for the attribute
70
+ # Result::
71
+ # * Object: Corresponding attribute value, or nil if the file does not exist
72
+ define_singleton_method("#{attribute}_for".to_sym) do |file|
73
+ @files[file][attribute] = plugin.attribute_for(file) if !@files[file].key?(attribute) && exist?(file)
74
+ @files[file][attribute]
75
+ end
76
+
77
+ # If there are some helpers, register them too
78
+ if plugin.class.const_defined?(:Helpers)
79
+ helpers_module = plugin.class.const_get(:Helpers)
80
+ self.class.include helpers_module unless helpers_module.nil?
81
+ end
82
+ # If this attribute is dependent on others, remember it too
83
+ plugin.invalidated_on_change_of.each do |parent_attribute|
84
+ @dependent_attributes[parent_attribute] = [] unless @dependent_attributes.key?(parent_attribute)
85
+ @dependent_attributes[parent_attribute] << attribute
86
+ end
87
+ end
88
+
89
+ # Is a file existing?
90
+ #
91
+ # Parameters::
92
+ # * *file* (String): File name
93
+ # Result::
94
+ # * String: Is the file existing?
95
+ def exist?(file)
96
+ # If there is at least 1 attribute for this file it means that it exists
97
+ unless @files[file].key?(:exist)
98
+ @files[file][:exist] =
99
+ # If we have an attribute for this file, it means it exist
100
+ if @files[file].size > 0
101
+ true
102
+ else
103
+ dir = File.dirname(file)
104
+ if @dirs.key?(dir)
105
+ # We know about its directory, so we should know if it is there
106
+ @dirs[dir][:files].key?(File.basename(file))
107
+ else
108
+ File.exist?(file)
109
+ end
110
+ end
111
+ end
112
+ @files[file][:exist]
113
+ end
114
+
115
+ # Get list of files from a directory (base names)
116
+ #
117
+ # Parameters::
118
+ # * *dir* (String): The directory to get files from
119
+ # Result::
120
+ # * Array<String>: List of file base names
121
+ def files_in(dir)
122
+ ensure_dir_data(dir)
123
+ @dirs[dir][:files].keys
124
+ end
125
+
126
+ # Get recursive list of directories from a directory
127
+ #
128
+ # Parameters::
129
+ # * *dir* (String): The directory to get other directories from
130
+ # Result::
131
+ # * Array<String>: List of directories
132
+ def dirs_from(dir)
133
+ unless @dirs[dir].key?(:recursive_dirs)
134
+ ensure_dir_data(dir)
135
+ recursive_dirs = {}
136
+ @dirs[dir][:dirs].keys.each do |subdir|
137
+ full_subdir = "#{dir}/#{subdir}"
138
+ recursive_dirs[full_subdir] = nil
139
+ recursive_dirs.merge!(Hash[dirs_from(full_subdir).map { |subsubdir| [subsubdir, nil] }])
140
+ end
141
+ @dirs[dir][:recursive_dirs] = recursive_dirs
142
+ end
143
+ @dirs[dir][:recursive_dirs].keys
144
+ end
145
+
146
+ # Get recursive list of files from a directory
147
+ #
148
+ # Parameters::
149
+ # * *dir* (String): The directory to get other directories from
150
+ # Result::
151
+ # * Array<String>: List of files
152
+ def files_from(dir)
153
+ unless @dirs[dir].key?(:recursive_files)
154
+ ensure_dir_data(dir)
155
+ recursive_files = Hash[@dirs[dir][:files].keys.map { |file| ["#{dir}/#{file}", nil] }]
156
+ @dirs[dir][:dirs].keys.each do |subdir|
157
+ recursive_files.merge!(Hash[files_from("#{dir}/#{subdir}").map { |file| [file, nil] }])
158
+ end
159
+ @dirs[dir][:recursive_files] = recursive_files
160
+ end
161
+ @dirs[dir][:recursive_files].keys
162
+ end
163
+
164
+ # Scan files and directories from a list of directories.
165
+ # Use a progress bar.
166
+ #
167
+ # Parameters::
168
+ # * *dirs* (Array<String>): List of directories to preload
169
+ # * *include_attributes* (Array<Symbol> or nil): List of attributes to scan, or nil for all [default = nil]
170
+ # * *exclude_attributes* (Array<Symbol>): List of attributes to ignore while scanning [default = []]
171
+ def scan(dirs, include_attributes: nil, exclude_attributes: [])
172
+ progressbar = ProgressBar.create(title: 'Indexing files')
173
+ attributes_to_scan = (include_attributes.nil? ? @attribute_plugins.keys : include_attributes) - exclude_attributes
174
+ files = dirs.
175
+ map do |dir|
176
+ dirs_from(dir)
177
+ files_from(dir)
178
+ end.
179
+ flatten
180
+ progressbar.total = files.size
181
+ files.each do |file|
182
+ exist?(file)
183
+ attributes_to_scan.each do |attribute|
184
+ self.send "#{attribute}_for", file
185
+ end
186
+ progressbar.increment
187
+ end
188
+ end
189
+
190
+ # Serialize into JSON.
191
+ #
192
+ # Result::
193
+ # * Object: JSON object
194
+ def to_json
195
+ {
196
+ files: @files,
197
+ dirs: @dirs
198
+ }
199
+ end
200
+
201
+ # Get data from JSON.
202
+ #
203
+ # Parameters::
204
+ # * *json* (Object): JSON object
205
+ def from_json(json)
206
+ json = json.transform_keys(&:to_sym)
207
+ @files = Hash[json[:files].map { |file, file_info| [file, file_info.transform_keys(&:to_sym)] }]
208
+ @files.default_proc = proc { |h, k| h[k] = {} }
209
+ @dirs = Hash[json[:dirs].map { |dir, dir_info| [dir, dir_info.transform_keys(&:to_sym)] }]
210
+ @dirs.default_proc = proc { |h, k| h[k] = {} }
211
+ end
212
+
213
+ # Notify the file system that a given file has been deleted
214
+ #
215
+ # Parameters::
216
+ # * *file* (String): File being deleted
217
+ def notify_file_rm(file)
218
+ @files[file] = { exist: false }
219
+ unregister_file_from_dirs(file)
220
+ end
221
+
222
+ # Notify the file system of a file copy
223
+ #
224
+ # Parameters::
225
+ # * *src* (String): Origin file
226
+ # * *dst* (String): Destination file
227
+ def notify_file_cp(src, dst)
228
+ if @files.key?(src)
229
+ @files[dst] = @files[src].clone
230
+ else
231
+ @files[src] = { exist: true }
232
+ @files[dst] = { exist: true }
233
+ end
234
+ register_file_in_dirs(dst)
235
+ end
236
+
237
+ # Notify the file system of a file move
238
+ #
239
+ # Parameters::
240
+ # * *src* (String): Origin file
241
+ # * *dst* (String): Destination file
242
+ def notify_file_mv(src, dst)
243
+ notify_file_cp(src, dst)
244
+ notify_file_rm(src)
245
+ end
246
+
247
+ # Check our info against file system changes.
248
+ # This detects
249
+ # * files that have been deleted,
250
+ # * any change in the directories structure,
251
+ # * any change in the attributes that are already part of the cache and that are not ignored explicitely.
252
+ #
253
+ # Parameters::
254
+ # * *include_attributes* (Array<Symbol> or nil): List of attributes to scan, or nil for all [default = nil]
255
+ # * *exclude_attributes* (Array<Symbol>): List of attributes to ignore while scanning [default = []]
256
+ def check(include_attributes: nil, exclude_attributes: [])
257
+ progressbar = ProgressBar.create(title: 'Refreshing files info')
258
+ attributes_to_scan = (include_attributes.nil? ? @attribute_plugins.keys : include_attributes) - exclude_attributes
259
+ progressbar.total = @files.size
260
+ @files.each do |file, file_info|
261
+ if File.exist?(file)
262
+ if file_info.key?(:exist) && !file_info[:exist]
263
+ # This file has been added when we thought it was missing
264
+ file_info.replace(exist: true)
265
+ else
266
+ # Check attributes that are already present
267
+ (file_info.keys & attributes_to_scan).each do |attribute|
268
+ current_attribute = file_info[attribute]
269
+ new_attribute = @attribute_plugins[attribute].attribute_for(file)
270
+ if current_attribute != new_attribute
271
+ # Attribute has changed
272
+ file_info[attribute] = new_attribute
273
+ # If some other attributes were depending on this one, invalidate them
274
+ if @dependent_attributes.key?(attribute)
275
+ @dependent_attributes[attribute].each do |dependent_attribute|
276
+ file_info.delete(dependent_attribute)
277
+ end
278
+ end
279
+ end
280
+ end
281
+ end
282
+ elsif !file_info.key?(:exist) || file_info[:exist]
283
+ # This file has been removed when we thought it was there
284
+ file_info.replace(exist: false)
285
+ end
286
+ progressbar.increment
287
+ end
288
+ # Rebuilding @dirs structure needs to make the Dir.glob commands once again. Therefore there is no need to check it. Removing it will rebuild it anyway.
289
+ @dirs.clear
290
+ end
291
+
292
+ # Remove attributes for a list of files
293
+ #
294
+ # Parameters::
295
+ # * *files* (Array<String>): The list of files to invalidate attributes for
296
+ # * *include_attributes* (Array<Symbol> or nil): List of attributes to scan, or nil for all [default = nil]
297
+ # * *exclude_attributes* (Array<Symbol>): List of attributes to ignore while scanning [default = []]
298
+ def invalidate(files, include_attributes: nil, exclude_attributes: [])
299
+ attributes_to_invalidate = ((include_attributes.nil? ? @attribute_plugins.keys : include_attributes) - exclude_attributes)
300
+ files.each do |file|
301
+ if @files.key?(file)
302
+ attributes_to_invalidate.each do |attribute|
303
+ @files[file].delete(attribute)
304
+ end
305
+ end
306
+ end
307
+ end
308
+
309
+ private
310
+
311
+ # Register a file in the @dirs structure
312
+ #
313
+ # Parameters::
314
+ # * *file* (String): File to register in @dirs
315
+ def register_file_in_dirs(file)
316
+ file_dir = File.dirname(file)
317
+ split_dir = file_dir.split('/')
318
+ split_dir.size.times do |idx|
319
+ dir = split_dir[0..idx].join('/')
320
+ @dirs[dir][:recursive_files][file] = nil if @dirs.key?(dir) && @dirs[dir].key?(:recursive_files) && !@dirs[dir][:recursive_files].key?(file)
321
+ end
322
+ base_name = File.basename(file)
323
+ @dirs[file_dir][:files][base_name] = nil if @dirs.key?(file_dir) && @dirs[file_dir].key?(:files) && !@dirs[file_dir][:files].key?(base_name)
324
+ end
325
+
326
+ # Unregister a file in the @dirs structure
327
+ #
328
+ # Parameters::
329
+ # * *file* (String): File to unregister from @dirs
330
+ def unregister_file_from_dirs(file)
331
+ file_dir = File.dirname(file)
332
+ split_dir = file_dir.split('/')
333
+ split_dir.size.times do |idx|
334
+ dir = split_dir[0..idx].join('/')
335
+ # Remove any reference of our file to this dir info
336
+ @dirs[dir][:recursive_files].delete(file) if @dirs.key?(dir) && @dirs[dir].key?(:recursive_files)
337
+ end
338
+ @dirs[file_dir][:files].delete(File.basename(file)) if @dirs.key?(file_dir) && @dirs[file_dir].key?(:files)
339
+ end
340
+
341
+ # Populate a given directory data (files and dirs)
342
+ #
343
+ # Parameters::
344
+ # * *dir* (String): Directory to get data from
345
+ def ensure_dir_data(dir)
346
+ unless @dirs[dir].key?(:files)
347
+ files = {}
348
+ dirs = {}
349
+ Dir.glob("#{dir}/*", File::FNM_DOTMATCH).each do |file|
350
+ base_name = File.basename(file)
351
+ if File.directory?(file)
352
+ dirs[base_name] = nil if base_name != '.' && base_name != '..'
353
+ else
354
+ files[base_name] = nil
355
+ end
356
+ end
357
+ @dirs[dir] = {
358
+ files: files,
359
+ dirs: dirs
360
+ }
361
+ end
362
+ end
363
+
364
+ end
@@ -0,0 +1,20 @@
1
+ class FsCache
2
+
3
+ module Attributes
4
+ end
5
+
6
+ # Common ancestor for all attributes
7
+ class Attribute
8
+
9
+ # Get the list of other attributes that invalidate this one.
10
+ # If any of those attributes is chaning on a file, then reset our attribute for the file.
11
+ #
12
+ # Result::
13
+ # * Array<Symbol>: List of dependent attributes
14
+ def invalidated_on_change_of
15
+ []
16
+ end
17
+
18
+ end
19
+
20
+ end
@@ -0,0 +1,105 @@
1
+ require 'zlib'
2
+
3
+ class FsCache
4
+
5
+ module Attributes
6
+
7
+ # CRC attribute. Can be:
8
+ # * String: Internal CRC computed from files by blocks
9
+ class Crc < Attribute
10
+
11
+ # Size of blocks to compute CRCs in bytes. Changing this value will invalidate previously computed CRCs.
12
+ CRC_BLOCK_SIZE = 32 * 1024 * 1024 # 32 MB
13
+
14
+ # Get the attribute for a given file
15
+ #
16
+ # Parameters::
17
+ # * *file* (String): File to get the attribute for
18
+ # Result::
19
+ # * Object: Corresponding attribute value
20
+ def attribute_for(file)
21
+ blocks_crc = ''
22
+ File.open(file, 'rb') do |file_io|
23
+ buffer = nil
24
+ while (buffer = file_io.read(CRC_BLOCK_SIZE))
25
+ blocks_crc << Zlib.crc32(buffer, 0).to_s(16).upcase
26
+ end
27
+ end
28
+ Zlib.crc32(blocks_crc, 0).to_s(16).upcase
29
+ end
30
+
31
+ # Get the list of other attributes that invalidate this one.
32
+ # If any of those attributes is chaning on a file, then reset our attribute for the file.
33
+ #
34
+ # Result::
35
+ # * Array<Symbol>: List of dependent attributes
36
+ def invalidated_on_change_of
37
+ [:size]
38
+ end
39
+
40
+ # Add helpers to the cache
41
+ module Helpers
42
+
43
+ # Provide info on the differences between 2 directories.
44
+ #
45
+ # Parameters::
46
+ # * *dir1* (String): First directory
47
+ # * *dir2* (String): Second directory
48
+ # Result::
49
+ # * Hash<Symbol,Object>: Difference between the 2 directories (dir2 - dir1):
50
+ # * *same* (Array<String>): Same files
51
+ # * *renamed* (Array<[String,String]>): Renamed files (from dir1 to dir2: [file_base1, file_base2])
52
+ # * *added* (Array<String>): Added files
53
+ # * *deleted* (Array<String>): Deleted files
54
+ # * *different* (Array<String>): Different files
55
+ def diff_dirs(dir1, dir2)
56
+ files1 = Hash[files_in(dir1).map { |file| [file, "#{dir1}/#{file}"] }]
57
+ files2 = Hash[files_in(dir2).map { |file| [file, "#{dir2}/#{file}"] }]
58
+ same = []
59
+ different = []
60
+ renamed = []
61
+ # First process files having the same names
62
+ files1.delete_if do |file_base1, file1|
63
+ if files2.key?(file_base1)
64
+ # A file with same name exists in dir2
65
+ if crc_for(files2[file_base1]) == crc_for(file1)
66
+ same << file_base1
67
+ else
68
+ different << file_base1
69
+ end
70
+ files2.delete(file_base1)
71
+ true
72
+ else
73
+ false
74
+ end
75
+ end
76
+ # Then process files having the same CRC among the remaining ones
77
+ files1.delete_if do |file_base1, file1|
78
+ crc1 = crc_for(file1)
79
+ found_file_base2, _found_file2 = files2.find { |_file_base2, file2| crc_for(file2) == crc1 }
80
+ if found_file_base2.nil?
81
+ false
82
+ else
83
+ renamed << [file_base1, found_file_base2]
84
+ files2.delete(found_file_base2)
85
+ true
86
+ end
87
+ end
88
+ remaining_files1 = files1.keys
89
+ remaining_files2 = files2.keys
90
+ {
91
+ same: same,
92
+ renamed: renamed,
93
+ added: remaining_files2 - remaining_files1,
94
+ deleted: remaining_files1 - remaining_files2,
95
+ different: different
96
+ }
97
+ end
98
+
99
+ end
100
+
101
+ end
102
+
103
+ end
104
+
105
+ end
@@ -0,0 +1,38 @@
1
+ class FsCache
2
+
3
+ module Attributes
4
+
5
+ # Size attribute. Can be:
6
+ # * Integer: File size
7
+ class Size < Attribute
8
+
9
+ # Get the attribute for a given file
10
+ #
11
+ # Parameters::
12
+ # * *file* (String): File to get the attribute for
13
+ # Result::
14
+ # * Object: Corresponding attribute value
15
+ def attribute_for(file)
16
+ File.stat(file).size
17
+ end
18
+
19
+ # Add helpers to the cache
20
+ module Helpers
21
+
22
+ # Is a file empty?
23
+ #
24
+ # Parameters::
25
+ # * *file* (String): File name
26
+ # Result::
27
+ # * String: Is the file empty?
28
+ def empty?(file)
29
+ size_for(file) == 0
30
+ end
31
+
32
+ end
33
+
34
+ end
35
+
36
+ end
37
+
38
+ end
metadata ADDED
@@ -0,0 +1,64 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fs_cache
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Muriel Salvan
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-07-01 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: progressbar
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.10'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.10'
27
+ description:
28
+ email:
29
+ - muriel@x-aeon.com
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - lib/fs_cache.rb
35
+ - lib/fs_cache/attribute.rb
36
+ - lib/fs_cache/attributes/crc.rb
37
+ - lib/fs_cache/attributes/size.rb
38
+ homepage: http://x-aeon.com
39
+ licenses:
40
+ - BSD-3-Clause
41
+ metadata:
42
+ homepage_uri: http://x-aeon.com
43
+ post_install_message:
44
+ rdoc_options: []
45
+ require_paths:
46
+ - lib
47
+ required_ruby_version: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - ">="
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ requirements: []
58
+ rubyforge_project:
59
+ rubygems_version: 2.7.6
60
+ signing_key:
61
+ specification_version: 4
62
+ summary: Simple file system caching to perform huge and repetitive accesses to files,
63
+ directories and various files' content analysis
64
+ test_files: []