fs_cache 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: c7307ee577d076235b7d885087638f0576b59d408339a5c02150eeeaec12bc00
4
+ data.tar.gz: 8f66ac733ff6144a2fdcfc0f1fd052996daa24d78d8d64fc4e11415f46352d3a
5
+ SHA512:
6
+ metadata.gz: 9889b8e9d5371facd30e07d7cf46c6d84ddb8bbc8b5059bf7a702bb5bf122c86bba5b01f0f2b37581ffbfaba77ebba7ec80be43ab4d219d02fa15e1de1b25fad
7
+ data.tar.gz: b660da8740076a449a15a8dca09080b5b89e3819181ebce93b335de544dc365bc51043eafdae1dffdca6a9286082206af3594390042779355cc82aedcc4f0741
data/lib/fs_cache.rb ADDED
@@ -0,0 +1,364 @@
1
+ require 'progressbar'
2
+ require 'fs_cache/attribute'
3
+
4
+ # Implement a cache of the file system: directories and files presence.
5
+ # Plugins can be used to also cache attributes of the files, like crc, size...
6
+ class FsCache
7
+
8
+ ATTRIBUTE_PLUGINS_MODULE = FsCache::Attributes
9
+
10
+ # Constructor
11
+ #
12
+ # Parameters::
13
+ # * *attribute_plugins_dirs* (Array<String>): List of directories containing possible attribute plugins [default = []]
14
+ def initialize(attribute_plugins_dirs: [])
15
+ # List of possible attribute plugins, per attribute name
16
+ # Hash<Symbol, Attribute>
17
+ @attribute_plugins = {}
18
+ # Tree of dependent attributes: for each attribute in this tree, the list of attributes to be invalidated if this attribute changes.
19
+ # Hash<Symbol, Array<Symbol> >
20
+ @dependent_attributes = {}
21
+ # Big database of files information
22
+ # Hash<String, Hash<Symbol,Object> >: For each file name, the file info (can be incomplete if it was never fetched):
23
+ # * *exist* (Boolean): Does the file exist?
24
+ # * *size* (Integer): File size.
25
+ # * *crc* (String): File CRC.
26
+ # * *corruption* (false or Object): Info about this file's corruption, or false if sane.
27
+ @files = Hash.new { |h, k| h[k] = {} }
28
+ # Directories information
29
+ # Hash<String, Hash<Symbol,Object> >: For each directory name, the dir info (can be incomplete if it was never fetched):
30
+ # * *files* (Hash<String,nil>): Set of files (base names)
31
+ # * *dirs* (Hash<String,nil>): Set of directories (base names)
32
+ # * *recursive_dirs* (Hash<String,nil>): Set of recursive sub-directories (full paths)
33
+ # * *recursive_files* (Hash<String,nil>): Set of recursive files (full paths)
34
+ @dirs = Hash.new { |h, k| h[k] = {} }
35
+
36
+ # Automatically register attributes from the plugins dirs
37
+ (["#{__dir__}/fs_cache/attributes"] + attribute_plugins_dirs).each do |attribute_plugins_dir|
38
+ Dir.glob("#{attribute_plugins_dir}/*.rb") do |attribute_plugin_file|
39
+ attribute = File.basename(attribute_plugin_file)[0..-4].to_sym
40
+ require attribute_plugin_file
41
+ class_name = attribute.to_s.split('_').collect(&:capitalize).join.to_sym
42
+ if ATTRIBUTE_PLUGINS_MODULE.const_defined?(class_name)
43
+ plugin_class = ATTRIBUTE_PLUGINS_MODULE.const_get(class_name)
44
+ register_attribute_plugin(attribute, plugin_class.new)
45
+ else
46
+ raise "Attributes plugin #{attribute_plugin_file} does not define the class #{class_name} inside #{ATTRIBUTE_PLUGINS_MODULE}" if plugin_class.nil?
47
+ end
48
+ end
49
+ end
50
+ end
51
+
52
+ # Register a new attributes' plugin.
53
+ # The constructor already registers all plugins found in the plugins directories.
54
+ # This method exists in order to register plugins that could be dynamically instantiated.
55
+ #
56
+ # Parameters::
57
+ # * *attribute* (Symbol): The attribute
58
+ # * *plugin* (Attribute): The attribute plugin
59
+ def register_attribute_plugin(attribute, plugin)
60
+ puts "Registering attribute plugin #{attribute}..."
61
+ raise "Attributes plugin #{attribute} is already defined (by class #{@attribute_plugins[attribute].class.name})." if @attribute_plugins.key?(attribute)
62
+ @attribute_plugins[attribute] = plugin
63
+ # Define the getter methods for this attribute, directly in the base class for performance purposes
64
+
65
+ # Get the attribute for a given file.
66
+ # Use the cache if possible.
67
+ #
68
+ # Parameters::
69
+ # * *file* (String): File path for which we look for the attribute
70
+ # Result::
71
+ # * Object: Corresponding attribute value, or nil if the file does not exist
72
+ define_singleton_method("#{attribute}_for".to_sym) do |file|
73
+ @files[file][attribute] = plugin.attribute_for(file) if !@files[file].key?(attribute) && exist?(file)
74
+ @files[file][attribute]
75
+ end
76
+
77
+ # If there are some helpers, register them too
78
+ if plugin.class.const_defined?(:Helpers)
79
+ helpers_module = plugin.class.const_get(:Helpers)
80
+ self.class.include helpers_module unless helpers_module.nil?
81
+ end
82
+ # If this attribute is dependent on others, remember it too
83
+ plugin.invalidated_on_change_of.each do |parent_attribute|
84
+ @dependent_attributes[parent_attribute] = [] unless @dependent_attributes.key?(parent_attribute)
85
+ @dependent_attributes[parent_attribute] << attribute
86
+ end
87
+ end
88
+
89
+ # Is a file existing?
90
+ #
91
+ # Parameters::
92
+ # * *file* (String): File name
93
+ # Result::
94
+ # * String: Is the file existing?
95
+ def exist?(file)
96
+ # If there is at least 1 attribute for this file it means that it exists
97
+ unless @files[file].key?(:exist)
98
+ @files[file][:exist] =
99
+ # If we have an attribute for this file, it means it exist
100
+ if @files[file].size > 0
101
+ true
102
+ else
103
+ dir = File.dirname(file)
104
+ if @dirs.key?(dir)
105
+ # We know about its directory, so we should know if it is there
106
+ @dirs[dir][:files].key?(File.basename(file))
107
+ else
108
+ File.exist?(file)
109
+ end
110
+ end
111
+ end
112
+ @files[file][:exist]
113
+ end
114
+
115
+ # Get list of files from a directory (base names)
116
+ #
117
+ # Parameters::
118
+ # * *dir* (String): The directory to get files from
119
+ # Result::
120
+ # * Array<String>: List of file base names
121
+ def files_in(dir)
122
+ ensure_dir_data(dir)
123
+ @dirs[dir][:files].keys
124
+ end
125
+
126
+ # Get recursive list of directories from a directory
127
+ #
128
+ # Parameters::
129
+ # * *dir* (String): The directory to get other directories from
130
+ # Result::
131
+ # * Array<String>: List of directories
132
+ def dirs_from(dir)
133
+ unless @dirs[dir].key?(:recursive_dirs)
134
+ ensure_dir_data(dir)
135
+ recursive_dirs = {}
136
+ @dirs[dir][:dirs].keys.each do |subdir|
137
+ full_subdir = "#{dir}/#{subdir}"
138
+ recursive_dirs[full_subdir] = nil
139
+ recursive_dirs.merge!(Hash[dirs_from(full_subdir).map { |subsubdir| [subsubdir, nil] }])
140
+ end
141
+ @dirs[dir][:recursive_dirs] = recursive_dirs
142
+ end
143
+ @dirs[dir][:recursive_dirs].keys
144
+ end
145
+
146
+ # Get recursive list of files from a directory
147
+ #
148
+ # Parameters::
149
+ # * *dir* (String): The directory to get other directories from
150
+ # Result::
151
+ # * Array<String>: List of files
152
+ def files_from(dir)
153
+ unless @dirs[dir].key?(:recursive_files)
154
+ ensure_dir_data(dir)
155
+ recursive_files = Hash[@dirs[dir][:files].keys.map { |file| ["#{dir}/#{file}", nil] }]
156
+ @dirs[dir][:dirs].keys.each do |subdir|
157
+ recursive_files.merge!(Hash[files_from("#{dir}/#{subdir}").map { |file| [file, nil] }])
158
+ end
159
+ @dirs[dir][:recursive_files] = recursive_files
160
+ end
161
+ @dirs[dir][:recursive_files].keys
162
+ end
163
+
164
+ # Scan files and directories from a list of directories.
165
+ # Use a progress bar.
166
+ #
167
+ # Parameters::
168
+ # * *dirs* (Array<String>): List of directories to preload
169
+ # * *include_attributes* (Array<Symbol> or nil): List of attributes to scan, or nil for all [default = nil]
170
+ # * *exclude_attributes* (Array<Symbol>): List of attributes to ignore while scanning [default = []]
171
+ def scan(dirs, include_attributes: nil, exclude_attributes: [])
172
+ progressbar = ProgressBar.create(title: 'Indexing files')
173
+ attributes_to_scan = (include_attributes.nil? ? @attribute_plugins.keys : include_attributes) - exclude_attributes
174
+ files = dirs.
175
+ map do |dir|
176
+ dirs_from(dir)
177
+ files_from(dir)
178
+ end.
179
+ flatten
180
+ progressbar.total = files.size
181
+ files.each do |file|
182
+ exist?(file)
183
+ attributes_to_scan.each do |attribute|
184
+ self.send "#{attribute}_for", file
185
+ end
186
+ progressbar.increment
187
+ end
188
+ end
189
+
190
+ # Serialize into JSON.
191
+ #
192
+ # Result::
193
+ # * Object: JSON object
194
+ def to_json
195
+ {
196
+ files: @files,
197
+ dirs: @dirs
198
+ }
199
+ end
200
+
201
+ # Get data from JSON.
202
+ #
203
+ # Parameters::
204
+ # * *json* (Object): JSON object
205
+ def from_json(json)
206
+ json = json.transform_keys(&:to_sym)
207
+ @files = Hash[json[:files].map { |file, file_info| [file, file_info.transform_keys(&:to_sym)] }]
208
+ @files.default_proc = proc { |h, k| h[k] = {} }
209
+ @dirs = Hash[json[:dirs].map { |dir, dir_info| [dir, dir_info.transform_keys(&:to_sym)] }]
210
+ @dirs.default_proc = proc { |h, k| h[k] = {} }
211
+ end
212
+
213
+ # Notify the file system that a given file has been deleted
214
+ #
215
+ # Parameters::
216
+ # * *file* (String): File being deleted
217
+ def notify_file_rm(file)
218
+ @files[file] = { exist: false }
219
+ unregister_file_from_dirs(file)
220
+ end
221
+
222
+ # Notify the file system of a file copy
223
+ #
224
+ # Parameters::
225
+ # * *src* (String): Origin file
226
+ # * *dst* (String): Destination file
227
+ def notify_file_cp(src, dst)
228
+ if @files.key?(src)
229
+ @files[dst] = @files[src].clone
230
+ else
231
+ @files[src] = { exist: true }
232
+ @files[dst] = { exist: true }
233
+ end
234
+ register_file_in_dirs(dst)
235
+ end
236
+
237
+ # Notify the file system of a file move
238
+ #
239
+ # Parameters::
240
+ # * *src* (String): Origin file
241
+ # * *dst* (String): Destination file
242
+ def notify_file_mv(src, dst)
243
+ notify_file_cp(src, dst)
244
+ notify_file_rm(src)
245
+ end
246
+
247
+ # Check our info against file system changes.
248
+ # This detects
249
+ # * files that have been deleted,
250
+ # * any change in the directories structure,
251
+ # * any change in the attributes that are already part of the cache and that are not ignored explicitely.
252
+ #
253
+ # Parameters::
254
+ # * *include_attributes* (Array<Symbol> or nil): List of attributes to scan, or nil for all [default = nil]
255
+ # * *exclude_attributes* (Array<Symbol>): List of attributes to ignore while scanning [default = []]
256
+ def check(include_attributes: nil, exclude_attributes: [])
257
+ progressbar = ProgressBar.create(title: 'Refreshing files info')
258
+ attributes_to_scan = (include_attributes.nil? ? @attribute_plugins.keys : include_attributes) - exclude_attributes
259
+ progressbar.total = @files.size
260
+ @files.each do |file, file_info|
261
+ if File.exist?(file)
262
+ if file_info.key?(:exist) && !file_info[:exist]
263
+ # This file has been added when we thought it was missing
264
+ file_info.replace(exist: true)
265
+ else
266
+ # Check attributes that are already present
267
+ (file_info.keys & attributes_to_scan).each do |attribute|
268
+ current_attribute = file_info[attribute]
269
+ new_attribute = @attribute_plugins[attribute].attribute_for(file)
270
+ if current_attribute != new_attribute
271
+ # Attribute has changed
272
+ file_info[attribute] = new_attribute
273
+ # If some other attributes were depending on this one, invalidate them
274
+ if @dependent_attributes.key?(attribute)
275
+ @dependent_attributes[attribute].each do |dependent_attribute|
276
+ file_info.delete(dependent_attribute)
277
+ end
278
+ end
279
+ end
280
+ end
281
+ end
282
+ elsif !file_info.key?(:exist) || file_info[:exist]
283
+ # This file has been removed when we thought it was there
284
+ file_info.replace(exist: false)
285
+ end
286
+ progressbar.increment
287
+ end
288
+ # Rebuilding @dirs structure needs to make the Dir.glob commands once again. Therefore there is no need to check it. Removing it will rebuild it anyway.
289
+ @dirs.clear
290
+ end
291
+
292
+ # Remove attributes for a list of files
293
+ #
294
+ # Parameters::
295
+ # * *files* (Array<String>): The list of files to invalidate attributes for
296
+ # * *include_attributes* (Array<Symbol> or nil): List of attributes to scan, or nil for all [default = nil]
297
+ # * *exclude_attributes* (Array<Symbol>): List of attributes to ignore while scanning [default = []]
298
+ def invalidate(files, include_attributes: nil, exclude_attributes: [])
299
+ attributes_to_invalidate = ((include_attributes.nil? ? @attribute_plugins.keys : include_attributes) - exclude_attributes)
300
+ files.each do |file|
301
+ if @files.key?(file)
302
+ attributes_to_invalidate.each do |attribute|
303
+ @files[file].delete(attribute)
304
+ end
305
+ end
306
+ end
307
+ end
308
+
309
+ private
310
+
311
+ # Register a file in the @dirs structure
312
+ #
313
+ # Parameters::
314
+ # * *file* (String): File to register in @dirs
315
+ def register_file_in_dirs(file)
316
+ file_dir = File.dirname(file)
317
+ split_dir = file_dir.split('/')
318
+ split_dir.size.times do |idx|
319
+ dir = split_dir[0..idx].join('/')
320
+ @dirs[dir][:recursive_files][file] = nil if @dirs.key?(dir) && @dirs[dir].key?(:recursive_files) && !@dirs[dir][:recursive_files].key?(file)
321
+ end
322
+ base_name = File.basename(file)
323
+ @dirs[file_dir][:files][base_name] = nil if @dirs.key?(file_dir) && @dirs[file_dir].key?(:files) && !@dirs[file_dir][:files].key?(base_name)
324
+ end
325
+
326
+ # Unregister a file in the @dirs structure
327
+ #
328
+ # Parameters::
329
+ # * *file* (String): File to unregister from @dirs
330
+ def unregister_file_from_dirs(file)
331
+ file_dir = File.dirname(file)
332
+ split_dir = file_dir.split('/')
333
+ split_dir.size.times do |idx|
334
+ dir = split_dir[0..idx].join('/')
335
+ # Remove any reference of our file to this dir info
336
+ @dirs[dir][:recursive_files].delete(file) if @dirs.key?(dir) && @dirs[dir].key?(:recursive_files)
337
+ end
338
+ @dirs[file_dir][:files].delete(File.basename(file)) if @dirs.key?(file_dir) && @dirs[file_dir].key?(:files)
339
+ end
340
+
341
+ # Populate a given directory data (files and dirs)
342
+ #
343
+ # Parameters::
344
+ # * *dir* (String): Directory to get data from
345
+ def ensure_dir_data(dir)
346
+ unless @dirs[dir].key?(:files)
347
+ files = {}
348
+ dirs = {}
349
+ Dir.glob("#{dir}/*", File::FNM_DOTMATCH).each do |file|
350
+ base_name = File.basename(file)
351
+ if File.directory?(file)
352
+ dirs[base_name] = nil if base_name != '.' && base_name != '..'
353
+ else
354
+ files[base_name] = nil
355
+ end
356
+ end
357
+ @dirs[dir] = {
358
+ files: files,
359
+ dirs: dirs
360
+ }
361
+ end
362
+ end
363
+
364
+ end
@@ -0,0 +1,20 @@
1
+ class FsCache
2
+
3
+ module Attributes
4
+ end
5
+
6
+ # Common ancestor for all attributes
7
+ class Attribute
8
+
9
+ # Get the list of other attributes that invalidate this one.
10
+ # If any of those attributes is chaning on a file, then reset our attribute for the file.
11
+ #
12
+ # Result::
13
+ # * Array<Symbol>: List of dependent attributes
14
+ def invalidated_on_change_of
15
+ []
16
+ end
17
+
18
+ end
19
+
20
+ end
@@ -0,0 +1,105 @@
1
+ require 'zlib'
2
+
3
+ class FsCache
4
+
5
+ module Attributes
6
+
7
+ # CRC attribute. Can be:
8
+ # * String: Internal CRC computed from files by blocks
9
+ class Crc < Attribute
10
+
11
+ # Size of blocks to compute CRCs in bytes. Changing this value will invalidate previously computed CRCs.
12
+ CRC_BLOCK_SIZE = 32 * 1024 * 1024 # 32 MB
13
+
14
+ # Get the attribute for a given file
15
+ #
16
+ # Parameters::
17
+ # * *file* (String): File to get the attribute for
18
+ # Result::
19
+ # * Object: Corresponding attribute value
20
+ def attribute_for(file)
21
+ blocks_crc = ''
22
+ File.open(file, 'rb') do |file_io|
23
+ buffer = nil
24
+ while (buffer = file_io.read(CRC_BLOCK_SIZE))
25
+ blocks_crc << Zlib.crc32(buffer, 0).to_s(16).upcase
26
+ end
27
+ end
28
+ Zlib.crc32(blocks_crc, 0).to_s(16).upcase
29
+ end
30
+
31
+ # Get the list of other attributes that invalidate this one.
32
+ # If any of those attributes is chaning on a file, then reset our attribute for the file.
33
+ #
34
+ # Result::
35
+ # * Array<Symbol>: List of dependent attributes
36
+ def invalidated_on_change_of
37
+ [:size]
38
+ end
39
+
40
+ # Add helpers to the cache
41
+ module Helpers
42
+
43
+ # Provide info on the differences between 2 directories.
44
+ #
45
+ # Parameters::
46
+ # * *dir1* (String): First directory
47
+ # * *dir2* (String): Second directory
48
+ # Result::
49
+ # * Hash<Symbol,Object>: Difference between the 2 directories (dir2 - dir1):
50
+ # * *same* (Array<String>): Same files
51
+ # * *renamed* (Array<[String,String]>): Renamed files (from dir1 to dir2: [file_base1, file_base2])
52
+ # * *added* (Array<String>): Added files
53
+ # * *deleted* (Array<String>): Deleted files
54
+ # * *different* (Array<String>): Different files
55
+ def diff_dirs(dir1, dir2)
56
+ files1 = Hash[files_in(dir1).map { |file| [file, "#{dir1}/#{file}"] }]
57
+ files2 = Hash[files_in(dir2).map { |file| [file, "#{dir2}/#{file}"] }]
58
+ same = []
59
+ different = []
60
+ renamed = []
61
+ # First process files having the same names
62
+ files1.delete_if do |file_base1, file1|
63
+ if files2.key?(file_base1)
64
+ # A file with same name exists in dir2
65
+ if crc_for(files2[file_base1]) == crc_for(file1)
66
+ same << file_base1
67
+ else
68
+ different << file_base1
69
+ end
70
+ files2.delete(file_base1)
71
+ true
72
+ else
73
+ false
74
+ end
75
+ end
76
+ # Then process files having the same CRC among the remaining ones
77
+ files1.delete_if do |file_base1, file1|
78
+ crc1 = crc_for(file1)
79
+ found_file_base2, _found_file2 = files2.find { |_file_base2, file2| crc_for(file2) == crc1 }
80
+ if found_file_base2.nil?
81
+ false
82
+ else
83
+ renamed << [file_base1, found_file_base2]
84
+ files2.delete(found_file_base2)
85
+ true
86
+ end
87
+ end
88
+ remaining_files1 = files1.keys
89
+ remaining_files2 = files2.keys
90
+ {
91
+ same: same,
92
+ renamed: renamed,
93
+ added: remaining_files2 - remaining_files1,
94
+ deleted: remaining_files1 - remaining_files2,
95
+ different: different
96
+ }
97
+ end
98
+
99
+ end
100
+
101
+ end
102
+
103
+ end
104
+
105
+ end
@@ -0,0 +1,38 @@
1
+ class FsCache
2
+
3
+ module Attributes
4
+
5
+ # Size attribute. Can be:
6
+ # * Integer: File size
7
+ class Size < Attribute
8
+
9
+ # Get the attribute for a given file
10
+ #
11
+ # Parameters::
12
+ # * *file* (String): File to get the attribute for
13
+ # Result::
14
+ # * Object: Corresponding attribute value
15
+ def attribute_for(file)
16
+ File.stat(file).size
17
+ end
18
+
19
+ # Add helpers to the cache
20
+ module Helpers
21
+
22
+ # Is a file empty?
23
+ #
24
+ # Parameters::
25
+ # * *file* (String): File name
26
+ # Result::
27
+ # * String: Is the file empty?
28
+ def empty?(file)
29
+ size_for(file) == 0
30
+ end
31
+
32
+ end
33
+
34
+ end
35
+
36
+ end
37
+
38
+ end
metadata ADDED
@@ -0,0 +1,64 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fs_cache
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Muriel Salvan
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-07-01 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: progressbar
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.10'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.10'
27
+ description:
28
+ email:
29
+ - muriel@x-aeon.com
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - lib/fs_cache.rb
35
+ - lib/fs_cache/attribute.rb
36
+ - lib/fs_cache/attributes/crc.rb
37
+ - lib/fs_cache/attributes/size.rb
38
+ homepage: http://x-aeon.com
39
+ licenses:
40
+ - BSD-3-Clause
41
+ metadata:
42
+ homepage_uri: http://x-aeon.com
43
+ post_install_message:
44
+ rdoc_options: []
45
+ require_paths:
46
+ - lib
47
+ required_ruby_version: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - ">="
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ requirements: []
58
+ rubyforge_project:
59
+ rubygems_version: 2.7.6
60
+ signing_key:
61
+ specification_version: 4
62
+ summary: Simple file system caching to perform huge and repetitive accesses to files,
63
+ directories and various files' content analysis
64
+ test_files: []