file_pipeline 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,140 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FilePipeline
4
+ module FileOperations
5
+ # This class parses an object that may be a hash, array other object or
6
+ # +nil+.
7
+ #
8
+ # If it is initialized with an array, that array may contain another array,
9
+ # a hash, any objects, or +nil+
10
+ #
11
+ # The resulting instance will behave like an array and always have two
12
+ # elements:
13
+ # * +nil+ or an array containing all arguments that are not a hash at index
14
+ # 0
15
+ # * +nil+ or a hash at index 1.
16
+ #
17
+ # ===== Examples
18
+ #
19
+ # When passed +nil+:
20
+ #
21
+ # LogDataParser.new(nil).to_a
22
+ # # => [nil, nil]
23
+ #
24
+ # When initialized with individual strings or errors, those will be wrapped
25
+ # in an array:
26
+ #
27
+ # LogDataParser.new(StandardError.new).to_a
28
+ # # => [[#<StandardError: StandardError>], nil]
29
+ #
30
+ # LogDataParser.new('a warning').to_a
31
+ # # => [['a warning'], nil]
32
+ #
33
+ # This is also true when initialized with individual messages or errors
34
+ # along with data:
35
+ #
36
+ # LogDataParser.new(['a warning', { a_key: 'some value' }]).to_a
37
+ # # => [['a warning'], { a_key: 'some value' }]
38
+ #
39
+ # LogDataParser.new(['a warning', { a_key: 'some value' }, 'error']).to_a
40
+ # # => [['a warning', 'error'], { a_key: 'some value' }]
41
+ #
42
+ # When initialized with a hash, the array will be +nil+ and the hash:
43
+ #
44
+ # LogDataParser.new(['a warning', { a_key: 'some value' }]).to_a
45
+ # # => [nil, { a_key: 'some value' }]
46
+ #
47
+ # When initialized with an arry that does contain neither arrays nor hashes,
48
+ # it will become the first element of the resulting array, with second being
49
+ # +nil+.
50
+ #
51
+ # LogDataParser.new(['a warning', StandardError.new]).to_a
52
+ # # => [['a warning', #<StandardError: StandardError>], nil]
53
+ #
54
+ # When initialized with an array containing an array and a hash, the inner
55
+ # array is will be the first element, the hash the second
56
+ #
57
+ # log = ['a warning', 'another warning']
58
+ # data = { a_key: 'some value' }
59
+ #
60
+ # LogDataParser.new([log, data]).to_a
61
+ # # => [['a warning', 'another warning'], { a_key: 'some value' }]
62
+ #
63
+ # LogDataParser.new([data, log])
64
+ # # => [['a warning', 'another warning'], { a_key: 'some value' }]
65
+ #
66
+ # When initialized with an array containing a hash and nil
67
+ #
68
+ # LogDataParser.new([nil, data]).to_a
69
+ # # => [nil, { a_key: 'some value' }]
70
+ #
71
+ class LogDataParser
72
+ # :args: object
73
+ #
74
+ # Returns a new instance for +object+, which may be +nil+, a hash, another
75
+ # object, or an array, that may itself contain a hash, an array, or other
76
+ # objects.
77
+ def initialize(obj)
78
+ @log_data = nil
79
+ parse obj
80
+ normalize
81
+ end
82
+
83
+ # Returns a trwo element array with an empty array and a hash.
84
+ def self.template
85
+ [[], {}]
86
+ end
87
+
88
+ private
89
+
90
+ def method_missing(method_name, *args, &block)
91
+ super unless respond_to_missing? method_name.to_sym
92
+
93
+ @log_data.public_send method_name, *args, &block
94
+ end
95
+
96
+ def normalize
97
+ return unless @log_data[0].is_a? Array
98
+
99
+ @log_data[0].compact!
100
+ @log_data[0] = nil if @log_data[0].empty?
101
+ end
102
+
103
+ def parse(obj)
104
+ @log_data = case obj
105
+ when Array
106
+ parse_array obj
107
+ when Hash
108
+ [nil, obj]
109
+ when nil
110
+ [nil, nil]
111
+ else
112
+ [[obj], nil]
113
+ end
114
+ end
115
+
116
+ def parse_array(obj)
117
+ return [obj, nil] if obj.none? { |e| e.respond_to? :each }
118
+
119
+ parse_nested obj
120
+ end
121
+
122
+ def parse_nested(obj)
123
+ obj.each_with_object([]) do |element, ld|
124
+ case element
125
+ when Array
126
+ ld[0] = element
127
+ when Hash
128
+ ld[1] = element
129
+ else
130
+ (ld[0] ||= []) << element
131
+ end
132
+ end
133
+ end
134
+
135
+ def respond_to_missing?(method_name, include_private = false)
136
+ @log_data.respond_to?(method_name.to_sym) || super
137
+ end
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FilePipeline
4
+ module FileOperations
5
+ # This class contains the results from a FileOperation being run on a file.
6
+ # Instances will be returned by the FileOperation#run method.
7
+ #
8
+ # Instances contain the file operation opbject that has produced +self+,
9
+ # a flag for success, and any logs and data the operation may return.
10
+ class Results
11
+ # The object (usually an instance of a subclass of FileOperation) that
12
+ # created +self+
13
+ attr_reader :operation
14
+
15
+ # +true+ if the operation has finished and produced a version file,
16
+ # or +false+ if it encountered an error that caused it to terminate.
17
+ attr_reader :success
18
+
19
+ # Array with log messages from operations.
20
+ attr_reader :log
21
+
22
+ # Hash with any data returned from an operation.
23
+ attr_reader :data
24
+
25
+ # Returns a new instance.
26
+ #
27
+ # ===== Arguments
28
+ #
29
+ # * +operation+ - Must respond to +:name+ and +:options+.
30
+ # * +success+ - +true+ or +false+.
31
+ # * +log_data+ - A string, error, array, hash, or +nil+.
32
+ #
33
+ # ===== Examples
34
+ #
35
+ # error = StandardError.new
36
+ # warning = 'a warning occurred'
37
+ # log = [error, warning]
38
+ # data = { mime_type: 'image/jpeg' }
39
+ #
40
+ # my_op = MyOperation.new
41
+ #
42
+ # Results.new(my_op, false, error)
43
+ # # => <Results @data=nil, @log=[error], ..., @success=false>
44
+ #
45
+ # Results.new(my_op, true, warning)
46
+ # # => <Results @data=nil, @log=[warning], ..., @success=true>
47
+ #
48
+ # Results.new(my_op, true, data)
49
+ # # => <Results @data=data, @log=[], ..., @success=true>
50
+ #
51
+ # Results.new(my_op, true, [warning, data])
52
+ # # => <Results @data=data, @log=[warning], ..., @success=true>
53
+ #
54
+ # Results.new(my_op, false, log)
55
+ # # => <Results @data=nil, @log=[error, warning], ..., @success=false>
56
+ #
57
+ # Results.new(my_op, false, [log, data])
58
+ # # => <Results @data=data, @log=[error, warning], ..., @success=false>
59
+ #
60
+ # Results.new(my_op, false, nil)
61
+ # # => <Results @data=nil, @log=nil, ..., @success=false>
62
+ #
63
+ def initialize(operation, success, log_data)
64
+ @operation = operation
65
+ @success = success
66
+ @log, @data = LogDataParser.new log_data
67
+ end
68
+
69
+ def self.return_data(obj) # :nodoc:
70
+ return [nil, obj] if obj.is_a? Hash
71
+ end
72
+
73
+ def self.return_log(obj) # :nodoc:
74
+ flat_array = obj.is_a?(Array) &&
75
+ obj.none? { |i| i.is_a?(Array) || i.is_a?(Hash) }
76
+ return unless flat_array
77
+
78
+ [obj]
79
+ end
80
+
81
+ def self.return_log_and_data(obj) # :nodoc:
82
+ log = obj.find { |i| !i.is_a? Hash }
83
+ log = [log] unless log.is_a? Array
84
+ data = obj.find { |i| i.is_a? Hash }
85
+ [log, data]
86
+ end
87
+
88
+ def self.return_log_message(obj) # :nodoc:
89
+ return if obj.is_a?(Array) || obj.is_a?(Hash)
90
+
91
+ [[obj]]
92
+ end
93
+
94
+ def self.normalize_log_data(obj)
95
+ return unless obj
96
+
97
+ Results.return_data(obj) ||
98
+ Results.return_log_message(obj) ||
99
+ Results.return_log(obj) ||
100
+ Results.return_log_and_data(obj)
101
+ end
102
+
103
+ # Returns +true+ if the operation was not succesful, +false+ otherwise.
104
+ def failure
105
+ !success
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ruby-vips'
4
+
5
+ require_relative 'file_operations/captured_data_tags'
6
+ require_relative 'file_operations/exif_manipulable'
7
+ require_relative 'file_operations/file_operation'
8
+ require_relative 'file_operations/log_data_parser'
9
+ require_relative 'file_operations/results'
10
+
11
+ module FilePipeline
12
+ # Module that contains FileOperation and subclasses thereof that contain the
13
+ # logic to perform file modifications, as well as associated classes, for
14
+ # passing on information that was produced during a file operation.
15
+ module FileOperations
16
+ end
17
+ end
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FilePipeline
4
+ # Instances of Pipeline hold a defined set of operations that perform
5
+ # modifications of files.
6
+ #
7
+ # The operations are applied to a VersionedFile in the order they are added
8
+ # to the instance. To implement custom operations, it is easiest to write a
9
+ # subclass of FileOperations::FileOperation.
10
+ #
11
+ # The class can be initialized with an optional block to add file
12
+ # operations:
13
+ #
14
+ # Pipeline.new do |pipeline|
15
+ # pipeline.define_operation('scale',
16
+ # :width => 1280, :height => 1024)
17
+ # pipeline.define_operation('ptiff_conversion',
18
+ # :tile_width => 64, :tile_height => 64)
19
+ # end
20
+ #
21
+ class Pipeline
22
+ # An array of file operations that will be applied to files in the order
23
+ # they have been added.
24
+ attr_reader :file_operations
25
+
26
+ # Returns a new instance.
27
+ #
28
+ # If <tt>src_directories</tt> are provided, they will be added to
29
+ # FilePipeline.source_directories.
30
+ #
31
+ # ===== Arguments
32
+ #
33
+ # * <tt>src_directories</tt> - one or more paths to directories where
34
+ # classes for file operations are defined.
35
+ def initialize(*src_directories)
36
+ src_directories.each { |dir| FilePipeline << dir }
37
+ @file_operations = []
38
+ yield(self) if block_given?
39
+ end
40
+
41
+ # Adds a file operation object #file_operations. The object must implement
42
+ # a _run_ method (see FileOperations::FileOperation#run for details).
43
+ def <<(file_operation_instance)
44
+ unless file_operation_instance.respond_to? :run
45
+ raise TypeError, 'File operations must implement a #run method'
46
+ end
47
+
48
+ @file_operations << file_operation_instance
49
+ end
50
+
51
+ # Applies all #file_operations to a <tt>versioned_file</tt> and returns it.
52
+ def apply_to(versioned_file)
53
+ file_operations.each { |job| run job, versioned_file }
54
+ versioned_file
55
+ end
56
+
57
+ # Applies all #file_operations to <tt>versioned_files</tt> (an array) and
58
+ # returns it.
59
+ def batch_apply(versioned_files)
60
+ versioned_files.map { |file| Thread.new(file) { apply_to(file) } }
61
+ .map(&:value)
62
+ end
63
+
64
+ # Initializes the class for <tt>file_operation</tt> (a string in
65
+ # underscore notation) with +options+, adds it to #file_operations, and
66
+ # returns +self+.
67
+ #
68
+ # If the source file containing the file operation's class definition is not
69
+ # loaded, this method will try to locate it in the
70
+ # FilePipeline.source_directories and require it.
71
+ #
72
+ # ===== Examples
73
+ #
74
+ # Define single operation:
75
+ #
76
+ # pipeline.define_operation('ptiff_conversion', :tile => false)
77
+ #
78
+ # Chaining:
79
+ #
80
+ # pipeline.define_operation('scale', width: 1280, height: 1024)
81
+ # .define_operation('ptiff_conversion')
82
+ #
83
+ def define_operation(file_operation, options = {})
84
+ operation = FilePipeline.load file_operation
85
+ self << operation.new(options)
86
+ self
87
+ end
88
+
89
+ # Returns +true+ if no #file_operations are defined.
90
+ def empty?
91
+ file_operations.empty?
92
+ end
93
+
94
+ # Applies +operation+ to <tt>versioned_file</tt>.
95
+ #
96
+ # +operation+ must be an object implementing a _run_ method that takes three
97
+ # arguments (see FileOperations::FileOperation#run ).
98
+ def run(operation, versioned_file)
99
+ versioned_file.modify do |version, directory, original|
100
+ operation.run version, directory, original
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,284 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FilePipeline
4
+ # VersionedFile creates a directory where it stores any versions of _file_.
5
+ class VersionedFile
6
+ include FileOperations::ExifManipulable
7
+
8
+ # The basename of the versioned file.
9
+ attr_reader :basename
10
+
11
+ # A hash with file paths as keys, information on the modifications applied
12
+ # to create the version as values (instances of FileOperations::Results).
13
+ attr_reader :history
14
+
15
+ # The path to the original file of _self_.
16
+ attr_reader :original
17
+
18
+ # A String that is appended to the file basename when the file written
19
+ # by #finalize is not replacing the original.
20
+ attr_reader :target_suffix
21
+
22
+ # Returns a new instance with +file+ as the #original.
23
+ #
24
+ # ===== Arguments
25
+ #
26
+ # * +file+ - Path to the file the instance will be based on. That file
27
+ # should not be touched unless #finalize is called with the +:overwrite+
28
+ # option set to +true+.
29
+ #
30
+ # *Caveat* it can not be ruled out that buggy or malignant file operations
31
+ # modify the original.
32
+ #
33
+ #--
34
+ # FIXME: protect the original
35
+ #++
36
+ #
37
+ # ===== Options
38
+ #
39
+ # <tt>target_suffix</ttm> is a string to be appended to the file that
40
+ # will be written by #finalize (the last version) if #finalize is to
41
+ # preserve the original. It is recommended to use a UUID (_default_) to
42
+ # avoid clashes with other files in the directory.
43
+ def initialize(file, target_suffix: SecureRandom.uuid)
44
+ raise Errors::MissingVersionFileError, file: file unless File.exist? file
45
+
46
+ @original = file
47
+ @basename = File.basename(file, '.*')
48
+ @history = {}
49
+ @directory = nil
50
+ @target_suffix = target_suffix
51
+ end
52
+
53
+ # Copies the file with path _src_ to <em>/dir/filename</em>.
54
+ def self.copy(src, dir, filename)
55
+ dest = FilePipeline.path(dir, filename)
56
+ FileUtils.cp src, dest
57
+ dest
58
+ end
59
+
60
+ # Moves the file with path _src_ to <em>/dir/filename</em>.
61
+ def self.move(src, dir, filename)
62
+ dest = FilePipeline.path(dir, filename)
63
+ FileUtils.mv src, dest
64
+ dest
65
+ end
66
+
67
+ # Adds a new version to #history and returns _self_.
68
+ #
69
+ # <tt>version_info</tt> must be a path to an existing file or an array with
70
+ # the path and optionally a FileOperations::Results instance:
71
+ # <tt>['path/to/file', results_object]</tt>.
72
+ # Will move the file to #directory if it is in another directory.
73
+ def <<(version_info)
74
+ file, info = version_info
75
+ raise Errors::FailedModificationError, info: info if info&.failure
76
+
77
+ version = validate(file)
78
+ @history[version] = info
79
+ self
80
+ rescue StandardError => e
81
+ reset
82
+ raise e
83
+ end
84
+
85
+ # Returns a two-dimesnional array, where each nested array has two items;
86
+ # the file operation object and data captured by the operartion (if any).
87
+ #
88
+ # <tt>[[description_object, data_or_nil], ...]</tt>
89
+ def captured_data
90
+ filter_history :data
91
+ end
92
+
93
+ # Returns any data captured by <tt>operation_name</tt>.
94
+ #
95
+ # If multiple instances of one operation class have modified the file,
96
+ # pass any +options+ the specific instance of the operation was initialized
97
+ # with as the optional second argument.
98
+ def captured_data_for(operation_name, **options)
99
+ raw_data = captured_data.filter do |operation, _|
100
+ operation.name == operation_name &&
101
+ options.all? { |k, v| operation.options[k] == v }
102
+ end
103
+ raw_data.map(&:last)
104
+ end
105
+
106
+ # Returns an array with all data captured by operations with +tag+ has.
107
+ #
108
+ # Tags are defined in FileOperations::CapturedDataTags
109
+ def captured_data_with(tag)
110
+ return unless changed?
111
+
112
+ captured_data.map do |operation, results|
113
+ next unless operation.captured_data_tag == tag
114
+
115
+ results
116
+ end
117
+ end
118
+
119
+ # Returns +true+ if there are #versions (file has been modified).
120
+ #
121
+ # *Warning:* It will also return +true+ if the file has been cloned.
122
+ def changed?
123
+ current != original
124
+ end
125
+
126
+ # Creates a new identical version of #current. Will only add the path of
127
+ # the file to history, but no FileOperations::Results.
128
+ def clone
129
+ filename = FilePipeline.new_basename + current_extension
130
+ clone_file = VersionedFile.copy(current, directory, filename)
131
+ self << clone_file
132
+ end
133
+
134
+ # Returns the path to the current file or the #original if no versions
135
+ # have been created.
136
+ def current
137
+ versions.last || original
138
+ end
139
+
140
+ # Returns the file extension for the #current file.
141
+ def current_extension
142
+ File.extname current
143
+ end
144
+
145
+ # Returns the path to the directory where the versioned of +self+ are
146
+ # stored. Creates the directory if it does not exist.
147
+ def directory
148
+ @directory ||= workdir
149
+ end
150
+
151
+ # Writes the #current version to #basename, optionally the #target_suffix,
152
+ # and the #current_extension in #original_dir. Deletes all versions and
153
+ # resets the #history to an empty Hash. Returns the path to the written
154
+ # file.
155
+ #
156
+ # ===== Options
157
+ #
158
+ # * +overwrite+ - +true+ or +false+
159
+ # * +false+ (_default_) - The #target_suffix will be appended to the
160
+ # #basename and the #original will be preserved.
161
+ # * +true+ - The finalized version will replace the #original.
162
+ def finalize(overwrite: false)
163
+ filename = overwrite ? replacing_trarget : preserving_taget
164
+ FileUtils.rm original if overwrite
165
+ @original = VersionedFile.copy(current, original_dir, filename)
166
+ ensure
167
+ reset
168
+ end
169
+
170
+ # Returns an array of triplets (arryas with three items each): the name of
171
+ # the file operation class (a string), options (a hash), and the actual log
172
+ # (an array).
173
+ def log
174
+ filter_history(:log)
175
+ .map { |operation, info| [operation.name, operation.options, info] }
176
+ end
177
+
178
+ # Returns the Exif metadata
179
+ #
180
+ # ===== Options
181
+ #
182
+ # * <tt>:for_version</tt> - +current+ or +original+
183
+ # * +current+ (_default_) - Metadata for the #current file will be
184
+ # returned.
185
+ # * +original+ - Metadata for the #original file will be returned.
186
+ #
187
+ #--
188
+ # TODO: when file is not an image file, this should return other metadata
189
+ # than exif.
190
+ # TODO: implement the option to return metadata for a specif version index
191
+ #++
192
+ def metadata(for_version: :current)
193
+ file = public_send for_version
194
+ read_exif(file).first
195
+ end
196
+
197
+ # Creates a new version.
198
+ # Requires a block that must return a path to an existing file or an array
199
+ # with the path and optionally a FileOperations::Results instance:
200
+ # <tt>['path/to/file', results_object]</tt>.
201
+ #
202
+ # The actual file modification logic will be in the block.
203
+ #
204
+ # The block must take three arguments: for the #current file (from which the
205
+ # modified version will be created), the work #directory (to where the
206
+ # modified file will be written), and the #original file (which will only
207
+ # be used in modifications that need the original file for reference, such
208
+ # as modifications that restore file metadata that was lost in other
209
+ # modifications).
210
+ def modify
211
+ self << yield(current, directory, original)
212
+ end
213
+
214
+ # Returns the directory where #original is stored.
215
+ def original_dir
216
+ File.dirname original
217
+ end
218
+
219
+ # Returns a hash into which all captured data from file operations with the
220
+ # FileOperations::CapturedDataTags::DROPPED_EXIF_DATA has been merged.
221
+ def recovered_metadata
222
+ captured_data_with(FileOperations::CapturedDataTags::DROPPED_EXIF_DATA)
223
+ &.reduce({}) { |recovered, data| recovered.merge data }
224
+ end
225
+
226
+ # Returns an array with paths to the version files of +self+ (excluding
227
+ # #original).
228
+ def versions
229
+ history.keys
230
+ end
231
+
232
+ alias touch clone
233
+
234
+ private
235
+
236
+ # item = :data or :log
237
+ def filter_history(item)
238
+ history.inject([]) do |results, (_, info)|
239
+ next results unless info.respond_to?(item) && info.public_send(item)
240
+
241
+ results << [info.operation, info.public_send(item)]
242
+ end
243
+ end
244
+
245
+ # Returns the filename for a target file that will not overwrite the
246
+ # original.
247
+ def preserving_taget
248
+ basename + '_' + target_suffix + current_extension
249
+ end
250
+
251
+ # Returns the filename for a target file that will overwrite the
252
+ # original.
253
+ def replacing_trarget
254
+ basename + current_extension
255
+ end
256
+
257
+ # Deletes the work directory and resets #versions
258
+ def reset
259
+ FileUtils.rm_r directory, force: true
260
+ @history = {}
261
+ end
262
+
263
+ # Validates if file exists and has been stored in #directory.
264
+ def validate(file)
265
+ raise Errors::MissingVersionFileError, file: file unless File.exist? file
266
+
267
+ return file if File.dirname(file) == directory
268
+
269
+ VersionedFile.move file, directory, File.basename(file)
270
+ end
271
+
272
+ # Creates the directory containing all version files. Directory name is
273
+ # composed of the basename plus '_version'.
274
+ #
275
+ # Raises SystemCallError if the directory already exists.
276
+ def workdir
277
+ subdir = basename + '_versions'
278
+ filedir = File.dirname(original)
279
+ dirname = File.join filedir, subdir
280
+ FileUtils.mkdir(dirname)
281
+ File.path dirname
282
+ end
283
+ end
284
+ end