file_pipeline 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,140 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FilePipeline
4
+ module FileOperations
5
+ # This class parses an object that may be a hash, array other object or
6
+ # +nil+.
7
+ #
8
+ # If it is initialized with an array, that array may contain another array,
9
+ # a hash, any objects, or +nil+
10
+ #
11
+ # The resulting instance will behave like an array and always have two
12
+ # elements:
13
+ # * +nil+ or an array containing all arguments that are not a hash at index
14
+ # 0
15
+ # * +nil+ or a hash at index 1.
16
+ #
17
+ # ===== Examples
18
+ #
19
+ # When passed +nil+:
20
+ #
21
+ # LogDataParser.new(nil).to_a
22
+ # # => [nil, nil]
23
+ #
24
+ # When initialized with individual strings or errors, those will be wrapped
25
+ # in an array:
26
+ #
27
+ # LogDataParser.new(StandardError.new).to_a
28
+ # # => [[#<StandardError: StandardError>], nil]
29
+ #
30
+ # LogDataParser.new('a warning').to_a
31
+ # # => [['a warning'], nil]
32
+ #
33
+ # This is also true when initialized with individual messages or errors
34
+ # along with data:
35
+ #
36
+ # LogDataParser.new(['a warning', { a_key: 'some value' }]).to_a
37
+ # # => [['a warning'], { a_key: 'some value' }]
38
+ #
39
+ # LogDataParser.new(['a warning', { a_key: 'some value' }, 'error']).to_a
40
+ # # => [['a warning', 'error'], { a_key: 'some value' }]
41
+ #
42
+ # When initialized with a hash, the array will be +nil+ and the hash:
43
+ #
44
+ # LogDataParser.new(['a warning', { a_key: 'some value' }]).to_a
45
+ # # => [nil, { a_key: 'some value' }]
46
+ #
47
+ # When initialized with an arry that does contain neither arrays nor hashes,
48
+ # it will become the first element of the resulting array, with second being
49
+ # +nil+.
50
+ #
51
+ # LogDataParser.new(['a warning', StandardError.new]).to_a
52
+ # # => [['a warning', #<StandardError: StandardError>], nil]
53
+ #
54
+ # When initialized with an array containing an array and a hash, the inner
55
+ # array is will be the first element, the hash the second
56
+ #
57
+ # log = ['a warning', 'another warning']
58
+ # data = { a_key: 'some value' }
59
+ #
60
+ # LogDataParser.new([log, data]).to_a
61
+ # # => [['a warning', 'another warning'], { a_key: 'some value' }]
62
+ #
63
+ # LogDataParser.new([data, log])
64
+ # # => [['a warning', 'another warning'], { a_key: 'some value' }]
65
+ #
66
+ # When initialized with an array containing a hash and nil
67
+ #
68
+ # LogDataParser.new([nil, data]).to_a
69
+ # # => [nil, { a_key: 'some value' }]
70
+ #
71
+ class LogDataParser
72
+ # :args: object
73
+ #
74
+ # Returns a new instance for +object+, which may be +nil+, a hash, another
75
+ # object, or an array, that may itself contain a hash, an array, or other
76
+ # objects.
77
+ def initialize(obj)
78
+ @log_data = nil
79
+ parse obj
80
+ normalize
81
+ end
82
+
83
+ # Returns a trwo element array with an empty array and a hash.
84
+ def self.template
85
+ [[], {}]
86
+ end
87
+
88
+ private
89
+
90
+ def method_missing(method_name, *args, &block)
91
+ super unless respond_to_missing? method_name.to_sym
92
+
93
+ @log_data.public_send method_name, *args, &block
94
+ end
95
+
96
+ def normalize
97
+ return unless @log_data[0].is_a? Array
98
+
99
+ @log_data[0].compact!
100
+ @log_data[0] = nil if @log_data[0].empty?
101
+ end
102
+
103
+ def parse(obj)
104
+ @log_data = case obj
105
+ when Array
106
+ parse_array obj
107
+ when Hash
108
+ [nil, obj]
109
+ when nil
110
+ [nil, nil]
111
+ else
112
+ [[obj], nil]
113
+ end
114
+ end
115
+
116
+ def parse_array(obj)
117
+ return [obj, nil] if obj.none? { |e| e.respond_to? :each }
118
+
119
+ parse_nested obj
120
+ end
121
+
122
+ def parse_nested(obj)
123
+ obj.each_with_object([]) do |element, ld|
124
+ case element
125
+ when Array
126
+ ld[0] = element
127
+ when Hash
128
+ ld[1] = element
129
+ else
130
+ (ld[0] ||= []) << element
131
+ end
132
+ end
133
+ end
134
+
135
+ def respond_to_missing?(method_name, include_private = false)
136
+ @log_data.respond_to?(method_name.to_sym) || super
137
+ end
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FilePipeline
4
+ module FileOperations
5
+ # This class contains the results from a FileOperation being run on a file.
6
+ # Instances will be returned by the FileOperation#run method.
7
+ #
8
+ # Instances contain the file operation opbject that has produced +self+,
9
+ # a flag for success, and any logs and data the operation may return.
10
+ class Results
11
+ # The object (usually an instance of a subclass of FileOperation) that
12
+ # created +self+
13
+ attr_reader :operation
14
+
15
+ # +true+ if the operation has finished and produced a version file,
16
+ # or +false+ if it encountered an error that caused it to terminate.
17
+ attr_reader :success
18
+
19
+ # Array with log messages from operations.
20
+ attr_reader :log
21
+
22
+ # Hash with any data returned from an operation.
23
+ attr_reader :data
24
+
25
+ # Returns a new instance.
26
+ #
27
+ # ===== Arguments
28
+ #
29
+ # * +operation+ - Must respond to +:name+ and +:options+.
30
+ # * +success+ - +true+ or +false+.
31
+ # * +log_data+ - A string, error, array, hash, or +nil+.
32
+ #
33
+ # ===== Examples
34
+ #
35
+ # error = StandardError.new
36
+ # warning = 'a warning occurred'
37
+ # log = [error, warning]
38
+ # data = { mime_type: 'image/jpeg' }
39
+ #
40
+ # my_op = MyOperation.new
41
+ #
42
+ # Results.new(my_op, false, error)
43
+ # # => <Results @data=nil, @log=[error], ..., @success=false>
44
+ #
45
+ # Results.new(my_op, true, warning)
46
+ # # => <Results @data=nil, @log=[warning], ..., @success=true>
47
+ #
48
+ # Results.new(my_op, true, data)
49
+ # # => <Results @data=data, @log=[], ..., @success=true>
50
+ #
51
+ # Results.new(my_op, true, [warning, data])
52
+ # # => <Results @data=data, @log=[warning], ..., @success=true>
53
+ #
54
+ # Results.new(my_op, false, log)
55
+ # # => <Results @data=nil, @log=[error, warning], ..., @success=false>
56
+ #
57
+ # Results.new(my_op, false, [log, data])
58
+ # # => <Results @data=data, @log=[error, warning], ..., @success=false>
59
+ #
60
+ # Results.new(my_op, false, nil)
61
+ # # => <Results @data=nil, @log=nil, ..., @success=false>
62
+ #
63
+ def initialize(operation, success, log_data)
64
+ @operation = operation
65
+ @success = success
66
+ @log, @data = LogDataParser.new log_data
67
+ end
68
+
69
+ def self.return_data(obj) # :nodoc:
70
+ return [nil, obj] if obj.is_a? Hash
71
+ end
72
+
73
+ def self.return_log(obj) # :nodoc:
74
+ flat_array = obj.is_a?(Array) &&
75
+ obj.none? { |i| i.is_a?(Array) || i.is_a?(Hash) }
76
+ return unless flat_array
77
+
78
+ [obj]
79
+ end
80
+
81
+ def self.return_log_and_data(obj) # :nodoc:
82
+ log = obj.find { |i| !i.is_a? Hash }
83
+ log = [log] unless log.is_a? Array
84
+ data = obj.find { |i| i.is_a? Hash }
85
+ [log, data]
86
+ end
87
+
88
+ def self.return_log_message(obj) # :nodoc:
89
+ return if obj.is_a?(Array) || obj.is_a?(Hash)
90
+
91
+ [[obj]]
92
+ end
93
+
94
+ def self.normalize_log_data(obj)
95
+ return unless obj
96
+
97
+ Results.return_data(obj) ||
98
+ Results.return_log_message(obj) ||
99
+ Results.return_log(obj) ||
100
+ Results.return_log_and_data(obj)
101
+ end
102
+
103
+ # Returns +true+ if the operation was not succesful, +false+ otherwise.
104
+ def failure
105
+ !success
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ruby-vips'
4
+
5
+ require_relative 'file_operations/captured_data_tags'
6
+ require_relative 'file_operations/exif_manipulable'
7
+ require_relative 'file_operations/file_operation'
8
+ require_relative 'file_operations/log_data_parser'
9
+ require_relative 'file_operations/results'
10
+
11
+ module FilePipeline
12
+ # Module that contains FileOperation and subclasses thereof that contain the
13
+ # logic to perform file modifications, as well as associated classes, for
14
+ # passing on information that was produced during a file operation.
15
+ module FileOperations
16
+ end
17
+ end
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FilePipeline
4
+ # Instances of Pipeline hold a defined set of operations that perform
5
+ # modifications of files.
6
+ #
7
+ # The operations are applied to a VersionedFile in the order they are added
8
+ # to the instance. To implement custom operations, it is easiest to write a
9
+ # subclass of FileOperations::FileOperation.
10
+ #
11
+ # The class can be initialized with an optional block to add file
12
+ # operations:
13
+ #
14
+ # Pipeline.new do |pipeline|
15
+ # pipeline.define_operation('scale',
16
+ # :width => 1280, :height => 1024)
17
+ # pipeline.define_operation('ptiff_conversion',
18
+ # :tile_width => 64, :tile_height => 64)
19
+ # end
20
+ #
21
+ class Pipeline
22
+ # An array of file operations that will be applied to files in the order
23
+ # they have been added.
24
+ attr_reader :file_operations
25
+
26
+ # Returns a new instance.
27
+ #
28
+ # If <tt>src_directories</tt> are provided, they will be added to
29
+ # FilePipeline.source_directories.
30
+ #
31
+ # ===== Arguments
32
+ #
33
+ # * <tt>src_directories</tt> - one or more paths to directories where
34
+ # classes for file operations are defined.
35
+ def initialize(*src_directories)
36
+ src_directories.each { |dir| FilePipeline << dir }
37
+ @file_operations = []
38
+ yield(self) if block_given?
39
+ end
40
+
41
+ # Adds a file operation object #file_operations. The object must implement
42
+ # a _run_ method (see FileOperations::FileOperation#run for details).
43
+ def <<(file_operation_instance)
44
+ unless file_operation_instance.respond_to? :run
45
+ raise TypeError, 'File operations must implement a #run method'
46
+ end
47
+
48
+ @file_operations << file_operation_instance
49
+ end
50
+
51
+ # Applies all #file_operations to a <tt>versioned_file</tt> and returns it.
52
+ def apply_to(versioned_file)
53
+ file_operations.each { |job| run job, versioned_file }
54
+ versioned_file
55
+ end
56
+
57
+ # Applies all #file_operations to <tt>versioned_files</tt> (an array) and
58
+ # returns it.
59
+ def batch_apply(versioned_files)
60
+ versioned_files.map { |file| Thread.new(file) { apply_to(file) } }
61
+ .map(&:value)
62
+ end
63
+
64
+ # Initializes the class for <tt>file_operation</tt> (a string in
65
+ # underscore notation) with +options+, adds it to #file_operations, and
66
+ # returns +self+.
67
+ #
68
+ # If the source file containing the file operation's class definition is not
69
+ # loaded, this method will try to locate it in the
70
+ # FilePipeline.source_directories and require it.
71
+ #
72
+ # ===== Examples
73
+ #
74
+ # Define single operation:
75
+ #
76
+ # pipeline.define_operation('ptiff_conversion', :tile => false)
77
+ #
78
+ # Chaining:
79
+ #
80
+ # pipeline.define_operation('scale', width: 1280, height: 1024)
81
+ # .define_operation('ptiff_conversion')
82
+ #
83
+ def define_operation(file_operation, options = {})
84
+ operation = FilePipeline.load file_operation
85
+ self << operation.new(options)
86
+ self
87
+ end
88
+
89
+ # Returns +true+ if no #file_operations are defined.
90
+ def empty?
91
+ file_operations.empty?
92
+ end
93
+
94
+ # Applies +operation+ to <tt>versioned_file</tt>.
95
+ #
96
+ # +operation+ must be an object implementing a _run_ method that takes three
97
+ # arguments (see FileOperations::FileOperation#run ).
98
+ def run(operation, versioned_file)
99
+ versioned_file.modify do |version, directory, original|
100
+ operation.run version, directory, original
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,284 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FilePipeline
4
+ # VersionedFile creates a directory where it stores any versions of _file_.
5
+ class VersionedFile
6
+ include FileOperations::ExifManipulable
7
+
8
+ # The basename of the versioned file.
9
+ attr_reader :basename
10
+
11
+ # A hash with file paths as keys, information on the modifications applied
12
+ # to create the version as values (instances of FileOperations::Results).
13
+ attr_reader :history
14
+
15
+ # The path to the original file of _self_.
16
+ attr_reader :original
17
+
18
+ # A String that is appended to the file basename when the file written
19
+ # by #finalize is not replacing the original.
20
+ attr_reader :target_suffix
21
+
22
+ # Returns a new instance with +file+ as the #original.
23
+ #
24
+ # ===== Arguments
25
+ #
26
+ # * +file+ - Path to the file the instance will be based on. That file
27
+ # should not be touched unless #finalize is called with the +:overwrite+
28
+ # option set to +true+.
29
+ #
30
+ # *Caveat* it can not be ruled out that buggy or malignant file operations
31
+ # modify the original.
32
+ #
33
+ #--
34
+ # FIXME: protect the original
35
+ #++
36
+ #
37
+ # ===== Options
38
+ #
39
+ # <tt>target_suffix</ttm> is a string to be appended to the file that
40
+ # will be written by #finalize (the last version) if #finalize is to
41
+ # preserve the original. It is recommended to use a UUID (_default_) to
42
+ # avoid clashes with other files in the directory.
43
+ def initialize(file, target_suffix: SecureRandom.uuid)
44
+ raise Errors::MissingVersionFileError, file: file unless File.exist? file
45
+
46
+ @original = file
47
+ @basename = File.basename(file, '.*')
48
+ @history = {}
49
+ @directory = nil
50
+ @target_suffix = target_suffix
51
+ end
52
+
53
+ # Copies the file with path _src_ to <em>/dir/filename</em>.
54
+ def self.copy(src, dir, filename)
55
+ dest = FilePipeline.path(dir, filename)
56
+ FileUtils.cp src, dest
57
+ dest
58
+ end
59
+
60
+ # Moves the file with path _src_ to <em>/dir/filename</em>.
61
+ def self.move(src, dir, filename)
62
+ dest = FilePipeline.path(dir, filename)
63
+ FileUtils.mv src, dest
64
+ dest
65
+ end
66
+
67
+ # Adds a new version to #history and returns _self_.
68
+ #
69
+ # <tt>version_info</tt> must be a path to an existing file or an array with
70
+ # the path and optionally a FileOperations::Results instance:
71
+ # <tt>['path/to/file', results_object]</tt>.
72
+ # Will move the file to #directory if it is in another directory.
73
+ def <<(version_info)
74
+ file, info = version_info
75
+ raise Errors::FailedModificationError, info: info if info&.failure
76
+
77
+ version = validate(file)
78
+ @history[version] = info
79
+ self
80
+ rescue StandardError => e
81
+ reset
82
+ raise e
83
+ end
84
+
85
+ # Returns a two-dimesnional array, where each nested array has two items;
86
+ # the file operation object and data captured by the operartion (if any).
87
+ #
88
+ # <tt>[[description_object, data_or_nil], ...]</tt>
89
+ def captured_data
90
+ filter_history :data
91
+ end
92
+
93
+ # Returns any data captured by <tt>operation_name</tt>.
94
+ #
95
+ # If multiple instances of one operation class have modified the file,
96
+ # pass any +options+ the specific instance of the operation was initialized
97
+ # with as the optional second argument.
98
+ def captured_data_for(operation_name, **options)
99
+ raw_data = captured_data.filter do |operation, _|
100
+ operation.name == operation_name &&
101
+ options.all? { |k, v| operation.options[k] == v }
102
+ end
103
+ raw_data.map(&:last)
104
+ end
105
+
106
+ # Returns an array with all data captured by operations with +tag+ has.
107
+ #
108
+ # Tags are defined in FileOperations::CapturedDataTags
109
+ def captured_data_with(tag)
110
+ return unless changed?
111
+
112
+ captured_data.map do |operation, results|
113
+ next unless operation.captured_data_tag == tag
114
+
115
+ results
116
+ end
117
+ end
118
+
119
+ # Returns +true+ if there are #versions (file has been modified).
120
+ #
121
+ # *Warning:* It will also return +true+ if the file has been cloned.
122
+ def changed?
123
+ current != original
124
+ end
125
+
126
+ # Creates a new identical version of #current. Will only add the path of
127
+ # the file to history, but no FileOperations::Results.
128
+ def clone
129
+ filename = FilePipeline.new_basename + current_extension
130
+ clone_file = VersionedFile.copy(current, directory, filename)
131
+ self << clone_file
132
+ end
133
+
134
+ # Returns the path to the current file or the #original if no versions
135
+ # have been created.
136
+ def current
137
+ versions.last || original
138
+ end
139
+
140
+ # Returns the file extension for the #current file.
141
+ def current_extension
142
+ File.extname current
143
+ end
144
+
145
+ # Returns the path to the directory where the versioned of +self+ are
146
+ # stored. Creates the directory if it does not exist.
147
+ def directory
148
+ @directory ||= workdir
149
+ end
150
+
151
+ # Writes the #current version to #basename, optionally the #target_suffix,
152
+ # and the #current_extension in #original_dir. Deletes all versions and
153
+ # resets the #history to an empty Hash. Returns the path to the written
154
+ # file.
155
+ #
156
+ # ===== Options
157
+ #
158
+ # * +overwrite+ - +true+ or +false+
159
+ # * +false+ (_default_) - The #target_suffix will be appended to the
160
+ # #basename and the #original will be preserved.
161
+ # * +true+ - The finalized version will replace the #original.
162
+ def finalize(overwrite: false)
163
+ filename = overwrite ? replacing_trarget : preserving_taget
164
+ FileUtils.rm original if overwrite
165
+ @original = VersionedFile.copy(current, original_dir, filename)
166
+ ensure
167
+ reset
168
+ end
169
+
170
+ # Returns an array of triplets (arryas with three items each): the name of
171
+ # the file operation class (a string), options (a hash), and the actual log
172
+ # (an array).
173
+ def log
174
+ filter_history(:log)
175
+ .map { |operation, info| [operation.name, operation.options, info] }
176
+ end
177
+
178
+ # Returns the Exif metadata
179
+ #
180
+ # ===== Options
181
+ #
182
+ # * <tt>:for_version</tt> - +current+ or +original+
183
+ # * +current+ (_default_) - Metadata for the #current file will be
184
+ # returned.
185
+ # * +original+ - Metadata for the #original file will be returned.
186
+ #
187
+ #--
188
+ # TODO: when file is not an image file, this should return other metadata
189
+ # than exif.
190
+ # TODO: implement the option to return metadata for a specif version index
191
+ #++
192
+ def metadata(for_version: :current)
193
+ file = public_send for_version
194
+ read_exif(file).first
195
+ end
196
+
197
+ # Creates a new version.
198
+ # Requires a block that must return a path to an existing file or an array
199
+ # with the path and optionally a FileOperations::Results instance:
200
+ # <tt>['path/to/file', results_object]</tt>.
201
+ #
202
+ # The actual file modification logic will be in the block.
203
+ #
204
+ # The block must take three arguments: for the #current file (from which the
205
+ # modified version will be created), the work #directory (to where the
206
+ # modified file will be written), and the #original file (which will only
207
+ # be used in modifications that need the original file for reference, such
208
+ # as modifications that restore file metadata that was lost in other
209
+ # modifications).
210
+ def modify
211
+ self << yield(current, directory, original)
212
+ end
213
+
214
+ # Returns the directory where #original is stored.
215
+ def original_dir
216
+ File.dirname original
217
+ end
218
+
219
+ # Returns a hash into which all captured data from file operations with the
220
+ # FileOperations::CapturedDataTags::DROPPED_EXIF_DATA has been merged.
221
+ def recovered_metadata
222
+ captured_data_with(FileOperations::CapturedDataTags::DROPPED_EXIF_DATA)
223
+ &.reduce({}) { |recovered, data| recovered.merge data }
224
+ end
225
+
226
+ # Returns an array with paths to the version files of +self+ (excluding
227
+ # #original).
228
+ def versions
229
+ history.keys
230
+ end
231
+
232
+ alias touch clone
233
+
234
+ private
235
+
236
+ # item = :data or :log
237
+ def filter_history(item)
238
+ history.inject([]) do |results, (_, info)|
239
+ next results unless info.respond_to?(item) && info.public_send(item)
240
+
241
+ results << [info.operation, info.public_send(item)]
242
+ end
243
+ end
244
+
245
+ # Returns the filename for a target file that will not overwrite the
246
+ # original.
247
+ def preserving_taget
248
+ basename + '_' + target_suffix + current_extension
249
+ end
250
+
251
+ # Returns the filename for a target file that will overwrite the
252
+ # original.
253
+ def replacing_trarget
254
+ basename + current_extension
255
+ end
256
+
257
+ # Deletes the work directory and resets #versions
258
+ def reset
259
+ FileUtils.rm_r directory, force: true
260
+ @history = {}
261
+ end
262
+
263
+ # Validates if file exists and has been stored in #directory.
264
+ def validate(file)
265
+ raise Errors::MissingVersionFileError, file: file unless File.exist? file
266
+
267
+ return file if File.dirname(file) == directory
268
+
269
+ VersionedFile.move file, directory, File.basename(file)
270
+ end
271
+
272
+ # Creates the directory containing all version files. Directory name is
273
+ # composed of the basename plus '_version'.
274
+ #
275
+ # Raises SystemCallError if the directory already exists.
276
+ def workdir
277
+ subdir = basename + '_versions'
278
+ filedir = File.dirname(original)
279
+ dirname = File.join filedir, subdir
280
+ FileUtils.mkdir(dirname)
281
+ File.path dirname
282
+ end
283
+ end
284
+ end