assembly-objectfile 1.12.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +14 -0
  3. data/.github/pull_request_template.md +1 -1
  4. data/.gitignore +0 -1
  5. data/.rubocop.yml +117 -12
  6. data/.rubocop_todo.yml +3 -109
  7. data/Gemfile.lock +97 -0
  8. data/README.md +7 -7
  9. data/assembly-objectfile.gemspec +8 -11
  10. data/config/boot.rb +0 -1
  11. data/lib/{assembly-objectfile → assembly/object_file}/version.rb +2 -2
  12. data/lib/{assembly-objectfile/object_fileable.rb → assembly/object_file.rb} +109 -104
  13. data/lib/assembly-objectfile.rb +12 -15
  14. data/spec/assembly/object_file_spec.rb +451 -0
  15. data/spec/spec_helper.rb +2 -31
  16. data/spec/test_data/empty.txt +0 -0
  17. metadata +23 -156
  18. data/.travis.yml +0 -20
  19. data/lib/assembly-objectfile/content_metadata/config.rb +0 -26
  20. data/lib/assembly-objectfile/content_metadata/file.rb +0 -63
  21. data/lib/assembly-objectfile/content_metadata/file_set.rb +0 -73
  22. data/lib/assembly-objectfile/content_metadata/file_set_builder.rb +0 -65
  23. data/lib/assembly-objectfile/content_metadata/nokogiri_builder.rb +0 -57
  24. data/lib/assembly-objectfile/content_metadata.rb +0 -117
  25. data/lib/assembly-objectfile/object_file.rb +0 -29
  26. data/profiles/AdobeRGB1998.icc +0 -0
  27. data/profiles/DotGain20.icc +0 -0
  28. data/profiles/sRGBIEC6196621.icc +0 -0
  29. data/spec/content_metadata_spec.rb +0 -809
  30. data/spec/object_file_spec.rb +0 -217
  31. data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_001.tif +0 -0
  32. data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_002.tif +0 -0
  33. data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_001.jp2 +0 -0
  34. data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_002.jp2 +0 -0
  35. data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_001.pdf +0 -1
  36. data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_002.pdf +0 -1
  37. data/spec/test_data/input/oo000oo0001/31/oo000oo0001_31_001.pdf +0 -1
  38. data/spec/test_data/input/oo000oo0001/50/oo000oo0001_50_001.tif +0 -0
  39. data/spec/test_data/input/oo000oo0001/oo000oo0001_book.pdf +0 -1
  40. data/spec/test_data/input/res1_image1.jp2 +0 -0
  41. data/spec/test_data/input/res1_image2.jp2 +0 -0
  42. data/spec/test_data/input/res1_image2.tif +0 -0
  43. data/spec/test_data/input/res1_teifile.txt +0 -1
  44. data/spec/test_data/input/res2_image1.jp2 +0 -0
  45. data/spec/test_data/input/res2_image1.tif +0 -0
  46. data/spec/test_data/input/res2_image2.jp2 +0 -0
  47. data/spec/test_data/input/res2_image2.tif +0 -0
  48. data/spec/test_data/input/res2_teifile.txt +0 -1
  49. data/spec/test_data/input/res2_textfile.txt +0 -1
  50. data/spec/test_data/input/res3_image1.jp2 +0 -0
  51. data/spec/test_data/input/res3_image1.tif +0 -0
  52. data/spec/test_data/input/res3_teifile.txt +0 -1
  53. data/spec/test_data/input/test.pdf +0 -1
  54. data/spec/test_data/input/test.svg +0 -2
  55. data/spec/test_data/input/test2.jp2 +0 -0
  56. data/spec/test_data/input/test2.tif +0 -0
@@ -2,25 +2,56 @@
2
2
 
3
3
  require 'mini_exiftool'
4
4
  require 'mime/types'
5
+ require 'active_support/core_ext/object/blank'
5
6
 
6
7
  module Assembly
7
- # Common behaviors we need for other classes in the gem
8
- module ObjectFileable
8
+ # This class contains generic methods to operate on any file.
9
+ class ObjectFile
10
+ # Class level method that given an array of strings, return the longest common initial path.
11
+ # Useful for removing a common path from a set of filenames when producing content metadata
12
+ #
13
+ # @param [Array] strings Array of filenames with paths to operate on
14
+ # @return [String] longest common initial part of path of filenames passed in
15
+ #
16
+ # Example:
17
+ # puts Assembly::ObjectFile.common_prefix(['/Users/peter/00/test.tif','/Users/peter/05/test.jp2'])
18
+ # # => '/Users/peter/0'
19
+ def self.common_path(strings)
20
+ return nil if strings.empty?
21
+
22
+ n = 0
23
+ x = strings.last
24
+ n += 1 while strings.all? { |s| s[n] && (s[n] == x[n]) }
25
+ common_prefix = x[0...n]
26
+ if common_prefix[-1, 1] == '/' # check if last element of the common string is the end of a directory
27
+ common_prefix # if not, split string along directories, and reject last one
28
+ else
29
+ "#{common_prefix.split('/')[0..-2].join('/')}/" # if it was, then return the common prefix directly
30
+ end
31
+ end
32
+
9
33
  attr_accessor :file_attributes, :label, :path, :provider_md5, :provider_sha1, :relative_path, :mime_type_order
10
34
 
11
35
  VALID_MIMETYPE_METHODS = %i[override exif file extension].freeze
12
36
 
13
37
  # @param [String] path full path to the file to be worked with
14
38
  # @param [Hash<Symbol => Object>] params options used during content metadata generation
15
- # @option params [Hash<Symbol => ['yes', 'no']>] :file_attributes e.g. {:preserve=>'yes',:shelve=>'no',:publish=>'no'}, defaults pulled from mimetype
16
- # @option params [String] :label a resource label (files bundlded together will just get the first file's label attribute if set)
39
+ # @option params [Hash<Symbol => ['yes', 'no']>] :file_attributes e.g.:
40
+ # {:preserve=>'yes',:shelve=>'no',:publish=>'no'},
41
+ # defaults pulled from mimetype
42
+ # @option params [String] :label a resource label (files bundled together will just get the first
43
+ # file's label attribute if set)
17
44
  # @option params [String] :provider_md5 pre-computed MD5 checksum
18
45
  # @option params [String] :provider_sha1 pre-computed SHA1 checksum
19
- # @option params [String] :relative_path if you want the file ids in the content metadata it can be set, otherwise content metadata will get the full path
46
+ # @option params [String] :relative_path if you want the file ids in the content metadata it can be set,
47
+ # otherwise content metadata will get the full path
20
48
  # @option params [Array] :mime_type_order can be set to the order in which you want mimetypes to be determined
21
- # options are :override (from manual overide mapping if exists), :exif (from exif if exists),
22
- # :extension (from file extension), and :file (from unix file system command)
23
- # the default is defined in the private `default_mime_type_order` method but you can override to set your own order
49
+ # options are :override (from manual overide mapping if exists),
50
+ # :exif (from exif if exists)
51
+ # :extension (from file extension)
52
+ # :file (from unix file system command)
53
+ # the default is defined in the private `default_mime_type_order` method
54
+ # but you can override to set your own order
24
55
  # @example
25
56
  # Assembly::ObjectFile.new('/input/path_to_file.tif')
26
57
  def initialize(path, params = {})
@@ -33,24 +64,6 @@ module Assembly
33
64
  @mime_type_order = params[:mime_type_order] || default_mime_type_order
34
65
  end
35
66
 
36
- # @return [String] DPG base filename, removing the extension and the '00','05', etc. placeholders
37
- # @example
38
- # source_file = Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
39
- # puts source_file.dpg_basename # "cy565rm7188_001"
40
- def dpg_basename
41
- file_parts = File.basename(path, ext).split('_')
42
- file_parts.size == 3 ? "#{file_parts[0]}_#{file_parts[2]}" : filename_without_ext
43
- end
44
-
45
- # @return [String] DPG subfolder for the given filename, i.e. '00','05', etc.
46
- # @example
47
- # source_file = Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
48
- # puts source_file.dpg_folder # "00"
49
- def dpg_folder
50
- file_parts = File.basename(path, ext).split('_')
51
- file_parts.size == 3 ? file_parts[1] : ''
52
- end
53
-
54
67
  # @return [String] base filename
55
68
  # @example
56
69
  # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
@@ -91,8 +104,6 @@ module Assembly
91
104
  @exif ||= begin
92
105
  check_for_file
93
106
  MiniExiftool.new(path, replace_invalid_chars: '?')
94
- rescue StandardError
95
- nil
96
107
  end
97
108
  end
98
109
 
@@ -127,73 +138,15 @@ module Assembly
127
138
  check_for_file
128
139
  mimetype = ''
129
140
  mime_type_order.each do |mime_type_method|
130
- mimetype = public_send("#{mime_type_method}_mimetype") if VALID_MIMETYPE_METHODS.include?(mime_type_method)
141
+ mimetype = send("#{mime_type_method}_mimetype") if VALID_MIMETYPE_METHODS.include?(mime_type_method)
131
142
  break if mimetype.present?
132
143
  end
133
144
  mimetype
134
145
  end
135
146
  end
136
147
 
137
- # Returns mimetype information using the manual override mapping (based on a file extension lookup)
138
- # @return [String] mime type for supplied file if a mapping exists for the file's extension
139
- # @example
140
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.json')
141
- # puts source_file.override_mimetype # 'application/json'
142
- def override_mimetype
143
- @override_mimetype ||= Assembly::OVERRIDE_MIMETYPES.fetch(ext.to_sym, '')
144
- end
145
-
146
- # Returns mimetype information using the mime-types gem (based on a file extension lookup)
147
- # @return [String] mime type for supplied file
148
- # @example
149
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
150
- # puts source_file.extension_mimetype # 'text/plain'
151
- def extension_mimetype
152
- @extension_mimetype ||= begin
153
- mtype = MIME::Types.type_for(path).first
154
- mtype ? mtype.content_type : ''
155
- end
156
- end
157
-
158
- # Returns mimetype information for the current file based on unix file system command.
159
- # @return [String] mime type for supplied file
160
- # @example
161
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
162
- # puts source_file.file_mimetype # 'text/plain'
163
- def file_mimetype
164
- @file_mimetype ||= begin
165
- check_for_file
166
- `file --mime-type "#{path}"`.delete("\n").split(':')[1].strip # first try and get the mimetype from the unix file command
167
- end
168
- end
169
-
170
- # Returns mimetype information for the current file based on exif data (if available and not a trusted source that we'd rather get from the file system command)
171
- # @return [String] mime type for supplied file
172
- # @example
173
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
174
- # puts source_file.exif_mimetype # 'text/plain'
175
- def exif_mimetype
176
- @exif_mimetype ||= begin
177
- check_for_file
178
- prefer_exif = !Assembly::TRUSTED_MIMETYPES.include?(file_mimetype) # if it's not a "trusted" mimetype and there is exif data; get the mimetype from the exif
179
- exif.mimetype if
180
- exif&.mimetype && prefer_exif
181
- end
182
- end
183
-
184
- # @note Uses shell call to "file", only expected to work on unix based systems
185
- # @return [String] encoding for supplied file
186
- # @example
187
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
188
- # puts source_file.encoding # 'us-ascii'
189
- def encoding
190
- @encoding ||= begin
191
- check_for_file
192
- `file --mime-encoding "#{path}"`.delete("\n").split(':')[1].strip
193
- end
194
- end
195
-
196
- # @return [Symbol] the type of object, could be :application (for PDF or Word, etc), :audio, :image, :message, :model, :multipart, :text or :video
148
+ # @return [Symbol] the type of object, could be :application (for PDF or Word, etc),
149
+ # :audio, :image, :message, :model, :multipart, :text or :video
197
150
  # @example
198
151
  # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
199
152
  # puts source_file.object_type # :image
@@ -222,18 +175,10 @@ exif&.mimetype && prefer_exif
222
175
  mimetype == 'image/jp2' || jp2able?
223
176
  end
224
177
 
225
- # @return [Boolean] true if image has a color profile, false if not.
226
- # @example
227
- # source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
228
- # puts source_img.has_color_profile? # true
229
- def has_color_profile?
230
- return false unless exif
231
-
232
- exif['profiledescription'] || exif['colorspace'] ? true : false
233
- end
234
-
235
- # Examines the input image for validity to create a jp2. Same as valid_image? but also confirms the existence of a profile description and further restricts mimetypes.
236
- # It is used by the assembly robots to decide if a jp2 will be created and is also called before you create a jp2 using assembly-image.
178
+ # Examines the input image for validity to create a jp2. Same as valid_image? but also confirms
179
+ # the existence of a profile description and further restricts mimetypes.
180
+ # It is used by the assembly robots to decide if a jp2 will be created and is also called before
181
+ # you create a jp2 using assembly-image.
237
182
  # @return [Boolean] true if image should have a jp2 created, false if not.
238
183
  # @example
239
184
  # source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
@@ -265,14 +210,74 @@ exif&.mimetype && prefer_exif
265
210
 
266
211
  private
267
212
 
213
+ # private method to check for file existence before operating on it
214
+ def check_for_file
215
+ raise "input file #{path} does not exist or is a directory" unless file_exists?
216
+ end
217
+
268
218
  # prive method defining default preferred ordering of how mimetypes are determined
269
219
  def default_mime_type_order
270
220
  %i[override exif file extension]
271
221
  end
272
222
 
273
- # private method to check for file existence before operating on it
274
- def check_for_file
275
- raise "input file #{path} does not exist" unless file_exists?
223
+ # Returns mimetype information using the mime-types gem (based on a file extension lookup)
224
+ # @return [String] mime type for supplied file
225
+ # @example
226
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
227
+ # puts source_file.extension_mimetype # 'text/plain'
228
+ def extension_mimetype
229
+ @extension_mimetype ||= begin
230
+ mtype = MIME::Types.type_for(path).first
231
+ mtype ? mtype.content_type : ''
232
+ end
233
+ end
234
+
235
+ # Returns mimetype information for the current file based on unix file system command.
236
+ # @return [String] mime type for supplied file
237
+ # @example
238
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
239
+ # puts source_file.file_mimetype # 'text/plain'
240
+ def file_mimetype
241
+ @file_mimetype ||= begin
242
+ check_for_file
243
+ `file --mime-type "#{path}"`.delete("\n").split(':')[1].strip # get the mimetype from the unix file command
244
+ end
245
+ end
246
+
247
+ # Returns mimetype information for the current file based on exif data
248
+ # (if available and not a trusted source that we'd rather get from the file system command)
249
+ # @return [String] mime type for supplied file
250
+ # @example
251
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
252
+ # puts source_file.exif_mimetype # 'text/plain'
253
+ def exif_mimetype
254
+ @exif_mimetype ||= begin
255
+ check_for_file
256
+ # if it's not a "trusted" mimetype and there is exif data; get the mimetype from the exif
257
+ prefer_exif = !Assembly::TRUSTED_MIMETYPES.include?(file_mimetype)
258
+ exif.mimetype if exif&.mimetype && prefer_exif
259
+ end
260
+ end
261
+
262
+ # Returns mimetype information using the manual override mapping (based on a file extension lookup)
263
+ # @return [String] mime type for supplied file if a mapping exists for the file's extension
264
+ # @example
265
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.json')
266
+ # puts source_file.override_mimetype # 'application/json'
267
+ def override_mimetype
268
+ @override_mimetype ||= Assembly::OVERRIDE_MIMETYPES.fetch(ext.to_sym, '')
269
+ end
270
+
271
+ # @note Uses shell call to "file", only expected to work on unix based systems
272
+ # @return [String] encoding for supplied file
273
+ # @example
274
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
275
+ # puts source_file.encoding # 'us-ascii'
276
+ def encoding
277
+ @encoding ||= begin
278
+ check_for_file
279
+ `file --mime-encoding "#{path}"`.delete("\n").split(':')[1].strip
280
+ end
276
281
  end
277
282
  end
278
283
  end
@@ -4,27 +4,24 @@ module Assembly
4
4
  # the path to the gem, used to access profiles stored with the gem
5
5
  PATH_TO_GEM = File.expand_path("#{File.dirname(__FILE__)}/..")
6
6
 
7
- # if input image is not one of these mime types, it will not be regarded as a valid image for the purpose of generating a JP2 derivative
7
+ # If input image is not one of these mime types, it will not be regarded as a valid image
8
+ # for the purpose of generating a JP2 derivative
8
9
  VALID_IMAGE_MIMETYPES = ['image/jpeg', 'image/tiff', 'image/tif', 'image/png'].freeze
9
10
 
10
- # if input file has one of these extensions in a 3D object, it will get the 3d resource type
11
- VALID_THREE_DIMENSION_EXTENTIONS = ['.obj'].freeze
12
-
13
- # the list of mimetypes that will be "trusted" by the unix file command; if a mimetype other than one of these is returned
14
- # by the file command, then a check will be made to see if exif data exists...if so, the mimetype returned by the exif data will be used
15
- # if no exif data exists, then the mimetype returned by the unix file command will be used
11
+ # The list of mimetypes that will be "trusted" by the unix file command; if a mimetype other than
12
+ # one of these is returned by the file command, then a check will be made to see if exif data exists...
13
+ # if so, the mimetype returned by the exif data will be used if no exif data exists, then the
14
+ # mimetype returned by the unix file command will be used
16
15
  TRUSTED_MIMETYPES = ['text/plain', 'plain/text', 'application/pdf', 'text/html', 'application/xml'].freeze
17
16
 
18
- # this is a manual override mapping of file extension to mimetype; if a file with the given extension is found, the mapped
19
- # mimetype will be returned and no further methods will be used - this is used to force a specific mimetype to be returned for
20
- # for a given file extension regardless of what exif or the unix file system command returns
21
- # the mapping format is "extension with period: returned mimetype", e.g. for any .json file, you will always get `application/json`
17
+ # This is a manual override mapping of file extension to mimetype; if a file with the given extension
18
+ # is found, the mapped mimetype will be returned and no further methods will be used - this is used
19
+ # to force a specific mimetype to be returned for a given file extension regardless of what exif or
20
+ # the unix file system command returns the mapping format is "extension with period: returned mimetype",
21
+ # e.g. for any .json file, you will always get `application/json`
22
22
  OVERRIDE_MIMETYPES = {
23
23
  '.json': 'application/json'
24
24
  }.freeze
25
25
  end
26
26
 
27
- require 'assembly-objectfile/content_metadata'
28
- require 'assembly-objectfile/object_fileable'
29
- require 'assembly-objectfile/object_file'
30
- require 'assembly-objectfile/version'
27
+ require 'assembly/object_file'