assembly-objectfile 1.11.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +14 -0
  3. data/.github/pull_request_template.md +3 -5
  4. data/.gitignore +0 -1
  5. data/.rubocop.yml +87 -15
  6. data/.rubocop_todo.yml +19 -74
  7. data/Gemfile +2 -0
  8. data/Gemfile.lock +106 -0
  9. data/README.md +1 -1
  10. data/assembly-objectfile.gemspec +5 -6
  11. data/lib/assembly-objectfile/object_file.rb +253 -3
  12. data/lib/assembly-objectfile/version.rb +2 -2
  13. data/lib/assembly-objectfile.rb +0 -5
  14. data/spec/object_file_spec.rb +411 -167
  15. data/spec/spec_helper.rb +3 -31
  16. data/spec/test_data/empty.txt +0 -0
  17. metadata +35 -121
  18. data/.travis.yml +0 -20
  19. data/lib/assembly-objectfile/content_metadata/config.rb +0 -26
  20. data/lib/assembly-objectfile/content_metadata/file.rb +0 -63
  21. data/lib/assembly-objectfile/content_metadata/file_set.rb +0 -73
  22. data/lib/assembly-objectfile/content_metadata/file_set_builder.rb +0 -65
  23. data/lib/assembly-objectfile/content_metadata/nokogiri_builder.rb +0 -57
  24. data/lib/assembly-objectfile/content_metadata.rb +0 -117
  25. data/lib/assembly-objectfile/object_fileable.rb +0 -278
  26. data/spec/content_metadata_spec.rb +0 -791
  27. data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_001.tif +0 -0
  28. data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_002.tif +0 -0
  29. data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_001.jp2 +0 -0
  30. data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_002.jp2 +0 -0
  31. data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_001.pdf +0 -1
  32. data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_002.pdf +0 -1
  33. data/spec/test_data/input/oo000oo0001/31/oo000oo0001_31_001.pdf +0 -1
  34. data/spec/test_data/input/oo000oo0001/50/oo000oo0001_50_001.tif +0 -0
  35. data/spec/test_data/input/oo000oo0001/oo000oo0001_book.pdf +0 -1
  36. data/spec/test_data/input/res1_image1.jp2 +0 -0
  37. data/spec/test_data/input/res1_image2.jp2 +0 -0
  38. data/spec/test_data/input/res1_image2.tif +0 -0
  39. data/spec/test_data/input/res1_teifile.txt +0 -1
  40. data/spec/test_data/input/res2_image1.jp2 +0 -0
  41. data/spec/test_data/input/res2_image1.tif +0 -0
  42. data/spec/test_data/input/res2_image2.jp2 +0 -0
  43. data/spec/test_data/input/res2_image2.tif +0 -0
  44. data/spec/test_data/input/res2_teifile.txt +0 -1
  45. data/spec/test_data/input/res2_textfile.txt +0 -1
  46. data/spec/test_data/input/res3_image1.jp2 +0 -0
  47. data/spec/test_data/input/res3_image1.tif +0 -0
  48. data/spec/test_data/input/res3_teifile.txt +0 -1
  49. data/spec/test_data/input/test.pdf +0 -1
  50. data/spec/test_data/input/test2.jp2 +0 -0
  51. data/spec/test_data/input/test2.tif +0 -0
@@ -1,14 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'mini_exiftool'
4
+ require 'mime/types'
5
+ require 'active_support/core_ext/object/blank'
6
+
3
7
  module Assembly
4
8
  # This class contains generic methods to operate on any file.
5
9
  class ObjectFile
6
- include Assembly::ObjectFileable
7
-
8
10
  # Class level method that given an array of strings, return the longest common initial path. Useful for removing a common path from a set of filenames when producing content metadata
9
11
  #
10
12
  # @param [Array] strings Array of filenames with paths to operate on
11
- # @return [String] Common part of path of filenames passed in
13
+ # @return [String] longest common initial part of path of filenames passed in
12
14
  #
13
15
  # Example:
14
16
  # puts Assembly::ObjectFile.common_prefix(['/Users/peter/00/test.tif','/Users/peter/05/test.jp2']) # '/Users/peter/0'
@@ -25,5 +27,253 @@ module Assembly
25
27
  "#{common_prefix.split('/')[0..-2].join('/')}/" # if it was, then return the common prefix directly
26
28
  end
27
29
  end
30
+
31
+ attr_accessor :file_attributes, :label, :path, :provider_md5, :provider_sha1, :relative_path, :mime_type_order
32
+
33
+ VALID_MIMETYPE_METHODS = %i[override exif file extension].freeze
34
+
35
+ # @param [String] path full path to the file to be worked with
36
+ # @param [Hash<Symbol => Object>] params options used during content metadata generation
37
+ # @option params [Hash<Symbol => ['yes', 'no']>] :file_attributes e.g. {:preserve=>'yes',:shelve=>'no',:publish=>'no'}, defaults pulled from mimetype
38
+ # @option params [String] :label a resource label (files bundlded together will just get the first file's label attribute if set)
39
+ # @option params [String] :provider_md5 pre-computed MD5 checksum
40
+ # @option params [String] :provider_sha1 pre-computed SHA1 checksum
41
+ # @option params [String] :relative_path if you want the file ids in the content metadata it can be set, otherwise content metadata will get the full path
42
+ # @option params [Array] :mime_type_order can be set to the order in which you want mimetypes to be determined
43
+ # options are :override (from manual overide mapping if exists), :exif (from exif if exists),
44
+ # :extension (from file extension), and :file (from unix file system command)
45
+ # the default is defined in the private `default_mime_type_order` method but you can override to set your own order
46
+ # @example
47
+ # Assembly::ObjectFile.new('/input/path_to_file.tif')
48
+ def initialize(path, params = {})
49
+ @path = path
50
+ @label = params[:label]
51
+ @file_attributes = params[:file_attributes]
52
+ @relative_path = params[:relative_path]
53
+ @provider_md5 = params[:provider_md5]
54
+ @provider_sha1 = params[:provider_sha1]
55
+ @mime_type_order = params[:mime_type_order] || default_mime_type_order
56
+ end
57
+
58
+ # @return [String] base filename
59
+ # @example
60
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
61
+ # puts source_file.filename # "path_to_file.tif"
62
+ def filename
63
+ File.basename(path)
64
+ end
65
+
66
+ # @return [String] base directory
67
+ # @example
68
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
69
+ # puts source_file.dirname # "/input"
70
+ def dirname
71
+ File.dirname(path)
72
+ end
73
+
74
+ # @return [String] filename extension
75
+ # @example
76
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
77
+ # puts source_file.ext # ".tif"
78
+ def ext
79
+ File.extname(path)
80
+ end
81
+
82
+ # @return [String] base filename without extension
83
+ # @example
84
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
85
+ # puts source_file.filename # "path_to_file"
86
+ def filename_without_ext
87
+ File.basename(path, ext)
88
+ end
89
+
90
+ # @return [MiniExiftool] exif information stored as a hash and an object
91
+ # @example
92
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
93
+ # puts source_file.exif # hash with exif information
94
+ def exif
95
+ @exif ||= begin
96
+ check_for_file
97
+ MiniExiftool.new(path, replace_invalid_chars: '?')
98
+ end
99
+ end
100
+
101
+ # Computes md5 checksum or returns cached value
102
+ # @return [String] md5 checksum
103
+ # @example
104
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
105
+ # puts source_file.md5 # 'XXX123XXX1243XX1243'
106
+ def md5
107
+ check_for_file unless @md5
108
+ @md5 ||= Digest::MD5.file(path).hexdigest
109
+ end
110
+
111
+ # Computes sha1 checksum or return cached value
112
+ # @return [String] sha1 checksum
113
+ # @example
114
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
115
+ # puts source_file.sha1 # 'XXX123XXX1243XX1243'
116
+ def sha1
117
+ check_for_file unless @sha1
118
+ @sha1 ||= Digest::SHA1.file(path).hexdigest
119
+ end
120
+
121
+ # Returns mimetype information for the current file based on the ordering set in default_mime_type_order
122
+ # We stop computing mimetypes as soon as we have a method that returns a value
123
+ # @return [String] mime type
124
+ # @example
125
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
126
+ # puts source_file.mimetype # 'text/plain'
127
+ def mimetype
128
+ @mimetype ||= begin
129
+ check_for_file
130
+ mimetype = ''
131
+ mime_type_order.each do |mime_type_method|
132
+ mimetype = send("#{mime_type_method}_mimetype") if VALID_MIMETYPE_METHODS.include?(mime_type_method)
133
+ break if mimetype.present?
134
+ end
135
+ mimetype
136
+ end
137
+ end
138
+
139
+ # @return [Symbol] the type of object, could be :application (for PDF or Word, etc), :audio, :image, :message, :model, :multipart, :text or :video
140
+ # @example
141
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
142
+ # puts source_file.object_type # :image
143
+ def object_type
144
+ lookup = MIME::Types[mimetype][0]
145
+ lookup.nil? ? :other : lookup.media_type.to_sym
146
+ end
147
+
148
+ # @return [Boolean] if object is an image
149
+ # @example
150
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
151
+ # puts source_file.image? # true
152
+ def image?
153
+ object_type == :image
154
+ end
155
+
156
+ # Examines the input image for validity. Used to determine if image is a valid and useful image.
157
+ # If image is not a jp2, also checks if it is jp2able?
158
+ # @return [Boolean] true if image is valid, false if not.
159
+ # @example
160
+ # source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
161
+ # puts source_img.valid_image? # true
162
+ def valid_image?
163
+ return false unless image?
164
+
165
+ mimetype == 'image/jp2' || jp2able?
166
+ end
167
+
168
+ # @return [Boolean] true if image has a color profile, false if not.
169
+ # @example
170
+ # source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
171
+ # puts source_img.has_color_profile? # true
172
+ def has_color_profile?
173
+ return false unless exif
174
+
175
+ exif['profiledescription'] || exif['colorspace'] ? true : false
176
+ end
177
+
178
+ # Examines the input image for validity to create a jp2. Same as valid_image? but also confirms the existence of a profile description and further restricts mimetypes.
179
+ # It is used by the assembly robots to decide if a jp2 will be created and is also called before you create a jp2 using assembly-image.
180
+ # @return [Boolean] true if image should have a jp2 created, false if not.
181
+ # @example
182
+ # source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
183
+ # puts source_img.jp2able? # true
184
+ def jp2able?
185
+ return false unless exif
186
+
187
+ Assembly::VALID_IMAGE_MIMETYPES.include?(mimetype)
188
+ end
189
+
190
+ # Returns file size information for the current file in bytes.
191
+ # @return [Integer] file size in bytes
192
+ # @example
193
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
194
+ # puts source_file.filesize # 1345
195
+ def filesize
196
+ check_for_file
197
+ @filesize ||= File.size(path)
198
+ end
199
+
200
+ # Determines if the file exists (and is not a directory)
201
+ # @return [Boolean] file exists
202
+ # @example
203
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
204
+ # puts source_file.file_exists? # true
205
+ def file_exists?
206
+ @file_exists ||= (File.exist?(path) && !File.directory?(path))
207
+ end
208
+
209
+ private
210
+
211
+ # private method to check for file existence before operating on it
212
+ def check_for_file
213
+ raise "input file #{path} does not exist or is a directory" unless file_exists?
214
+ end
215
+
216
+ # prive method defining default preferred ordering of how mimetypes are determined
217
+ def default_mime_type_order
218
+ %i[override exif file extension]
219
+ end
220
+
221
+ # Returns mimetype information using the mime-types gem (based on a file extension lookup)
222
+ # @return [String] mime type for supplied file
223
+ # @example
224
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
225
+ # puts source_file.extension_mimetype # 'text/plain'
226
+ def extension_mimetype
227
+ @extension_mimetype ||= begin
228
+ mtype = MIME::Types.type_for(path).first
229
+ mtype ? mtype.content_type : ''
230
+ end
231
+ end
232
+
233
+ # Returns mimetype information for the current file based on unix file system command.
234
+ # @return [String] mime type for supplied file
235
+ # @example
236
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
237
+ # puts source_file.file_mimetype # 'text/plain'
238
+ def file_mimetype
239
+ @file_mimetype ||= begin
240
+ check_for_file
241
+ `file --mime-type "#{path}"`.delete("\n").split(':')[1].strip # first try and get the mimetype from the unix file command
242
+ end
243
+ end
244
+
245
+ # Returns mimetype information for the current file based on exif data (if available and not a trusted source that we'd rather get from the file system command)
246
+ # @return [String] mime type for supplied file
247
+ # @example
248
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
249
+ # puts source_file.exif_mimetype # 'text/plain'
250
+ def exif_mimetype
251
+ @exif_mimetype ||= begin
252
+ check_for_file
253
+ prefer_exif = !Assembly::TRUSTED_MIMETYPES.include?(file_mimetype) # if it's not a "trusted" mimetype and there is exif data; get the mimetype from the exif
254
+ exif.mimetype if exif&.mimetype && prefer_exif
255
+ end
256
+ end
257
+
258
+ # Returns mimetype information using the manual override mapping (based on a file extension lookup)
259
+ # @return [String] mime type for supplied file if a mapping exists for the file's extension
260
+ # @example
261
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.json')
262
+ # puts source_file.override_mimetype # 'application/json'
263
+ def override_mimetype
264
+ @override_mimetype ||= Assembly::OVERRIDE_MIMETYPES.fetch(ext.to_sym, '')
265
+ end
266
+
267
+ # @note Uses shell call to "file", only expected to work on unix based systems
268
+ # @return [String] encoding for supplied file
269
+ # @example
270
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
271
+ # puts source_file.encoding # 'us-ascii'
272
+ def encoding
273
+ @encoding ||= begin
274
+ check_for_file
275
+ `file --mime-encoding "#{path}"`.delete("\n").split(':')[1].strip
276
+ end
277
+ end
28
278
  end
29
279
  end
@@ -3,7 +3,7 @@
3
3
  # Main Assembly namespace
4
4
  module Assembly
5
5
  class ObjectFile
6
- # Project version number
7
- VERSION = '1.11.0'
6
+ # Gem version
7
+ VERSION = '2.0.0'
8
8
  end
9
9
  end
@@ -7,9 +7,6 @@ module Assembly
7
7
  # if input image is not one of these mime types, it will not be regarded as a valid image for the purpose of generating a JP2 derivative
8
8
  VALID_IMAGE_MIMETYPES = ['image/jpeg', 'image/tiff', 'image/tif', 'image/png'].freeze
9
9
 
10
- # if input file has one of these extensions in a 3D object, it will get the 3d resource type
11
- VALID_THREE_DIMENSION_EXTENTIONS = ['.obj'].freeze
12
-
13
10
  # the list of mimetypes that will be "trusted" by the unix file command; if a mimetype other than one of these is returned
14
11
  # by the file command, then a check will be made to see if exif data exists...if so, the mimetype returned by the exif data will be used
15
12
  # if no exif data exists, then the mimetype returned by the unix file command will be used
@@ -24,7 +21,5 @@ module Assembly
24
21
  }.freeze
25
22
  end
26
23
 
27
- require 'assembly-objectfile/content_metadata'
28
- require 'assembly-objectfile/object_fileable'
29
24
  require 'assembly-objectfile/object_file'
30
25
  require 'assembly-objectfile/version'