assembly-objectfile 1.13.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +0 -1
  3. data/.rubocop.yml +1 -1
  4. data/.rubocop_todo.yml +14 -80
  5. data/Gemfile.lock +106 -0
  6. data/assembly-objectfile.gemspec +1 -3
  7. data/lib/assembly-objectfile/object_file.rb +253 -3
  8. data/lib/assembly-objectfile/version.rb +1 -1
  9. data/lib/assembly-objectfile.rb +0 -5
  10. data/spec/object_file_spec.rb +411 -172
  11. data/spec/spec_helper.rb +2 -31
  12. metadata +19 -107
  13. data/lib/assembly-objectfile/content_metadata/config.rb +0 -26
  14. data/lib/assembly-objectfile/content_metadata/file.rb +0 -63
  15. data/lib/assembly-objectfile/content_metadata/file_set.rb +0 -73
  16. data/lib/assembly-objectfile/content_metadata/file_set_builder.rb +0 -65
  17. data/lib/assembly-objectfile/content_metadata/nokogiri_builder.rb +0 -57
  18. data/lib/assembly-objectfile/content_metadata.rb +0 -117
  19. data/lib/assembly-objectfile/object_fileable.rb +0 -275
  20. data/spec/content_metadata_spec.rb +0 -809
  21. data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_001.tif +0 -0
  22. data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_002.tif +0 -0
  23. data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_001.jp2 +0 -0
  24. data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_002.jp2 +0 -0
  25. data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_001.pdf +0 -1
  26. data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_002.pdf +0 -1
  27. data/spec/test_data/input/oo000oo0001/31/oo000oo0001_31_001.pdf +0 -1
  28. data/spec/test_data/input/oo000oo0001/50/oo000oo0001_50_001.tif +0 -0
  29. data/spec/test_data/input/oo000oo0001/oo000oo0001_book.pdf +0 -1
  30. data/spec/test_data/input/res1_image1.jp2 +0 -0
  31. data/spec/test_data/input/res1_image2.jp2 +0 -0
  32. data/spec/test_data/input/res1_image2.tif +0 -0
  33. data/spec/test_data/input/res1_teifile.txt +0 -1
  34. data/spec/test_data/input/res2_image1.jp2 +0 -0
  35. data/spec/test_data/input/res2_image1.tif +0 -0
  36. data/spec/test_data/input/res2_image2.jp2 +0 -0
  37. data/spec/test_data/input/res2_image2.tif +0 -0
  38. data/spec/test_data/input/res2_teifile.txt +0 -1
  39. data/spec/test_data/input/res2_textfile.txt +0 -1
  40. data/spec/test_data/input/res3_image1.jp2 +0 -0
  41. data/spec/test_data/input/res3_image1.tif +0 -0
  42. data/spec/test_data/input/res3_teifile.txt +0 -1
  43. data/spec/test_data/input/test.pdf +0 -1
  44. data/spec/test_data/input/test.svg +0 -2
  45. data/spec/test_data/input/test2.jp2 +0 -0
  46. data/spec/test_data/input/test2.tif +0 -0
@@ -1,275 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'mini_exiftool'
4
- require 'mime/types'
5
-
6
- module Assembly
7
- # Common behaviors we need for other classes in the gem
8
- module ObjectFileable
9
- attr_accessor :file_attributes, :label, :path, :provider_md5, :provider_sha1, :relative_path, :mime_type_order
10
-
11
- VALID_MIMETYPE_METHODS = %i[override exif file extension].freeze
12
-
13
- # @param [String] path full path to the file to be worked with
14
- # @param [Hash<Symbol => Object>] params options used during content metadata generation
15
- # @option params [Hash<Symbol => ['yes', 'no']>] :file_attributes e.g. {:preserve=>'yes',:shelve=>'no',:publish=>'no'}, defaults pulled from mimetype
16
- # @option params [String] :label a resource label (files bundlded together will just get the first file's label attribute if set)
17
- # @option params [String] :provider_md5 pre-computed MD5 checksum
18
- # @option params [String] :provider_sha1 pre-computed SHA1 checksum
19
- # @option params [String] :relative_path if you want the file ids in the content metadata it can be set, otherwise content metadata will get the full path
20
- # @option params [Array] :mime_type_order can be set to the order in which you want mimetypes to be determined
21
- # options are :override (from manual overide mapping if exists), :exif (from exif if exists),
22
- # :extension (from file extension), and :file (from unix file system command)
23
- # the default is defined in the private `default_mime_type_order` method but you can override to set your own order
24
- # @example
25
- # Assembly::ObjectFile.new('/input/path_to_file.tif')
26
- def initialize(path, params = {})
27
- @path = path
28
- @label = params[:label]
29
- @file_attributes = params[:file_attributes]
30
- @relative_path = params[:relative_path]
31
- @provider_md5 = params[:provider_md5]
32
- @provider_sha1 = params[:provider_sha1]
33
- @mime_type_order = params[:mime_type_order] || default_mime_type_order
34
- end
35
-
36
- # @return [String] DPG base filename, removing the extension and the '00','05', etc. placeholders
37
- # @example
38
- # source_file = Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
39
- # puts source_file.dpg_basename # "cy565rm7188_001"
40
- def dpg_basename
41
- file_parts = File.basename(path, ext).split('_')
42
- file_parts.size == 3 ? "#{file_parts[0]}_#{file_parts[2]}" : filename_without_ext
43
- end
44
-
45
- # @return [String] DPG subfolder for the given filename, i.e. '00','05', etc.
46
- # @example
47
- # source_file = Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
48
- # puts source_file.dpg_folder # "00"
49
- def dpg_folder
50
- file_parts = File.basename(path, ext).split('_')
51
- file_parts.size == 3 ? file_parts[1] : ''
52
- end
53
-
54
- # @return [String] base filename
55
- # @example
56
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
57
- # puts source_file.filename # "path_to_file.tif"
58
- def filename
59
- File.basename(path)
60
- end
61
-
62
- # @return [String] base directory
63
- # @example
64
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
65
- # puts source_file.dirname # "/input"
66
- def dirname
67
- File.dirname(path)
68
- end
69
-
70
- # @return [String] filename extension
71
- # @example
72
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
73
- # puts source_file.ext # ".tif"
74
- def ext
75
- File.extname(path)
76
- end
77
-
78
- # @return [String] base filename without extension
79
- # @example
80
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
81
- # puts source_file.filename # "path_to_file"
82
- def filename_without_ext
83
- File.basename(path, ext)
84
- end
85
-
86
- # @return [MiniExiftool] exif information stored as a hash and an object
87
- # @example
88
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
89
- # puts source_file.exif # hash with exif information
90
- def exif
91
- @exif ||= begin
92
- check_for_file
93
- MiniExiftool.new(path, replace_invalid_chars: '?')
94
- end
95
- end
96
-
97
- # Computes md5 checksum or returns cached value
98
- # @return [String] md5 checksum
99
- # @example
100
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
101
- # puts source_file.md5 # 'XXX123XXX1243XX1243'
102
- def md5
103
- check_for_file unless @md5
104
- @md5 ||= Digest::MD5.file(path).hexdigest
105
- end
106
-
107
- # Computes sha1 checksum or return cached value
108
- # @return [String] sha1 checksum
109
- # @example
110
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
111
- # puts source_file.sha1 # 'XXX123XXX1243XX1243'
112
- def sha1
113
- check_for_file unless @sha1
114
- @sha1 ||= Digest::SHA1.file(path).hexdigest
115
- end
116
-
117
- # Returns mimetype information for the current file based on the ordering set in default_mime_type_order
118
- # We stop computing mimetypes as soon as we have a method that returns a value
119
- # @return [String] mime type
120
- # @example
121
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
122
- # puts source_file.mimetype # 'text/plain'
123
- def mimetype
124
- @mimetype ||= begin
125
- check_for_file
126
- mimetype = ''
127
- mime_type_order.each do |mime_type_method|
128
- mimetype = public_send("#{mime_type_method}_mimetype") if VALID_MIMETYPE_METHODS.include?(mime_type_method)
129
- break if mimetype.present?
130
- end
131
- mimetype
132
- end
133
- end
134
-
135
- # Returns mimetype information using the manual override mapping (based on a file extension lookup)
136
- # @return [String] mime type for supplied file if a mapping exists for the file's extension
137
- # @example
138
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.json')
139
- # puts source_file.override_mimetype # 'application/json'
140
- def override_mimetype
141
- @override_mimetype ||= Assembly::OVERRIDE_MIMETYPES.fetch(ext.to_sym, '')
142
- end
143
-
144
- # Returns mimetype information using the mime-types gem (based on a file extension lookup)
145
- # @return [String] mime type for supplied file
146
- # @example
147
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
148
- # puts source_file.extension_mimetype # 'text/plain'
149
- def extension_mimetype
150
- @extension_mimetype ||= begin
151
- mtype = MIME::Types.type_for(path).first
152
- mtype ? mtype.content_type : ''
153
- end
154
- end
155
-
156
- # Returns mimetype information for the current file based on unix file system command.
157
- # @return [String] mime type for supplied file
158
- # @example
159
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
160
- # puts source_file.file_mimetype # 'text/plain'
161
- def file_mimetype
162
- @file_mimetype ||= begin
163
- check_for_file
164
- `file --mime-type "#{path}"`.delete("\n").split(':')[1].strip # first try and get the mimetype from the unix file command
165
- end
166
- end
167
-
168
- # Returns mimetype information for the current file based on exif data (if available and not a trusted source that we'd rather get from the file system command)
169
- # @return [String] mime type for supplied file
170
- # @example
171
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
172
- # puts source_file.exif_mimetype # 'text/plain'
173
- def exif_mimetype
174
- @exif_mimetype ||= begin
175
- check_for_file
176
- prefer_exif = !Assembly::TRUSTED_MIMETYPES.include?(file_mimetype) # if it's not a "trusted" mimetype and there is exif data; get the mimetype from the exif
177
- exif.mimetype if exif&.mimetype && prefer_exif
178
- end
179
- end
180
-
181
- # @note Uses shell call to "file", only expected to work on unix based systems
182
- # @return [String] encoding for supplied file
183
- # @example
184
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
185
- # puts source_file.encoding # 'us-ascii'
186
- def encoding
187
- @encoding ||= begin
188
- check_for_file
189
- `file --mime-encoding "#{path}"`.delete("\n").split(':')[1].strip
190
- end
191
- end
192
-
193
- # @return [Symbol] the type of object, could be :application (for PDF or Word, etc), :audio, :image, :message, :model, :multipart, :text or :video
194
- # @example
195
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
196
- # puts source_file.object_type # :image
197
- def object_type
198
- lookup = MIME::Types[mimetype][0]
199
- lookup.nil? ? :other : lookup.media_type.to_sym
200
- end
201
-
202
- # @return [Boolean] if object is an image
203
- # @example
204
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
205
- # puts source_file.image? # true
206
- def image?
207
- object_type == :image
208
- end
209
-
210
- # Examines the input image for validity. Used to determine if image is a valid and useful image.
211
- # If image is not a jp2, also checks if it is jp2able?
212
- # @return [Boolean] true if image is valid, false if not.
213
- # @example
214
- # source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
215
- # puts source_img.valid_image? # true
216
- def valid_image?
217
- return false unless image?
218
-
219
- mimetype == 'image/jp2' || jp2able?
220
- end
221
-
222
- # @return [Boolean] true if image has a color profile, false if not.
223
- # @example
224
- # source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
225
- # puts source_img.has_color_profile? # true
226
- def has_color_profile?
227
- return false unless exif
228
-
229
- exif['profiledescription'] || exif['colorspace'] ? true : false
230
- end
231
-
232
- # Examines the input image for validity to create a jp2. Same as valid_image? but also confirms the existence of a profile description and further restricts mimetypes.
233
- # It is used by the assembly robots to decide if a jp2 will be created and is also called before you create a jp2 using assembly-image.
234
- # @return [Boolean] true if image should have a jp2 created, false if not.
235
- # @example
236
- # source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
237
- # puts source_img.jp2able? # true
238
- def jp2able?
239
- return false unless exif
240
-
241
- Assembly::VALID_IMAGE_MIMETYPES.include?(mimetype)
242
- end
243
-
244
- # Returns file size information for the current file in bytes.
245
- # @return [Integer] file size in bytes
246
- # @example
247
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
248
- # puts source_file.filesize # 1345
249
- def filesize
250
- check_for_file
251
- @filesize ||= File.size(path)
252
- end
253
-
254
- # Determines if the file exists (and is not a directory)
255
- # @return [Boolean] file exists
256
- # @example
257
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
258
- # puts source_file.file_exists? # true
259
- def file_exists?
260
- @file_exists ||= (File.exist?(path) && !File.directory?(path))
261
- end
262
-
263
- private
264
-
265
- # prive method defining default preferred ordering of how mimetypes are determined
266
- def default_mime_type_order
267
- %i[override exif file extension]
268
- end
269
-
270
- # private method to check for file existence before operating on it
271
- def check_for_file
272
- raise "input file #{path} does not exist or is a directory" unless file_exists?
273
- end
274
- end
275
- end