assembly-objectfile 1.13.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +0 -1
  3. data/.rubocop.yml +1 -1
  4. data/.rubocop_todo.yml +14 -80
  5. data/Gemfile.lock +106 -0
  6. data/assembly-objectfile.gemspec +1 -3
  7. data/lib/assembly-objectfile/object_file.rb +253 -3
  8. data/lib/assembly-objectfile/version.rb +1 -1
  9. data/lib/assembly-objectfile.rb +0 -5
  10. data/spec/object_file_spec.rb +411 -172
  11. data/spec/spec_helper.rb +2 -31
  12. metadata +19 -107
  13. data/lib/assembly-objectfile/content_metadata/config.rb +0 -26
  14. data/lib/assembly-objectfile/content_metadata/file.rb +0 -63
  15. data/lib/assembly-objectfile/content_metadata/file_set.rb +0 -73
  16. data/lib/assembly-objectfile/content_metadata/file_set_builder.rb +0 -65
  17. data/lib/assembly-objectfile/content_metadata/nokogiri_builder.rb +0 -57
  18. data/lib/assembly-objectfile/content_metadata.rb +0 -117
  19. data/lib/assembly-objectfile/object_fileable.rb +0 -275
  20. data/spec/content_metadata_spec.rb +0 -809
  21. data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_001.tif +0 -0
  22. data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_002.tif +0 -0
  23. data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_001.jp2 +0 -0
  24. data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_002.jp2 +0 -0
  25. data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_001.pdf +0 -1
  26. data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_002.pdf +0 -1
  27. data/spec/test_data/input/oo000oo0001/31/oo000oo0001_31_001.pdf +0 -1
  28. data/spec/test_data/input/oo000oo0001/50/oo000oo0001_50_001.tif +0 -0
  29. data/spec/test_data/input/oo000oo0001/oo000oo0001_book.pdf +0 -1
  30. data/spec/test_data/input/res1_image1.jp2 +0 -0
  31. data/spec/test_data/input/res1_image2.jp2 +0 -0
  32. data/spec/test_data/input/res1_image2.tif +0 -0
  33. data/spec/test_data/input/res1_teifile.txt +0 -1
  34. data/spec/test_data/input/res2_image1.jp2 +0 -0
  35. data/spec/test_data/input/res2_image1.tif +0 -0
  36. data/spec/test_data/input/res2_image2.jp2 +0 -0
  37. data/spec/test_data/input/res2_image2.tif +0 -0
  38. data/spec/test_data/input/res2_teifile.txt +0 -1
  39. data/spec/test_data/input/res2_textfile.txt +0 -1
  40. data/spec/test_data/input/res3_image1.jp2 +0 -0
  41. data/spec/test_data/input/res3_image1.tif +0 -0
  42. data/spec/test_data/input/res3_teifile.txt +0 -1
  43. data/spec/test_data/input/test.pdf +0 -1
  44. data/spec/test_data/input/test.svg +0 -2
  45. data/spec/test_data/input/test2.jp2 +0 -0
  46. data/spec/test_data/input/test2.tif +0 -0
@@ -1,275 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'mini_exiftool'
4
- require 'mime/types'
5
-
6
- module Assembly
7
- # Common behaviors we need for other classes in the gem
8
- module ObjectFileable
9
- attr_accessor :file_attributes, :label, :path, :provider_md5, :provider_sha1, :relative_path, :mime_type_order
10
-
11
- VALID_MIMETYPE_METHODS = %i[override exif file extension].freeze
12
-
13
- # @param [String] path full path to the file to be worked with
14
- # @param [Hash<Symbol => Object>] params options used during content metadata generation
15
- # @option params [Hash<Symbol => ['yes', 'no']>] :file_attributes e.g. {:preserve=>'yes',:shelve=>'no',:publish=>'no'}, defaults pulled from mimetype
16
- # @option params [String] :label a resource label (files bundlded together will just get the first file's label attribute if set)
17
- # @option params [String] :provider_md5 pre-computed MD5 checksum
18
- # @option params [String] :provider_sha1 pre-computed SHA1 checksum
19
- # @option params [String] :relative_path if you want the file ids in the content metadata it can be set, otherwise content metadata will get the full path
20
- # @option params [Array] :mime_type_order can be set to the order in which you want mimetypes to be determined
21
- # options are :override (from manual overide mapping if exists), :exif (from exif if exists),
22
- # :extension (from file extension), and :file (from unix file system command)
23
- # the default is defined in the private `default_mime_type_order` method but you can override to set your own order
24
- # @example
25
- # Assembly::ObjectFile.new('/input/path_to_file.tif')
26
- def initialize(path, params = {})
27
- @path = path
28
- @label = params[:label]
29
- @file_attributes = params[:file_attributes]
30
- @relative_path = params[:relative_path]
31
- @provider_md5 = params[:provider_md5]
32
- @provider_sha1 = params[:provider_sha1]
33
- @mime_type_order = params[:mime_type_order] || default_mime_type_order
34
- end
35
-
36
- # @return [String] DPG base filename, removing the extension and the '00','05', etc. placeholders
37
- # @example
38
- # source_file = Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
39
- # puts source_file.dpg_basename # "cy565rm7188_001"
40
- def dpg_basename
41
- file_parts = File.basename(path, ext).split('_')
42
- file_parts.size == 3 ? "#{file_parts[0]}_#{file_parts[2]}" : filename_without_ext
43
- end
44
-
45
- # @return [String] DPG subfolder for the given filename, i.e. '00','05', etc.
46
- # @example
47
- # source_file = Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
48
- # puts source_file.dpg_folder # "00"
49
- def dpg_folder
50
- file_parts = File.basename(path, ext).split('_')
51
- file_parts.size == 3 ? file_parts[1] : ''
52
- end
53
-
54
- # @return [String] base filename
55
- # @example
56
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
57
- # puts source_file.filename # "path_to_file.tif"
58
- def filename
59
- File.basename(path)
60
- end
61
-
62
- # @return [String] base directory
63
- # @example
64
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
65
- # puts source_file.dirname # "/input"
66
- def dirname
67
- File.dirname(path)
68
- end
69
-
70
- # @return [String] filename extension
71
- # @example
72
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
73
- # puts source_file.ext # ".tif"
74
- def ext
75
- File.extname(path)
76
- end
77
-
78
- # @return [String] base filename without extension
79
- # @example
80
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
81
- # puts source_file.filename # "path_to_file"
82
- def filename_without_ext
83
- File.basename(path, ext)
84
- end
85
-
86
- # @return [MiniExiftool] exif information stored as a hash and an object
87
- # @example
88
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
89
- # puts source_file.exif # hash with exif information
90
- def exif
91
- @exif ||= begin
92
- check_for_file
93
- MiniExiftool.new(path, replace_invalid_chars: '?')
94
- end
95
- end
96
-
97
- # Computes md5 checksum or returns cached value
98
- # @return [String] md5 checksum
99
- # @example
100
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
101
- # puts source_file.md5 # 'XXX123XXX1243XX1243'
102
- def md5
103
- check_for_file unless @md5
104
- @md5 ||= Digest::MD5.file(path).hexdigest
105
- end
106
-
107
- # Computes sha1 checksum or return cached value
108
- # @return [String] sha1 checksum
109
- # @example
110
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
111
- # puts source_file.sha1 # 'XXX123XXX1243XX1243'
112
- def sha1
113
- check_for_file unless @sha1
114
- @sha1 ||= Digest::SHA1.file(path).hexdigest
115
- end
116
-
117
- # Returns mimetype information for the current file based on the ordering set in default_mime_type_order
118
- # We stop computing mimetypes as soon as we have a method that returns a value
119
- # @return [String] mime type
120
- # @example
121
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
122
- # puts source_file.mimetype # 'text/plain'
123
- def mimetype
124
- @mimetype ||= begin
125
- check_for_file
126
- mimetype = ''
127
- mime_type_order.each do |mime_type_method|
128
- mimetype = public_send("#{mime_type_method}_mimetype") if VALID_MIMETYPE_METHODS.include?(mime_type_method)
129
- break if mimetype.present?
130
- end
131
- mimetype
132
- end
133
- end
134
-
135
- # Returns mimetype information using the manual override mapping (based on a file extension lookup)
136
- # @return [String] mime type for supplied file if a mapping exists for the file's extension
137
- # @example
138
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.json')
139
- # puts source_file.override_mimetype # 'application/json'
140
- def override_mimetype
141
- @override_mimetype ||= Assembly::OVERRIDE_MIMETYPES.fetch(ext.to_sym, '')
142
- end
143
-
144
- # Returns mimetype information using the mime-types gem (based on a file extension lookup)
145
- # @return [String] mime type for supplied file
146
- # @example
147
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
148
- # puts source_file.extension_mimetype # 'text/plain'
149
- def extension_mimetype
150
- @extension_mimetype ||= begin
151
- mtype = MIME::Types.type_for(path).first
152
- mtype ? mtype.content_type : ''
153
- end
154
- end
155
-
156
- # Returns mimetype information for the current file based on unix file system command.
157
- # @return [String] mime type for supplied file
158
- # @example
159
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
160
- # puts source_file.file_mimetype # 'text/plain'
161
- def file_mimetype
162
- @file_mimetype ||= begin
163
- check_for_file
164
- `file --mime-type "#{path}"`.delete("\n").split(':')[1].strip # first try and get the mimetype from the unix file command
165
- end
166
- end
167
-
168
- # Returns mimetype information for the current file based on exif data (if available and not a trusted source that we'd rather get from the file system command)
169
- # @return [String] mime type for supplied file
170
- # @example
171
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
172
- # puts source_file.exif_mimetype # 'text/plain'
173
- def exif_mimetype
174
- @exif_mimetype ||= begin
175
- check_for_file
176
- prefer_exif = !Assembly::TRUSTED_MIMETYPES.include?(file_mimetype) # if it's not a "trusted" mimetype and there is exif data; get the mimetype from the exif
177
- exif.mimetype if exif&.mimetype && prefer_exif
178
- end
179
- end
180
-
181
- # @note Uses shell call to "file", only expected to work on unix based systems
182
- # @return [String] encoding for supplied file
183
- # @example
184
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
185
- # puts source_file.encoding # 'us-ascii'
186
- def encoding
187
- @encoding ||= begin
188
- check_for_file
189
- `file --mime-encoding "#{path}"`.delete("\n").split(':')[1].strip
190
- end
191
- end
192
-
193
- # @return [Symbol] the type of object, could be :application (for PDF or Word, etc), :audio, :image, :message, :model, :multipart, :text or :video
194
- # @example
195
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
196
- # puts source_file.object_type # :image
197
- def object_type
198
- lookup = MIME::Types[mimetype][0]
199
- lookup.nil? ? :other : lookup.media_type.to_sym
200
- end
201
-
202
- # @return [Boolean] if object is an image
203
- # @example
204
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
205
- # puts source_file.image? # true
206
- def image?
207
- object_type == :image
208
- end
209
-
210
- # Examines the input image for validity. Used to determine if image is a valid and useful image.
211
- # If image is not a jp2, also checks if it is jp2able?
212
- # @return [Boolean] true if image is valid, false if not.
213
- # @example
214
- # source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
215
- # puts source_img.valid_image? # true
216
- def valid_image?
217
- return false unless image?
218
-
219
- mimetype == 'image/jp2' || jp2able?
220
- end
221
-
222
- # @return [Boolean] true if image has a color profile, false if not.
223
- # @example
224
- # source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
225
- # puts source_img.has_color_profile? # true
226
- def has_color_profile?
227
- return false unless exif
228
-
229
- exif['profiledescription'] || exif['colorspace'] ? true : false
230
- end
231
-
232
- # Examines the input image for validity to create a jp2. Same as valid_image? but also confirms the existence of a profile description and further restricts mimetypes.
233
- # It is used by the assembly robots to decide if a jp2 will be created and is also called before you create a jp2 using assembly-image.
234
- # @return [Boolean] true if image should have a jp2 created, false if not.
235
- # @example
236
- # source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
237
- # puts source_img.jp2able? # true
238
- def jp2able?
239
- return false unless exif
240
-
241
- Assembly::VALID_IMAGE_MIMETYPES.include?(mimetype)
242
- end
243
-
244
- # Returns file size information for the current file in bytes.
245
- # @return [Integer] file size in bytes
246
- # @example
247
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
248
- # puts source_file.filesize # 1345
249
- def filesize
250
- check_for_file
251
- @filesize ||= File.size(path)
252
- end
253
-
254
- # Determines if the file exists (and is not a directory)
255
- # @return [Boolean] file exists
256
- # @example
257
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
258
- # puts source_file.file_exists? # true
259
- def file_exists?
260
- @file_exists ||= (File.exist?(path) && !File.directory?(path))
261
- end
262
-
263
- private
264
-
265
- # prive method defining default preferred ordering of how mimetypes are determined
266
- def default_mime_type_order
267
- %i[override exif file extension]
268
- end
269
-
270
- # private method to check for file existence before operating on it
271
- def check_for_file
272
- raise "input file #{path} does not exist or is a directory" unless file_exists?
273
- end
274
- end
275
- end