assembly-objectfile 1.13.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +0 -1
- data/.rubocop.yml +1 -1
- data/.rubocop_todo.yml +14 -80
- data/Gemfile.lock +106 -0
- data/assembly-objectfile.gemspec +1 -3
- data/lib/assembly-objectfile/object_file.rb +253 -3
- data/lib/assembly-objectfile/version.rb +1 -1
- data/lib/assembly-objectfile.rb +0 -5
- data/spec/object_file_spec.rb +411 -172
- data/spec/spec_helper.rb +2 -31
- metadata +19 -107
- data/lib/assembly-objectfile/content_metadata/config.rb +0 -26
- data/lib/assembly-objectfile/content_metadata/file.rb +0 -63
- data/lib/assembly-objectfile/content_metadata/file_set.rb +0 -73
- data/lib/assembly-objectfile/content_metadata/file_set_builder.rb +0 -65
- data/lib/assembly-objectfile/content_metadata/nokogiri_builder.rb +0 -57
- data/lib/assembly-objectfile/content_metadata.rb +0 -117
- data/lib/assembly-objectfile/object_fileable.rb +0 -275
- data/spec/content_metadata_spec.rb +0 -809
- data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_001.tif +0 -0
- data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_002.tif +0 -0
- data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_001.jp2 +0 -0
- data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_002.jp2 +0 -0
- data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_001.pdf +0 -1
- data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_002.pdf +0 -1
- data/spec/test_data/input/oo000oo0001/31/oo000oo0001_31_001.pdf +0 -1
- data/spec/test_data/input/oo000oo0001/50/oo000oo0001_50_001.tif +0 -0
- data/spec/test_data/input/oo000oo0001/oo000oo0001_book.pdf +0 -1
- data/spec/test_data/input/res1_image1.jp2 +0 -0
- data/spec/test_data/input/res1_image2.jp2 +0 -0
- data/spec/test_data/input/res1_image2.tif +0 -0
- data/spec/test_data/input/res1_teifile.txt +0 -1
- data/spec/test_data/input/res2_image1.jp2 +0 -0
- data/spec/test_data/input/res2_image1.tif +0 -0
- data/spec/test_data/input/res2_image2.jp2 +0 -0
- data/spec/test_data/input/res2_image2.tif +0 -0
- data/spec/test_data/input/res2_teifile.txt +0 -1
- data/spec/test_data/input/res2_textfile.txt +0 -1
- data/spec/test_data/input/res3_image1.jp2 +0 -0
- data/spec/test_data/input/res3_image1.tif +0 -0
- data/spec/test_data/input/res3_teifile.txt +0 -1
- data/spec/test_data/input/test.pdf +0 -1
- data/spec/test_data/input/test.svg +0 -2
- data/spec/test_data/input/test2.jp2 +0 -0
- data/spec/test_data/input/test2.tif +0 -0
|
@@ -1,275 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'mini_exiftool'
|
|
4
|
-
require 'mime/types'
|
|
5
|
-
|
|
6
|
-
module Assembly
|
|
7
|
-
# Common behaviors we need for other classes in the gem
|
|
8
|
-
module ObjectFileable
|
|
9
|
-
attr_accessor :file_attributes, :label, :path, :provider_md5, :provider_sha1, :relative_path, :mime_type_order
|
|
10
|
-
|
|
11
|
-
VALID_MIMETYPE_METHODS = %i[override exif file extension].freeze
|
|
12
|
-
|
|
13
|
-
# @param [String] path full path to the file to be worked with
|
|
14
|
-
# @param [Hash<Symbol => Object>] params options used during content metadata generation
|
|
15
|
-
# @option params [Hash<Symbol => ['yes', 'no']>] :file_attributes e.g. {:preserve=>'yes',:shelve=>'no',:publish=>'no'}, defaults pulled from mimetype
|
|
16
|
-
# @option params [String] :label a resource label (files bundlded together will just get the first file's label attribute if set)
|
|
17
|
-
# @option params [String] :provider_md5 pre-computed MD5 checksum
|
|
18
|
-
# @option params [String] :provider_sha1 pre-computed SHA1 checksum
|
|
19
|
-
# @option params [String] :relative_path if you want the file ids in the content metadata it can be set, otherwise content metadata will get the full path
|
|
20
|
-
# @option params [Array] :mime_type_order can be set to the order in which you want mimetypes to be determined
|
|
21
|
-
# options are :override (from manual overide mapping if exists), :exif (from exif if exists),
|
|
22
|
-
# :extension (from file extension), and :file (from unix file system command)
|
|
23
|
-
# the default is defined in the private `default_mime_type_order` method but you can override to set your own order
|
|
24
|
-
# @example
|
|
25
|
-
# Assembly::ObjectFile.new('/input/path_to_file.tif')
|
|
26
|
-
def initialize(path, params = {})
|
|
27
|
-
@path = path
|
|
28
|
-
@label = params[:label]
|
|
29
|
-
@file_attributes = params[:file_attributes]
|
|
30
|
-
@relative_path = params[:relative_path]
|
|
31
|
-
@provider_md5 = params[:provider_md5]
|
|
32
|
-
@provider_sha1 = params[:provider_sha1]
|
|
33
|
-
@mime_type_order = params[:mime_type_order] || default_mime_type_order
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
# @return [String] DPG base filename, removing the extension and the '00','05', etc. placeholders
|
|
37
|
-
# @example
|
|
38
|
-
# source_file = Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
|
|
39
|
-
# puts source_file.dpg_basename # "cy565rm7188_001"
|
|
40
|
-
def dpg_basename
|
|
41
|
-
file_parts = File.basename(path, ext).split('_')
|
|
42
|
-
file_parts.size == 3 ? "#{file_parts[0]}_#{file_parts[2]}" : filename_without_ext
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
# @return [String] DPG subfolder for the given filename, i.e. '00','05', etc.
|
|
46
|
-
# @example
|
|
47
|
-
# source_file = Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
|
|
48
|
-
# puts source_file.dpg_folder # "00"
|
|
49
|
-
def dpg_folder
|
|
50
|
-
file_parts = File.basename(path, ext).split('_')
|
|
51
|
-
file_parts.size == 3 ? file_parts[1] : ''
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
# @return [String] base filename
|
|
55
|
-
# @example
|
|
56
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
|
57
|
-
# puts source_file.filename # "path_to_file.tif"
|
|
58
|
-
def filename
|
|
59
|
-
File.basename(path)
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
# @return [String] base directory
|
|
63
|
-
# @example
|
|
64
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
|
65
|
-
# puts source_file.dirname # "/input"
|
|
66
|
-
def dirname
|
|
67
|
-
File.dirname(path)
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
# @return [String] filename extension
|
|
71
|
-
# @example
|
|
72
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
|
73
|
-
# puts source_file.ext # ".tif"
|
|
74
|
-
def ext
|
|
75
|
-
File.extname(path)
|
|
76
|
-
end
|
|
77
|
-
|
|
78
|
-
# @return [String] base filename without extension
|
|
79
|
-
# @example
|
|
80
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
|
81
|
-
# puts source_file.filename # "path_to_file"
|
|
82
|
-
def filename_without_ext
|
|
83
|
-
File.basename(path, ext)
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
# @return [MiniExiftool] exif information stored as a hash and an object
|
|
87
|
-
# @example
|
|
88
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
|
89
|
-
# puts source_file.exif # hash with exif information
|
|
90
|
-
def exif
|
|
91
|
-
@exif ||= begin
|
|
92
|
-
check_for_file
|
|
93
|
-
MiniExiftool.new(path, replace_invalid_chars: '?')
|
|
94
|
-
end
|
|
95
|
-
end
|
|
96
|
-
|
|
97
|
-
# Computes md5 checksum or returns cached value
|
|
98
|
-
# @return [String] md5 checksum
|
|
99
|
-
# @example
|
|
100
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
|
101
|
-
# puts source_file.md5 # 'XXX123XXX1243XX1243'
|
|
102
|
-
def md5
|
|
103
|
-
check_for_file unless @md5
|
|
104
|
-
@md5 ||= Digest::MD5.file(path).hexdigest
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
# Computes sha1 checksum or return cached value
|
|
108
|
-
# @return [String] sha1 checksum
|
|
109
|
-
# @example
|
|
110
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
|
111
|
-
# puts source_file.sha1 # 'XXX123XXX1243XX1243'
|
|
112
|
-
def sha1
|
|
113
|
-
check_for_file unless @sha1
|
|
114
|
-
@sha1 ||= Digest::SHA1.file(path).hexdigest
|
|
115
|
-
end
|
|
116
|
-
|
|
117
|
-
# Returns mimetype information for the current file based on the ordering set in default_mime_type_order
|
|
118
|
-
# We stop computing mimetypes as soon as we have a method that returns a value
|
|
119
|
-
# @return [String] mime type
|
|
120
|
-
# @example
|
|
121
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
|
122
|
-
# puts source_file.mimetype # 'text/plain'
|
|
123
|
-
def mimetype
|
|
124
|
-
@mimetype ||= begin
|
|
125
|
-
check_for_file
|
|
126
|
-
mimetype = ''
|
|
127
|
-
mime_type_order.each do |mime_type_method|
|
|
128
|
-
mimetype = public_send("#{mime_type_method}_mimetype") if VALID_MIMETYPE_METHODS.include?(mime_type_method)
|
|
129
|
-
break if mimetype.present?
|
|
130
|
-
end
|
|
131
|
-
mimetype
|
|
132
|
-
end
|
|
133
|
-
end
|
|
134
|
-
|
|
135
|
-
# Returns mimetype information using the manual override mapping (based on a file extension lookup)
|
|
136
|
-
# @return [String] mime type for supplied file if a mapping exists for the file's extension
|
|
137
|
-
# @example
|
|
138
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.json')
|
|
139
|
-
# puts source_file.override_mimetype # 'application/json'
|
|
140
|
-
def override_mimetype
|
|
141
|
-
@override_mimetype ||= Assembly::OVERRIDE_MIMETYPES.fetch(ext.to_sym, '')
|
|
142
|
-
end
|
|
143
|
-
|
|
144
|
-
# Returns mimetype information using the mime-types gem (based on a file extension lookup)
|
|
145
|
-
# @return [String] mime type for supplied file
|
|
146
|
-
# @example
|
|
147
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
|
148
|
-
# puts source_file.extension_mimetype # 'text/plain'
|
|
149
|
-
def extension_mimetype
|
|
150
|
-
@extension_mimetype ||= begin
|
|
151
|
-
mtype = MIME::Types.type_for(path).first
|
|
152
|
-
mtype ? mtype.content_type : ''
|
|
153
|
-
end
|
|
154
|
-
end
|
|
155
|
-
|
|
156
|
-
# Returns mimetype information for the current file based on unix file system command.
|
|
157
|
-
# @return [String] mime type for supplied file
|
|
158
|
-
# @example
|
|
159
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
|
160
|
-
# puts source_file.file_mimetype # 'text/plain'
|
|
161
|
-
def file_mimetype
|
|
162
|
-
@file_mimetype ||= begin
|
|
163
|
-
check_for_file
|
|
164
|
-
`file --mime-type "#{path}"`.delete("\n").split(':')[1].strip # first try and get the mimetype from the unix file command
|
|
165
|
-
end
|
|
166
|
-
end
|
|
167
|
-
|
|
168
|
-
# Returns mimetype information for the current file based on exif data (if available and not a trusted source that we'd rather get from the file system command)
|
|
169
|
-
# @return [String] mime type for supplied file
|
|
170
|
-
# @example
|
|
171
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
|
172
|
-
# puts source_file.exif_mimetype # 'text/plain'
|
|
173
|
-
def exif_mimetype
|
|
174
|
-
@exif_mimetype ||= begin
|
|
175
|
-
check_for_file
|
|
176
|
-
prefer_exif = !Assembly::TRUSTED_MIMETYPES.include?(file_mimetype) # if it's not a "trusted" mimetype and there is exif data; get the mimetype from the exif
|
|
177
|
-
exif.mimetype if exif&.mimetype && prefer_exif
|
|
178
|
-
end
|
|
179
|
-
end
|
|
180
|
-
|
|
181
|
-
# @note Uses shell call to "file", only expected to work on unix based systems
|
|
182
|
-
# @return [String] encoding for supplied file
|
|
183
|
-
# @example
|
|
184
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
|
185
|
-
# puts source_file.encoding # 'us-ascii'
|
|
186
|
-
def encoding
|
|
187
|
-
@encoding ||= begin
|
|
188
|
-
check_for_file
|
|
189
|
-
`file --mime-encoding "#{path}"`.delete("\n").split(':')[1].strip
|
|
190
|
-
end
|
|
191
|
-
end
|
|
192
|
-
|
|
193
|
-
# @return [Symbol] the type of object, could be :application (for PDF or Word, etc), :audio, :image, :message, :model, :multipart, :text or :video
|
|
194
|
-
# @example
|
|
195
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
|
196
|
-
# puts source_file.object_type # :image
|
|
197
|
-
def object_type
|
|
198
|
-
lookup = MIME::Types[mimetype][0]
|
|
199
|
-
lookup.nil? ? :other : lookup.media_type.to_sym
|
|
200
|
-
end
|
|
201
|
-
|
|
202
|
-
# @return [Boolean] if object is an image
|
|
203
|
-
# @example
|
|
204
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
|
205
|
-
# puts source_file.image? # true
|
|
206
|
-
def image?
|
|
207
|
-
object_type == :image
|
|
208
|
-
end
|
|
209
|
-
|
|
210
|
-
# Examines the input image for validity. Used to determine if image is a valid and useful image.
|
|
211
|
-
# If image is not a jp2, also checks if it is jp2able?
|
|
212
|
-
# @return [Boolean] true if image is valid, false if not.
|
|
213
|
-
# @example
|
|
214
|
-
# source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
|
215
|
-
# puts source_img.valid_image? # true
|
|
216
|
-
def valid_image?
|
|
217
|
-
return false unless image?
|
|
218
|
-
|
|
219
|
-
mimetype == 'image/jp2' || jp2able?
|
|
220
|
-
end
|
|
221
|
-
|
|
222
|
-
# @return [Boolean] true if image has a color profile, false if not.
|
|
223
|
-
# @example
|
|
224
|
-
# source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
|
225
|
-
# puts source_img.has_color_profile? # true
|
|
226
|
-
def has_color_profile?
|
|
227
|
-
return false unless exif
|
|
228
|
-
|
|
229
|
-
exif['profiledescription'] || exif['colorspace'] ? true : false
|
|
230
|
-
end
|
|
231
|
-
|
|
232
|
-
# Examines the input image for validity to create a jp2. Same as valid_image? but also confirms the existence of a profile description and further restricts mimetypes.
|
|
233
|
-
# It is used by the assembly robots to decide if a jp2 will be created and is also called before you create a jp2 using assembly-image.
|
|
234
|
-
# @return [Boolean] true if image should have a jp2 created, false if not.
|
|
235
|
-
# @example
|
|
236
|
-
# source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
|
237
|
-
# puts source_img.jp2able? # true
|
|
238
|
-
def jp2able?
|
|
239
|
-
return false unless exif
|
|
240
|
-
|
|
241
|
-
Assembly::VALID_IMAGE_MIMETYPES.include?(mimetype)
|
|
242
|
-
end
|
|
243
|
-
|
|
244
|
-
# Returns file size information for the current file in bytes.
|
|
245
|
-
# @return [Integer] file size in bytes
|
|
246
|
-
# @example
|
|
247
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
|
248
|
-
# puts source_file.filesize # 1345
|
|
249
|
-
def filesize
|
|
250
|
-
check_for_file
|
|
251
|
-
@filesize ||= File.size(path)
|
|
252
|
-
end
|
|
253
|
-
|
|
254
|
-
# Determines if the file exists (and is not a directory)
|
|
255
|
-
# @return [Boolean] file exists
|
|
256
|
-
# @example
|
|
257
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
|
258
|
-
# puts source_file.file_exists? # true
|
|
259
|
-
def file_exists?
|
|
260
|
-
@file_exists ||= (File.exist?(path) && !File.directory?(path))
|
|
261
|
-
end
|
|
262
|
-
|
|
263
|
-
private
|
|
264
|
-
|
|
265
|
-
# prive method defining default preferred ordering of how mimetypes are determined
|
|
266
|
-
def default_mime_type_order
|
|
267
|
-
%i[override exif file extension]
|
|
268
|
-
end
|
|
269
|
-
|
|
270
|
-
# private method to check for file existence before operating on it
|
|
271
|
-
def check_for_file
|
|
272
|
-
raise "input file #{path} does not exist or is a directory" unless file_exists?
|
|
273
|
-
end
|
|
274
|
-
end
|
|
275
|
-
end
|