assembly-objectfile 1.13.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +0 -1
- data/.rubocop.yml +1 -1
- data/.rubocop_todo.yml +14 -80
- data/Gemfile.lock +106 -0
- data/assembly-objectfile.gemspec +1 -3
- data/lib/assembly-objectfile/object_file.rb +253 -3
- data/lib/assembly-objectfile/version.rb +1 -1
- data/lib/assembly-objectfile.rb +0 -5
- data/spec/object_file_spec.rb +411 -172
- data/spec/spec_helper.rb +2 -31
- metadata +19 -107
- data/lib/assembly-objectfile/content_metadata/config.rb +0 -26
- data/lib/assembly-objectfile/content_metadata/file.rb +0 -63
- data/lib/assembly-objectfile/content_metadata/file_set.rb +0 -73
- data/lib/assembly-objectfile/content_metadata/file_set_builder.rb +0 -65
- data/lib/assembly-objectfile/content_metadata/nokogiri_builder.rb +0 -57
- data/lib/assembly-objectfile/content_metadata.rb +0 -117
- data/lib/assembly-objectfile/object_fileable.rb +0 -275
- data/spec/content_metadata_spec.rb +0 -809
- data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_001.tif +0 -0
- data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_002.tif +0 -0
- data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_001.jp2 +0 -0
- data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_002.jp2 +0 -0
- data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_001.pdf +0 -1
- data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_002.pdf +0 -1
- data/spec/test_data/input/oo000oo0001/31/oo000oo0001_31_001.pdf +0 -1
- data/spec/test_data/input/oo000oo0001/50/oo000oo0001_50_001.tif +0 -0
- data/spec/test_data/input/oo000oo0001/oo000oo0001_book.pdf +0 -1
- data/spec/test_data/input/res1_image1.jp2 +0 -0
- data/spec/test_data/input/res1_image2.jp2 +0 -0
- data/spec/test_data/input/res1_image2.tif +0 -0
- data/spec/test_data/input/res1_teifile.txt +0 -1
- data/spec/test_data/input/res2_image1.jp2 +0 -0
- data/spec/test_data/input/res2_image1.tif +0 -0
- data/spec/test_data/input/res2_image2.jp2 +0 -0
- data/spec/test_data/input/res2_image2.tif +0 -0
- data/spec/test_data/input/res2_teifile.txt +0 -1
- data/spec/test_data/input/res2_textfile.txt +0 -1
- data/spec/test_data/input/res3_image1.jp2 +0 -0
- data/spec/test_data/input/res3_image1.tif +0 -0
- data/spec/test_data/input/res3_teifile.txt +0 -1
- data/spec/test_data/input/test.pdf +0 -1
- data/spec/test_data/input/test.svg +0 -2
- data/spec/test_data/input/test2.jp2 +0 -0
- data/spec/test_data/input/test2.tif +0 -0
@@ -1,275 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'mini_exiftool'
|
4
|
-
require 'mime/types'
|
5
|
-
|
6
|
-
module Assembly
|
7
|
-
# Common behaviors we need for other classes in the gem
|
8
|
-
module ObjectFileable
|
9
|
-
attr_accessor :file_attributes, :label, :path, :provider_md5, :provider_sha1, :relative_path, :mime_type_order
|
10
|
-
|
11
|
-
VALID_MIMETYPE_METHODS = %i[override exif file extension].freeze
|
12
|
-
|
13
|
-
# @param [String] path full path to the file to be worked with
|
14
|
-
# @param [Hash<Symbol => Object>] params options used during content metadata generation
|
15
|
-
# @option params [Hash<Symbol => ['yes', 'no']>] :file_attributes e.g. {:preserve=>'yes',:shelve=>'no',:publish=>'no'}, defaults pulled from mimetype
|
16
|
-
# @option params [String] :label a resource label (files bundlded together will just get the first file's label attribute if set)
|
17
|
-
# @option params [String] :provider_md5 pre-computed MD5 checksum
|
18
|
-
# @option params [String] :provider_sha1 pre-computed SHA1 checksum
|
19
|
-
# @option params [String] :relative_path if you want the file ids in the content metadata it can be set, otherwise content metadata will get the full path
|
20
|
-
# @option params [Array] :mime_type_order can be set to the order in which you want mimetypes to be determined
|
21
|
-
# options are :override (from manual overide mapping if exists), :exif (from exif if exists),
|
22
|
-
# :extension (from file extension), and :file (from unix file system command)
|
23
|
-
# the default is defined in the private `default_mime_type_order` method but you can override to set your own order
|
24
|
-
# @example
|
25
|
-
# Assembly::ObjectFile.new('/input/path_to_file.tif')
|
26
|
-
def initialize(path, params = {})
|
27
|
-
@path = path
|
28
|
-
@label = params[:label]
|
29
|
-
@file_attributes = params[:file_attributes]
|
30
|
-
@relative_path = params[:relative_path]
|
31
|
-
@provider_md5 = params[:provider_md5]
|
32
|
-
@provider_sha1 = params[:provider_sha1]
|
33
|
-
@mime_type_order = params[:mime_type_order] || default_mime_type_order
|
34
|
-
end
|
35
|
-
|
36
|
-
# @return [String] DPG base filename, removing the extension and the '00','05', etc. placeholders
|
37
|
-
# @example
|
38
|
-
# source_file = Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
|
39
|
-
# puts source_file.dpg_basename # "cy565rm7188_001"
|
40
|
-
def dpg_basename
|
41
|
-
file_parts = File.basename(path, ext).split('_')
|
42
|
-
file_parts.size == 3 ? "#{file_parts[0]}_#{file_parts[2]}" : filename_without_ext
|
43
|
-
end
|
44
|
-
|
45
|
-
# @return [String] DPG subfolder for the given filename, i.e. '00','05', etc.
|
46
|
-
# @example
|
47
|
-
# source_file = Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
|
48
|
-
# puts source_file.dpg_folder # "00"
|
49
|
-
def dpg_folder
|
50
|
-
file_parts = File.basename(path, ext).split('_')
|
51
|
-
file_parts.size == 3 ? file_parts[1] : ''
|
52
|
-
end
|
53
|
-
|
54
|
-
# @return [String] base filename
|
55
|
-
# @example
|
56
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
57
|
-
# puts source_file.filename # "path_to_file.tif"
|
58
|
-
def filename
|
59
|
-
File.basename(path)
|
60
|
-
end
|
61
|
-
|
62
|
-
# @return [String] base directory
|
63
|
-
# @example
|
64
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
65
|
-
# puts source_file.dirname # "/input"
|
66
|
-
def dirname
|
67
|
-
File.dirname(path)
|
68
|
-
end
|
69
|
-
|
70
|
-
# @return [String] filename extension
|
71
|
-
# @example
|
72
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
73
|
-
# puts source_file.ext # ".tif"
|
74
|
-
def ext
|
75
|
-
File.extname(path)
|
76
|
-
end
|
77
|
-
|
78
|
-
# @return [String] base filename without extension
|
79
|
-
# @example
|
80
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
81
|
-
# puts source_file.filename # "path_to_file"
|
82
|
-
def filename_without_ext
|
83
|
-
File.basename(path, ext)
|
84
|
-
end
|
85
|
-
|
86
|
-
# @return [MiniExiftool] exif information stored as a hash and an object
|
87
|
-
# @example
|
88
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
89
|
-
# puts source_file.exif # hash with exif information
|
90
|
-
def exif
|
91
|
-
@exif ||= begin
|
92
|
-
check_for_file
|
93
|
-
MiniExiftool.new(path, replace_invalid_chars: '?')
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
# Computes md5 checksum or returns cached value
|
98
|
-
# @return [String] md5 checksum
|
99
|
-
# @example
|
100
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
101
|
-
# puts source_file.md5 # 'XXX123XXX1243XX1243'
|
102
|
-
def md5
|
103
|
-
check_for_file unless @md5
|
104
|
-
@md5 ||= Digest::MD5.file(path).hexdigest
|
105
|
-
end
|
106
|
-
|
107
|
-
# Computes sha1 checksum or return cached value
|
108
|
-
# @return [String] sha1 checksum
|
109
|
-
# @example
|
110
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
111
|
-
# puts source_file.sha1 # 'XXX123XXX1243XX1243'
|
112
|
-
def sha1
|
113
|
-
check_for_file unless @sha1
|
114
|
-
@sha1 ||= Digest::SHA1.file(path).hexdigest
|
115
|
-
end
|
116
|
-
|
117
|
-
# Returns mimetype information for the current file based on the ordering set in default_mime_type_order
|
118
|
-
# We stop computing mimetypes as soon as we have a method that returns a value
|
119
|
-
# @return [String] mime type
|
120
|
-
# @example
|
121
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
122
|
-
# puts source_file.mimetype # 'text/plain'
|
123
|
-
def mimetype
|
124
|
-
@mimetype ||= begin
|
125
|
-
check_for_file
|
126
|
-
mimetype = ''
|
127
|
-
mime_type_order.each do |mime_type_method|
|
128
|
-
mimetype = public_send("#{mime_type_method}_mimetype") if VALID_MIMETYPE_METHODS.include?(mime_type_method)
|
129
|
-
break if mimetype.present?
|
130
|
-
end
|
131
|
-
mimetype
|
132
|
-
end
|
133
|
-
end
|
134
|
-
|
135
|
-
# Returns mimetype information using the manual override mapping (based on a file extension lookup)
|
136
|
-
# @return [String] mime type for supplied file if a mapping exists for the file's extension
|
137
|
-
# @example
|
138
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.json')
|
139
|
-
# puts source_file.override_mimetype # 'application/json'
|
140
|
-
def override_mimetype
|
141
|
-
@override_mimetype ||= Assembly::OVERRIDE_MIMETYPES.fetch(ext.to_sym, '')
|
142
|
-
end
|
143
|
-
|
144
|
-
# Returns mimetype information using the mime-types gem (based on a file extension lookup)
|
145
|
-
# @return [String] mime type for supplied file
|
146
|
-
# @example
|
147
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
148
|
-
# puts source_file.extension_mimetype # 'text/plain'
|
149
|
-
def extension_mimetype
|
150
|
-
@extension_mimetype ||= begin
|
151
|
-
mtype = MIME::Types.type_for(path).first
|
152
|
-
mtype ? mtype.content_type : ''
|
153
|
-
end
|
154
|
-
end
|
155
|
-
|
156
|
-
# Returns mimetype information for the current file based on unix file system command.
|
157
|
-
# @return [String] mime type for supplied file
|
158
|
-
# @example
|
159
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
160
|
-
# puts source_file.file_mimetype # 'text/plain'
|
161
|
-
def file_mimetype
|
162
|
-
@file_mimetype ||= begin
|
163
|
-
check_for_file
|
164
|
-
`file --mime-type "#{path}"`.delete("\n").split(':')[1].strip # first try and get the mimetype from the unix file command
|
165
|
-
end
|
166
|
-
end
|
167
|
-
|
168
|
-
# Returns mimetype information for the current file based on exif data (if available and not a trusted source that we'd rather get from the file system command)
|
169
|
-
# @return [String] mime type for supplied file
|
170
|
-
# @example
|
171
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
172
|
-
# puts source_file.exif_mimetype # 'text/plain'
|
173
|
-
def exif_mimetype
|
174
|
-
@exif_mimetype ||= begin
|
175
|
-
check_for_file
|
176
|
-
prefer_exif = !Assembly::TRUSTED_MIMETYPES.include?(file_mimetype) # if it's not a "trusted" mimetype and there is exif data; get the mimetype from the exif
|
177
|
-
exif.mimetype if exif&.mimetype && prefer_exif
|
178
|
-
end
|
179
|
-
end
|
180
|
-
|
181
|
-
# @note Uses shell call to "file", only expected to work on unix based systems
|
182
|
-
# @return [String] encoding for supplied file
|
183
|
-
# @example
|
184
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
185
|
-
# puts source_file.encoding # 'us-ascii'
|
186
|
-
def encoding
|
187
|
-
@encoding ||= begin
|
188
|
-
check_for_file
|
189
|
-
`file --mime-encoding "#{path}"`.delete("\n").split(':')[1].strip
|
190
|
-
end
|
191
|
-
end
|
192
|
-
|
193
|
-
# @return [Symbol] the type of object, could be :application (for PDF or Word, etc), :audio, :image, :message, :model, :multipart, :text or :video
|
194
|
-
# @example
|
195
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
196
|
-
# puts source_file.object_type # :image
|
197
|
-
def object_type
|
198
|
-
lookup = MIME::Types[mimetype][0]
|
199
|
-
lookup.nil? ? :other : lookup.media_type.to_sym
|
200
|
-
end
|
201
|
-
|
202
|
-
# @return [Boolean] if object is an image
|
203
|
-
# @example
|
204
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
205
|
-
# puts source_file.image? # true
|
206
|
-
def image?
|
207
|
-
object_type == :image
|
208
|
-
end
|
209
|
-
|
210
|
-
# Examines the input image for validity. Used to determine if image is a valid and useful image.
|
211
|
-
# If image is not a jp2, also checks if it is jp2able?
|
212
|
-
# @return [Boolean] true if image is valid, false if not.
|
213
|
-
# @example
|
214
|
-
# source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
215
|
-
# puts source_img.valid_image? # true
|
216
|
-
def valid_image?
|
217
|
-
return false unless image?
|
218
|
-
|
219
|
-
mimetype == 'image/jp2' || jp2able?
|
220
|
-
end
|
221
|
-
|
222
|
-
# @return [Boolean] true if image has a color profile, false if not.
|
223
|
-
# @example
|
224
|
-
# source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
225
|
-
# puts source_img.has_color_profile? # true
|
226
|
-
def has_color_profile?
|
227
|
-
return false unless exif
|
228
|
-
|
229
|
-
exif['profiledescription'] || exif['colorspace'] ? true : false
|
230
|
-
end
|
231
|
-
|
232
|
-
# Examines the input image for validity to create a jp2. Same as valid_image? but also confirms the existence of a profile description and further restricts mimetypes.
|
233
|
-
# It is used by the assembly robots to decide if a jp2 will be created and is also called before you create a jp2 using assembly-image.
|
234
|
-
# @return [Boolean] true if image should have a jp2 created, false if not.
|
235
|
-
# @example
|
236
|
-
# source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
237
|
-
# puts source_img.jp2able? # true
|
238
|
-
def jp2able?
|
239
|
-
return false unless exif
|
240
|
-
|
241
|
-
Assembly::VALID_IMAGE_MIMETYPES.include?(mimetype)
|
242
|
-
end
|
243
|
-
|
244
|
-
# Returns file size information for the current file in bytes.
|
245
|
-
# @return [Integer] file size in bytes
|
246
|
-
# @example
|
247
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
248
|
-
# puts source_file.filesize # 1345
|
249
|
-
def filesize
|
250
|
-
check_for_file
|
251
|
-
@filesize ||= File.size(path)
|
252
|
-
end
|
253
|
-
|
254
|
-
# Determines if the file exists (and is not a directory)
|
255
|
-
# @return [Boolean] file exists
|
256
|
-
# @example
|
257
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
258
|
-
# puts source_file.file_exists? # true
|
259
|
-
def file_exists?
|
260
|
-
@file_exists ||= (File.exist?(path) && !File.directory?(path))
|
261
|
-
end
|
262
|
-
|
263
|
-
private
|
264
|
-
|
265
|
-
# prive method defining default preferred ordering of how mimetypes are determined
|
266
|
-
def default_mime_type_order
|
267
|
-
%i[override exif file extension]
|
268
|
-
end
|
269
|
-
|
270
|
-
# private method to check for file existence before operating on it
|
271
|
-
def check_for_file
|
272
|
-
raise "input file #{path} does not exist or is a directory" unless file_exists?
|
273
|
-
end
|
274
|
-
end
|
275
|
-
end
|