assembly-objectfile 1.11.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +14 -0
- data/.github/pull_request_template.md +3 -5
- data/.gitignore +0 -1
- data/.rubocop.yml +87 -15
- data/.rubocop_todo.yml +19 -74
- data/Gemfile +2 -0
- data/Gemfile.lock +106 -0
- data/README.md +1 -1
- data/assembly-objectfile.gemspec +5 -6
- data/lib/assembly-objectfile/object_file.rb +253 -3
- data/lib/assembly-objectfile/version.rb +2 -2
- data/lib/assembly-objectfile.rb +0 -5
- data/spec/object_file_spec.rb +411 -167
- data/spec/spec_helper.rb +3 -31
- data/spec/test_data/empty.txt +0 -0
- metadata +35 -121
- data/.travis.yml +0 -20
- data/lib/assembly-objectfile/content_metadata/config.rb +0 -26
- data/lib/assembly-objectfile/content_metadata/file.rb +0 -63
- data/lib/assembly-objectfile/content_metadata/file_set.rb +0 -73
- data/lib/assembly-objectfile/content_metadata/file_set_builder.rb +0 -65
- data/lib/assembly-objectfile/content_metadata/nokogiri_builder.rb +0 -57
- data/lib/assembly-objectfile/content_metadata.rb +0 -117
- data/lib/assembly-objectfile/object_fileable.rb +0 -278
- data/spec/content_metadata_spec.rb +0 -791
- data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_001.tif +0 -0
- data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_002.tif +0 -0
- data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_001.jp2 +0 -0
- data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_002.jp2 +0 -0
- data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_001.pdf +0 -1
- data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_002.pdf +0 -1
- data/spec/test_data/input/oo000oo0001/31/oo000oo0001_31_001.pdf +0 -1
- data/spec/test_data/input/oo000oo0001/50/oo000oo0001_50_001.tif +0 -0
- data/spec/test_data/input/oo000oo0001/oo000oo0001_book.pdf +0 -1
- data/spec/test_data/input/res1_image1.jp2 +0 -0
- data/spec/test_data/input/res1_image2.jp2 +0 -0
- data/spec/test_data/input/res1_image2.tif +0 -0
- data/spec/test_data/input/res1_teifile.txt +0 -1
- data/spec/test_data/input/res2_image1.jp2 +0 -0
- data/spec/test_data/input/res2_image1.tif +0 -0
- data/spec/test_data/input/res2_image2.jp2 +0 -0
- data/spec/test_data/input/res2_image2.tif +0 -0
- data/spec/test_data/input/res2_teifile.txt +0 -1
- data/spec/test_data/input/res2_textfile.txt +0 -1
- data/spec/test_data/input/res3_image1.jp2 +0 -0
- data/spec/test_data/input/res3_image1.tif +0 -0
- data/spec/test_data/input/res3_teifile.txt +0 -1
- data/spec/test_data/input/test.pdf +0 -1
- data/spec/test_data/input/test2.jp2 +0 -0
- data/spec/test_data/input/test2.tif +0 -0
@@ -1,14 +1,16 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'mini_exiftool'
|
4
|
+
require 'mime/types'
|
5
|
+
require 'active_support/core_ext/object/blank'
|
6
|
+
|
3
7
|
module Assembly
|
4
8
|
# This class contains generic methods to operate on any file.
|
5
9
|
class ObjectFile
|
6
|
-
include Assembly::ObjectFileable
|
7
|
-
|
8
10
|
# Class level method that given an array of strings, return the longest common initial path. Useful for removing a common path from a set of filenames when producing content metadata
|
9
11
|
#
|
10
12
|
# @param [Array] strings Array of filenames with paths to operate on
|
11
|
-
# @return [String]
|
13
|
+
# @return [String] longest common initial part of path of filenames passed in
|
12
14
|
#
|
13
15
|
# Example:
|
14
16
|
# puts Assembly::ObjectFile.common_prefix(['/Users/peter/00/test.tif','/Users/peter/05/test.jp2']) # '/Users/peter/0'
|
@@ -25,5 +27,253 @@ module Assembly
|
|
25
27
|
"#{common_prefix.split('/')[0..-2].join('/')}/" # if it was, then return the common prefix directly
|
26
28
|
end
|
27
29
|
end
|
30
|
+
|
31
|
+
attr_accessor :file_attributes, :label, :path, :provider_md5, :provider_sha1, :relative_path, :mime_type_order
|
32
|
+
|
33
|
+
VALID_MIMETYPE_METHODS = %i[override exif file extension].freeze
|
34
|
+
|
35
|
+
# @param [String] path full path to the file to be worked with
|
36
|
+
# @param [Hash<Symbol => Object>] params options used during content metadata generation
|
37
|
+
# @option params [Hash<Symbol => ['yes', 'no']>] :file_attributes e.g. {:preserve=>'yes',:shelve=>'no',:publish=>'no'}, defaults pulled from mimetype
|
38
|
+
# @option params [String] :label a resource label (files bundlded together will just get the first file's label attribute if set)
|
39
|
+
# @option params [String] :provider_md5 pre-computed MD5 checksum
|
40
|
+
# @option params [String] :provider_sha1 pre-computed SHA1 checksum
|
41
|
+
# @option params [String] :relative_path if you want the file ids in the content metadata it can be set, otherwise content metadata will get the full path
|
42
|
+
# @option params [Array] :mime_type_order can be set to the order in which you want mimetypes to be determined
|
43
|
+
# options are :override (from manual overide mapping if exists), :exif (from exif if exists),
|
44
|
+
# :extension (from file extension), and :file (from unix file system command)
|
45
|
+
# the default is defined in the private `default_mime_type_order` method but you can override to set your own order
|
46
|
+
# @example
|
47
|
+
# Assembly::ObjectFile.new('/input/path_to_file.tif')
|
48
|
+
def initialize(path, params = {})
|
49
|
+
@path = path
|
50
|
+
@label = params[:label]
|
51
|
+
@file_attributes = params[:file_attributes]
|
52
|
+
@relative_path = params[:relative_path]
|
53
|
+
@provider_md5 = params[:provider_md5]
|
54
|
+
@provider_sha1 = params[:provider_sha1]
|
55
|
+
@mime_type_order = params[:mime_type_order] || default_mime_type_order
|
56
|
+
end
|
57
|
+
|
58
|
+
# @return [String] base filename
|
59
|
+
# @example
|
60
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
61
|
+
# puts source_file.filename # "path_to_file.tif"
|
62
|
+
def filename
|
63
|
+
File.basename(path)
|
64
|
+
end
|
65
|
+
|
66
|
+
# @return [String] base directory
|
67
|
+
# @example
|
68
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
69
|
+
# puts source_file.dirname # "/input"
|
70
|
+
def dirname
|
71
|
+
File.dirname(path)
|
72
|
+
end
|
73
|
+
|
74
|
+
# @return [String] filename extension
|
75
|
+
# @example
|
76
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
77
|
+
# puts source_file.ext # ".tif"
|
78
|
+
def ext
|
79
|
+
File.extname(path)
|
80
|
+
end
|
81
|
+
|
82
|
+
# @return [String] base filename without extension
|
83
|
+
# @example
|
84
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
85
|
+
# puts source_file.filename # "path_to_file"
|
86
|
+
def filename_without_ext
|
87
|
+
File.basename(path, ext)
|
88
|
+
end
|
89
|
+
|
90
|
+
# @return [MiniExiftool] exif information stored as a hash and an object
|
91
|
+
# @example
|
92
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
93
|
+
# puts source_file.exif # hash with exif information
|
94
|
+
def exif
|
95
|
+
@exif ||= begin
|
96
|
+
check_for_file
|
97
|
+
MiniExiftool.new(path, replace_invalid_chars: '?')
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# Computes md5 checksum or returns cached value
|
102
|
+
# @return [String] md5 checksum
|
103
|
+
# @example
|
104
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
105
|
+
# puts source_file.md5 # 'XXX123XXX1243XX1243'
|
106
|
+
def md5
|
107
|
+
check_for_file unless @md5
|
108
|
+
@md5 ||= Digest::MD5.file(path).hexdigest
|
109
|
+
end
|
110
|
+
|
111
|
+
# Computes sha1 checksum or return cached value
|
112
|
+
# @return [String] sha1 checksum
|
113
|
+
# @example
|
114
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
115
|
+
# puts source_file.sha1 # 'XXX123XXX1243XX1243'
|
116
|
+
def sha1
|
117
|
+
check_for_file unless @sha1
|
118
|
+
@sha1 ||= Digest::SHA1.file(path).hexdigest
|
119
|
+
end
|
120
|
+
|
121
|
+
# Returns mimetype information for the current file based on the ordering set in default_mime_type_order
|
122
|
+
# We stop computing mimetypes as soon as we have a method that returns a value
|
123
|
+
# @return [String] mime type
|
124
|
+
# @example
|
125
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
126
|
+
# puts source_file.mimetype # 'text/plain'
|
127
|
+
def mimetype
|
128
|
+
@mimetype ||= begin
|
129
|
+
check_for_file
|
130
|
+
mimetype = ''
|
131
|
+
mime_type_order.each do |mime_type_method|
|
132
|
+
mimetype = send("#{mime_type_method}_mimetype") if VALID_MIMETYPE_METHODS.include?(mime_type_method)
|
133
|
+
break if mimetype.present?
|
134
|
+
end
|
135
|
+
mimetype
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
# @return [Symbol] the type of object, could be :application (for PDF or Word, etc), :audio, :image, :message, :model, :multipart, :text or :video
|
140
|
+
# @example
|
141
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
142
|
+
# puts source_file.object_type # :image
|
143
|
+
def object_type
|
144
|
+
lookup = MIME::Types[mimetype][0]
|
145
|
+
lookup.nil? ? :other : lookup.media_type.to_sym
|
146
|
+
end
|
147
|
+
|
148
|
+
# @return [Boolean] if object is an image
|
149
|
+
# @example
|
150
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
151
|
+
# puts source_file.image? # true
|
152
|
+
def image?
|
153
|
+
object_type == :image
|
154
|
+
end
|
155
|
+
|
156
|
+
# Examines the input image for validity. Used to determine if image is a valid and useful image.
|
157
|
+
# If image is not a jp2, also checks if it is jp2able?
|
158
|
+
# @return [Boolean] true if image is valid, false if not.
|
159
|
+
# @example
|
160
|
+
# source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
161
|
+
# puts source_img.valid_image? # true
|
162
|
+
def valid_image?
|
163
|
+
return false unless image?
|
164
|
+
|
165
|
+
mimetype == 'image/jp2' || jp2able?
|
166
|
+
end
|
167
|
+
|
168
|
+
# @return [Boolean] true if image has a color profile, false if not.
|
169
|
+
# @example
|
170
|
+
# source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
171
|
+
# puts source_img.has_color_profile? # true
|
172
|
+
def has_color_profile?
|
173
|
+
return false unless exif
|
174
|
+
|
175
|
+
exif['profiledescription'] || exif['colorspace'] ? true : false
|
176
|
+
end
|
177
|
+
|
178
|
+
# Examines the input image for validity to create a jp2. Same as valid_image? but also confirms the existence of a profile description and further restricts mimetypes.
|
179
|
+
# It is used by the assembly robots to decide if a jp2 will be created and is also called before you create a jp2 using assembly-image.
|
180
|
+
# @return [Boolean] true if image should have a jp2 created, false if not.
|
181
|
+
# @example
|
182
|
+
# source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
183
|
+
# puts source_img.jp2able? # true
|
184
|
+
def jp2able?
|
185
|
+
return false unless exif
|
186
|
+
|
187
|
+
Assembly::VALID_IMAGE_MIMETYPES.include?(mimetype)
|
188
|
+
end
|
189
|
+
|
190
|
+
# Returns file size information for the current file in bytes.
|
191
|
+
# @return [Integer] file size in bytes
|
192
|
+
# @example
|
193
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
194
|
+
# puts source_file.filesize # 1345
|
195
|
+
def filesize
|
196
|
+
check_for_file
|
197
|
+
@filesize ||= File.size(path)
|
198
|
+
end
|
199
|
+
|
200
|
+
# Determines if the file exists (and is not a directory)
|
201
|
+
# @return [Boolean] file exists
|
202
|
+
# @example
|
203
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
204
|
+
# puts source_file.file_exists? # true
|
205
|
+
def file_exists?
|
206
|
+
@file_exists ||= (File.exist?(path) && !File.directory?(path))
|
207
|
+
end
|
208
|
+
|
209
|
+
private
|
210
|
+
|
211
|
+
# private method to check for file existence before operating on it
|
212
|
+
def check_for_file
|
213
|
+
raise "input file #{path} does not exist or is a directory" unless file_exists?
|
214
|
+
end
|
215
|
+
|
216
|
+
# prive method defining default preferred ordering of how mimetypes are determined
|
217
|
+
def default_mime_type_order
|
218
|
+
%i[override exif file extension]
|
219
|
+
end
|
220
|
+
|
221
|
+
# Returns mimetype information using the mime-types gem (based on a file extension lookup)
|
222
|
+
# @return [String] mime type for supplied file
|
223
|
+
# @example
|
224
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
225
|
+
# puts source_file.extension_mimetype # 'text/plain'
|
226
|
+
def extension_mimetype
|
227
|
+
@extension_mimetype ||= begin
|
228
|
+
mtype = MIME::Types.type_for(path).first
|
229
|
+
mtype ? mtype.content_type : ''
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
# Returns mimetype information for the current file based on unix file system command.
|
234
|
+
# @return [String] mime type for supplied file
|
235
|
+
# @example
|
236
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
237
|
+
# puts source_file.file_mimetype # 'text/plain'
|
238
|
+
def file_mimetype
|
239
|
+
@file_mimetype ||= begin
|
240
|
+
check_for_file
|
241
|
+
`file --mime-type "#{path}"`.delete("\n").split(':')[1].strip # first try and get the mimetype from the unix file command
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
# Returns mimetype information for the current file based on exif data (if available and not a trusted source that we'd rather get from the file system command)
|
246
|
+
# @return [String] mime type for supplied file
|
247
|
+
# @example
|
248
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
249
|
+
# puts source_file.exif_mimetype # 'text/plain'
|
250
|
+
def exif_mimetype
|
251
|
+
@exif_mimetype ||= begin
|
252
|
+
check_for_file
|
253
|
+
prefer_exif = !Assembly::TRUSTED_MIMETYPES.include?(file_mimetype) # if it's not a "trusted" mimetype and there is exif data; get the mimetype from the exif
|
254
|
+
exif.mimetype if exif&.mimetype && prefer_exif
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
# Returns mimetype information using the manual override mapping (based on a file extension lookup)
|
259
|
+
# @return [String] mime type for supplied file if a mapping exists for the file's extension
|
260
|
+
# @example
|
261
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.json')
|
262
|
+
# puts source_file.override_mimetype # 'application/json'
|
263
|
+
def override_mimetype
|
264
|
+
@override_mimetype ||= Assembly::OVERRIDE_MIMETYPES.fetch(ext.to_sym, '')
|
265
|
+
end
|
266
|
+
|
267
|
+
# @note Uses shell call to "file", only expected to work on unix based systems
|
268
|
+
# @return [String] encoding for supplied file
|
269
|
+
# @example
|
270
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
271
|
+
# puts source_file.encoding # 'us-ascii'
|
272
|
+
def encoding
|
273
|
+
@encoding ||= begin
|
274
|
+
check_for_file
|
275
|
+
`file --mime-encoding "#{path}"`.delete("\n").split(':')[1].strip
|
276
|
+
end
|
277
|
+
end
|
28
278
|
end
|
29
279
|
end
|
data/lib/assembly-objectfile.rb
CHANGED
@@ -7,9 +7,6 @@ module Assembly
|
|
7
7
|
# if input image is not one of these mime types, it will not be regarded as a valid image for the purpose of generating a JP2 derivative
|
8
8
|
VALID_IMAGE_MIMETYPES = ['image/jpeg', 'image/tiff', 'image/tif', 'image/png'].freeze
|
9
9
|
|
10
|
-
# if input file has one of these extensions in a 3D object, it will get the 3d resource type
|
11
|
-
VALID_THREE_DIMENSION_EXTENTIONS = ['.obj'].freeze
|
12
|
-
|
13
10
|
# the list of mimetypes that will be "trusted" by the unix file command; if a mimetype other than one of these is returned
|
14
11
|
# by the file command, then a check will be made to see if exif data exists...if so, the mimetype returned by the exif data will be used
|
15
12
|
# if no exif data exists, then the mimetype returned by the unix file command will be used
|
@@ -24,7 +21,5 @@ module Assembly
|
|
24
21
|
}.freeze
|
25
22
|
end
|
26
23
|
|
27
|
-
require 'assembly-objectfile/content_metadata'
|
28
|
-
require 'assembly-objectfile/object_fileable'
|
29
24
|
require 'assembly-objectfile/object_file'
|
30
25
|
require 'assembly-objectfile/version'
|