assembly-objectfile 1.12.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +14 -0
- data/.github/pull_request_template.md +1 -1
- data/.gitignore +0 -1
- data/.rubocop.yml +117 -12
- data/.rubocop_todo.yml +3 -109
- data/Gemfile.lock +97 -0
- data/README.md +7 -7
- data/assembly-objectfile.gemspec +8 -11
- data/config/boot.rb +0 -1
- data/lib/{assembly-objectfile → assembly/object_file}/version.rb +2 -2
- data/lib/{assembly-objectfile/object_fileable.rb → assembly/object_file.rb} +109 -104
- data/lib/assembly-objectfile.rb +12 -15
- data/spec/assembly/object_file_spec.rb +451 -0
- data/spec/spec_helper.rb +2 -31
- data/spec/test_data/empty.txt +0 -0
- metadata +23 -156
- data/.travis.yml +0 -20
- data/lib/assembly-objectfile/content_metadata/config.rb +0 -26
- data/lib/assembly-objectfile/content_metadata/file.rb +0 -63
- data/lib/assembly-objectfile/content_metadata/file_set.rb +0 -73
- data/lib/assembly-objectfile/content_metadata/file_set_builder.rb +0 -65
- data/lib/assembly-objectfile/content_metadata/nokogiri_builder.rb +0 -57
- data/lib/assembly-objectfile/content_metadata.rb +0 -117
- data/lib/assembly-objectfile/object_file.rb +0 -29
- data/profiles/AdobeRGB1998.icc +0 -0
- data/profiles/DotGain20.icc +0 -0
- data/profiles/sRGBIEC6196621.icc +0 -0
- data/spec/content_metadata_spec.rb +0 -809
- data/spec/object_file_spec.rb +0 -217
- data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_001.tif +0 -0
- data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_002.tif +0 -0
- data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_001.jp2 +0 -0
- data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_002.jp2 +0 -0
- data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_001.pdf +0 -1
- data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_002.pdf +0 -1
- data/spec/test_data/input/oo000oo0001/31/oo000oo0001_31_001.pdf +0 -1
- data/spec/test_data/input/oo000oo0001/50/oo000oo0001_50_001.tif +0 -0
- data/spec/test_data/input/oo000oo0001/oo000oo0001_book.pdf +0 -1
- data/spec/test_data/input/res1_image1.jp2 +0 -0
- data/spec/test_data/input/res1_image2.jp2 +0 -0
- data/spec/test_data/input/res1_image2.tif +0 -0
- data/spec/test_data/input/res1_teifile.txt +0 -1
- data/spec/test_data/input/res2_image1.jp2 +0 -0
- data/spec/test_data/input/res2_image1.tif +0 -0
- data/spec/test_data/input/res2_image2.jp2 +0 -0
- data/spec/test_data/input/res2_image2.tif +0 -0
- data/spec/test_data/input/res2_teifile.txt +0 -1
- data/spec/test_data/input/res2_textfile.txt +0 -1
- data/spec/test_data/input/res3_image1.jp2 +0 -0
- data/spec/test_data/input/res3_image1.tif +0 -0
- data/spec/test_data/input/res3_teifile.txt +0 -1
- data/spec/test_data/input/test.pdf +0 -1
- data/spec/test_data/input/test.svg +0 -2
- data/spec/test_data/input/test2.jp2 +0 -0
- data/spec/test_data/input/test2.tif +0 -0
@@ -2,25 +2,56 @@
|
|
2
2
|
|
3
3
|
require 'mini_exiftool'
|
4
4
|
require 'mime/types'
|
5
|
+
require 'active_support/core_ext/object/blank'
|
5
6
|
|
6
7
|
module Assembly
|
7
|
-
#
|
8
|
-
|
8
|
+
# This class contains generic methods to operate on any file.
|
9
|
+
class ObjectFile
|
10
|
+
# Class level method that given an array of strings, return the longest common initial path.
|
11
|
+
# Useful for removing a common path from a set of filenames when producing content metadata
|
12
|
+
#
|
13
|
+
# @param [Array] strings Array of filenames with paths to operate on
|
14
|
+
# @return [String] longest common initial part of path of filenames passed in
|
15
|
+
#
|
16
|
+
# Example:
|
17
|
+
# puts Assembly::ObjectFile.common_prefix(['/Users/peter/00/test.tif','/Users/peter/05/test.jp2'])
|
18
|
+
# # => '/Users/peter/0'
|
19
|
+
def self.common_path(strings)
|
20
|
+
return nil if strings.empty?
|
21
|
+
|
22
|
+
n = 0
|
23
|
+
x = strings.last
|
24
|
+
n += 1 while strings.all? { |s| s[n] && (s[n] == x[n]) }
|
25
|
+
common_prefix = x[0...n]
|
26
|
+
if common_prefix[-1, 1] == '/' # check if last element of the common string is the end of a directory
|
27
|
+
common_prefix # if not, split string along directories, and reject last one
|
28
|
+
else
|
29
|
+
"#{common_prefix.split('/')[0..-2].join('/')}/" # if it was, then return the common prefix directly
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
9
33
|
attr_accessor :file_attributes, :label, :path, :provider_md5, :provider_sha1, :relative_path, :mime_type_order
|
10
34
|
|
11
35
|
VALID_MIMETYPE_METHODS = %i[override exif file extension].freeze
|
12
36
|
|
13
37
|
# @param [String] path full path to the file to be worked with
|
14
38
|
# @param [Hash<Symbol => Object>] params options used during content metadata generation
|
15
|
-
# @option params [Hash<Symbol => ['yes', 'no']>] :file_attributes e.g
|
16
|
-
#
|
39
|
+
# @option params [Hash<Symbol => ['yes', 'no']>] :file_attributes e.g.:
|
40
|
+
# {:preserve=>'yes',:shelve=>'no',:publish=>'no'},
|
41
|
+
# defaults pulled from mimetype
|
42
|
+
# @option params [String] :label a resource label (files bundled together will just get the first
|
43
|
+
# file's label attribute if set)
|
17
44
|
# @option params [String] :provider_md5 pre-computed MD5 checksum
|
18
45
|
# @option params [String] :provider_sha1 pre-computed SHA1 checksum
|
19
|
-
# @option params [String] :relative_path if you want the file ids in the content metadata it can be set,
|
46
|
+
# @option params [String] :relative_path if you want the file ids in the content metadata it can be set,
|
47
|
+
# otherwise content metadata will get the full path
|
20
48
|
# @option params [Array] :mime_type_order can be set to the order in which you want mimetypes to be determined
|
21
|
-
# options are :override (from manual overide mapping if exists),
|
22
|
-
# :
|
23
|
-
#
|
49
|
+
# options are :override (from manual overide mapping if exists),
|
50
|
+
# :exif (from exif if exists)
|
51
|
+
# :extension (from file extension)
|
52
|
+
# :file (from unix file system command)
|
53
|
+
# the default is defined in the private `default_mime_type_order` method
|
54
|
+
# but you can override to set your own order
|
24
55
|
# @example
|
25
56
|
# Assembly::ObjectFile.new('/input/path_to_file.tif')
|
26
57
|
def initialize(path, params = {})
|
@@ -33,24 +64,6 @@ module Assembly
|
|
33
64
|
@mime_type_order = params[:mime_type_order] || default_mime_type_order
|
34
65
|
end
|
35
66
|
|
36
|
-
# @return [String] DPG base filename, removing the extension and the '00','05', etc. placeholders
|
37
|
-
# @example
|
38
|
-
# source_file = Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
|
39
|
-
# puts source_file.dpg_basename # "cy565rm7188_001"
|
40
|
-
def dpg_basename
|
41
|
-
file_parts = File.basename(path, ext).split('_')
|
42
|
-
file_parts.size == 3 ? "#{file_parts[0]}_#{file_parts[2]}" : filename_without_ext
|
43
|
-
end
|
44
|
-
|
45
|
-
# @return [String] DPG subfolder for the given filename, i.e. '00','05', etc.
|
46
|
-
# @example
|
47
|
-
# source_file = Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
|
48
|
-
# puts source_file.dpg_folder # "00"
|
49
|
-
def dpg_folder
|
50
|
-
file_parts = File.basename(path, ext).split('_')
|
51
|
-
file_parts.size == 3 ? file_parts[1] : ''
|
52
|
-
end
|
53
|
-
|
54
67
|
# @return [String] base filename
|
55
68
|
# @example
|
56
69
|
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
@@ -91,8 +104,6 @@ module Assembly
|
|
91
104
|
@exif ||= begin
|
92
105
|
check_for_file
|
93
106
|
MiniExiftool.new(path, replace_invalid_chars: '?')
|
94
|
-
rescue StandardError
|
95
|
-
nil
|
96
107
|
end
|
97
108
|
end
|
98
109
|
|
@@ -127,73 +138,15 @@ module Assembly
|
|
127
138
|
check_for_file
|
128
139
|
mimetype = ''
|
129
140
|
mime_type_order.each do |mime_type_method|
|
130
|
-
mimetype =
|
141
|
+
mimetype = send("#{mime_type_method}_mimetype") if VALID_MIMETYPE_METHODS.include?(mime_type_method)
|
131
142
|
break if mimetype.present?
|
132
143
|
end
|
133
144
|
mimetype
|
134
145
|
end
|
135
146
|
end
|
136
147
|
|
137
|
-
#
|
138
|
-
#
|
139
|
-
# @example
|
140
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.json')
|
141
|
-
# puts source_file.override_mimetype # 'application/json'
|
142
|
-
def override_mimetype
|
143
|
-
@override_mimetype ||= Assembly::OVERRIDE_MIMETYPES.fetch(ext.to_sym, '')
|
144
|
-
end
|
145
|
-
|
146
|
-
# Returns mimetype information using the mime-types gem (based on a file extension lookup)
|
147
|
-
# @return [String] mime type for supplied file
|
148
|
-
# @example
|
149
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
150
|
-
# puts source_file.extension_mimetype # 'text/plain'
|
151
|
-
def extension_mimetype
|
152
|
-
@extension_mimetype ||= begin
|
153
|
-
mtype = MIME::Types.type_for(path).first
|
154
|
-
mtype ? mtype.content_type : ''
|
155
|
-
end
|
156
|
-
end
|
157
|
-
|
158
|
-
# Returns mimetype information for the current file based on unix file system command.
|
159
|
-
# @return [String] mime type for supplied file
|
160
|
-
# @example
|
161
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
162
|
-
# puts source_file.file_mimetype # 'text/plain'
|
163
|
-
def file_mimetype
|
164
|
-
@file_mimetype ||= begin
|
165
|
-
check_for_file
|
166
|
-
`file --mime-type "#{path}"`.delete("\n").split(':')[1].strip # first try and get the mimetype from the unix file command
|
167
|
-
end
|
168
|
-
end
|
169
|
-
|
170
|
-
# Returns mimetype information for the current file based on exif data (if available and not a trusted source that we'd rather get from the file system command)
|
171
|
-
# @return [String] mime type for supplied file
|
172
|
-
# @example
|
173
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
174
|
-
# puts source_file.exif_mimetype # 'text/plain'
|
175
|
-
def exif_mimetype
|
176
|
-
@exif_mimetype ||= begin
|
177
|
-
check_for_file
|
178
|
-
prefer_exif = !Assembly::TRUSTED_MIMETYPES.include?(file_mimetype) # if it's not a "trusted" mimetype and there is exif data; get the mimetype from the exif
|
179
|
-
exif.mimetype if
|
180
|
-
exif&.mimetype && prefer_exif
|
181
|
-
end
|
182
|
-
end
|
183
|
-
|
184
|
-
# @note Uses shell call to "file", only expected to work on unix based systems
|
185
|
-
# @return [String] encoding for supplied file
|
186
|
-
# @example
|
187
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
188
|
-
# puts source_file.encoding # 'us-ascii'
|
189
|
-
def encoding
|
190
|
-
@encoding ||= begin
|
191
|
-
check_for_file
|
192
|
-
`file --mime-encoding "#{path}"`.delete("\n").split(':')[1].strip
|
193
|
-
end
|
194
|
-
end
|
195
|
-
|
196
|
-
# @return [Symbol] the type of object, could be :application (for PDF or Word, etc), :audio, :image, :message, :model, :multipart, :text or :video
|
148
|
+
# @return [Symbol] the type of object, could be :application (for PDF or Word, etc),
|
149
|
+
# :audio, :image, :message, :model, :multipart, :text or :video
|
197
150
|
# @example
|
198
151
|
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
199
152
|
# puts source_file.object_type # :image
|
@@ -222,18 +175,10 @@ exif&.mimetype && prefer_exif
|
|
222
175
|
mimetype == 'image/jp2' || jp2able?
|
223
176
|
end
|
224
177
|
|
225
|
-
#
|
226
|
-
#
|
227
|
-
#
|
228
|
-
#
|
229
|
-
def has_color_profile?
|
230
|
-
return false unless exif
|
231
|
-
|
232
|
-
exif['profiledescription'] || exif['colorspace'] ? true : false
|
233
|
-
end
|
234
|
-
|
235
|
-
# Examines the input image for validity to create a jp2. Same as valid_image? but also confirms the existence of a profile description and further restricts mimetypes.
|
236
|
-
# It is used by the assembly robots to decide if a jp2 will be created and is also called before you create a jp2 using assembly-image.
|
178
|
+
# Examines the input image for validity to create a jp2. Same as valid_image? but also confirms
|
179
|
+
# the existence of a profile description and further restricts mimetypes.
|
180
|
+
# It is used by the assembly robots to decide if a jp2 will be created and is also called before
|
181
|
+
# you create a jp2 using assembly-image.
|
237
182
|
# @return [Boolean] true if image should have a jp2 created, false if not.
|
238
183
|
# @example
|
239
184
|
# source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
@@ -265,14 +210,74 @@ exif&.mimetype && prefer_exif
|
|
265
210
|
|
266
211
|
private
|
267
212
|
|
213
|
+
# private method to check for file existence before operating on it
|
214
|
+
def check_for_file
|
215
|
+
raise "input file #{path} does not exist or is a directory" unless file_exists?
|
216
|
+
end
|
217
|
+
|
268
218
|
# prive method defining default preferred ordering of how mimetypes are determined
|
269
219
|
def default_mime_type_order
|
270
220
|
%i[override exif file extension]
|
271
221
|
end
|
272
222
|
|
273
|
-
#
|
274
|
-
|
275
|
-
|
223
|
+
# Returns mimetype information using the mime-types gem (based on a file extension lookup)
|
224
|
+
# @return [String] mime type for supplied file
|
225
|
+
# @example
|
226
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
227
|
+
# puts source_file.extension_mimetype # 'text/plain'
|
228
|
+
def extension_mimetype
|
229
|
+
@extension_mimetype ||= begin
|
230
|
+
mtype = MIME::Types.type_for(path).first
|
231
|
+
mtype ? mtype.content_type : ''
|
232
|
+
end
|
233
|
+
end
|
234
|
+
|
235
|
+
# Returns mimetype information for the current file based on unix file system command.
|
236
|
+
# @return [String] mime type for supplied file
|
237
|
+
# @example
|
238
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
239
|
+
# puts source_file.file_mimetype # 'text/plain'
|
240
|
+
def file_mimetype
|
241
|
+
@file_mimetype ||= begin
|
242
|
+
check_for_file
|
243
|
+
`file --mime-type "#{path}"`.delete("\n").split(':')[1].strip # get the mimetype from the unix file command
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
# Returns mimetype information for the current file based on exif data
|
248
|
+
# (if available and not a trusted source that we'd rather get from the file system command)
|
249
|
+
# @return [String] mime type for supplied file
|
250
|
+
# @example
|
251
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
252
|
+
# puts source_file.exif_mimetype # 'text/plain'
|
253
|
+
def exif_mimetype
|
254
|
+
@exif_mimetype ||= begin
|
255
|
+
check_for_file
|
256
|
+
# if it's not a "trusted" mimetype and there is exif data; get the mimetype from the exif
|
257
|
+
prefer_exif = !Assembly::TRUSTED_MIMETYPES.include?(file_mimetype)
|
258
|
+
exif.mimetype if exif&.mimetype && prefer_exif
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
# Returns mimetype information using the manual override mapping (based on a file extension lookup)
|
263
|
+
# @return [String] mime type for supplied file if a mapping exists for the file's extension
|
264
|
+
# @example
|
265
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.json')
|
266
|
+
# puts source_file.override_mimetype # 'application/json'
|
267
|
+
def override_mimetype
|
268
|
+
@override_mimetype ||= Assembly::OVERRIDE_MIMETYPES.fetch(ext.to_sym, '')
|
269
|
+
end
|
270
|
+
|
271
|
+
# @note Uses shell call to "file", only expected to work on unix based systems
|
272
|
+
# @return [String] encoding for supplied file
|
273
|
+
# @example
|
274
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
275
|
+
# puts source_file.encoding # 'us-ascii'
|
276
|
+
def encoding
|
277
|
+
@encoding ||= begin
|
278
|
+
check_for_file
|
279
|
+
`file --mime-encoding "#{path}"`.delete("\n").split(':')[1].strip
|
280
|
+
end
|
276
281
|
end
|
277
282
|
end
|
278
283
|
end
|
data/lib/assembly-objectfile.rb
CHANGED
@@ -4,27 +4,24 @@ module Assembly
|
|
4
4
|
# the path to the gem, used to access profiles stored with the gem
|
5
5
|
PATH_TO_GEM = File.expand_path("#{File.dirname(__FILE__)}/..")
|
6
6
|
|
7
|
-
#
|
7
|
+
# If input image is not one of these mime types, it will not be regarded as a valid image
|
8
|
+
# for the purpose of generating a JP2 derivative
|
8
9
|
VALID_IMAGE_MIMETYPES = ['image/jpeg', 'image/tiff', 'image/tif', 'image/png'].freeze
|
9
10
|
|
10
|
-
#
|
11
|
-
|
12
|
-
|
13
|
-
#
|
14
|
-
# by the file command, then a check will be made to see if exif data exists...if so, the mimetype returned by the exif data will be used
|
15
|
-
# if no exif data exists, then the mimetype returned by the unix file command will be used
|
11
|
+
# The list of mimetypes that will be "trusted" by the unix file command; if a mimetype other than
|
12
|
+
# one of these is returned by the file command, then a check will be made to see if exif data exists...
|
13
|
+
# if so, the mimetype returned by the exif data will be used if no exif data exists, then the
|
14
|
+
# mimetype returned by the unix file command will be used
|
16
15
|
TRUSTED_MIMETYPES = ['text/plain', 'plain/text', 'application/pdf', 'text/html', 'application/xml'].freeze
|
17
16
|
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
17
|
+
# This is a manual override mapping of file extension to mimetype; if a file with the given extension
|
18
|
+
# is found, the mapped mimetype will be returned and no further methods will be used - this is used
|
19
|
+
# to force a specific mimetype to be returned for a given file extension regardless of what exif or
|
20
|
+
# the unix file system command returns the mapping format is "extension with period: returned mimetype",
|
21
|
+
# e.g. for any .json file, you will always get `application/json`
|
22
22
|
OVERRIDE_MIMETYPES = {
|
23
23
|
'.json': 'application/json'
|
24
24
|
}.freeze
|
25
25
|
end
|
26
26
|
|
27
|
-
require 'assembly
|
28
|
-
require 'assembly-objectfile/object_fileable'
|
29
|
-
require 'assembly-objectfile/object_file'
|
30
|
-
require 'assembly-objectfile/version'
|
27
|
+
require 'assembly/object_file'
|