assembly-objectfile 1.7.1 → 1.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.rubocop.yml +22 -0
- data/.rubocop_todo.yml +132 -0
- data/Gemfile +1 -1
- data/Rakefile +9 -9
- data/assembly-objectfile.gemspec +11 -10
- data/lib/assembly-objectfile.rb +24 -22
- data/lib/assembly-objectfile/content_metadata.rb +225 -220
- data/lib/assembly-objectfile/object_file.rb +6 -9
- data/lib/assembly-objectfile/object_fileable.rb +131 -198
- data/lib/assembly-objectfile/version.rb +2 -4
- data/spec/content_metadata_spec.rb +455 -439
- data/spec/object_file_spec.rb +86 -83
- data/spec/spec_helper.rb +48 -45
- data/spec/test_data/input/someobject.obj +1 -0
- data/spec/test_data/input/someobject.ply +1 -0
- metadata +45 -12
@@ -1,8 +1,6 @@
|
|
1
1
|
module Assembly
|
2
|
-
|
3
2
|
# This class contains generic methods to operate on any file.
|
4
3
|
class ObjectFile
|
5
|
-
|
6
4
|
include Assembly::ObjectFileable
|
7
5
|
|
8
6
|
# Class level method that given an array of strings, return the longest common initial path. Useful for removing a common path from a set of filenames when producing content metadata
|
@@ -13,18 +11,17 @@ module Assembly
|
|
13
11
|
# Example:
|
14
12
|
# puts Assembly::ObjectFile.common_prefix(['/Users/peter/00/test.tif','/Users/peter/05/test.jp2']) # '/Users/peter/0'
|
15
13
|
def self.common_path(strings)
|
16
|
-
return nil if strings.
|
14
|
+
return nil if strings.empty?
|
15
|
+
|
17
16
|
n = 0
|
18
17
|
x = strings.last
|
19
|
-
n += 1 while strings.all? { |s| s[n]
|
20
|
-
common_prefix=x[0...n]
|
21
|
-
if common_prefix[-1,1] != '/' # check if last element of the common string is the end of a directory
|
22
|
-
return common_prefix.split('/')[0..-2].join('/') +
|
18
|
+
n += 1 while strings.all? { |s| s[n] && (s[n] == x[n]) }
|
19
|
+
common_prefix = x[0...n]
|
20
|
+
if common_prefix[-1, 1] != '/' # check if last element of the common string is the end of a directory
|
21
|
+
return common_prefix.split('/')[0..-2].join('/') + '/' # if not, split string along directories, and reject last one
|
23
22
|
else
|
24
23
|
return common_prefix # if it was, then return the common prefix directly
|
25
24
|
end
|
26
25
|
end
|
27
|
-
|
28
26
|
end
|
29
|
-
|
30
27
|
end
|
@@ -1,297 +1,230 @@
|
|
1
1
|
require 'mini_exiftool'
|
2
2
|
require 'mime/types'
|
3
|
-
#require 'checksum-tools'
|
3
|
+
# require 'checksum-tools'
|
4
4
|
|
5
5
|
module Assembly
|
6
|
-
|
7
|
-
# Namespace to include common behaviors we need for other classes in the gem
|
6
|
+
# Common behaviors we need for other classes in the gem
|
8
7
|
module ObjectFileable
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
#
|
14
|
-
|
15
|
-
|
16
|
-
#
|
17
|
-
#
|
18
|
-
|
19
|
-
|
20
|
-
# relative path is useful when generating content metadata, if you want the file ids in the content metadata to be something other than the full path, it can be set
|
21
|
-
# if not, content metadata will get the full path
|
22
|
-
attr_accessor :relative_path
|
23
|
-
|
24
|
-
# provider checksums are optional checksums given by the provider used in content metadata generation
|
25
|
-
attr_accessor :provider_md5, :provider_sha1
|
26
|
-
|
27
|
-
# Initialize file from given path.
|
28
|
-
#
|
29
|
-
# @param [String] path full path to the file to be worked with
|
30
|
-
#
|
31
|
-
# Example:
|
8
|
+
attr_accessor :file_attributes, :label, :path, :provider_md5, :provider_sha1, :relative_path
|
9
|
+
|
10
|
+
# @param [String] path full path to the file to be worked with
|
11
|
+
# @param [Hash<Symbol => Object>] params options used during content metadata generation
|
12
|
+
# @option params [Hash<Symbol => ['yes', 'no']>] :file_attributes e.g. {:preserve=>'yes',:shelve=>'no',:publish=>'no'}, defaults pulled from mimetype
|
13
|
+
# @option params [String] :label a resource label (files bundlded together will just get the first file's label attribute if set)
|
14
|
+
# @option params [String] :provider_md5 pre-computed MD5 checksum
|
15
|
+
# @option params [String] :provider_sha1 pre-computed SHA1 checksum
|
16
|
+
# @option params [String] :relative_path if you want the file ids in the content metadata it can be set, otherwise content metadata will get the full path
|
17
|
+
# @example
|
32
18
|
# Assembly::ObjectFile.new('/input/path_to_file.tif')
|
33
|
-
def initialize(path,params={})
|
19
|
+
def initialize(path, params = {})
|
34
20
|
@path = path
|
35
21
|
@label = params[:label]
|
36
22
|
@file_attributes = params[:file_attributes]
|
37
23
|
@relative_path = params[:relative_path]
|
38
|
-
@provider_md5 = params[:
|
24
|
+
@provider_md5 = params[:provider_md5]
|
39
25
|
@provider_sha1 = params[:provider_sha1]
|
40
26
|
end
|
41
|
-
|
42
|
-
# Returns base DPG name for the current file.
|
43
|
-
#
|
27
|
+
|
44
28
|
# @return [String] DPG base filename, removing the extension and the '00','05', etc. placeholders
|
45
|
-
#
|
46
|
-
#
|
47
|
-
# source_file=Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
|
29
|
+
# @example
|
30
|
+
# source_file = Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
|
48
31
|
# puts source_file.dpg_basename # "cy565rm7188_001"
|
49
32
|
def dpg_basename
|
50
|
-
file_parts=File.basename(path,ext).split('_')
|
33
|
+
file_parts = File.basename(path, ext).split('_')
|
51
34
|
file_parts.size == 3 ? "#{file_parts[0]}_#{file_parts[2]}" : filename_without_ext
|
52
35
|
end
|
53
36
|
|
54
|
-
#
|
55
|
-
#
|
56
|
-
#
|
57
|
-
#
|
58
|
-
# Example:
|
59
|
-
# source_file=Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
|
37
|
+
# @return [String] DPG subfolder for the given filename, i.e. '00','05', etc.
|
38
|
+
# @example
|
39
|
+
# source_file = Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
|
60
40
|
# puts source_file.dpg_folder # "00"
|
61
41
|
def dpg_folder
|
62
|
-
file_parts=File.basename(path,ext).split('_')
|
42
|
+
file_parts = File.basename(path, ext).split('_')
|
63
43
|
file_parts.size == 3 ? file_parts[1] : ''
|
64
44
|
end
|
65
45
|
|
66
|
-
# Returns base filename for the current file.
|
67
|
-
#
|
68
46
|
# @return [String] base filename
|
69
|
-
#
|
70
|
-
#
|
71
|
-
# source_file=Assembly::ObjectFile.new('/input/path_to_file.tif')
|
47
|
+
# @example
|
48
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
72
49
|
# puts source_file.filename # "path_to_file.tif"
|
73
50
|
def filename
|
74
51
|
File.basename(path)
|
75
52
|
end
|
76
53
|
|
77
|
-
# Returns base directory path for the current file.
|
78
|
-
#
|
79
54
|
# @return [String] base directory
|
80
|
-
#
|
81
|
-
#
|
82
|
-
# source_file
|
83
|
-
# puts source_file.dirname # "/input"
|
55
|
+
# @example
|
56
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
57
|
+
# puts source_file.dirname # "/input"
|
84
58
|
def dirname
|
85
59
|
File.dirname(path)
|
86
60
|
end
|
87
61
|
|
88
|
-
# Returns filename extension
|
89
|
-
#
|
90
62
|
# @return [String] filename extension
|
91
|
-
#
|
92
|
-
#
|
93
|
-
# source_file=Assembly::ObjectFile.new('/input/path_to_file.tif')
|
63
|
+
# @example
|
64
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
94
65
|
# puts source_file.ext # ".tif"
|
95
66
|
def ext
|
96
67
|
File.extname(path)
|
97
68
|
end
|
98
|
-
|
99
|
-
# Returns base filename without extension for the current file.
|
100
|
-
#
|
69
|
+
|
101
70
|
# @return [String] base filename without extension
|
102
|
-
#
|
103
|
-
#
|
104
|
-
# source_file=Assembly::ObjectFile.new('/input/path_to_file.tif')
|
71
|
+
# @example
|
72
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
105
73
|
# puts source_file.filename # "path_to_file"
|
106
74
|
def filename_without_ext
|
107
|
-
File.basename(path,ext)
|
75
|
+
File.basename(path, ext)
|
108
76
|
end
|
109
|
-
|
110
|
-
# Returns exif information for the current file.
|
111
|
-
#
|
77
|
+
|
112
78
|
# @return [MiniExiftool] exif information stored as a hash and an object
|
113
|
-
#
|
114
|
-
#
|
115
|
-
# source_file
|
116
|
-
# puts source_file.exif # gives hash with exif information
|
79
|
+
# @example
|
80
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
81
|
+
# puts source_file.exif # hash with exif information
|
117
82
|
def exif
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
end
|
125
|
-
|
126
|
-
#
|
127
|
-
#
|
128
|
-
# @
|
129
|
-
#
|
130
|
-
#
|
131
|
-
# source_file=Assembly::ObjectFile.new('/input/path_to_file.tif')
|
132
|
-
# puts source_file.md5 # gives XXX123XXX1243XX1243
|
83
|
+
@exif ||= begin
|
84
|
+
check_for_file
|
85
|
+
MiniExiftool.new(path, replace_invalid_chars: '?')
|
86
|
+
rescue StandardError
|
87
|
+
nil
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
# Computes md5 checksum or returns cached value
|
92
|
+
# @return [String] md5 checksum
|
93
|
+
# @example
|
94
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
95
|
+
# puts source_file.md5 # 'XXX123XXX1243XX1243'
|
133
96
|
def md5
|
134
97
|
check_for_file unless @md5
|
135
98
|
@md5 ||= Digest::MD5.file(path).hexdigest
|
136
99
|
end
|
137
100
|
|
138
|
-
#
|
139
|
-
#
|
140
|
-
# @
|
141
|
-
#
|
142
|
-
#
|
143
|
-
# source_file=Assembly::ObjectFile.new('/input/path_to_file.tif')
|
144
|
-
# puts source_file.sha1 # gives XXX123XXX1243XX1243
|
101
|
+
# Computes sha1 checksum or return cached value
|
102
|
+
# @return [String] sha1 checksum
|
103
|
+
# @example
|
104
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
105
|
+
# puts source_file.sha1 # 'XXX123XXX1243XX1243'
|
145
106
|
def sha1
|
146
107
|
check_for_file unless @sha1
|
147
108
|
@sha1 ||= Digest::SHA1.file(path).hexdigest
|
148
109
|
end
|
149
110
|
|
150
111
|
# Returns mimetype information for the current file based on file extension or exif data (if available)
|
151
|
-
#
|
152
|
-
# @
|
153
|
-
#
|
154
|
-
#
|
155
|
-
# source_file=Assembly::ObjectFile.new('/input/path_to_file.txt')
|
156
|
-
# puts source_file.mimetype # gives 'text/plain'
|
112
|
+
# @return [String] mime type
|
113
|
+
# @example
|
114
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
115
|
+
# puts source_file.mimetype # 'text/plain'
|
157
116
|
def mimetype
|
158
|
-
|
159
|
-
if
|
160
|
-
|
161
|
-
else # otherwise get it from the mime-types gem (using the file extension)
|
162
|
-
|
163
|
-
|
117
|
+
@mimetype ||= begin
|
118
|
+
if exif && exif.mimetype # try exif first
|
119
|
+
exif.mimetype
|
120
|
+
else # otherwise get it from the mime-types gem (using the file extension), else blank
|
121
|
+
mtype = MIME::Types.type_for(path).first
|
122
|
+
mtype ? mtype.content_type : ''
|
164
123
|
end
|
165
124
|
end
|
166
|
-
return @mimetype
|
167
125
|
end
|
168
126
|
|
169
|
-
|
170
127
|
# Returns mimetype information for the current file based on unix file system command or exif data (if available).
|
171
|
-
#
|
172
|
-
# @
|
173
|
-
#
|
174
|
-
#
|
175
|
-
# source_file=Assembly::ObjectFile.new('/input/path_to_file.txt')
|
176
|
-
# puts source_file.file_mimetype # gives 'text/plain'
|
128
|
+
# @return [String] mime type for supplied file
|
129
|
+
# @example
|
130
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
131
|
+
# puts source_file.file_mimetype # 'text/plain'
|
177
132
|
def file_mimetype
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
133
|
+
@file_mimetype ||= begin
|
134
|
+
check_for_file
|
135
|
+
mtype = `file --mime-type "#{path}"`.delete("\n").split(':')[1].strip # first try and get the mimetype from the unix file command
|
136
|
+
prefer_exif = !Assembly::TRUSTED_MIMETYPES.include?(mtype) && exif && exif.mimetype # if it's not a "trusted" mimetype and there is exif data; get the mimetype from the exif
|
137
|
+
prefer_exif ? exif.mimetype : mtype
|
182
138
|
end
|
183
|
-
return @file_mimetype
|
184
139
|
end
|
185
140
|
|
186
|
-
#
|
187
|
-
#
|
188
|
-
# @
|
189
|
-
#
|
190
|
-
#
|
191
|
-
# source_file=Assembly::ObjectFile.new('/input/path_to_file.txt')
|
192
|
-
# puts source_file.encoding # gives 'us-ascii'
|
141
|
+
# @note Uses shell call to "file", only expected to work on unix based systems
|
142
|
+
# @return [String] encoding for supplied file
|
143
|
+
# @example
|
144
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
145
|
+
# puts source_file.encoding # 'us-ascii'
|
193
146
|
def encoding
|
194
|
-
|
195
|
-
|
147
|
+
@encoding ||= begin
|
148
|
+
check_for_file
|
149
|
+
`file --mime-encoding "#{path}"`.delete("\n").split(':')[1].strip
|
150
|
+
end
|
196
151
|
end
|
197
152
|
|
198
|
-
#
|
199
|
-
#
|
200
|
-
#
|
201
|
-
#
|
202
|
-
# Example:
|
203
|
-
# source_file=Assembly::ObjectFile.new('/input/path_to_file.tif')
|
204
|
-
# puts source_file.object_type # gives :image
|
153
|
+
# @return [Symbol] the type of object, could be :application (for PDF or Word, etc), :audio, :image, :message, :model, :multipart, :text or :video
|
154
|
+
# @example
|
155
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
156
|
+
# puts source_file.object_type # :image
|
205
157
|
def object_type
|
206
|
-
lookup=MIME::Types[mimetype][0]
|
207
|
-
|
158
|
+
lookup = MIME::Types[mimetype][0]
|
159
|
+
lookup.nil? ? :other : lookup.media_type.to_sym
|
208
160
|
end
|
209
|
-
|
210
|
-
#
|
211
|
-
#
|
212
|
-
#
|
213
|
-
#
|
214
|
-
# Example:
|
215
|
-
# source_file=Assembly::ObjectFile.new('/input/path_to_file.tif')
|
216
|
-
# puts source_file.image? # gives TRUE
|
161
|
+
|
162
|
+
# @return [Boolean] if object is an image
|
163
|
+
# @example
|
164
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
165
|
+
# puts source_file.image? # true
|
217
166
|
def image?
|
218
|
-
object_type == :image
|
167
|
+
object_type == :image
|
219
168
|
end
|
220
169
|
|
221
|
-
# Examines the input image for validity. Used to determine if image is a valid and useful image.
|
222
|
-
#
|
223
|
-
# @return [
|
224
|
-
#
|
225
|
-
#
|
226
|
-
# source_img
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
result= image? ? true : false
|
231
|
-
result= jp2able? unless mimetype == 'image/jp2' # further checks if we are not already a jp2
|
170
|
+
# Examines the input image for validity. Used to determine if image is a valid and useful image.
|
171
|
+
# If image is not a jp2, also checks if it is jp2able?
|
172
|
+
# @return [Boolean] true if image is valid, false if not.
|
173
|
+
# @example
|
174
|
+
# source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
175
|
+
# puts source_img.valid_image? # true
|
176
|
+
def valid_image?
|
177
|
+
return false unless image?
|
232
178
|
|
233
|
-
|
234
|
-
|
179
|
+
mimetype == 'image/jp2' || jp2able? ? true : false
|
235
180
|
end
|
236
181
|
|
237
|
-
#
|
238
|
-
#
|
239
|
-
#
|
240
|
-
#
|
241
|
-
# Example:
|
242
|
-
# source_img=Assembly::ObjectFile.new('/input/path_to_file.tif')
|
243
|
-
# puts source_img.has_color_profile? # gives true
|
182
|
+
# @return [Boolean] true if image has a color profile, false if not.
|
183
|
+
# @example
|
184
|
+
# source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
185
|
+
# puts source_img.has_color_profile? # true
|
244
186
|
def has_color_profile?
|
245
|
-
|
187
|
+
return false unless exif
|
188
|
+
|
189
|
+
exif['profiledescription'] || exif['colorspace'] ? true : false
|
246
190
|
end
|
247
191
|
|
248
192
|
# Examines the input image for validity to create a jp2. Same as valid_image? but also confirms the existence of a profile description and further restricts mimetypes.
|
249
193
|
# It is used by the assembly robots to decide if a jp2 will be created and is also called before you create a jp2 using assembly-image.
|
250
|
-
# @return [
|
251
|
-
#
|
252
|
-
#
|
253
|
-
# source_img
|
254
|
-
# puts source_img.jp2able? # gives true
|
194
|
+
# @return [Boolean] true if image should have a jp2 created, false if not.
|
195
|
+
# @example
|
196
|
+
# source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
197
|
+
# puts source_img.jp2able? # true
|
255
198
|
def jp2able?
|
256
|
-
|
257
|
-
result=false
|
258
|
-
unless exif.nil?
|
259
|
-
result=(Assembly::VALID_IMAGE_MIMETYPES.include?(mimetype)) # check for allowed image mimetypes that can be converted to jp2
|
260
|
-
end
|
261
|
-
return result
|
199
|
+
return false unless exif
|
262
200
|
|
201
|
+
Assembly::VALID_IMAGE_MIMETYPES.include?(mimetype)
|
263
202
|
end
|
264
|
-
|
203
|
+
|
265
204
|
# Returns file size information for the current file in bytes.
|
266
|
-
#
|
267
|
-
# @
|
268
|
-
#
|
269
|
-
#
|
270
|
-
# source_file=Assembly::ObjectFile.new('/input/path_to_file.tif')
|
271
|
-
# puts source_file.filesize # gives 1345
|
205
|
+
# @return [Integer] file size in bytes
|
206
|
+
# @example
|
207
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
208
|
+
# puts source_file.filesize # 1345
|
272
209
|
def filesize
|
273
210
|
check_for_file
|
274
|
-
@filesize ||= File.size
|
211
|
+
@filesize ||= File.size(path)
|
275
212
|
end
|
276
213
|
|
277
|
-
|
278
214
|
# Determines if the file exists (and is not a directory)
|
279
|
-
#
|
280
|
-
# @
|
281
|
-
#
|
282
|
-
#
|
283
|
-
# source_file=Assembly::ObjectFile.new('/input/path_to_file.tif')
|
284
|
-
# puts source_file.file_exists? # gives true
|
215
|
+
# @return [Boolean] file exists
|
216
|
+
# @example
|
217
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
218
|
+
# puts source_file.file_exists? # true
|
285
219
|
def file_exists?
|
286
|
-
File.
|
220
|
+
File.exist?(path) && !File.directory?(path)
|
287
221
|
end
|
288
|
-
|
222
|
+
|
289
223
|
private
|
224
|
+
|
290
225
|
# private method to check for file existence before operating on it
|
291
226
|
def check_for_file
|
292
227
|
raise "input file #{path} does not exist" unless file_exists?
|
293
228
|
end
|
294
|
-
|
295
229
|
end
|
296
|
-
|
297
|
-
end
|
230
|
+
end
|