assembly-objectfile 1.7.1 → 1.7.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.rubocop.yml +22 -0
- data/.rubocop_todo.yml +132 -0
- data/Gemfile +1 -1
- data/Rakefile +9 -9
- data/assembly-objectfile.gemspec +11 -10
- data/lib/assembly-objectfile.rb +24 -22
- data/lib/assembly-objectfile/content_metadata.rb +225 -220
- data/lib/assembly-objectfile/object_file.rb +6 -9
- data/lib/assembly-objectfile/object_fileable.rb +131 -198
- data/lib/assembly-objectfile/version.rb +2 -4
- data/spec/content_metadata_spec.rb +455 -439
- data/spec/object_file_spec.rb +86 -83
- data/spec/spec_helper.rb +48 -45
- data/spec/test_data/input/someobject.obj +1 -0
- data/spec/test_data/input/someobject.ply +1 -0
- metadata +45 -12
@@ -1,8 +1,6 @@
|
|
1
1
|
module Assembly
|
2
|
-
|
3
2
|
# This class contains generic methods to operate on any file.
|
4
3
|
class ObjectFile
|
5
|
-
|
6
4
|
include Assembly::ObjectFileable
|
7
5
|
|
8
6
|
# Class level method that given an array of strings, return the longest common initial path. Useful for removing a common path from a set of filenames when producing content metadata
|
@@ -13,18 +11,17 @@ module Assembly
|
|
13
11
|
# Example:
|
14
12
|
# puts Assembly::ObjectFile.common_prefix(['/Users/peter/00/test.tif','/Users/peter/05/test.jp2']) # '/Users/peter/0'
|
15
13
|
def self.common_path(strings)
|
16
|
-
return nil if strings.
|
14
|
+
return nil if strings.empty?
|
15
|
+
|
17
16
|
n = 0
|
18
17
|
x = strings.last
|
19
|
-
n += 1 while strings.all? { |s| s[n]
|
20
|
-
common_prefix=x[0...n]
|
21
|
-
if common_prefix[-1,1] != '/' # check if last element of the common string is the end of a directory
|
22
|
-
return common_prefix.split('/')[0..-2].join('/') +
|
18
|
+
n += 1 while strings.all? { |s| s[n] && (s[n] == x[n]) }
|
19
|
+
common_prefix = x[0...n]
|
20
|
+
if common_prefix[-1, 1] != '/' # check if last element of the common string is the end of a directory
|
21
|
+
return common_prefix.split('/')[0..-2].join('/') + '/' # if not, split string along directories, and reject last one
|
23
22
|
else
|
24
23
|
return common_prefix # if it was, then return the common prefix directly
|
25
24
|
end
|
26
25
|
end
|
27
|
-
|
28
26
|
end
|
29
|
-
|
30
27
|
end
|
@@ -1,297 +1,230 @@
|
|
1
1
|
require 'mini_exiftool'
|
2
2
|
require 'mime/types'
|
3
|
-
#require 'checksum-tools'
|
3
|
+
# require 'checksum-tools'
|
4
4
|
|
5
5
|
module Assembly
|
6
|
-
|
7
|
-
# Namespace to include common behaviors we need for other classes in the gem
|
6
|
+
# Common behaviors we need for other classes in the gem
|
8
7
|
module ObjectFileable
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
#
|
14
|
-
|
15
|
-
|
16
|
-
#
|
17
|
-
#
|
18
|
-
|
19
|
-
|
20
|
-
# relative path is useful when generating content metadata, if you want the file ids in the content metadata to be something other than the full path, it can be set
|
21
|
-
# if not, content metadata will get the full path
|
22
|
-
attr_accessor :relative_path
|
23
|
-
|
24
|
-
# provider checksums are optional checksums given by the provider used in content metadata generation
|
25
|
-
attr_accessor :provider_md5, :provider_sha1
|
26
|
-
|
27
|
-
# Initialize file from given path.
|
28
|
-
#
|
29
|
-
# @param [String] path full path to the file to be worked with
|
30
|
-
#
|
31
|
-
# Example:
|
8
|
+
attr_accessor :file_attributes, :label, :path, :provider_md5, :provider_sha1, :relative_path
|
9
|
+
|
10
|
+
# @param [String] path full path to the file to be worked with
|
11
|
+
# @param [Hash<Symbol => Object>] params options used during content metadata generation
|
12
|
+
# @option params [Hash<Symbol => ['yes', 'no']>] :file_attributes e.g. {:preserve=>'yes',:shelve=>'no',:publish=>'no'}, defaults pulled from mimetype
|
13
|
+
# @option params [String] :label a resource label (files bundlded together will just get the first file's label attribute if set)
|
14
|
+
# @option params [String] :provider_md5 pre-computed MD5 checksum
|
15
|
+
# @option params [String] :provider_sha1 pre-computed SHA1 checksum
|
16
|
+
# @option params [String] :relative_path if you want the file ids in the content metadata it can be set, otherwise content metadata will get the full path
|
17
|
+
# @example
|
32
18
|
# Assembly::ObjectFile.new('/input/path_to_file.tif')
|
33
|
-
def initialize(path,params={})
|
19
|
+
def initialize(path, params = {})
|
34
20
|
@path = path
|
35
21
|
@label = params[:label]
|
36
22
|
@file_attributes = params[:file_attributes]
|
37
23
|
@relative_path = params[:relative_path]
|
38
|
-
@provider_md5 = params[:
|
24
|
+
@provider_md5 = params[:provider_md5]
|
39
25
|
@provider_sha1 = params[:provider_sha1]
|
40
26
|
end
|
41
|
-
|
42
|
-
# Returns base DPG name for the current file.
|
43
|
-
#
|
27
|
+
|
44
28
|
# @return [String] DPG base filename, removing the extension and the '00','05', etc. placeholders
|
45
|
-
#
|
46
|
-
#
|
47
|
-
# source_file=Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
|
29
|
+
# @example
|
30
|
+
# source_file = Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
|
48
31
|
# puts source_file.dpg_basename # "cy565rm7188_001"
|
49
32
|
def dpg_basename
|
50
|
-
file_parts=File.basename(path,ext).split('_')
|
33
|
+
file_parts = File.basename(path, ext).split('_')
|
51
34
|
file_parts.size == 3 ? "#{file_parts[0]}_#{file_parts[2]}" : filename_without_ext
|
52
35
|
end
|
53
36
|
|
54
|
-
#
|
55
|
-
#
|
56
|
-
#
|
57
|
-
#
|
58
|
-
# Example:
|
59
|
-
# source_file=Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
|
37
|
+
# @return [String] DPG subfolder for the given filename, i.e. '00','05', etc.
|
38
|
+
# @example
|
39
|
+
# source_file = Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
|
60
40
|
# puts source_file.dpg_folder # "00"
|
61
41
|
def dpg_folder
|
62
|
-
file_parts=File.basename(path,ext).split('_')
|
42
|
+
file_parts = File.basename(path, ext).split('_')
|
63
43
|
file_parts.size == 3 ? file_parts[1] : ''
|
64
44
|
end
|
65
45
|
|
66
|
-
# Returns base filename for the current file.
|
67
|
-
#
|
68
46
|
# @return [String] base filename
|
69
|
-
#
|
70
|
-
#
|
71
|
-
# source_file=Assembly::ObjectFile.new('/input/path_to_file.tif')
|
47
|
+
# @example
|
48
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
72
49
|
# puts source_file.filename # "path_to_file.tif"
|
73
50
|
def filename
|
74
51
|
File.basename(path)
|
75
52
|
end
|
76
53
|
|
77
|
-
# Returns base directory path for the current file.
|
78
|
-
#
|
79
54
|
# @return [String] base directory
|
80
|
-
#
|
81
|
-
#
|
82
|
-
# source_file
|
83
|
-
# puts source_file.dirname # "/input"
|
55
|
+
# @example
|
56
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
57
|
+
# puts source_file.dirname # "/input"
|
84
58
|
def dirname
|
85
59
|
File.dirname(path)
|
86
60
|
end
|
87
61
|
|
88
|
-
# Returns filename extension
|
89
|
-
#
|
90
62
|
# @return [String] filename extension
|
91
|
-
#
|
92
|
-
#
|
93
|
-
# source_file=Assembly::ObjectFile.new('/input/path_to_file.tif')
|
63
|
+
# @example
|
64
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
94
65
|
# puts source_file.ext # ".tif"
|
95
66
|
def ext
|
96
67
|
File.extname(path)
|
97
68
|
end
|
98
|
-
|
99
|
-
# Returns base filename without extension for the current file.
|
100
|
-
#
|
69
|
+
|
101
70
|
# @return [String] base filename without extension
|
102
|
-
#
|
103
|
-
#
|
104
|
-
# source_file=Assembly::ObjectFile.new('/input/path_to_file.tif')
|
71
|
+
# @example
|
72
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
105
73
|
# puts source_file.filename # "path_to_file"
|
106
74
|
def filename_without_ext
|
107
|
-
File.basename(path,ext)
|
75
|
+
File.basename(path, ext)
|
108
76
|
end
|
109
|
-
|
110
|
-
# Returns exif information for the current file.
|
111
|
-
#
|
77
|
+
|
112
78
|
# @return [MiniExiftool] exif information stored as a hash and an object
|
113
|
-
#
|
114
|
-
#
|
115
|
-
# source_file
|
116
|
-
# puts source_file.exif # gives hash with exif information
|
79
|
+
# @example
|
80
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
81
|
+
# puts source_file.exif # hash with exif information
|
117
82
|
def exif
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
end
|
125
|
-
|
126
|
-
#
|
127
|
-
#
|
128
|
-
# @
|
129
|
-
#
|
130
|
-
#
|
131
|
-
# source_file=Assembly::ObjectFile.new('/input/path_to_file.tif')
|
132
|
-
# puts source_file.md5 # gives XXX123XXX1243XX1243
|
83
|
+
@exif ||= begin
|
84
|
+
check_for_file
|
85
|
+
MiniExiftool.new(path, replace_invalid_chars: '?')
|
86
|
+
rescue StandardError
|
87
|
+
nil
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
# Computes md5 checksum or returns cached value
|
92
|
+
# @return [String] md5 checksum
|
93
|
+
# @example
|
94
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
95
|
+
# puts source_file.md5 # 'XXX123XXX1243XX1243'
|
133
96
|
def md5
|
134
97
|
check_for_file unless @md5
|
135
98
|
@md5 ||= Digest::MD5.file(path).hexdigest
|
136
99
|
end
|
137
100
|
|
138
|
-
#
|
139
|
-
#
|
140
|
-
# @
|
141
|
-
#
|
142
|
-
#
|
143
|
-
# source_file=Assembly::ObjectFile.new('/input/path_to_file.tif')
|
144
|
-
# puts source_file.sha1 # gives XXX123XXX1243XX1243
|
101
|
+
# Computes sha1 checksum or return cached value
|
102
|
+
# @return [String] sha1 checksum
|
103
|
+
# @example
|
104
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
105
|
+
# puts source_file.sha1 # 'XXX123XXX1243XX1243'
|
145
106
|
def sha1
|
146
107
|
check_for_file unless @sha1
|
147
108
|
@sha1 ||= Digest::SHA1.file(path).hexdigest
|
148
109
|
end
|
149
110
|
|
150
111
|
# Returns mimetype information for the current file based on file extension or exif data (if available)
|
151
|
-
#
|
152
|
-
# @
|
153
|
-
#
|
154
|
-
#
|
155
|
-
# source_file=Assembly::ObjectFile.new('/input/path_to_file.txt')
|
156
|
-
# puts source_file.mimetype # gives 'text/plain'
|
112
|
+
# @return [String] mime type
|
113
|
+
# @example
|
114
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
115
|
+
# puts source_file.mimetype # 'text/plain'
|
157
116
|
def mimetype
|
158
|
-
|
159
|
-
if
|
160
|
-
|
161
|
-
else # otherwise get it from the mime-types gem (using the file extension)
|
162
|
-
|
163
|
-
|
117
|
+
@mimetype ||= begin
|
118
|
+
if exif && exif.mimetype # try exif first
|
119
|
+
exif.mimetype
|
120
|
+
else # otherwise get it from the mime-types gem (using the file extension), else blank
|
121
|
+
mtype = MIME::Types.type_for(path).first
|
122
|
+
mtype ? mtype.content_type : ''
|
164
123
|
end
|
165
124
|
end
|
166
|
-
return @mimetype
|
167
125
|
end
|
168
126
|
|
169
|
-
|
170
127
|
# Returns mimetype information for the current file based on unix file system command or exif data (if available).
|
171
|
-
#
|
172
|
-
# @
|
173
|
-
#
|
174
|
-
#
|
175
|
-
# source_file=Assembly::ObjectFile.new('/input/path_to_file.txt')
|
176
|
-
# puts source_file.file_mimetype # gives 'text/plain'
|
128
|
+
# @return [String] mime type for supplied file
|
129
|
+
# @example
|
130
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
131
|
+
# puts source_file.file_mimetype # 'text/plain'
|
177
132
|
def file_mimetype
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
133
|
+
@file_mimetype ||= begin
|
134
|
+
check_for_file
|
135
|
+
mtype = `file --mime-type "#{path}"`.delete("\n").split(':')[1].strip # first try and get the mimetype from the unix file command
|
136
|
+
prefer_exif = !Assembly::TRUSTED_MIMETYPES.include?(mtype) && exif && exif.mimetype # if it's not a "trusted" mimetype and there is exif data; get the mimetype from the exif
|
137
|
+
prefer_exif ? exif.mimetype : mtype
|
182
138
|
end
|
183
|
-
return @file_mimetype
|
184
139
|
end
|
185
140
|
|
186
|
-
#
|
187
|
-
#
|
188
|
-
# @
|
189
|
-
#
|
190
|
-
#
|
191
|
-
# source_file=Assembly::ObjectFile.new('/input/path_to_file.txt')
|
192
|
-
# puts source_file.encoding # gives 'us-ascii'
|
141
|
+
# @note Uses shell call to "file", only expected to work on unix based systems
|
142
|
+
# @return [String] encoding for supplied file
|
143
|
+
# @example
|
144
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
145
|
+
# puts source_file.encoding # 'us-ascii'
|
193
146
|
def encoding
|
194
|
-
|
195
|
-
|
147
|
+
@encoding ||= begin
|
148
|
+
check_for_file
|
149
|
+
`file --mime-encoding "#{path}"`.delete("\n").split(':')[1].strip
|
150
|
+
end
|
196
151
|
end
|
197
152
|
|
198
|
-
#
|
199
|
-
#
|
200
|
-
#
|
201
|
-
#
|
202
|
-
# Example:
|
203
|
-
# source_file=Assembly::ObjectFile.new('/input/path_to_file.tif')
|
204
|
-
# puts source_file.object_type # gives :image
|
153
|
+
# @return [Symbol] the type of object, could be :application (for PDF or Word, etc), :audio, :image, :message, :model, :multipart, :text or :video
|
154
|
+
# @example
|
155
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
156
|
+
# puts source_file.object_type # :image
|
205
157
|
def object_type
|
206
|
-
lookup=MIME::Types[mimetype][0]
|
207
|
-
|
158
|
+
lookup = MIME::Types[mimetype][0]
|
159
|
+
lookup.nil? ? :other : lookup.media_type.to_sym
|
208
160
|
end
|
209
|
-
|
210
|
-
#
|
211
|
-
#
|
212
|
-
#
|
213
|
-
#
|
214
|
-
# Example:
|
215
|
-
# source_file=Assembly::ObjectFile.new('/input/path_to_file.tif')
|
216
|
-
# puts source_file.image? # gives TRUE
|
161
|
+
|
162
|
+
# @return [Boolean] if object is an image
|
163
|
+
# @example
|
164
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
165
|
+
# puts source_file.image? # true
|
217
166
|
def image?
|
218
|
-
object_type == :image
|
167
|
+
object_type == :image
|
219
168
|
end
|
220
169
|
|
221
|
-
# Examines the input image for validity. Used to determine if image is a valid and useful image.
|
222
|
-
#
|
223
|
-
# @return [
|
224
|
-
#
|
225
|
-
#
|
226
|
-
# source_img
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
result= image? ? true : false
|
231
|
-
result= jp2able? unless mimetype == 'image/jp2' # further checks if we are not already a jp2
|
170
|
+
# Examines the input image for validity. Used to determine if image is a valid and useful image.
|
171
|
+
# If image is not a jp2, also checks if it is jp2able?
|
172
|
+
# @return [Boolean] true if image is valid, false if not.
|
173
|
+
# @example
|
174
|
+
# source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
175
|
+
# puts source_img.valid_image? # true
|
176
|
+
def valid_image?
|
177
|
+
return false unless image?
|
232
178
|
|
233
|
-
|
234
|
-
|
179
|
+
mimetype == 'image/jp2' || jp2able? ? true : false
|
235
180
|
end
|
236
181
|
|
237
|
-
#
|
238
|
-
#
|
239
|
-
#
|
240
|
-
#
|
241
|
-
# Example:
|
242
|
-
# source_img=Assembly::ObjectFile.new('/input/path_to_file.tif')
|
243
|
-
# puts source_img.has_color_profile? # gives true
|
182
|
+
# @return [Boolean] true if image has a color profile, false if not.
|
183
|
+
# @example
|
184
|
+
# source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
185
|
+
# puts source_img.has_color_profile? # true
|
244
186
|
def has_color_profile?
|
245
|
-
|
187
|
+
return false unless exif
|
188
|
+
|
189
|
+
exif['profiledescription'] || exif['colorspace'] ? true : false
|
246
190
|
end
|
247
191
|
|
248
192
|
# Examines the input image for validity to create a jp2. Same as valid_image? but also confirms the existence of a profile description and further restricts mimetypes.
|
249
193
|
# It is used by the assembly robots to decide if a jp2 will be created and is also called before you create a jp2 using assembly-image.
|
250
|
-
# @return [
|
251
|
-
#
|
252
|
-
#
|
253
|
-
# source_img
|
254
|
-
# puts source_img.jp2able? # gives true
|
194
|
+
# @return [Boolean] true if image should have a jp2 created, false if not.
|
195
|
+
# @example
|
196
|
+
# source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
197
|
+
# puts source_img.jp2able? # true
|
255
198
|
def jp2able?
|
256
|
-
|
257
|
-
result=false
|
258
|
-
unless exif.nil?
|
259
|
-
result=(Assembly::VALID_IMAGE_MIMETYPES.include?(mimetype)) # check for allowed image mimetypes that can be converted to jp2
|
260
|
-
end
|
261
|
-
return result
|
199
|
+
return false unless exif
|
262
200
|
|
201
|
+
Assembly::VALID_IMAGE_MIMETYPES.include?(mimetype)
|
263
202
|
end
|
264
|
-
|
203
|
+
|
265
204
|
# Returns file size information for the current file in bytes.
|
266
|
-
#
|
267
|
-
# @
|
268
|
-
#
|
269
|
-
#
|
270
|
-
# source_file=Assembly::ObjectFile.new('/input/path_to_file.tif')
|
271
|
-
# puts source_file.filesize # gives 1345
|
205
|
+
# @return [Integer] file size in bytes
|
206
|
+
# @example
|
207
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
208
|
+
# puts source_file.filesize # 1345
|
272
209
|
def filesize
|
273
210
|
check_for_file
|
274
|
-
@filesize ||= File.size
|
211
|
+
@filesize ||= File.size(path)
|
275
212
|
end
|
276
213
|
|
277
|
-
|
278
214
|
# Determines if the file exists (and is not a directory)
|
279
|
-
#
|
280
|
-
# @
|
281
|
-
#
|
282
|
-
#
|
283
|
-
# source_file=Assembly::ObjectFile.new('/input/path_to_file.tif')
|
284
|
-
# puts source_file.file_exists? # gives true
|
215
|
+
# @return [Boolean] file exists
|
216
|
+
# @example
|
217
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
218
|
+
# puts source_file.file_exists? # true
|
285
219
|
def file_exists?
|
286
|
-
File.
|
220
|
+
File.exist?(path) && !File.directory?(path)
|
287
221
|
end
|
288
|
-
|
222
|
+
|
289
223
|
private
|
224
|
+
|
290
225
|
# private method to check for file existence before operating on it
|
291
226
|
def check_for_file
|
292
227
|
raise "input file #{path} does not exist" unless file_exists?
|
293
228
|
end
|
294
|
-
|
295
229
|
end
|
296
|
-
|
297
|
-
end
|
230
|
+
end
|