assembly-objectfile 2.0.0 → 2.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +1 -1
- data/.rubocop.yml +33 -0
- data/.rubocop_todo.yml +1 -52
- data/Gemfile.lock +14 -23
- data/README.md +6 -6
- data/assembly-objectfile.gemspec +3 -5
- data/config/boot.rb +0 -1
- data/lib/{assembly-objectfile → assembly/object_file}/version.rb +1 -1
- data/lib/assembly/object_file.rb +201 -0
- data/lib/assembly-objectfile.rb +14 -14
- data/spec/{object_file_spec.rb → assembly/object_file_spec.rb} +36 -61
- data/spec/spec_helper.rb +2 -7
- metadata +7 -52
- data/lib/assembly-objectfile/object_file.rb +0 -279
- data/profiles/AdobeRGB1998.icc +0 -0
- data/profiles/DotGain20.icc +0 -0
- data/profiles/sRGBIEC6196621.icc +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '0931aac97a88ba77ac0cfa8ff018d19fddcfe909c81cd82e9dfc13dca12ed4f4'
|
4
|
+
data.tar.gz: 4eb596c2799586b3298cd1019be16606b4a371a175c4630b1ac826296dacb5bb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a252cfc25b6e2a11f50a6eb2976997fdcbf387ba3855fbca03d188a2f7e9572ca1a3ed605da191700551e34a685deb9dec810e63dfbff59bf9bfd09f6a6a9504
|
7
|
+
data.tar.gz: db0fdbf6583455bce118aa14bc29692d90fbb35c1e9746f3748548094ff53e0f33fe79c08cf5baace3869a08e4f9e3080d1906b810ce38d64022efa573e8c500
|
data/.circleci/config.yml
CHANGED
data/.rubocop.yml
CHANGED
@@ -9,11 +9,31 @@ AllCops:
|
|
9
9
|
- '**/*.md'
|
10
10
|
- 'vendor/**/*' # avoid running rubocop on cached bundler
|
11
11
|
|
12
|
+
Layout/LineLength:
|
13
|
+
Max: 120
|
14
|
+
|
15
|
+
Naming/FileName:
|
16
|
+
Exclude:
|
17
|
+
- 'lib/assembly-objectfile.rb'
|
18
|
+
|
19
|
+
Naming/PredicateName:
|
20
|
+
NamePrefix:
|
21
|
+
- 'is_'
|
22
|
+
|
12
23
|
Metrics/BlockLength:
|
13
24
|
Exclude:
|
14
25
|
- 'spec/**/*.rb'
|
15
26
|
- '**/*.gemspec'
|
16
27
|
|
28
|
+
RSpec/ExampleLength:
|
29
|
+
Max: 10
|
30
|
+
|
31
|
+
RSpec/MultipleExpectations:
|
32
|
+
Max: 10
|
33
|
+
|
34
|
+
RSpec/NestedGroups:
|
35
|
+
Max: 4
|
36
|
+
|
17
37
|
Gemspec/DeprecatedAttributeAssignment: # (new in 1.10)
|
18
38
|
Enabled: true
|
19
39
|
|
@@ -207,3 +227,16 @@ RSpec/ChangeByZero: # new in 2.11.0
|
|
207
227
|
Enabled: true
|
208
228
|
RSpec/VerifiedDoubleReference: # new in 2.10.0
|
209
229
|
Enabled: true
|
230
|
+
|
231
|
+
Layout/LineContinuationLeadingSpace: # new in 1.31
|
232
|
+
Enabled: true
|
233
|
+
Layout/LineContinuationSpacing: # new in 1.31
|
234
|
+
Enabled: true
|
235
|
+
Lint/ConstantOverwrittenInRescue: # new in 1.31
|
236
|
+
Enabled: true
|
237
|
+
Lint/NonAtomicFileOperation: # new in 1.31
|
238
|
+
Enabled: true
|
239
|
+
RSpec/Capybara/SpecificMatcher: # new in 2.12
|
240
|
+
Enabled: true
|
241
|
+
RSpec/Rails/HaveHttpStatus: # new in 2.12
|
242
|
+
Enabled: true
|
data/.rubocop_todo.yml
CHANGED
@@ -1,63 +1,12 @@
|
|
1
1
|
# This configuration was generated by
|
2
2
|
# `rubocop --auto-gen-config`
|
3
|
-
# on 2022-07-
|
3
|
+
# on 2022-07-20 17:16:46 UTC using RuboCop version 1.31.2.
|
4
4
|
# The point is for the user to remove these configuration records
|
5
5
|
# one by one as the offenses are removed from the code base.
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
7
7
|
# versions of RuboCop, may require this file to be generated again.
|
8
8
|
|
9
|
-
# Offense count: 1
|
10
|
-
Lint/UselessAssignment:
|
11
|
-
Exclude:
|
12
|
-
- 'config/boot.rb'
|
13
|
-
|
14
9
|
# Offense count: 1
|
15
10
|
# Configuration parameters: CountComments, CountAsOne.
|
16
11
|
Metrics/ClassLength:
|
17
12
|
Max: 122
|
18
|
-
|
19
|
-
# Offense count: 1
|
20
|
-
# Configuration parameters: ExpectMatchingDefinition, CheckDefinitionPathHierarchy, CheckDefinitionPathHierarchyRoots, Regex, IgnoreExecutableScripts, AllowedAcronyms.
|
21
|
-
# CheckDefinitionPathHierarchyRoots: lib, spec, test, src
|
22
|
-
# AllowedAcronyms: CLI, DSL, ACL, API, ASCII, CPU, CSS, DNS, EOF, GUID, HTML, HTTP, HTTPS, ID, IP, JSON, LHS, QPS, RAM, RHS, RPC, SLA, SMTP, SQL, SSH, TCP, TLS, TTL, UDP, UI, UID, UUID, URI, URL, UTF8, VM, XML, XMPP, XSRF, XSS
|
23
|
-
Naming/FileName:
|
24
|
-
Exclude:
|
25
|
-
- 'lib/assembly-objectfile.rb'
|
26
|
-
|
27
|
-
# Offense count: 1
|
28
|
-
# Configuration parameters: NamePrefix, ForbiddenPrefixes, AllowedMethods, MethodDefinitionMacros.
|
29
|
-
# NamePrefix: is_, has_, have_
|
30
|
-
# ForbiddenPrefixes: is_, has_, have_
|
31
|
-
# AllowedMethods: is_a?
|
32
|
-
# MethodDefinitionMacros: define_method, define_singleton_method
|
33
|
-
Naming/PredicateName:
|
34
|
-
Exclude:
|
35
|
-
- 'spec/**/*'
|
36
|
-
- 'lib/assembly-objectfile/object_file.rb'
|
37
|
-
|
38
|
-
# Offense count: 2
|
39
|
-
# Configuration parameters: CountAsOne.
|
40
|
-
RSpec/ExampleLength:
|
41
|
-
Max: 7
|
42
|
-
|
43
|
-
# Offense count: 1
|
44
|
-
# Configuration parameters: Include, CustomTransform, IgnoreMethods, SpecSuffixOnly.
|
45
|
-
# Include: **/*_spec*rb*, **/spec/**/*
|
46
|
-
RSpec/FilePath:
|
47
|
-
Exclude:
|
48
|
-
- 'spec/object_file_spec.rb'
|
49
|
-
|
50
|
-
# Offense count: 8
|
51
|
-
RSpec/MultipleExpectations:
|
52
|
-
Max: 6
|
53
|
-
|
54
|
-
# Offense count: 3
|
55
|
-
RSpec/NestedGroups:
|
56
|
-
Max: 4
|
57
|
-
|
58
|
-
# Offense count: 24
|
59
|
-
# This cop supports safe autocorrection (--autocorrect).
|
60
|
-
# Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, AllowedPatterns, IgnoredPatterns.
|
61
|
-
# URISchemes: http, https
|
62
|
-
Layout/LineLength:
|
63
|
-
Max: 187
|
data/Gemfile.lock
CHANGED
@@ -1,17 +1,15 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
assembly-objectfile (2.
|
4
|
+
assembly-objectfile (2.1.2)
|
5
5
|
activesupport (>= 5.2.0)
|
6
|
-
deprecation
|
7
6
|
mime-types (> 3)
|
8
7
|
mini_exiftool
|
9
|
-
nokogiri
|
10
8
|
|
11
9
|
GEM
|
12
10
|
remote: http://rubygems.org/
|
13
11
|
specs:
|
14
|
-
activesupport (7.0.3)
|
12
|
+
activesupport (7.0.3.1)
|
15
13
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
16
14
|
i18n (>= 1.6, < 2)
|
17
15
|
minitest (>= 5.1)
|
@@ -20,11 +18,9 @@ GEM
|
|
20
18
|
byebug (11.1.3)
|
21
19
|
coderay (1.1.3)
|
22
20
|
concurrent-ruby (1.1.10)
|
23
|
-
deprecation (1.1.0)
|
24
|
-
activesupport
|
25
21
|
diff-lcs (1.5.0)
|
26
22
|
docile (1.4.0)
|
27
|
-
i18n (1.
|
23
|
+
i18n (1.12.0)
|
28
24
|
concurrent-ruby (~> 1.0)
|
29
25
|
json (2.6.2)
|
30
26
|
method_source (1.0.0)
|
@@ -32,13 +28,9 @@ GEM
|
|
32
28
|
mime-types-data (~> 3.2015)
|
33
29
|
mime-types-data (3.2022.0105)
|
34
30
|
mini_exiftool (2.10.2)
|
35
|
-
|
36
|
-
minitest (5.16.2)
|
37
|
-
nokogiri (1.13.6)
|
38
|
-
mini_portile2 (~> 2.8.0)
|
39
|
-
racc (~> 1.4)
|
31
|
+
minitest (5.16.3)
|
40
32
|
parallel (1.22.1)
|
41
|
-
parser (3.1.2.
|
33
|
+
parser (3.1.2.1)
|
42
34
|
ast (~> 2.4.1)
|
43
35
|
pry (0.13.1)
|
44
36
|
coderay (~> 1.1)
|
@@ -46,7 +38,6 @@ GEM
|
|
46
38
|
pry-byebug (3.9.0)
|
47
39
|
byebug (~> 11.0)
|
48
40
|
pry (~> 0.13.0)
|
49
|
-
racc (1.6.0)
|
50
41
|
rainbow (3.1.1)
|
51
42
|
rake (13.0.6)
|
52
43
|
regexp_parser (2.5.0)
|
@@ -64,19 +55,20 @@ GEM
|
|
64
55
|
diff-lcs (>= 1.2.0, < 2.0)
|
65
56
|
rspec-support (~> 3.11.0)
|
66
57
|
rspec-support (3.11.0)
|
67
|
-
rubocop (1.
|
58
|
+
rubocop (1.35.0)
|
59
|
+
json (~> 2.3)
|
68
60
|
parallel (~> 1.10)
|
69
|
-
parser (>= 3.1.
|
61
|
+
parser (>= 3.1.2.1)
|
70
62
|
rainbow (>= 2.2.2, < 4.0)
|
71
63
|
regexp_parser (>= 1.8, < 3.0)
|
72
64
|
rexml (>= 3.2.5, < 4.0)
|
73
|
-
rubocop-ast (>= 1.
|
65
|
+
rubocop-ast (>= 1.20.1, < 2.0)
|
74
66
|
ruby-progressbar (~> 1.7)
|
75
67
|
unicode-display_width (>= 1.4.0, < 3.0)
|
76
|
-
rubocop-ast (1.
|
68
|
+
rubocop-ast (1.21.0)
|
77
69
|
parser (>= 3.1.1.0)
|
78
|
-
rubocop-rspec (2.
|
79
|
-
rubocop (~> 1.
|
70
|
+
rubocop-rspec (2.12.1)
|
71
|
+
rubocop (~> 1.31)
|
80
72
|
ruby-progressbar (1.11.0)
|
81
73
|
simplecov (0.21.2)
|
82
74
|
docile (~> 1.1)
|
@@ -84,7 +76,7 @@ GEM
|
|
84
76
|
simplecov_json_formatter (~> 0.1)
|
85
77
|
simplecov-html (0.12.3)
|
86
78
|
simplecov_json_formatter (0.1.4)
|
87
|
-
tzinfo (2.0.
|
79
|
+
tzinfo (2.0.5)
|
88
80
|
concurrent-ruby (~> 1.0)
|
89
81
|
unicode-display_width (2.2.0)
|
90
82
|
|
@@ -94,7 +86,6 @@ PLATFORMS
|
|
94
86
|
DEPENDENCIES
|
95
87
|
assembly-objectfile!
|
96
88
|
byebug
|
97
|
-
json
|
98
89
|
pry-byebug
|
99
90
|
rake
|
100
91
|
rspec (~> 3.0)
|
@@ -103,4 +94,4 @@ DEPENDENCIES
|
|
103
94
|
simplecov
|
104
95
|
|
105
96
|
BUNDLED WITH
|
106
|
-
2.3.
|
97
|
+
2.3.17
|
data/README.md
CHANGED
@@ -8,15 +8,15 @@
|
|
8
8
|
## Overview
|
9
9
|
This gem contains classes used by the Stanford University Digital Library to
|
10
10
|
perform file operations necessary for accessioning of content. It is also
|
11
|
-
used by related gems
|
12
|
-
generation).
|
11
|
+
used by related gems that perform content type specific operations (e.g.
|
12
|
+
assembly-image for jp2 generation).
|
13
13
|
|
14
14
|
## Usage
|
15
15
|
|
16
16
|
The gem currently has methods for:
|
17
17
|
* filesize
|
18
|
-
*
|
19
|
-
*
|
18
|
+
* mimetype
|
19
|
+
* exif - consumers use ExifTool to get file information
|
20
20
|
|
21
21
|
## Running tests
|
22
22
|
|
@@ -34,8 +34,8 @@ rake release
|
|
34
34
|
|
35
35
|
1. Exiftool
|
36
36
|
|
37
|
-
RHEL: (RPM to install
|
38
|
-
|
37
|
+
RHEL: (RPM to install coming soon) Download latest version from:
|
38
|
+
https://exiftool.org/
|
39
39
|
|
40
40
|
tar -xf Image-ExifTool-#.##.tar.gz
|
41
41
|
cd Image-ExifTool-#.##
|
data/assembly-objectfile.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
$LOAD_PATH.push File.expand_path('lib', __dir__)
|
4
|
-
require 'assembly
|
4
|
+
require 'assembly/object_file/version'
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = 'assembly-objectfile'
|
@@ -10,7 +10,8 @@ Gem::Specification.new do |s|
|
|
10
10
|
s.authors = ['Peter Mangiafico', 'Renzo Sanchez-Silva', 'Monty Hindman', 'Tony Calavano']
|
11
11
|
s.email = ['pmangiafico@stanford.edu']
|
12
12
|
s.homepage = 'https://github.com/sul-dlss/assembly-objectfile'
|
13
|
-
s.summary = 'Ruby
|
13
|
+
s.summary = 'Ruby implementation of file services needed to prepare objects to be accessioned ' \
|
14
|
+
'into the Stanford Digital Repository'
|
14
15
|
s.description = 'Get exif data, file sizes and more.'
|
15
16
|
s.license = 'ALv2'
|
16
17
|
s.metadata['rubygems_mfa_required'] = 'true'
|
@@ -23,12 +24,9 @@ Gem::Specification.new do |s|
|
|
23
24
|
s.required_ruby_version = '>= 3.0'
|
24
25
|
|
25
26
|
s.add_dependency 'activesupport', '>= 5.2.0'
|
26
|
-
s.add_dependency 'deprecation'
|
27
27
|
s.add_dependency 'mime-types', '> 3'
|
28
28
|
s.add_dependency 'mini_exiftool'
|
29
|
-
s.add_dependency 'nokogiri'
|
30
29
|
|
31
|
-
s.add_development_dependency 'json'
|
32
30
|
s.add_development_dependency 'pry-byebug'
|
33
31
|
s.add_development_dependency 'rake'
|
34
32
|
s.add_development_dependency 'rspec', '~> 3.0'
|
data/config/boot.rb
CHANGED
@@ -0,0 +1,201 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'mini_exiftool'
|
4
|
+
require 'mime/types'
|
5
|
+
require 'active_support/core_ext/object/blank'
|
6
|
+
|
7
|
+
module Assembly
|
8
|
+
# This class contains generic methods to operate on any file.
|
9
|
+
class ObjectFile
|
10
|
+
# @param [Array] strings Array of filenames with paths
|
11
|
+
# @return [String] longest common initial path of filenames passed in
|
12
|
+
#
|
13
|
+
# Example:
|
14
|
+
# puts Assembly::ObjectFile.common_prefix(['/Users/peter/00/test.tif','/Users/peter/05/test.jp2'])
|
15
|
+
# # => '/Users/peter/0'
|
16
|
+
def self.common_path(strings)
|
17
|
+
return nil if strings.empty?
|
18
|
+
|
19
|
+
n = 0
|
20
|
+
x = strings.last
|
21
|
+
n += 1 while strings.all? { |s| s[n] && (s[n] == x[n]) }
|
22
|
+
common_prefix = x[0...n]
|
23
|
+
if common_prefix[-1, 1] == '/' # check if last element of the common string is the end of a directory
|
24
|
+
common_prefix # if not, split string along directories, and reject last one
|
25
|
+
else
|
26
|
+
"#{common_prefix.split('/')[0..-2].join('/')}/" # if it was, then return the common prefix directly
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
attr_accessor :file_attributes, :label, :path, :provider_md5, :provider_sha1, :relative_path, :mime_type_order
|
31
|
+
|
32
|
+
VALID_MIMETYPE_METHODS = %i[override exif file extension].freeze
|
33
|
+
|
34
|
+
# @param [String] path full path to the file to be worked with
|
35
|
+
# @param [Hash<Symbol => Object>] params options used during content metadata generation
|
36
|
+
# @option params [Hash<Symbol => ['yes', 'no']>] :file_attributes e.g.:
|
37
|
+
# {:preserve=>'yes',:shelve=>'no',:publish=>'no'},
|
38
|
+
# defaults pulled from mimetype
|
39
|
+
# @option params [String] :label a resource label (files bundled together will just get the first
|
40
|
+
# file's label attribute if set)
|
41
|
+
# @option params [String] :provider_md5 pre-computed MD5 checksum
|
42
|
+
# @option params [String] :provider_sha1 pre-computed SHA1 checksum
|
43
|
+
# @option params [String] :relative_path if you want the file ids in the content metadata it can be set,
|
44
|
+
# otherwise content metadata will get the full path
|
45
|
+
# @option params [Array] :mime_type_order can be set to the order in which you want mimetypes to be determined
|
46
|
+
# options are :override (from manual overide mapping if exists),
|
47
|
+
# :exif (from exif if exists)
|
48
|
+
# :extension (from file extension)
|
49
|
+
# :file (from unix file system command)
|
50
|
+
# the default is defined in the private `default_mime_type_order` method
|
51
|
+
# but you can override to set your own order
|
52
|
+
def initialize(path, params = {})
|
53
|
+
@path = path
|
54
|
+
@label = params[:label]
|
55
|
+
@file_attributes = params[:file_attributes]
|
56
|
+
@relative_path = params[:relative_path]
|
57
|
+
@provider_md5 = params[:provider_md5]
|
58
|
+
@provider_sha1 = params[:provider_sha1]
|
59
|
+
@mime_type_order = params[:mime_type_order] || default_mime_type_order
|
60
|
+
end
|
61
|
+
|
62
|
+
def filename
|
63
|
+
File.basename(path)
|
64
|
+
end
|
65
|
+
|
66
|
+
def dirname
|
67
|
+
File.dirname(path)
|
68
|
+
end
|
69
|
+
|
70
|
+
def ext
|
71
|
+
File.extname(path)
|
72
|
+
end
|
73
|
+
|
74
|
+
def filename_without_ext
|
75
|
+
File.basename(path, ext)
|
76
|
+
end
|
77
|
+
|
78
|
+
# @return [MiniExiftool] exif mini_exiftool gem object wrapper for exiftool
|
79
|
+
def exif
|
80
|
+
@exif ||= begin
|
81
|
+
check_for_file
|
82
|
+
MiniExiftool.new(path, replace_invalid_chars: '?')
|
83
|
+
rescue MiniExiftool::Error
|
84
|
+
# MiniExiftool may raise an error on files it doesn't know how to handle (disk images for example)
|
85
|
+
# but we don't want this to prevent an ObjectFile from being created, so just swallow it.
|
86
|
+
nil
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
# @return [String] computed md5 checksum
|
91
|
+
def md5
|
92
|
+
check_for_file unless @md5
|
93
|
+
@md5 ||= Digest::MD5.file(path).hexdigest
|
94
|
+
end
|
95
|
+
|
96
|
+
# @return [String] computed sha1 checksum
|
97
|
+
def sha1
|
98
|
+
check_for_file unless @sha1
|
99
|
+
@sha1 ||= Digest::SHA1.file(path).hexdigest
|
100
|
+
end
|
101
|
+
|
102
|
+
# Returns mimetype information for the current file based on the ordering set in default_mime_type_order
|
103
|
+
# We stop computing mimetypes as soon as we have a method that returns a value
|
104
|
+
# @return [String] mimetype of the file
|
105
|
+
def mimetype
|
106
|
+
@mimetype ||= begin
|
107
|
+
check_for_file
|
108
|
+
mimetype = ''
|
109
|
+
mime_type_order.each do |mime_type_method|
|
110
|
+
mimetype = send("#{mime_type_method}_mimetype") if VALID_MIMETYPE_METHODS.include?(mime_type_method)
|
111
|
+
break if mimetype.present?
|
112
|
+
end
|
113
|
+
mimetype
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
# @return [Symbol] the type of object, could be :application (for PDF or Word, etc),
|
118
|
+
# :audio, :image, :message, :model, :multipart, :text or :video
|
119
|
+
def object_type
|
120
|
+
lookup = MIME::Types[mimetype][0]
|
121
|
+
lookup.nil? ? :other : lookup.media_type.to_sym
|
122
|
+
end
|
123
|
+
|
124
|
+
# @return [Boolean] true if the mime-types gem recognizes it as an image (from file extension lookup)
|
125
|
+
def image?
|
126
|
+
object_type == :image
|
127
|
+
end
|
128
|
+
|
129
|
+
# @return [Boolean] true if the mime-types gem recognizes it as an image (from file extension lookup)
|
130
|
+
# AND it is a jp2 or jp2able?
|
131
|
+
def valid_image?
|
132
|
+
return false unless image?
|
133
|
+
|
134
|
+
mimetype == 'image/jp2' || jp2able?
|
135
|
+
end
|
136
|
+
|
137
|
+
# @return [Boolean] true if we can create a jp2 from the file
|
138
|
+
def jp2able?
|
139
|
+
return false unless exif
|
140
|
+
|
141
|
+
Assembly::VALID_IMAGE_MIMETYPES.include?(mimetype)
|
142
|
+
end
|
143
|
+
|
144
|
+
# @return [Integer] file size in bytes
|
145
|
+
def filesize
|
146
|
+
check_for_file
|
147
|
+
@filesize ||= File.size(path)
|
148
|
+
end
|
149
|
+
|
150
|
+
# @return [Boolean] file exists and is not a directory
|
151
|
+
def file_exists?
|
152
|
+
@file_exists ||= (File.exist?(path) && !File.directory?(path))
|
153
|
+
end
|
154
|
+
|
155
|
+
private
|
156
|
+
|
157
|
+
# check for file existence before operating on it
|
158
|
+
def check_for_file
|
159
|
+
raise "input file #{path} does not exist or is a directory" unless file_exists?
|
160
|
+
end
|
161
|
+
|
162
|
+
# defines default preferred ordering of how mimetypes are determined
|
163
|
+
def default_mime_type_order
|
164
|
+
%i[override exif file extension]
|
165
|
+
end
|
166
|
+
|
167
|
+
# @return [String] mime type for supplied file using the mime-types gem (based on a file extension lookup)
|
168
|
+
def extension_mimetype
|
169
|
+
@extension_mimetype ||= begin
|
170
|
+
mtype = MIME::Types.type_for(path).first
|
171
|
+
mtype ? mtype.content_type : ''
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
# @return [String] mime type for supplied file based on unix file system command
|
176
|
+
def file_mimetype
|
177
|
+
@file_mimetype ||= begin
|
178
|
+
check_for_file
|
179
|
+
`file --mime-type "#{path}"`.delete("\n").split(':')[1].strip # get the mimetype from the unix file command
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
# @return [String] mimetype information for the current file based on exif data,
|
184
|
+
# unless mimetype is configured as one we'd rather get from the file system command
|
185
|
+
# (e.g. exif struggles or we get better info from file system command)
|
186
|
+
def exif_mimetype
|
187
|
+
@exif_mimetype ||= begin
|
188
|
+
check_for_file
|
189
|
+
# if it's not a "trusted" mimetype and there is exif data; get the mimetype from the exif
|
190
|
+
prefer_exif = !Assembly::TRUSTED_MIMETYPES.include?(file_mimetype)
|
191
|
+
exif.mimetype if prefer_exif && exif&.mimetype
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
# Returns mimetype information using the manual override mapping (based on a file extension lookup)
|
196
|
+
# @return [String] mime type for supplied file if a mapping exists for the file's extension
|
197
|
+
def override_mimetype
|
198
|
+
@override_mimetype ||= Assembly::OVERRIDE_MIMETYPES.fetch(ext.to_sym, '')
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
data/lib/assembly-objectfile.rb
CHANGED
@@ -1,25 +1,25 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Assembly
|
4
|
-
#
|
5
|
-
|
6
|
-
|
7
|
-
# if input image is not one of these mime types, it will not be regarded as a valid image for the purpose of generating a JP2 derivative
|
4
|
+
# If input image is not one of these mime types, it will not be regarded as a valid image
|
5
|
+
# for the purpose of generating a JP2 derivative
|
8
6
|
VALID_IMAGE_MIMETYPES = ['image/jpeg', 'image/tiff', 'image/tif', 'image/png'].freeze
|
9
7
|
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
|
8
|
+
# The list of mimetypes that will be "trusted" by the unix file command; if a mimetype other than
|
9
|
+
# one of these is returned by the file command, then a check will be made to see if exif data exists...
|
10
|
+
# if so, the mimetype returned by the exif data will be used if no exif data exists, then the
|
11
|
+
# mimetype returned by the unix file command will be used
|
12
|
+
TRUSTED_MIMETYPES = ['text/plain', 'plain/text', 'application/pdf', 'text/html', 'application/xml',
|
13
|
+
'application/octet-stream'].freeze
|
14
14
|
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
15
|
+
# This is a manual override mapping of file extension to mimetype; if a file with the given extension
|
16
|
+
# is found, the mapped mimetype will be returned and no further methods will be used - this is used
|
17
|
+
# to force a specific mimetype to be returned for a given file extension regardless of what exif or
|
18
|
+
# the unix file system command returns the mapping format is "extension with period: returned mimetype",
|
19
|
+
# e.g. for any .json file, you will always get `application/json`
|
19
20
|
OVERRIDE_MIMETYPES = {
|
20
21
|
'.json': 'application/json'
|
21
22
|
}.freeze
|
22
23
|
end
|
23
24
|
|
24
|
-
require 'assembly
|
25
|
-
require 'assembly-objectfile/version'
|
25
|
+
require 'assembly/object_file'
|
@@ -6,13 +6,15 @@ describe Assembly::ObjectFile do
|
|
6
6
|
describe '.common_path' do
|
7
7
|
context 'when common path is 2 nodes out of 4' do
|
8
8
|
it 'returns the common directory' do
|
9
|
-
expect(described_class.common_path(['/Users/peter/00/test.tif',
|
9
|
+
expect(described_class.common_path(['/Users/peter/00/test.tif',
|
10
|
+
'/Users/peter/05/test.jp2'])).to eq('/Users/peter/')
|
10
11
|
end
|
11
12
|
end
|
12
13
|
|
13
14
|
context 'when common path is 3 nodes out of 4' do
|
14
15
|
it 'returns the common directory' do
|
15
|
-
expect(described_class.common_path(['/Users/peter/00/test.tif',
|
16
|
+
expect(described_class.common_path(['/Users/peter/00/test.tif',
|
17
|
+
'/Users/peter/00/test.jp2'])).to eq('/Users/peter/00/')
|
16
18
|
end
|
17
19
|
end
|
18
20
|
|
@@ -25,8 +27,9 @@ describe Assembly::ObjectFile do
|
|
25
27
|
|
26
28
|
describe '#new' do
|
27
29
|
context 'without params' do
|
30
|
+
let(:object_file) { described_class.new('/some/file.txt') }
|
31
|
+
|
28
32
|
it 'does not set attributes' do
|
29
|
-
object_file = described_class.new('/some/file.txt')
|
30
33
|
expect(object_file.path).to eq('/some/file.txt')
|
31
34
|
expect(object_file.label).to be_nil
|
32
35
|
expect(object_file.file_attributes).to be_nil
|
@@ -37,8 +40,15 @@ describe Assembly::ObjectFile do
|
|
37
40
|
end
|
38
41
|
|
39
42
|
context 'with params' do
|
43
|
+
let(:object_file) do
|
44
|
+
described_class.new('/some/file.txt', label: 'some label',
|
45
|
+
file_attributes: { 'shelve' => 'yes',
|
46
|
+
'publish' => 'yes',
|
47
|
+
'preserve' => 'no' },
|
48
|
+
relative_path: '/tmp')
|
49
|
+
end
|
50
|
+
|
40
51
|
it 'sets attributes to passed params' do
|
41
|
-
object_file = described_class.new('/some/file.txt', label: 'some label', file_attributes: { 'shelve' => 'yes', 'publish' => 'yes', 'preserve' => 'no' }, relative_path: '/tmp')
|
42
52
|
expect(object_file.path).to eq('/some/file.txt')
|
43
53
|
expect(object_file.label).to eq('some label')
|
44
54
|
expect(object_file.file_attributes).to eq('shelve' => 'yes', 'publish' => 'yes', 'preserve' => 'no')
|
@@ -47,9 +57,12 @@ describe Assembly::ObjectFile do
|
|
47
57
|
expect(object_file.relative_path).to eq('/tmp')
|
48
58
|
end
|
49
59
|
|
50
|
-
|
51
|
-
object_file
|
52
|
-
|
60
|
+
context 'with provider_md5' do
|
61
|
+
let(:object_file) { described_class.new('/some/file.txt', provider_md5: 'XYZ') }
|
62
|
+
|
63
|
+
it 'sets provider_md5 to passed param' do
|
64
|
+
expect(object_file.provider_md5).to eq('XYZ')
|
65
|
+
end
|
53
66
|
end
|
54
67
|
end
|
55
68
|
end
|
@@ -100,7 +113,7 @@ describe Assembly::ObjectFile do
|
|
100
113
|
|
101
114
|
context 'with ruby file' do
|
102
115
|
it 'false' do
|
103
|
-
non_image_file = File.join(
|
116
|
+
non_image_file = File.join(PATH_TO_GEM, 'spec/assembly/object_file_spec.rb')
|
104
117
|
object_file = described_class.new(non_image_file)
|
105
118
|
expect(object_file.image?).to be(false)
|
106
119
|
end
|
@@ -108,7 +121,7 @@ describe Assembly::ObjectFile do
|
|
108
121
|
|
109
122
|
context 'with xml' do
|
110
123
|
it 'false' do
|
111
|
-
non_image_file = File.join(
|
124
|
+
non_image_file = File.join(PATH_TO_GEM, 'spec/test_data/input/file_with_no_exif.xml')
|
112
125
|
object_file = described_class.new(non_image_file)
|
113
126
|
expect(object_file.image?).to be(false)
|
114
127
|
end
|
@@ -132,7 +145,7 @@ describe Assembly::ObjectFile do
|
|
132
145
|
|
133
146
|
context 'with ruby file' do
|
134
147
|
it ':text' do
|
135
|
-
non_image_file = File.join(
|
148
|
+
non_image_file = File.join(PATH_TO_GEM, 'spec/assembly/object_file_spec.rb')
|
136
149
|
object_file = described_class.new(non_image_file)
|
137
150
|
expect(object_file.object_type).to eq(:text)
|
138
151
|
end
|
@@ -140,7 +153,7 @@ describe Assembly::ObjectFile do
|
|
140
153
|
|
141
154
|
context 'with xml' do
|
142
155
|
it ':application' do
|
143
|
-
non_image_file = File.join(
|
156
|
+
non_image_file = File.join(PATH_TO_GEM, 'spec/test_data/input/file_with_no_exif.xml')
|
144
157
|
object_file = described_class.new(non_image_file)
|
145
158
|
expect(object_file.object_type).to eq(:application)
|
146
159
|
end
|
@@ -178,7 +191,7 @@ describe Assembly::ObjectFile do
|
|
178
191
|
|
179
192
|
context 'with ruby file' do
|
180
193
|
it 'false' do
|
181
|
-
non_image_file = File.join(
|
194
|
+
non_image_file = File.join(PATH_TO_GEM, 'spec/assembly/object_file_spec.rb')
|
182
195
|
object_file = described_class.new(non_image_file)
|
183
196
|
expect(object_file.valid_image?).to be(false)
|
184
197
|
end
|
@@ -186,7 +199,7 @@ describe Assembly::ObjectFile do
|
|
186
199
|
|
187
200
|
context 'with xml' do
|
188
201
|
it 'false' do
|
189
|
-
non_image_file = File.join(
|
202
|
+
non_image_file = File.join(PATH_TO_GEM, 'spec/test_data/input/file_with_no_exif.xml')
|
190
203
|
object_file = described_class.new(non_image_file)
|
191
204
|
expect(object_file.valid_image?).to be(false)
|
192
205
|
end
|
@@ -320,29 +333,6 @@ describe Assembly::ObjectFile do
|
|
320
333
|
end
|
321
334
|
end
|
322
335
|
|
323
|
-
describe '#has_color_profile?' do
|
324
|
-
context 'with jp2 file' do
|
325
|
-
it 'true' do
|
326
|
-
object_file = described_class.new(TEST_JP2_INPUT_FILE)
|
327
|
-
expect(object_file.has_color_profile?).to be(true)
|
328
|
-
end
|
329
|
-
end
|
330
|
-
|
331
|
-
context 'with tiff file' do
|
332
|
-
it 'true' do
|
333
|
-
object_file = described_class.new(TEST_RES1_TIF1)
|
334
|
-
expect(object_file.has_color_profile?).to be(true)
|
335
|
-
end
|
336
|
-
end
|
337
|
-
|
338
|
-
context 'with tiff no color file' do
|
339
|
-
it 'false' do
|
340
|
-
object_file = described_class.new(TEST_TIFF_NO_COLOR_FILE)
|
341
|
-
expect(object_file.has_color_profile?).to be(false)
|
342
|
-
end
|
343
|
-
end
|
344
|
-
end
|
345
|
-
|
346
336
|
describe '#md5' do
|
347
337
|
it 'computes md5 for an image file' do
|
348
338
|
object_file = described_class.new(TEST_TIF_INPUT_FILE)
|
@@ -369,7 +359,7 @@ describe Assembly::ObjectFile do
|
|
369
359
|
|
370
360
|
describe '#file_exists?' do
|
371
361
|
it 'false when a valid directory is specified instead of a file' do
|
372
|
-
path =
|
362
|
+
path = PATH_TO_GEM
|
373
363
|
object_file = described_class.new(path)
|
374
364
|
expect(File.exist?(path)).to be true
|
375
365
|
expect(File.directory?(path)).to be true
|
@@ -377,7 +367,7 @@ describe Assembly::ObjectFile do
|
|
377
367
|
end
|
378
368
|
|
379
369
|
it 'false when a non-existent file is specified' do
|
380
|
-
path = File.join(
|
370
|
+
path = File.join(PATH_TO_GEM, 'file_not_there.txt')
|
381
371
|
object_file = described_class.new(path)
|
382
372
|
expect(File.exist?(path)).to be false
|
383
373
|
expect(File.directory?(path)).to be false
|
@@ -397,32 +387,17 @@ describe Assembly::ObjectFile do
|
|
397
387
|
end
|
398
388
|
end
|
399
389
|
|
400
|
-
describe '#
|
401
|
-
|
402
|
-
it 'binary' do
|
403
|
-
object_file = described_class.new(TEST_TIF_INPUT_FILE)
|
404
|
-
expect(object_file.send(:encoding)).to eq('binary')
|
405
|
-
end
|
406
|
-
end
|
390
|
+
describe '#exif' do
|
391
|
+
subject(:exif) { object_file.exif }
|
407
392
|
|
408
|
-
|
409
|
-
it 'binary' do
|
410
|
-
object_file = described_class.new(TEST_RES1_TEXT)
|
411
|
-
expect(object_file.send(:encoding)).to eq('us-ascii')
|
412
|
-
end
|
413
|
-
end
|
414
|
-
end
|
393
|
+
let(:object_file) { described_class.new(TEST_TIF_INPUT_FILE) }
|
415
394
|
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
expect(object_file.exif.class).to eq MiniExiftool
|
421
|
-
end
|
395
|
+
it { is_expected.to be_kind_of MiniExiftool }
|
396
|
+
|
397
|
+
context 'when exiftool raises an error initializing the file' do
|
398
|
+
let(:object_file) { described_class.new('spec/test_data/empty.txt') }
|
422
399
|
|
423
|
-
|
424
|
-
object_file = described_class.new('spec/test_data/empty.txt')
|
425
|
-
expect { object_file.exif }.to raise_error(MiniExiftool::Error)
|
400
|
+
it { is_expected.to be_nil }
|
426
401
|
end
|
427
402
|
end
|
428
403
|
|
data/spec/spec_helper.rb
CHANGED
@@ -10,13 +10,10 @@ RSpec.configure do |config|
|
|
10
10
|
config.order = 'random'
|
11
11
|
end
|
12
12
|
|
13
|
-
|
14
|
-
TEST_INPUT_DIR = File.join(
|
15
|
-
TEST_OUTPUT_DIR = File.join(TEST_DATA_DIR, 'output')
|
13
|
+
PATH_TO_GEM = File.expand_path("#{File.dirname(__FILE__)}/..")
|
14
|
+
TEST_INPUT_DIR = File.join(PATH_TO_GEM, 'spec', 'test_data', 'input')
|
16
15
|
TEST_TIF_INPUT_FILE = File.join(TEST_INPUT_DIR, 'test.tif')
|
17
|
-
TEST_JPEG_INPUT_FILE = File.join(TEST_INPUT_DIR, 'test.jpg')
|
18
16
|
TEST_JP2_INPUT_FILE = File.join(TEST_INPUT_DIR, 'test.jp2')
|
19
|
-
TEST_JP2_OUTPUT_FILE = File.join(TEST_OUTPUT_DIR, 'test.jp2')
|
20
17
|
|
21
18
|
TEST_TIFF_NO_COLOR_FILE = File.join(TEST_INPUT_DIR, 'test_no_color_profile.tif')
|
22
19
|
|
@@ -30,5 +27,3 @@ TEST_JSON_FILE = File.join(TEST_INPUT_DIR, 'test.json')
|
|
30
27
|
|
31
28
|
TEST_OBJ_FILE = File.join(TEST_INPUT_DIR, 'someobject.obj')
|
32
29
|
TEST_PLY_FILE = File.join(TEST_INPUT_DIR, 'someobject.ply')
|
33
|
-
|
34
|
-
TEST_DRUID = 'nx288wh8889'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: assembly-objectfile
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Peter Mangiafico
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: exe
|
13
13
|
cert_chain: []
|
14
|
-
date: 2022-
|
14
|
+
date: 2022-08-18 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: activesupport
|
@@ -27,20 +27,6 @@ dependencies:
|
|
27
27
|
- - ">="
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: 5.2.0
|
30
|
-
- !ruby/object:Gem::Dependency
|
31
|
-
name: deprecation
|
32
|
-
requirement: !ruby/object:Gem::Requirement
|
33
|
-
requirements:
|
34
|
-
- - ">="
|
35
|
-
- !ruby/object:Gem::Version
|
36
|
-
version: '0'
|
37
|
-
type: :runtime
|
38
|
-
prerelease: false
|
39
|
-
version_requirements: !ruby/object:Gem::Requirement
|
40
|
-
requirements:
|
41
|
-
- - ">="
|
42
|
-
- !ruby/object:Gem::Version
|
43
|
-
version: '0'
|
44
30
|
- !ruby/object:Gem::Dependency
|
45
31
|
name: mime-types
|
46
32
|
requirement: !ruby/object:Gem::Requirement
|
@@ -69,34 +55,6 @@ dependencies:
|
|
69
55
|
- - ">="
|
70
56
|
- !ruby/object:Gem::Version
|
71
57
|
version: '0'
|
72
|
-
- !ruby/object:Gem::Dependency
|
73
|
-
name: nokogiri
|
74
|
-
requirement: !ruby/object:Gem::Requirement
|
75
|
-
requirements:
|
76
|
-
- - ">="
|
77
|
-
- !ruby/object:Gem::Version
|
78
|
-
version: '0'
|
79
|
-
type: :runtime
|
80
|
-
prerelease: false
|
81
|
-
version_requirements: !ruby/object:Gem::Requirement
|
82
|
-
requirements:
|
83
|
-
- - ">="
|
84
|
-
- !ruby/object:Gem::Version
|
85
|
-
version: '0'
|
86
|
-
- !ruby/object:Gem::Dependency
|
87
|
-
name: json
|
88
|
-
requirement: !ruby/object:Gem::Requirement
|
89
|
-
requirements:
|
90
|
-
- - ">="
|
91
|
-
- !ruby/object:Gem::Version
|
92
|
-
version: '0'
|
93
|
-
type: :development
|
94
|
-
prerelease: false
|
95
|
-
version_requirements: !ruby/object:Gem::Requirement
|
96
|
-
requirements:
|
97
|
-
- - ">="
|
98
|
-
- !ruby/object:Gem::Version
|
99
|
-
version: '0'
|
100
58
|
- !ruby/object:Gem::Dependency
|
101
59
|
name: pry-byebug
|
102
60
|
requirement: !ruby/object:Gem::Requirement
|
@@ -205,12 +163,9 @@ files:
|
|
205
163
|
- bin/run_all_tests
|
206
164
|
- config/boot.rb
|
207
165
|
- lib/assembly-objectfile.rb
|
208
|
-
- lib/assembly
|
209
|
-
- lib/assembly
|
210
|
-
-
|
211
|
-
- profiles/DotGain20.icc
|
212
|
-
- profiles/sRGBIEC6196621.icc
|
213
|
-
- spec/object_file_spec.rb
|
166
|
+
- lib/assembly/object_file.rb
|
167
|
+
- lib/assembly/object_file/version.rb
|
168
|
+
- spec/assembly/object_file_spec.rb
|
214
169
|
- spec/spec_helper.rb
|
215
170
|
- spec/test_data/empty.txt
|
216
171
|
- spec/test_data/input/.empty
|
@@ -247,6 +202,6 @@ requirements: []
|
|
247
202
|
rubygems_version: 3.3.7
|
248
203
|
signing_key:
|
249
204
|
specification_version: 4
|
250
|
-
summary: Ruby
|
251
|
-
|
205
|
+
summary: Ruby implementation of file services needed to prepare objects to be accessioned
|
206
|
+
into the Stanford Digital Repository
|
252
207
|
test_files: []
|
@@ -1,279 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'mini_exiftool'
|
4
|
-
require 'mime/types'
|
5
|
-
require 'active_support/core_ext/object/blank'
|
6
|
-
|
7
|
-
module Assembly
|
8
|
-
# This class contains generic methods to operate on any file.
|
9
|
-
class ObjectFile
|
10
|
-
# Class level method that given an array of strings, return the longest common initial path. Useful for removing a common path from a set of filenames when producing content metadata
|
11
|
-
#
|
12
|
-
# @param [Array] strings Array of filenames with paths to operate on
|
13
|
-
# @return [String] longest common initial part of path of filenames passed in
|
14
|
-
#
|
15
|
-
# Example:
|
16
|
-
# puts Assembly::ObjectFile.common_prefix(['/Users/peter/00/test.tif','/Users/peter/05/test.jp2']) # '/Users/peter/0'
|
17
|
-
def self.common_path(strings)
|
18
|
-
return nil if strings.empty?
|
19
|
-
|
20
|
-
n = 0
|
21
|
-
x = strings.last
|
22
|
-
n += 1 while strings.all? { |s| s[n] && (s[n] == x[n]) }
|
23
|
-
common_prefix = x[0...n]
|
24
|
-
if common_prefix[-1, 1] == '/' # check if last element of the common string is the end of a directory
|
25
|
-
common_prefix # if not, split string along directories, and reject last one
|
26
|
-
else
|
27
|
-
"#{common_prefix.split('/')[0..-2].join('/')}/" # if it was, then return the common prefix directly
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
attr_accessor :file_attributes, :label, :path, :provider_md5, :provider_sha1, :relative_path, :mime_type_order
|
32
|
-
|
33
|
-
VALID_MIMETYPE_METHODS = %i[override exif file extension].freeze
|
34
|
-
|
35
|
-
# @param [String] path full path to the file to be worked with
|
36
|
-
# @param [Hash<Symbol => Object>] params options used during content metadata generation
|
37
|
-
# @option params [Hash<Symbol => ['yes', 'no']>] :file_attributes e.g. {:preserve=>'yes',:shelve=>'no',:publish=>'no'}, defaults pulled from mimetype
|
38
|
-
# @option params [String] :label a resource label (files bundlded together will just get the first file's label attribute if set)
|
39
|
-
# @option params [String] :provider_md5 pre-computed MD5 checksum
|
40
|
-
# @option params [String] :provider_sha1 pre-computed SHA1 checksum
|
41
|
-
# @option params [String] :relative_path if you want the file ids in the content metadata it can be set, otherwise content metadata will get the full path
|
42
|
-
# @option params [Array] :mime_type_order can be set to the order in which you want mimetypes to be determined
|
43
|
-
# options are :override (from manual overide mapping if exists), :exif (from exif if exists),
|
44
|
-
# :extension (from file extension), and :file (from unix file system command)
|
45
|
-
# the default is defined in the private `default_mime_type_order` method but you can override to set your own order
|
46
|
-
# @example
|
47
|
-
# Assembly::ObjectFile.new('/input/path_to_file.tif')
|
48
|
-
def initialize(path, params = {})
|
49
|
-
@path = path
|
50
|
-
@label = params[:label]
|
51
|
-
@file_attributes = params[:file_attributes]
|
52
|
-
@relative_path = params[:relative_path]
|
53
|
-
@provider_md5 = params[:provider_md5]
|
54
|
-
@provider_sha1 = params[:provider_sha1]
|
55
|
-
@mime_type_order = params[:mime_type_order] || default_mime_type_order
|
56
|
-
end
|
57
|
-
|
58
|
-
# @return [String] base filename
|
59
|
-
# @example
|
60
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
61
|
-
# puts source_file.filename # "path_to_file.tif"
|
62
|
-
def filename
|
63
|
-
File.basename(path)
|
64
|
-
end
|
65
|
-
|
66
|
-
# @return [String] base directory
|
67
|
-
# @example
|
68
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
69
|
-
# puts source_file.dirname # "/input"
|
70
|
-
def dirname
|
71
|
-
File.dirname(path)
|
72
|
-
end
|
73
|
-
|
74
|
-
# @return [String] filename extension
|
75
|
-
# @example
|
76
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
77
|
-
# puts source_file.ext # ".tif"
|
78
|
-
def ext
|
79
|
-
File.extname(path)
|
80
|
-
end
|
81
|
-
|
82
|
-
# @return [String] base filename without extension
|
83
|
-
# @example
|
84
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
85
|
-
# puts source_file.filename # "path_to_file"
|
86
|
-
def filename_without_ext
|
87
|
-
File.basename(path, ext)
|
88
|
-
end
|
89
|
-
|
90
|
-
# @return [MiniExiftool] exif information stored as a hash and an object
|
91
|
-
# @example
|
92
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
93
|
-
# puts source_file.exif # hash with exif information
|
94
|
-
def exif
|
95
|
-
@exif ||= begin
|
96
|
-
check_for_file
|
97
|
-
MiniExiftool.new(path, replace_invalid_chars: '?')
|
98
|
-
end
|
99
|
-
end
|
100
|
-
|
101
|
-
# Computes md5 checksum or returns cached value
|
102
|
-
# @return [String] md5 checksum
|
103
|
-
# @example
|
104
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
105
|
-
# puts source_file.md5 # 'XXX123XXX1243XX1243'
|
106
|
-
def md5
|
107
|
-
check_for_file unless @md5
|
108
|
-
@md5 ||= Digest::MD5.file(path).hexdigest
|
109
|
-
end
|
110
|
-
|
111
|
-
# Computes sha1 checksum or return cached value
|
112
|
-
# @return [String] sha1 checksum
|
113
|
-
# @example
|
114
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
115
|
-
# puts source_file.sha1 # 'XXX123XXX1243XX1243'
|
116
|
-
def sha1
|
117
|
-
check_for_file unless @sha1
|
118
|
-
@sha1 ||= Digest::SHA1.file(path).hexdigest
|
119
|
-
end
|
120
|
-
|
121
|
-
# Returns mimetype information for the current file based on the ordering set in default_mime_type_order
|
122
|
-
# We stop computing mimetypes as soon as we have a method that returns a value
|
123
|
-
# @return [String] mime type
|
124
|
-
# @example
|
125
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
126
|
-
# puts source_file.mimetype # 'text/plain'
|
127
|
-
def mimetype
|
128
|
-
@mimetype ||= begin
|
129
|
-
check_for_file
|
130
|
-
mimetype = ''
|
131
|
-
mime_type_order.each do |mime_type_method|
|
132
|
-
mimetype = send("#{mime_type_method}_mimetype") if VALID_MIMETYPE_METHODS.include?(mime_type_method)
|
133
|
-
break if mimetype.present?
|
134
|
-
end
|
135
|
-
mimetype
|
136
|
-
end
|
137
|
-
end
|
138
|
-
|
139
|
-
# @return [Symbol] the type of object, could be :application (for PDF or Word, etc), :audio, :image, :message, :model, :multipart, :text or :video
|
140
|
-
# @example
|
141
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
142
|
-
# puts source_file.object_type # :image
|
143
|
-
def object_type
|
144
|
-
lookup = MIME::Types[mimetype][0]
|
145
|
-
lookup.nil? ? :other : lookup.media_type.to_sym
|
146
|
-
end
|
147
|
-
|
148
|
-
# @return [Boolean] if object is an image
|
149
|
-
# @example
|
150
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
151
|
-
# puts source_file.image? # true
|
152
|
-
def image?
|
153
|
-
object_type == :image
|
154
|
-
end
|
155
|
-
|
156
|
-
# Examines the input image for validity. Used to determine if image is a valid and useful image.
|
157
|
-
# If image is not a jp2, also checks if it is jp2able?
|
158
|
-
# @return [Boolean] true if image is valid, false if not.
|
159
|
-
# @example
|
160
|
-
# source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
161
|
-
# puts source_img.valid_image? # true
|
162
|
-
def valid_image?
|
163
|
-
return false unless image?
|
164
|
-
|
165
|
-
mimetype == 'image/jp2' || jp2able?
|
166
|
-
end
|
167
|
-
|
168
|
-
# @return [Boolean] true if image has a color profile, false if not.
|
169
|
-
# @example
|
170
|
-
# source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
171
|
-
# puts source_img.has_color_profile? # true
|
172
|
-
def has_color_profile?
|
173
|
-
return false unless exif
|
174
|
-
|
175
|
-
exif['profiledescription'] || exif['colorspace'] ? true : false
|
176
|
-
end
|
177
|
-
|
178
|
-
# Examines the input image for validity to create a jp2. Same as valid_image? but also confirms the existence of a profile description and further restricts mimetypes.
|
179
|
-
# It is used by the assembly robots to decide if a jp2 will be created and is also called before you create a jp2 using assembly-image.
|
180
|
-
# @return [Boolean] true if image should have a jp2 created, false if not.
|
181
|
-
# @example
|
182
|
-
# source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
183
|
-
# puts source_img.jp2able? # true
|
184
|
-
def jp2able?
|
185
|
-
return false unless exif
|
186
|
-
|
187
|
-
Assembly::VALID_IMAGE_MIMETYPES.include?(mimetype)
|
188
|
-
end
|
189
|
-
|
190
|
-
# Returns file size information for the current file in bytes.
|
191
|
-
# @return [Integer] file size in bytes
|
192
|
-
# @example
|
193
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
194
|
-
# puts source_file.filesize # 1345
|
195
|
-
def filesize
|
196
|
-
check_for_file
|
197
|
-
@filesize ||= File.size(path)
|
198
|
-
end
|
199
|
-
|
200
|
-
# Determines if the file exists (and is not a directory)
|
201
|
-
# @return [Boolean] file exists
|
202
|
-
# @example
|
203
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
204
|
-
# puts source_file.file_exists? # true
|
205
|
-
def file_exists?
|
206
|
-
@file_exists ||= (File.exist?(path) && !File.directory?(path))
|
207
|
-
end
|
208
|
-
|
209
|
-
private
|
210
|
-
|
211
|
-
# private method to check for file existence before operating on it
|
212
|
-
def check_for_file
|
213
|
-
raise "input file #{path} does not exist or is a directory" unless file_exists?
|
214
|
-
end
|
215
|
-
|
216
|
-
# prive method defining default preferred ordering of how mimetypes are determined
|
217
|
-
def default_mime_type_order
|
218
|
-
%i[override exif file extension]
|
219
|
-
end
|
220
|
-
|
221
|
-
# Returns mimetype information using the mime-types gem (based on a file extension lookup)
|
222
|
-
# @return [String] mime type for supplied file
|
223
|
-
# @example
|
224
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
225
|
-
# puts source_file.extension_mimetype # 'text/plain'
|
226
|
-
def extension_mimetype
|
227
|
-
@extension_mimetype ||= begin
|
228
|
-
mtype = MIME::Types.type_for(path).first
|
229
|
-
mtype ? mtype.content_type : ''
|
230
|
-
end
|
231
|
-
end
|
232
|
-
|
233
|
-
# Returns mimetype information for the current file based on unix file system command.
|
234
|
-
# @return [String] mime type for supplied file
|
235
|
-
# @example
|
236
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
237
|
-
# puts source_file.file_mimetype # 'text/plain'
|
238
|
-
def file_mimetype
|
239
|
-
@file_mimetype ||= begin
|
240
|
-
check_for_file
|
241
|
-
`file --mime-type "#{path}"`.delete("\n").split(':')[1].strip # first try and get the mimetype from the unix file command
|
242
|
-
end
|
243
|
-
end
|
244
|
-
|
245
|
-
# Returns mimetype information for the current file based on exif data (if available and not a trusted source that we'd rather get from the file system command)
|
246
|
-
# @return [String] mime type for supplied file
|
247
|
-
# @example
|
248
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
249
|
-
# puts source_file.exif_mimetype # 'text/plain'
|
250
|
-
def exif_mimetype
|
251
|
-
@exif_mimetype ||= begin
|
252
|
-
check_for_file
|
253
|
-
prefer_exif = !Assembly::TRUSTED_MIMETYPES.include?(file_mimetype) # if it's not a "trusted" mimetype and there is exif data; get the mimetype from the exif
|
254
|
-
exif.mimetype if exif&.mimetype && prefer_exif
|
255
|
-
end
|
256
|
-
end
|
257
|
-
|
258
|
-
# Returns mimetype information using the manual override mapping (based on a file extension lookup)
|
259
|
-
# @return [String] mime type for supplied file if a mapping exists for the file's extension
|
260
|
-
# @example
|
261
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.json')
|
262
|
-
# puts source_file.override_mimetype # 'application/json'
|
263
|
-
def override_mimetype
|
264
|
-
@override_mimetype ||= Assembly::OVERRIDE_MIMETYPES.fetch(ext.to_sym, '')
|
265
|
-
end
|
266
|
-
|
267
|
-
# @note Uses shell call to "file", only expected to work on unix based systems
|
268
|
-
# @return [String] encoding for supplied file
|
269
|
-
# @example
|
270
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
271
|
-
# puts source_file.encoding # 'us-ascii'
|
272
|
-
def encoding
|
273
|
-
@encoding ||= begin
|
274
|
-
check_for_file
|
275
|
-
`file --mime-encoding "#{path}"`.delete("\n").split(':')[1].strip
|
276
|
-
end
|
277
|
-
end
|
278
|
-
end
|
279
|
-
end
|
data/profiles/AdobeRGB1998.icc
DELETED
Binary file
|
data/profiles/DotGain20.icc
DELETED
Binary file
|
data/profiles/sRGBIEC6196621.icc
DELETED
Binary file
|