assembly-objectfile 1.13.0 → 2.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +1 -1
- data/.gitignore +0 -1
- data/.rubocop.yml +34 -1
- data/.rubocop_todo.yml +3 -120
- data/Gemfile.lock +97 -0
- data/README.md +6 -6
- data/assembly-objectfile.gemspec +4 -8
- data/config/boot.rb +0 -1
- data/lib/{assembly-objectfile → assembly/object_file}/version.rb +1 -1
- data/lib/{assembly-objectfile/object_fileable.rb → assembly/object_file.rb} +116 -101
- data/lib/assembly-objectfile.rb +14 -19
- data/spec/assembly/object_file_spec.rb +452 -0
- data/spec/spec_helper.rb +3 -37
- metadata +10 -143
- data/lib/assembly-objectfile/content_metadata/config.rb +0 -26
- data/lib/assembly-objectfile/content_metadata/file.rb +0 -63
- data/lib/assembly-objectfile/content_metadata/file_set.rb +0 -73
- data/lib/assembly-objectfile/content_metadata/file_set_builder.rb +0 -65
- data/lib/assembly-objectfile/content_metadata/nokogiri_builder.rb +0 -57
- data/lib/assembly-objectfile/content_metadata.rb +0 -117
- data/lib/assembly-objectfile/object_file.rb +0 -29
- data/profiles/AdobeRGB1998.icc +0 -0
- data/profiles/DotGain20.icc +0 -0
- data/profiles/sRGBIEC6196621.icc +0 -0
- data/spec/content_metadata_spec.rb +0 -809
- data/spec/object_file_spec.rb +0 -222
- data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_001.tif +0 -0
- data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_002.tif +0 -0
- data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_001.jp2 +0 -0
- data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_002.jp2 +0 -0
- data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_001.pdf +0 -1
- data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_002.pdf +0 -1
- data/spec/test_data/input/oo000oo0001/31/oo000oo0001_31_001.pdf +0 -1
- data/spec/test_data/input/oo000oo0001/50/oo000oo0001_50_001.tif +0 -0
- data/spec/test_data/input/oo000oo0001/oo000oo0001_book.pdf +0 -1
- data/spec/test_data/input/res1_image1.jp2 +0 -0
- data/spec/test_data/input/res1_image2.jp2 +0 -0
- data/spec/test_data/input/res1_image2.tif +0 -0
- data/spec/test_data/input/res1_teifile.txt +0 -1
- data/spec/test_data/input/res2_image1.jp2 +0 -0
- data/spec/test_data/input/res2_image1.tif +0 -0
- data/spec/test_data/input/res2_image2.jp2 +0 -0
- data/spec/test_data/input/res2_image2.tif +0 -0
- data/spec/test_data/input/res2_teifile.txt +0 -1
- data/spec/test_data/input/res2_textfile.txt +0 -1
- data/spec/test_data/input/res3_image1.jp2 +0 -0
- data/spec/test_data/input/res3_image1.tif +0 -0
- data/spec/test_data/input/res3_teifile.txt +0 -1
- data/spec/test_data/input/test.pdf +0 -1
- data/spec/test_data/input/test.svg +0 -2
- data/spec/test_data/input/test2.jp2 +0 -0
- data/spec/test_data/input/test2.tif +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 76dac08191a4c118cf16e4736b9538c9d2a60811cf9547e0ad395ab468b1b51a
|
4
|
+
data.tar.gz: b9fec1e99d03941f658cd6716e1c00db37ce2f2042816e4ba8f77246b102d1f4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '082fc021ca4f4dde1f3eaaac937e66862d8ea36f5198bdece9866a72e5be068021665e10bd40b8ea2835d848c062886982ed41033935120525c8d4e57efadafd'
|
7
|
+
data.tar.gz: cf2e4facd4248532c5f92c741f80b5e8d27e04b8cc23e3b7959b59118cceaf8122d03aa92af8faf99c8000bb4298b74d9ab86001744c6f852cd148ceda21d675
|
data/.circleci/config.yml
CHANGED
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
@@ -9,12 +9,32 @@ AllCops:
|
|
9
9
|
- '**/*.md'
|
10
10
|
- 'vendor/**/*' # avoid running rubocop on cached bundler
|
11
11
|
|
12
|
+
Layout/LineLength:
|
13
|
+
Max: 120
|
14
|
+
|
15
|
+
Naming/FileName:
|
16
|
+
Exclude:
|
17
|
+
- 'lib/assembly-objectfile.rb'
|
18
|
+
|
19
|
+
Naming/PredicateName:
|
20
|
+
NamePrefix:
|
21
|
+
- 'is_'
|
22
|
+
|
12
23
|
Metrics/BlockLength:
|
13
24
|
Exclude:
|
14
25
|
- 'spec/**/*.rb'
|
15
26
|
- '**/*.gemspec'
|
16
27
|
|
17
|
-
|
28
|
+
RSpec/ExampleLength:
|
29
|
+
Max: 10
|
30
|
+
|
31
|
+
RSpec/MultipleExpectations:
|
32
|
+
Max: 10
|
33
|
+
|
34
|
+
RSpec/NestedGroups:
|
35
|
+
Max: 4
|
36
|
+
|
37
|
+
Gemspec/DeprecatedAttributeAssignment: # (new in 1.10)
|
18
38
|
Enabled: true
|
19
39
|
|
20
40
|
Layout/SpaceAroundMethodCallOperator:
|
@@ -207,3 +227,16 @@ RSpec/ChangeByZero: # new in 2.11.0
|
|
207
227
|
Enabled: true
|
208
228
|
RSpec/VerifiedDoubleReference: # new in 2.10.0
|
209
229
|
Enabled: true
|
230
|
+
|
231
|
+
Layout/LineContinuationLeadingSpace: # new in 1.31
|
232
|
+
Enabled: true
|
233
|
+
Layout/LineContinuationSpacing: # new in 1.31
|
234
|
+
Enabled: true
|
235
|
+
Lint/ConstantOverwrittenInRescue: # new in 1.31
|
236
|
+
Enabled: true
|
237
|
+
Lint/NonAtomicFileOperation: # new in 1.31
|
238
|
+
Enabled: true
|
239
|
+
RSpec/Capybara/SpecificMatcher: # new in 2.12
|
240
|
+
Enabled: true
|
241
|
+
RSpec/Rails/HaveHttpStatus: # new in 2.12
|
242
|
+
Enabled: true
|
data/.rubocop_todo.yml
CHANGED
@@ -1,129 +1,12 @@
|
|
1
1
|
# This configuration was generated by
|
2
2
|
# `rubocop --auto-gen-config`
|
3
|
-
# on 2022-
|
3
|
+
# on 2022-07-20 17:16:46 UTC using RuboCop version 1.31.2.
|
4
4
|
# The point is for the user to remove these configuration records
|
5
5
|
# one by one as the offenses are removed from the code base.
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
7
7
|
# versions of RuboCop, may require this file to be generated again.
|
8
8
|
|
9
|
-
# Offense count: 1
|
10
|
-
Lint/UselessAssignment:
|
11
|
-
Exclude:
|
12
|
-
- 'config/boot.rb'
|
13
|
-
|
14
|
-
# Offense count: 3
|
15
|
-
# Configuration parameters: IgnoredMethods, CountRepeatedAttributes.
|
16
|
-
Metrics/AbcSize:
|
17
|
-
Max: 55
|
18
|
-
|
19
|
-
# Offense count: 1
|
20
|
-
# Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
|
21
|
-
# IgnoredMethods: refine
|
22
|
-
Metrics/BlockLength:
|
23
|
-
Max: 27
|
24
|
-
|
25
|
-
# Offense count: 2
|
26
|
-
# Configuration parameters: IgnoredMethods.
|
27
|
-
Metrics/CyclomaticComplexity:
|
28
|
-
Max: 14
|
29
|
-
|
30
|
-
# Offense count: 4
|
31
|
-
# Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
|
32
|
-
Metrics/MethodLength:
|
33
|
-
Max: 31
|
34
|
-
|
35
9
|
# Offense count: 1
|
36
10
|
# Configuration parameters: CountComments, CountAsOne.
|
37
|
-
Metrics/
|
38
|
-
Max:
|
39
|
-
|
40
|
-
# Offense count: 1
|
41
|
-
# Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
|
42
|
-
Metrics/ParameterLists:
|
43
|
-
Max: 12
|
44
|
-
|
45
|
-
# Offense count: 2
|
46
|
-
# Configuration parameters: IgnoredMethods.
|
47
|
-
Metrics/PerceivedComplexity:
|
48
|
-
Max: 15
|
49
|
-
|
50
|
-
# Offense count: 1
|
51
|
-
# Configuration parameters: ExpectMatchingDefinition, CheckDefinitionPathHierarchy, CheckDefinitionPathHierarchyRoots, Regex, IgnoreExecutableScripts, AllowedAcronyms.
|
52
|
-
# CheckDefinitionPathHierarchyRoots: lib, spec, test, src
|
53
|
-
# AllowedAcronyms: CLI, DSL, ACL, API, ASCII, CPU, CSS, DNS, EOF, GUID, HTML, HTTP, HTTPS, ID, IP, JSON, LHS, QPS, RAM, RHS, RPC, SLA, SMTP, SQL, SSH, TCP, TLS, TTL, UDP, UI, UID, UUID, URI, URL, UTF8, VM, XML, XMPP, XSRF, XSS
|
54
|
-
Naming/FileName:
|
55
|
-
Exclude:
|
56
|
-
- 'lib/assembly-objectfile.rb'
|
57
|
-
|
58
|
-
# Offense count: 1
|
59
|
-
# Configuration parameters: NamePrefix, ForbiddenPrefixes, AllowedMethods, MethodDefinitionMacros.
|
60
|
-
# NamePrefix: is_, has_, have_
|
61
|
-
# ForbiddenPrefixes: is_, has_, have_
|
62
|
-
# AllowedMethods: is_a?
|
63
|
-
# MethodDefinitionMacros: define_method, define_singleton_method
|
64
|
-
Naming/PredicateName:
|
65
|
-
Exclude:
|
66
|
-
- 'spec/**/*'
|
67
|
-
- 'lib/assembly-objectfile/object_fileable.rb'
|
68
|
-
|
69
|
-
# Offense count: 1
|
70
|
-
# Configuration parameters: Prefixes.
|
71
|
-
# Prefixes: when, with, without
|
72
|
-
RSpec/ContextWording:
|
73
|
-
Exclude:
|
74
|
-
- 'spec/content_metadata_spec.rb'
|
75
|
-
|
76
|
-
# Offense count: 32
|
77
|
-
# Configuration parameters: CountAsOne.
|
78
|
-
RSpec/ExampleLength:
|
79
|
-
Max: 34
|
80
|
-
|
81
|
-
# Offense count: 2
|
82
|
-
# Configuration parameters: Include, CustomTransform, IgnoreMethods, SpecSuffixOnly.
|
83
|
-
# Include: **/*_spec*rb*, **/spec/**/*
|
84
|
-
RSpec/FilePath:
|
85
|
-
Exclude:
|
86
|
-
- 'spec/content_metadata_spec.rb'
|
87
|
-
- 'spec/object_file_spec.rb'
|
88
|
-
|
89
|
-
# Offense count: 74
|
90
|
-
# Configuration parameters: AssignmentOnly.
|
91
|
-
RSpec/InstanceVariable:
|
92
|
-
Exclude:
|
93
|
-
- 'spec/object_file_spec.rb'
|
94
|
-
|
95
|
-
# Offense count: 41
|
96
|
-
RSpec/MultipleExpectations:
|
97
|
-
Max: 29
|
98
|
-
|
99
|
-
# Offense count: 20
|
100
|
-
RSpec/NestedGroups:
|
101
|
-
Max: 4
|
102
|
-
|
103
|
-
# Offense count: 2
|
104
|
-
RSpec/RepeatedDescription:
|
105
|
-
Exclude:
|
106
|
-
- 'spec/object_file_spec.rb'
|
107
|
-
|
108
|
-
# Offense count: 2
|
109
|
-
RSpec/RepeatedExample:
|
110
|
-
Exclude:
|
111
|
-
- 'spec/object_file_spec.rb'
|
112
|
-
|
113
|
-
# Offense count: 5
|
114
|
-
RSpec/RepeatedExampleGroupDescription:
|
115
|
-
Exclude:
|
116
|
-
- 'spec/content_metadata_spec.rb'
|
117
|
-
|
118
|
-
# Offense count: 2
|
119
|
-
# Cop supports --auto-correct.
|
120
|
-
Style/CommentedKeyword:
|
121
|
-
Exclude:
|
122
|
-
- 'lib/assembly-objectfile/content_metadata.rb'
|
123
|
-
|
124
|
-
# Offense count: 123
|
125
|
-
# Cop supports --auto-correct.
|
126
|
-
# Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, IgnoredPatterns.
|
127
|
-
# URISchemes: http, https
|
128
|
-
Layout/LineLength:
|
129
|
-
Max: 277
|
11
|
+
Metrics/ClassLength:
|
12
|
+
Max: 122
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
assembly-objectfile (2.1.1)
|
5
|
+
activesupport (>= 5.2.0)
|
6
|
+
mime-types (> 3)
|
7
|
+
mini_exiftool
|
8
|
+
|
9
|
+
GEM
|
10
|
+
remote: http://rubygems.org/
|
11
|
+
specs:
|
12
|
+
activesupport (7.0.3.1)
|
13
|
+
concurrent-ruby (~> 1.0, >= 1.0.2)
|
14
|
+
i18n (>= 1.6, < 2)
|
15
|
+
minitest (>= 5.1)
|
16
|
+
tzinfo (~> 2.0)
|
17
|
+
ast (2.4.2)
|
18
|
+
byebug (11.1.3)
|
19
|
+
coderay (1.1.3)
|
20
|
+
concurrent-ruby (1.1.10)
|
21
|
+
diff-lcs (1.5.0)
|
22
|
+
docile (1.4.0)
|
23
|
+
i18n (1.12.0)
|
24
|
+
concurrent-ruby (~> 1.0)
|
25
|
+
json (2.6.2)
|
26
|
+
method_source (1.0.0)
|
27
|
+
mime-types (3.4.1)
|
28
|
+
mime-types-data (~> 3.2015)
|
29
|
+
mime-types-data (3.2022.0105)
|
30
|
+
mini_exiftool (2.10.2)
|
31
|
+
minitest (5.16.2)
|
32
|
+
parallel (1.22.1)
|
33
|
+
parser (3.1.2.0)
|
34
|
+
ast (~> 2.4.1)
|
35
|
+
pry (0.13.1)
|
36
|
+
coderay (~> 1.1)
|
37
|
+
method_source (~> 1.0)
|
38
|
+
pry-byebug (3.9.0)
|
39
|
+
byebug (~> 11.0)
|
40
|
+
pry (~> 0.13.0)
|
41
|
+
rainbow (3.1.1)
|
42
|
+
rake (13.0.6)
|
43
|
+
regexp_parser (2.5.0)
|
44
|
+
rexml (3.2.5)
|
45
|
+
rspec (3.11.0)
|
46
|
+
rspec-core (~> 3.11.0)
|
47
|
+
rspec-expectations (~> 3.11.0)
|
48
|
+
rspec-mocks (~> 3.11.0)
|
49
|
+
rspec-core (3.11.0)
|
50
|
+
rspec-support (~> 3.11.0)
|
51
|
+
rspec-expectations (3.11.0)
|
52
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
53
|
+
rspec-support (~> 3.11.0)
|
54
|
+
rspec-mocks (3.11.1)
|
55
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
56
|
+
rspec-support (~> 3.11.0)
|
57
|
+
rspec-support (3.11.0)
|
58
|
+
rubocop (1.31.2)
|
59
|
+
json (~> 2.3)
|
60
|
+
parallel (~> 1.10)
|
61
|
+
parser (>= 3.1.0.0)
|
62
|
+
rainbow (>= 2.2.2, < 4.0)
|
63
|
+
regexp_parser (>= 1.8, < 3.0)
|
64
|
+
rexml (>= 3.2.5, < 4.0)
|
65
|
+
rubocop-ast (>= 1.18.0, < 2.0)
|
66
|
+
ruby-progressbar (~> 1.7)
|
67
|
+
unicode-display_width (>= 1.4.0, < 3.0)
|
68
|
+
rubocop-ast (1.19.1)
|
69
|
+
parser (>= 3.1.1.0)
|
70
|
+
rubocop-rspec (2.12.1)
|
71
|
+
rubocop (~> 1.31)
|
72
|
+
ruby-progressbar (1.11.0)
|
73
|
+
simplecov (0.21.2)
|
74
|
+
docile (~> 1.1)
|
75
|
+
simplecov-html (~> 0.11)
|
76
|
+
simplecov_json_formatter (~> 0.1)
|
77
|
+
simplecov-html (0.12.3)
|
78
|
+
simplecov_json_formatter (0.1.4)
|
79
|
+
tzinfo (2.0.5)
|
80
|
+
concurrent-ruby (~> 1.0)
|
81
|
+
unicode-display_width (2.2.0)
|
82
|
+
|
83
|
+
PLATFORMS
|
84
|
+
ruby
|
85
|
+
|
86
|
+
DEPENDENCIES
|
87
|
+
assembly-objectfile!
|
88
|
+
byebug
|
89
|
+
pry-byebug
|
90
|
+
rake
|
91
|
+
rspec (~> 3.0)
|
92
|
+
rubocop (~> 1.25)
|
93
|
+
rubocop-rspec
|
94
|
+
simplecov
|
95
|
+
|
96
|
+
BUNDLED WITH
|
97
|
+
2.3.17
|
data/README.md
CHANGED
@@ -8,15 +8,15 @@
|
|
8
8
|
## Overview
|
9
9
|
This gem contains classes used by the Stanford University Digital Library to
|
10
10
|
perform file operations necessary for accessioning of content. It is also
|
11
|
-
used by related gems
|
12
|
-
generation).
|
11
|
+
used by related gems that perform content type specific operations (e.g.
|
12
|
+
assembly-image for jp2 generation).
|
13
13
|
|
14
14
|
## Usage
|
15
15
|
|
16
16
|
The gem currently has methods for:
|
17
17
|
* filesize
|
18
|
-
*
|
19
|
-
*
|
18
|
+
* mimetype
|
19
|
+
* exif - consumers use ExifTool to get file information
|
20
20
|
|
21
21
|
## Running tests
|
22
22
|
|
@@ -34,8 +34,8 @@ rake release
|
|
34
34
|
|
35
35
|
1. Exiftool
|
36
36
|
|
37
|
-
RHEL: (RPM to install
|
38
|
-
|
37
|
+
RHEL: (RPM to install coming soon) Download latest version from:
|
38
|
+
https://exiftool.org/
|
39
39
|
|
40
40
|
tar -xf Image-ExifTool-#.##.tar.gz
|
41
41
|
cd Image-ExifTool-#.##
|
data/assembly-objectfile.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
$LOAD_PATH.push File.expand_path('lib', __dir__)
|
4
|
-
require 'assembly
|
4
|
+
require 'assembly/object_file/version'
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = 'assembly-objectfile'
|
@@ -10,13 +10,13 @@ Gem::Specification.new do |s|
|
|
10
10
|
s.authors = ['Peter Mangiafico', 'Renzo Sanchez-Silva', 'Monty Hindman', 'Tony Calavano']
|
11
11
|
s.email = ['pmangiafico@stanford.edu']
|
12
12
|
s.homepage = 'https://github.com/sul-dlss/assembly-objectfile'
|
13
|
-
s.summary = 'Ruby
|
13
|
+
s.summary = 'Ruby implementation of file services needed to prepare objects to be accessioned ' \
|
14
|
+
'into the Stanford Digital Repository'
|
14
15
|
s.description = 'Get exif data, file sizes and more.'
|
15
16
|
s.license = 'ALv2'
|
16
17
|
s.metadata['rubygems_mfa_required'] = 'true'
|
17
18
|
|
18
19
|
s.files = `git ls-files`.split("\n")
|
19
|
-
s.test_files = `git ls-files -- spec/*`.split("\n")
|
20
20
|
s.bindir = 'exe'
|
21
21
|
s.executables = s.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
22
22
|
s.require_paths = ['lib']
|
@@ -24,14 +24,10 @@ Gem::Specification.new do |s|
|
|
24
24
|
s.required_ruby_version = '>= 3.0'
|
25
25
|
|
26
26
|
s.add_dependency 'activesupport', '>= 5.2.0'
|
27
|
-
s.add_dependency 'deprecation'
|
28
|
-
s.add_dependency 'dry-struct', '~> 1.0'
|
29
|
-
s.add_dependency 'dry-types', '~> 1.1'
|
30
27
|
s.add_dependency 'mime-types', '> 3'
|
31
28
|
s.add_dependency 'mini_exiftool'
|
32
|
-
s.add_dependency 'nokogiri'
|
33
29
|
|
34
|
-
s.add_development_dependency '
|
30
|
+
s.add_development_dependency 'pry-byebug'
|
35
31
|
s.add_development_dependency 'rake'
|
36
32
|
s.add_development_dependency 'rspec', '~> 3.0'
|
37
33
|
s.add_development_dependency 'rubocop', '~> 1.25'
|
data/config/boot.rb
CHANGED
@@ -2,25 +2,56 @@
|
|
2
2
|
|
3
3
|
require 'mini_exiftool'
|
4
4
|
require 'mime/types'
|
5
|
+
require 'active_support/core_ext/object/blank'
|
5
6
|
|
6
7
|
module Assembly
|
7
|
-
#
|
8
|
-
|
8
|
+
# This class contains generic methods to operate on any file.
|
9
|
+
class ObjectFile
|
10
|
+
# Class level method that given an array of strings, return the longest common initial path.
|
11
|
+
# Useful for removing a common path from a set of filenames when producing content metadata
|
12
|
+
#
|
13
|
+
# @param [Array] strings Array of filenames with paths to operate on
|
14
|
+
# @return [String] longest common initial part of path of filenames passed in
|
15
|
+
#
|
16
|
+
# Example:
|
17
|
+
# puts Assembly::ObjectFile.common_prefix(['/Users/peter/00/test.tif','/Users/peter/05/test.jp2'])
|
18
|
+
# # => '/Users/peter/0'
|
19
|
+
def self.common_path(strings)
|
20
|
+
return nil if strings.empty?
|
21
|
+
|
22
|
+
n = 0
|
23
|
+
x = strings.last
|
24
|
+
n += 1 while strings.all? { |s| s[n] && (s[n] == x[n]) }
|
25
|
+
common_prefix = x[0...n]
|
26
|
+
if common_prefix[-1, 1] == '/' # check if last element of the common string is the end of a directory
|
27
|
+
common_prefix # if not, split string along directories, and reject last one
|
28
|
+
else
|
29
|
+
"#{common_prefix.split('/')[0..-2].join('/')}/" # if it was, then return the common prefix directly
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
9
33
|
attr_accessor :file_attributes, :label, :path, :provider_md5, :provider_sha1, :relative_path, :mime_type_order
|
10
34
|
|
11
35
|
VALID_MIMETYPE_METHODS = %i[override exif file extension].freeze
|
12
36
|
|
13
37
|
# @param [String] path full path to the file to be worked with
|
14
38
|
# @param [Hash<Symbol => Object>] params options used during content metadata generation
|
15
|
-
# @option params [Hash<Symbol => ['yes', 'no']>] :file_attributes e.g
|
16
|
-
#
|
39
|
+
# @option params [Hash<Symbol => ['yes', 'no']>] :file_attributes e.g.:
|
40
|
+
# {:preserve=>'yes',:shelve=>'no',:publish=>'no'},
|
41
|
+
# defaults pulled from mimetype
|
42
|
+
# @option params [String] :label a resource label (files bundled together will just get the first
|
43
|
+
# file's label attribute if set)
|
17
44
|
# @option params [String] :provider_md5 pre-computed MD5 checksum
|
18
45
|
# @option params [String] :provider_sha1 pre-computed SHA1 checksum
|
19
|
-
# @option params [String] :relative_path if you want the file ids in the content metadata it can be set,
|
46
|
+
# @option params [String] :relative_path if you want the file ids in the content metadata it can be set,
|
47
|
+
# otherwise content metadata will get the full path
|
20
48
|
# @option params [Array] :mime_type_order can be set to the order in which you want mimetypes to be determined
|
21
|
-
# options are :override (from manual overide mapping if exists),
|
22
|
-
# :
|
23
|
-
#
|
49
|
+
# options are :override (from manual overide mapping if exists),
|
50
|
+
# :exif (from exif if exists)
|
51
|
+
# :extension (from file extension)
|
52
|
+
# :file (from unix file system command)
|
53
|
+
# the default is defined in the private `default_mime_type_order` method
|
54
|
+
# but you can override to set your own order
|
24
55
|
# @example
|
25
56
|
# Assembly::ObjectFile.new('/input/path_to_file.tif')
|
26
57
|
def initialize(path, params = {})
|
@@ -33,24 +64,6 @@ module Assembly
|
|
33
64
|
@mime_type_order = params[:mime_type_order] || default_mime_type_order
|
34
65
|
end
|
35
66
|
|
36
|
-
# @return [String] DPG base filename, removing the extension and the '00','05', etc. placeholders
|
37
|
-
# @example
|
38
|
-
# source_file = Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
|
39
|
-
# puts source_file.dpg_basename # "cy565rm7188_001"
|
40
|
-
def dpg_basename
|
41
|
-
file_parts = File.basename(path, ext).split('_')
|
42
|
-
file_parts.size == 3 ? "#{file_parts[0]}_#{file_parts[2]}" : filename_without_ext
|
43
|
-
end
|
44
|
-
|
45
|
-
# @return [String] DPG subfolder for the given filename, i.e. '00','05', etc.
|
46
|
-
# @example
|
47
|
-
# source_file = Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
|
48
|
-
# puts source_file.dpg_folder # "00"
|
49
|
-
def dpg_folder
|
50
|
-
file_parts = File.basename(path, ext).split('_')
|
51
|
-
file_parts.size == 3 ? file_parts[1] : ''
|
52
|
-
end
|
53
|
-
|
54
67
|
# @return [String] base filename
|
55
68
|
# @example
|
56
69
|
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
@@ -91,6 +104,13 @@ module Assembly
|
|
91
104
|
@exif ||= begin
|
92
105
|
check_for_file
|
93
106
|
MiniExiftool.new(path, replace_invalid_chars: '?')
|
107
|
+
rescue MiniExiftool::Error
|
108
|
+
# MiniExiftool will throw an exception when it tries to initialize for problematic files,
|
109
|
+
# but the exception it throws does not tell you the file that caused the problem.
|
110
|
+
# Instead, we will raise our own exception with more context in logging/reporting upstream.
|
111
|
+
# Note: if the file that causes the problem should NOT use exiftool to determine mimetype, add it to the skipped
|
112
|
+
# mimetypes in Assembly::TRUSTED_MIMETYPES to bypass initialization of MiniExiftool for mimetype generation
|
113
|
+
raise MiniExiftool::Error, "error initializing MiniExiftool for #{path}"
|
94
114
|
end
|
95
115
|
end
|
96
116
|
|
@@ -125,72 +145,15 @@ module Assembly
|
|
125
145
|
check_for_file
|
126
146
|
mimetype = ''
|
127
147
|
mime_type_order.each do |mime_type_method|
|
128
|
-
mimetype =
|
148
|
+
mimetype = send("#{mime_type_method}_mimetype") if VALID_MIMETYPE_METHODS.include?(mime_type_method)
|
129
149
|
break if mimetype.present?
|
130
150
|
end
|
131
151
|
mimetype
|
132
152
|
end
|
133
153
|
end
|
134
154
|
|
135
|
-
#
|
136
|
-
#
|
137
|
-
# @example
|
138
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.json')
|
139
|
-
# puts source_file.override_mimetype # 'application/json'
|
140
|
-
def override_mimetype
|
141
|
-
@override_mimetype ||= Assembly::OVERRIDE_MIMETYPES.fetch(ext.to_sym, '')
|
142
|
-
end
|
143
|
-
|
144
|
-
# Returns mimetype information using the mime-types gem (based on a file extension lookup)
|
145
|
-
# @return [String] mime type for supplied file
|
146
|
-
# @example
|
147
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
148
|
-
# puts source_file.extension_mimetype # 'text/plain'
|
149
|
-
def extension_mimetype
|
150
|
-
@extension_mimetype ||= begin
|
151
|
-
mtype = MIME::Types.type_for(path).first
|
152
|
-
mtype ? mtype.content_type : ''
|
153
|
-
end
|
154
|
-
end
|
155
|
-
|
156
|
-
# Returns mimetype information for the current file based on unix file system command.
|
157
|
-
# @return [String] mime type for supplied file
|
158
|
-
# @example
|
159
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
160
|
-
# puts source_file.file_mimetype # 'text/plain'
|
161
|
-
def file_mimetype
|
162
|
-
@file_mimetype ||= begin
|
163
|
-
check_for_file
|
164
|
-
`file --mime-type "#{path}"`.delete("\n").split(':')[1].strip # first try and get the mimetype from the unix file command
|
165
|
-
end
|
166
|
-
end
|
167
|
-
|
168
|
-
# Returns mimetype information for the current file based on exif data (if available and not a trusted source that we'd rather get from the file system command)
|
169
|
-
# @return [String] mime type for supplied file
|
170
|
-
# @example
|
171
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
172
|
-
# puts source_file.exif_mimetype # 'text/plain'
|
173
|
-
def exif_mimetype
|
174
|
-
@exif_mimetype ||= begin
|
175
|
-
check_for_file
|
176
|
-
prefer_exif = !Assembly::TRUSTED_MIMETYPES.include?(file_mimetype) # if it's not a "trusted" mimetype and there is exif data; get the mimetype from the exif
|
177
|
-
exif.mimetype if exif&.mimetype && prefer_exif
|
178
|
-
end
|
179
|
-
end
|
180
|
-
|
181
|
-
# @note Uses shell call to "file", only expected to work on unix based systems
|
182
|
-
# @return [String] encoding for supplied file
|
183
|
-
# @example
|
184
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
185
|
-
# puts source_file.encoding # 'us-ascii'
|
186
|
-
def encoding
|
187
|
-
@encoding ||= begin
|
188
|
-
check_for_file
|
189
|
-
`file --mime-encoding "#{path}"`.delete("\n").split(':')[1].strip
|
190
|
-
end
|
191
|
-
end
|
192
|
-
|
193
|
-
# @return [Symbol] the type of object, could be :application (for PDF or Word, etc), :audio, :image, :message, :model, :multipart, :text or :video
|
155
|
+
# @return [Symbol] the type of object, could be :application (for PDF or Word, etc),
|
156
|
+
# :audio, :image, :message, :model, :multipart, :text or :video
|
194
157
|
# @example
|
195
158
|
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
196
159
|
# puts source_file.object_type # :image
|
@@ -219,18 +182,10 @@ module Assembly
|
|
219
182
|
mimetype == 'image/jp2' || jp2able?
|
220
183
|
end
|
221
184
|
|
222
|
-
#
|
223
|
-
#
|
224
|
-
#
|
225
|
-
#
|
226
|
-
def has_color_profile?
|
227
|
-
return false unless exif
|
228
|
-
|
229
|
-
exif['profiledescription'] || exif['colorspace'] ? true : false
|
230
|
-
end
|
231
|
-
|
232
|
-
# Examines the input image for validity to create a jp2. Same as valid_image? but also confirms the existence of a profile description and further restricts mimetypes.
|
233
|
-
# It is used by the assembly robots to decide if a jp2 will be created and is also called before you create a jp2 using assembly-image.
|
185
|
+
# Examines the input image for validity to create a jp2. Same as valid_image? but also confirms
|
186
|
+
# the existence of a profile description and further restricts mimetypes.
|
187
|
+
# It is used by the assembly robots to decide if a jp2 will be created and is also called before
|
188
|
+
# you create a jp2 using assembly-image.
|
234
189
|
# @return [Boolean] true if image should have a jp2 created, false if not.
|
235
190
|
# @example
|
236
191
|
# source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
@@ -262,14 +217,74 @@ module Assembly
|
|
262
217
|
|
263
218
|
private
|
264
219
|
|
220
|
+
# private method to check for file existence before operating on it
|
221
|
+
def check_for_file
|
222
|
+
raise "input file #{path} does not exist or is a directory" unless file_exists?
|
223
|
+
end
|
224
|
+
|
265
225
|
# prive method defining default preferred ordering of how mimetypes are determined
|
266
226
|
def default_mime_type_order
|
267
227
|
%i[override exif file extension]
|
268
228
|
end
|
269
229
|
|
270
|
-
#
|
271
|
-
|
272
|
-
|
230
|
+
# Returns mimetype information using the mime-types gem (based on a file extension lookup)
|
231
|
+
# @return [String] mime type for supplied file
|
232
|
+
# @example
|
233
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
234
|
+
# puts source_file.extension_mimetype # 'text/plain'
|
235
|
+
def extension_mimetype
|
236
|
+
@extension_mimetype ||= begin
|
237
|
+
mtype = MIME::Types.type_for(path).first
|
238
|
+
mtype ? mtype.content_type : ''
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
# Returns mimetype information for the current file based on unix file system command.
|
243
|
+
# @return [String] mime type for supplied file
|
244
|
+
# @example
|
245
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
246
|
+
# puts source_file.file_mimetype # 'text/plain'
|
247
|
+
def file_mimetype
|
248
|
+
@file_mimetype ||= begin
|
249
|
+
check_for_file
|
250
|
+
`file --mime-type "#{path}"`.delete("\n").split(':')[1].strip # get the mimetype from the unix file command
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
# Returns mimetype information for the current file based on exif data
|
255
|
+
# (if available and not a trusted source that we'd rather get from the file system command)
|
256
|
+
# @return [String] mime type for supplied file
|
257
|
+
# @example
|
258
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
259
|
+
# puts source_file.exif_mimetype # 'text/plain'
|
260
|
+
def exif_mimetype
|
261
|
+
@exif_mimetype ||= begin
|
262
|
+
check_for_file
|
263
|
+
# if it's not a "trusted" mimetype and there is exif data; get the mimetype from the exif
|
264
|
+
prefer_exif = !Assembly::TRUSTED_MIMETYPES.include?(file_mimetype)
|
265
|
+
exif.mimetype if prefer_exif && exif&.mimetype
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
# Returns mimetype information using the manual override mapping (based on a file extension lookup)
|
270
|
+
# @return [String] mime type for supplied file if a mapping exists for the file's extension
|
271
|
+
# @example
|
272
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.json')
|
273
|
+
# puts source_file.override_mimetype # 'application/json'
|
274
|
+
def override_mimetype
|
275
|
+
@override_mimetype ||= Assembly::OVERRIDE_MIMETYPES.fetch(ext.to_sym, '')
|
276
|
+
end
|
277
|
+
|
278
|
+
# @note Uses shell call to "file", only expected to work on unix based systems
|
279
|
+
# @return [String] encoding for supplied file
|
280
|
+
# @example
|
281
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
282
|
+
# puts source_file.encoding # 'us-ascii'
|
283
|
+
def encoding
|
284
|
+
@encoding ||= begin
|
285
|
+
check_for_file
|
286
|
+
`file --mime-encoding "#{path}"`.delete("\n").split(':')[1].strip
|
287
|
+
end
|
273
288
|
end
|
274
289
|
end
|
275
290
|
end
|