assembly-objectfile 1.13.0 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +1 -1
- data/.gitignore +0 -1
- data/.rubocop.yml +34 -1
- data/.rubocop_todo.yml +3 -120
- data/Gemfile.lock +97 -0
- data/README.md +6 -6
- data/assembly-objectfile.gemspec +4 -8
- data/config/boot.rb +0 -1
- data/lib/{assembly-objectfile → assembly/object_file}/version.rb +1 -1
- data/lib/{assembly-objectfile/object_fileable.rb → assembly/object_file.rb} +116 -101
- data/lib/assembly-objectfile.rb +14 -19
- data/spec/assembly/object_file_spec.rb +452 -0
- data/spec/spec_helper.rb +3 -37
- metadata +10 -143
- data/lib/assembly-objectfile/content_metadata/config.rb +0 -26
- data/lib/assembly-objectfile/content_metadata/file.rb +0 -63
- data/lib/assembly-objectfile/content_metadata/file_set.rb +0 -73
- data/lib/assembly-objectfile/content_metadata/file_set_builder.rb +0 -65
- data/lib/assembly-objectfile/content_metadata/nokogiri_builder.rb +0 -57
- data/lib/assembly-objectfile/content_metadata.rb +0 -117
- data/lib/assembly-objectfile/object_file.rb +0 -29
- data/profiles/AdobeRGB1998.icc +0 -0
- data/profiles/DotGain20.icc +0 -0
- data/profiles/sRGBIEC6196621.icc +0 -0
- data/spec/content_metadata_spec.rb +0 -809
- data/spec/object_file_spec.rb +0 -222
- data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_001.tif +0 -0
- data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_002.tif +0 -0
- data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_001.jp2 +0 -0
- data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_002.jp2 +0 -0
- data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_001.pdf +0 -1
- data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_002.pdf +0 -1
- data/spec/test_data/input/oo000oo0001/31/oo000oo0001_31_001.pdf +0 -1
- data/spec/test_data/input/oo000oo0001/50/oo000oo0001_50_001.tif +0 -0
- data/spec/test_data/input/oo000oo0001/oo000oo0001_book.pdf +0 -1
- data/spec/test_data/input/res1_image1.jp2 +0 -0
- data/spec/test_data/input/res1_image2.jp2 +0 -0
- data/spec/test_data/input/res1_image2.tif +0 -0
- data/spec/test_data/input/res1_teifile.txt +0 -1
- data/spec/test_data/input/res2_image1.jp2 +0 -0
- data/spec/test_data/input/res2_image1.tif +0 -0
- data/spec/test_data/input/res2_image2.jp2 +0 -0
- data/spec/test_data/input/res2_image2.tif +0 -0
- data/spec/test_data/input/res2_teifile.txt +0 -1
- data/spec/test_data/input/res2_textfile.txt +0 -1
- data/spec/test_data/input/res3_image1.jp2 +0 -0
- data/spec/test_data/input/res3_image1.tif +0 -0
- data/spec/test_data/input/res3_teifile.txt +0 -1
- data/spec/test_data/input/test.pdf +0 -1
- data/spec/test_data/input/test.svg +0 -2
- data/spec/test_data/input/test2.jp2 +0 -0
- data/spec/test_data/input/test2.tif +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 76dac08191a4c118cf16e4736b9538c9d2a60811cf9547e0ad395ab468b1b51a
|
4
|
+
data.tar.gz: b9fec1e99d03941f658cd6716e1c00db37ce2f2042816e4ba8f77246b102d1f4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '082fc021ca4f4dde1f3eaaac937e66862d8ea36f5198bdece9866a72e5be068021665e10bd40b8ea2835d848c062886982ed41033935120525c8d4e57efadafd'
|
7
|
+
data.tar.gz: cf2e4facd4248532c5f92c741f80b5e8d27e04b8cc23e3b7959b59118cceaf8122d03aa92af8faf99c8000bb4298b74d9ab86001744c6f852cd148ceda21d675
|
data/.circleci/config.yml
CHANGED
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
@@ -9,12 +9,32 @@ AllCops:
|
|
9
9
|
- '**/*.md'
|
10
10
|
- 'vendor/**/*' # avoid running rubocop on cached bundler
|
11
11
|
|
12
|
+
Layout/LineLength:
|
13
|
+
Max: 120
|
14
|
+
|
15
|
+
Naming/FileName:
|
16
|
+
Exclude:
|
17
|
+
- 'lib/assembly-objectfile.rb'
|
18
|
+
|
19
|
+
Naming/PredicateName:
|
20
|
+
NamePrefix:
|
21
|
+
- 'is_'
|
22
|
+
|
12
23
|
Metrics/BlockLength:
|
13
24
|
Exclude:
|
14
25
|
- 'spec/**/*.rb'
|
15
26
|
- '**/*.gemspec'
|
16
27
|
|
17
|
-
|
28
|
+
RSpec/ExampleLength:
|
29
|
+
Max: 10
|
30
|
+
|
31
|
+
RSpec/MultipleExpectations:
|
32
|
+
Max: 10
|
33
|
+
|
34
|
+
RSpec/NestedGroups:
|
35
|
+
Max: 4
|
36
|
+
|
37
|
+
Gemspec/DeprecatedAttributeAssignment: # (new in 1.10)
|
18
38
|
Enabled: true
|
19
39
|
|
20
40
|
Layout/SpaceAroundMethodCallOperator:
|
@@ -207,3 +227,16 @@ RSpec/ChangeByZero: # new in 2.11.0
|
|
207
227
|
Enabled: true
|
208
228
|
RSpec/VerifiedDoubleReference: # new in 2.10.0
|
209
229
|
Enabled: true
|
230
|
+
|
231
|
+
Layout/LineContinuationLeadingSpace: # new in 1.31
|
232
|
+
Enabled: true
|
233
|
+
Layout/LineContinuationSpacing: # new in 1.31
|
234
|
+
Enabled: true
|
235
|
+
Lint/ConstantOverwrittenInRescue: # new in 1.31
|
236
|
+
Enabled: true
|
237
|
+
Lint/NonAtomicFileOperation: # new in 1.31
|
238
|
+
Enabled: true
|
239
|
+
RSpec/Capybara/SpecificMatcher: # new in 2.12
|
240
|
+
Enabled: true
|
241
|
+
RSpec/Rails/HaveHttpStatus: # new in 2.12
|
242
|
+
Enabled: true
|
data/.rubocop_todo.yml
CHANGED
@@ -1,129 +1,12 @@
|
|
1
1
|
# This configuration was generated by
|
2
2
|
# `rubocop --auto-gen-config`
|
3
|
-
# on 2022-
|
3
|
+
# on 2022-07-20 17:16:46 UTC using RuboCop version 1.31.2.
|
4
4
|
# The point is for the user to remove these configuration records
|
5
5
|
# one by one as the offenses are removed from the code base.
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
7
7
|
# versions of RuboCop, may require this file to be generated again.
|
8
8
|
|
9
|
-
# Offense count: 1
|
10
|
-
Lint/UselessAssignment:
|
11
|
-
Exclude:
|
12
|
-
- 'config/boot.rb'
|
13
|
-
|
14
|
-
# Offense count: 3
|
15
|
-
# Configuration parameters: IgnoredMethods, CountRepeatedAttributes.
|
16
|
-
Metrics/AbcSize:
|
17
|
-
Max: 55
|
18
|
-
|
19
|
-
# Offense count: 1
|
20
|
-
# Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
|
21
|
-
# IgnoredMethods: refine
|
22
|
-
Metrics/BlockLength:
|
23
|
-
Max: 27
|
24
|
-
|
25
|
-
# Offense count: 2
|
26
|
-
# Configuration parameters: IgnoredMethods.
|
27
|
-
Metrics/CyclomaticComplexity:
|
28
|
-
Max: 14
|
29
|
-
|
30
|
-
# Offense count: 4
|
31
|
-
# Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
|
32
|
-
Metrics/MethodLength:
|
33
|
-
Max: 31
|
34
|
-
|
35
9
|
# Offense count: 1
|
36
10
|
# Configuration parameters: CountComments, CountAsOne.
|
37
|
-
Metrics/
|
38
|
-
Max:
|
39
|
-
|
40
|
-
# Offense count: 1
|
41
|
-
# Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
|
42
|
-
Metrics/ParameterLists:
|
43
|
-
Max: 12
|
44
|
-
|
45
|
-
# Offense count: 2
|
46
|
-
# Configuration parameters: IgnoredMethods.
|
47
|
-
Metrics/PerceivedComplexity:
|
48
|
-
Max: 15
|
49
|
-
|
50
|
-
# Offense count: 1
|
51
|
-
# Configuration parameters: ExpectMatchingDefinition, CheckDefinitionPathHierarchy, CheckDefinitionPathHierarchyRoots, Regex, IgnoreExecutableScripts, AllowedAcronyms.
|
52
|
-
# CheckDefinitionPathHierarchyRoots: lib, spec, test, src
|
53
|
-
# AllowedAcronyms: CLI, DSL, ACL, API, ASCII, CPU, CSS, DNS, EOF, GUID, HTML, HTTP, HTTPS, ID, IP, JSON, LHS, QPS, RAM, RHS, RPC, SLA, SMTP, SQL, SSH, TCP, TLS, TTL, UDP, UI, UID, UUID, URI, URL, UTF8, VM, XML, XMPP, XSRF, XSS
|
54
|
-
Naming/FileName:
|
55
|
-
Exclude:
|
56
|
-
- 'lib/assembly-objectfile.rb'
|
57
|
-
|
58
|
-
# Offense count: 1
|
59
|
-
# Configuration parameters: NamePrefix, ForbiddenPrefixes, AllowedMethods, MethodDefinitionMacros.
|
60
|
-
# NamePrefix: is_, has_, have_
|
61
|
-
# ForbiddenPrefixes: is_, has_, have_
|
62
|
-
# AllowedMethods: is_a?
|
63
|
-
# MethodDefinitionMacros: define_method, define_singleton_method
|
64
|
-
Naming/PredicateName:
|
65
|
-
Exclude:
|
66
|
-
- 'spec/**/*'
|
67
|
-
- 'lib/assembly-objectfile/object_fileable.rb'
|
68
|
-
|
69
|
-
# Offense count: 1
|
70
|
-
# Configuration parameters: Prefixes.
|
71
|
-
# Prefixes: when, with, without
|
72
|
-
RSpec/ContextWording:
|
73
|
-
Exclude:
|
74
|
-
- 'spec/content_metadata_spec.rb'
|
75
|
-
|
76
|
-
# Offense count: 32
|
77
|
-
# Configuration parameters: CountAsOne.
|
78
|
-
RSpec/ExampleLength:
|
79
|
-
Max: 34
|
80
|
-
|
81
|
-
# Offense count: 2
|
82
|
-
# Configuration parameters: Include, CustomTransform, IgnoreMethods, SpecSuffixOnly.
|
83
|
-
# Include: **/*_spec*rb*, **/spec/**/*
|
84
|
-
RSpec/FilePath:
|
85
|
-
Exclude:
|
86
|
-
- 'spec/content_metadata_spec.rb'
|
87
|
-
- 'spec/object_file_spec.rb'
|
88
|
-
|
89
|
-
# Offense count: 74
|
90
|
-
# Configuration parameters: AssignmentOnly.
|
91
|
-
RSpec/InstanceVariable:
|
92
|
-
Exclude:
|
93
|
-
- 'spec/object_file_spec.rb'
|
94
|
-
|
95
|
-
# Offense count: 41
|
96
|
-
RSpec/MultipleExpectations:
|
97
|
-
Max: 29
|
98
|
-
|
99
|
-
# Offense count: 20
|
100
|
-
RSpec/NestedGroups:
|
101
|
-
Max: 4
|
102
|
-
|
103
|
-
# Offense count: 2
|
104
|
-
RSpec/RepeatedDescription:
|
105
|
-
Exclude:
|
106
|
-
- 'spec/object_file_spec.rb'
|
107
|
-
|
108
|
-
# Offense count: 2
|
109
|
-
RSpec/RepeatedExample:
|
110
|
-
Exclude:
|
111
|
-
- 'spec/object_file_spec.rb'
|
112
|
-
|
113
|
-
# Offense count: 5
|
114
|
-
RSpec/RepeatedExampleGroupDescription:
|
115
|
-
Exclude:
|
116
|
-
- 'spec/content_metadata_spec.rb'
|
117
|
-
|
118
|
-
# Offense count: 2
|
119
|
-
# Cop supports --auto-correct.
|
120
|
-
Style/CommentedKeyword:
|
121
|
-
Exclude:
|
122
|
-
- 'lib/assembly-objectfile/content_metadata.rb'
|
123
|
-
|
124
|
-
# Offense count: 123
|
125
|
-
# Cop supports --auto-correct.
|
126
|
-
# Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, IgnoredPatterns.
|
127
|
-
# URISchemes: http, https
|
128
|
-
Layout/LineLength:
|
129
|
-
Max: 277
|
11
|
+
Metrics/ClassLength:
|
12
|
+
Max: 122
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
assembly-objectfile (2.1.1)
|
5
|
+
activesupport (>= 5.2.0)
|
6
|
+
mime-types (> 3)
|
7
|
+
mini_exiftool
|
8
|
+
|
9
|
+
GEM
|
10
|
+
remote: http://rubygems.org/
|
11
|
+
specs:
|
12
|
+
activesupport (7.0.3.1)
|
13
|
+
concurrent-ruby (~> 1.0, >= 1.0.2)
|
14
|
+
i18n (>= 1.6, < 2)
|
15
|
+
minitest (>= 5.1)
|
16
|
+
tzinfo (~> 2.0)
|
17
|
+
ast (2.4.2)
|
18
|
+
byebug (11.1.3)
|
19
|
+
coderay (1.1.3)
|
20
|
+
concurrent-ruby (1.1.10)
|
21
|
+
diff-lcs (1.5.0)
|
22
|
+
docile (1.4.0)
|
23
|
+
i18n (1.12.0)
|
24
|
+
concurrent-ruby (~> 1.0)
|
25
|
+
json (2.6.2)
|
26
|
+
method_source (1.0.0)
|
27
|
+
mime-types (3.4.1)
|
28
|
+
mime-types-data (~> 3.2015)
|
29
|
+
mime-types-data (3.2022.0105)
|
30
|
+
mini_exiftool (2.10.2)
|
31
|
+
minitest (5.16.2)
|
32
|
+
parallel (1.22.1)
|
33
|
+
parser (3.1.2.0)
|
34
|
+
ast (~> 2.4.1)
|
35
|
+
pry (0.13.1)
|
36
|
+
coderay (~> 1.1)
|
37
|
+
method_source (~> 1.0)
|
38
|
+
pry-byebug (3.9.0)
|
39
|
+
byebug (~> 11.0)
|
40
|
+
pry (~> 0.13.0)
|
41
|
+
rainbow (3.1.1)
|
42
|
+
rake (13.0.6)
|
43
|
+
regexp_parser (2.5.0)
|
44
|
+
rexml (3.2.5)
|
45
|
+
rspec (3.11.0)
|
46
|
+
rspec-core (~> 3.11.0)
|
47
|
+
rspec-expectations (~> 3.11.0)
|
48
|
+
rspec-mocks (~> 3.11.0)
|
49
|
+
rspec-core (3.11.0)
|
50
|
+
rspec-support (~> 3.11.0)
|
51
|
+
rspec-expectations (3.11.0)
|
52
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
53
|
+
rspec-support (~> 3.11.0)
|
54
|
+
rspec-mocks (3.11.1)
|
55
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
56
|
+
rspec-support (~> 3.11.0)
|
57
|
+
rspec-support (3.11.0)
|
58
|
+
rubocop (1.31.2)
|
59
|
+
json (~> 2.3)
|
60
|
+
parallel (~> 1.10)
|
61
|
+
parser (>= 3.1.0.0)
|
62
|
+
rainbow (>= 2.2.2, < 4.0)
|
63
|
+
regexp_parser (>= 1.8, < 3.0)
|
64
|
+
rexml (>= 3.2.5, < 4.0)
|
65
|
+
rubocop-ast (>= 1.18.0, < 2.0)
|
66
|
+
ruby-progressbar (~> 1.7)
|
67
|
+
unicode-display_width (>= 1.4.0, < 3.0)
|
68
|
+
rubocop-ast (1.19.1)
|
69
|
+
parser (>= 3.1.1.0)
|
70
|
+
rubocop-rspec (2.12.1)
|
71
|
+
rubocop (~> 1.31)
|
72
|
+
ruby-progressbar (1.11.0)
|
73
|
+
simplecov (0.21.2)
|
74
|
+
docile (~> 1.1)
|
75
|
+
simplecov-html (~> 0.11)
|
76
|
+
simplecov_json_formatter (~> 0.1)
|
77
|
+
simplecov-html (0.12.3)
|
78
|
+
simplecov_json_formatter (0.1.4)
|
79
|
+
tzinfo (2.0.5)
|
80
|
+
concurrent-ruby (~> 1.0)
|
81
|
+
unicode-display_width (2.2.0)
|
82
|
+
|
83
|
+
PLATFORMS
|
84
|
+
ruby
|
85
|
+
|
86
|
+
DEPENDENCIES
|
87
|
+
assembly-objectfile!
|
88
|
+
byebug
|
89
|
+
pry-byebug
|
90
|
+
rake
|
91
|
+
rspec (~> 3.0)
|
92
|
+
rubocop (~> 1.25)
|
93
|
+
rubocop-rspec
|
94
|
+
simplecov
|
95
|
+
|
96
|
+
BUNDLED WITH
|
97
|
+
2.3.17
|
data/README.md
CHANGED
@@ -8,15 +8,15 @@
|
|
8
8
|
## Overview
|
9
9
|
This gem contains classes used by the Stanford University Digital Library to
|
10
10
|
perform file operations necessary for accessioning of content. It is also
|
11
|
-
used by related gems
|
12
|
-
generation).
|
11
|
+
used by related gems that perform content type specific operations (e.g.
|
12
|
+
assembly-image for jp2 generation).
|
13
13
|
|
14
14
|
## Usage
|
15
15
|
|
16
16
|
The gem currently has methods for:
|
17
17
|
* filesize
|
18
|
-
*
|
19
|
-
*
|
18
|
+
* mimetype
|
19
|
+
* exif - consumers use ExifTool to get file information
|
20
20
|
|
21
21
|
## Running tests
|
22
22
|
|
@@ -34,8 +34,8 @@ rake release
|
|
34
34
|
|
35
35
|
1. Exiftool
|
36
36
|
|
37
|
-
RHEL: (RPM to install
|
38
|
-
|
37
|
+
RHEL: (RPM to install coming soon) Download latest version from:
|
38
|
+
https://exiftool.org/
|
39
39
|
|
40
40
|
tar -xf Image-ExifTool-#.##.tar.gz
|
41
41
|
cd Image-ExifTool-#.##
|
data/assembly-objectfile.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
$LOAD_PATH.push File.expand_path('lib', __dir__)
|
4
|
-
require 'assembly
|
4
|
+
require 'assembly/object_file/version'
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = 'assembly-objectfile'
|
@@ -10,13 +10,13 @@ Gem::Specification.new do |s|
|
|
10
10
|
s.authors = ['Peter Mangiafico', 'Renzo Sanchez-Silva', 'Monty Hindman', 'Tony Calavano']
|
11
11
|
s.email = ['pmangiafico@stanford.edu']
|
12
12
|
s.homepage = 'https://github.com/sul-dlss/assembly-objectfile'
|
13
|
-
s.summary = 'Ruby
|
13
|
+
s.summary = 'Ruby implementation of file services needed to prepare objects to be accessioned ' \
|
14
|
+
'into the Stanford Digital Repository'
|
14
15
|
s.description = 'Get exif data, file sizes and more.'
|
15
16
|
s.license = 'ALv2'
|
16
17
|
s.metadata['rubygems_mfa_required'] = 'true'
|
17
18
|
|
18
19
|
s.files = `git ls-files`.split("\n")
|
19
|
-
s.test_files = `git ls-files -- spec/*`.split("\n")
|
20
20
|
s.bindir = 'exe'
|
21
21
|
s.executables = s.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
22
22
|
s.require_paths = ['lib']
|
@@ -24,14 +24,10 @@ Gem::Specification.new do |s|
|
|
24
24
|
s.required_ruby_version = '>= 3.0'
|
25
25
|
|
26
26
|
s.add_dependency 'activesupport', '>= 5.2.0'
|
27
|
-
s.add_dependency 'deprecation'
|
28
|
-
s.add_dependency 'dry-struct', '~> 1.0'
|
29
|
-
s.add_dependency 'dry-types', '~> 1.1'
|
30
27
|
s.add_dependency 'mime-types', '> 3'
|
31
28
|
s.add_dependency 'mini_exiftool'
|
32
|
-
s.add_dependency 'nokogiri'
|
33
29
|
|
34
|
-
s.add_development_dependency '
|
30
|
+
s.add_development_dependency 'pry-byebug'
|
35
31
|
s.add_development_dependency 'rake'
|
36
32
|
s.add_development_dependency 'rspec', '~> 3.0'
|
37
33
|
s.add_development_dependency 'rubocop', '~> 1.25'
|
data/config/boot.rb
CHANGED
@@ -2,25 +2,56 @@
|
|
2
2
|
|
3
3
|
require 'mini_exiftool'
|
4
4
|
require 'mime/types'
|
5
|
+
require 'active_support/core_ext/object/blank'
|
5
6
|
|
6
7
|
module Assembly
|
7
|
-
#
|
8
|
-
|
8
|
+
# This class contains generic methods to operate on any file.
|
9
|
+
class ObjectFile
|
10
|
+
# Class level method that given an array of strings, return the longest common initial path.
|
11
|
+
# Useful for removing a common path from a set of filenames when producing content metadata
|
12
|
+
#
|
13
|
+
# @param [Array] strings Array of filenames with paths to operate on
|
14
|
+
# @return [String] longest common initial part of path of filenames passed in
|
15
|
+
#
|
16
|
+
# Example:
|
17
|
+
# puts Assembly::ObjectFile.common_prefix(['/Users/peter/00/test.tif','/Users/peter/05/test.jp2'])
|
18
|
+
# # => '/Users/peter/0'
|
19
|
+
def self.common_path(strings)
|
20
|
+
return nil if strings.empty?
|
21
|
+
|
22
|
+
n = 0
|
23
|
+
x = strings.last
|
24
|
+
n += 1 while strings.all? { |s| s[n] && (s[n] == x[n]) }
|
25
|
+
common_prefix = x[0...n]
|
26
|
+
if common_prefix[-1, 1] == '/' # check if last element of the common string is the end of a directory
|
27
|
+
common_prefix # if not, split string along directories, and reject last one
|
28
|
+
else
|
29
|
+
"#{common_prefix.split('/')[0..-2].join('/')}/" # if it was, then return the common prefix directly
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
9
33
|
attr_accessor :file_attributes, :label, :path, :provider_md5, :provider_sha1, :relative_path, :mime_type_order
|
10
34
|
|
11
35
|
VALID_MIMETYPE_METHODS = %i[override exif file extension].freeze
|
12
36
|
|
13
37
|
# @param [String] path full path to the file to be worked with
|
14
38
|
# @param [Hash<Symbol => Object>] params options used during content metadata generation
|
15
|
-
# @option params [Hash<Symbol => ['yes', 'no']>] :file_attributes e.g
|
16
|
-
#
|
39
|
+
# @option params [Hash<Symbol => ['yes', 'no']>] :file_attributes e.g.:
|
40
|
+
# {:preserve=>'yes',:shelve=>'no',:publish=>'no'},
|
41
|
+
# defaults pulled from mimetype
|
42
|
+
# @option params [String] :label a resource label (files bundled together will just get the first
|
43
|
+
# file's label attribute if set)
|
17
44
|
# @option params [String] :provider_md5 pre-computed MD5 checksum
|
18
45
|
# @option params [String] :provider_sha1 pre-computed SHA1 checksum
|
19
|
-
# @option params [String] :relative_path if you want the file ids in the content metadata it can be set,
|
46
|
+
# @option params [String] :relative_path if you want the file ids in the content metadata it can be set,
|
47
|
+
# otherwise content metadata will get the full path
|
20
48
|
# @option params [Array] :mime_type_order can be set to the order in which you want mimetypes to be determined
|
21
|
-
# options are :override (from manual overide mapping if exists),
|
22
|
-
# :
|
23
|
-
#
|
49
|
+
# options are :override (from manual overide mapping if exists),
|
50
|
+
# :exif (from exif if exists)
|
51
|
+
# :extension (from file extension)
|
52
|
+
# :file (from unix file system command)
|
53
|
+
# the default is defined in the private `default_mime_type_order` method
|
54
|
+
# but you can override to set your own order
|
24
55
|
# @example
|
25
56
|
# Assembly::ObjectFile.new('/input/path_to_file.tif')
|
26
57
|
def initialize(path, params = {})
|
@@ -33,24 +64,6 @@ module Assembly
|
|
33
64
|
@mime_type_order = params[:mime_type_order] || default_mime_type_order
|
34
65
|
end
|
35
66
|
|
36
|
-
# @return [String] DPG base filename, removing the extension and the '00','05', etc. placeholders
|
37
|
-
# @example
|
38
|
-
# source_file = Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
|
39
|
-
# puts source_file.dpg_basename # "cy565rm7188_001"
|
40
|
-
def dpg_basename
|
41
|
-
file_parts = File.basename(path, ext).split('_')
|
42
|
-
file_parts.size == 3 ? "#{file_parts[0]}_#{file_parts[2]}" : filename_without_ext
|
43
|
-
end
|
44
|
-
|
45
|
-
# @return [String] DPG subfolder for the given filename, i.e. '00','05', etc.
|
46
|
-
# @example
|
47
|
-
# source_file = Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
|
48
|
-
# puts source_file.dpg_folder # "00"
|
49
|
-
def dpg_folder
|
50
|
-
file_parts = File.basename(path, ext).split('_')
|
51
|
-
file_parts.size == 3 ? file_parts[1] : ''
|
52
|
-
end
|
53
|
-
|
54
67
|
# @return [String] base filename
|
55
68
|
# @example
|
56
69
|
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
@@ -91,6 +104,13 @@ module Assembly
|
|
91
104
|
@exif ||= begin
|
92
105
|
check_for_file
|
93
106
|
MiniExiftool.new(path, replace_invalid_chars: '?')
|
107
|
+
rescue MiniExiftool::Error
|
108
|
+
# MiniExiftool will throw an exception when it tries to initialize for problematic files,
|
109
|
+
# but the exception it throws does not tell you the file that caused the problem.
|
110
|
+
# Instead, we will raise our own exception with more context in logging/reporting upstream.
|
111
|
+
# Note: if the file that causes the problem should NOT use exiftool to determine mimetype, add it to the skipped
|
112
|
+
# mimetypes in Assembly::TRUSTED_MIMETYPES to bypass initialization of MiniExiftool for mimetype generation
|
113
|
+
raise MiniExiftool::Error, "error initializing MiniExiftool for #{path}"
|
94
114
|
end
|
95
115
|
end
|
96
116
|
|
@@ -125,72 +145,15 @@ module Assembly
|
|
125
145
|
check_for_file
|
126
146
|
mimetype = ''
|
127
147
|
mime_type_order.each do |mime_type_method|
|
128
|
-
mimetype =
|
148
|
+
mimetype = send("#{mime_type_method}_mimetype") if VALID_MIMETYPE_METHODS.include?(mime_type_method)
|
129
149
|
break if mimetype.present?
|
130
150
|
end
|
131
151
|
mimetype
|
132
152
|
end
|
133
153
|
end
|
134
154
|
|
135
|
-
#
|
136
|
-
#
|
137
|
-
# @example
|
138
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.json')
|
139
|
-
# puts source_file.override_mimetype # 'application/json'
|
140
|
-
def override_mimetype
|
141
|
-
@override_mimetype ||= Assembly::OVERRIDE_MIMETYPES.fetch(ext.to_sym, '')
|
142
|
-
end
|
143
|
-
|
144
|
-
# Returns mimetype information using the mime-types gem (based on a file extension lookup)
|
145
|
-
# @return [String] mime type for supplied file
|
146
|
-
# @example
|
147
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
148
|
-
# puts source_file.extension_mimetype # 'text/plain'
|
149
|
-
def extension_mimetype
|
150
|
-
@extension_mimetype ||= begin
|
151
|
-
mtype = MIME::Types.type_for(path).first
|
152
|
-
mtype ? mtype.content_type : ''
|
153
|
-
end
|
154
|
-
end
|
155
|
-
|
156
|
-
# Returns mimetype information for the current file based on unix file system command.
|
157
|
-
# @return [String] mime type for supplied file
|
158
|
-
# @example
|
159
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
160
|
-
# puts source_file.file_mimetype # 'text/plain'
|
161
|
-
def file_mimetype
|
162
|
-
@file_mimetype ||= begin
|
163
|
-
check_for_file
|
164
|
-
`file --mime-type "#{path}"`.delete("\n").split(':')[1].strip # first try and get the mimetype from the unix file command
|
165
|
-
end
|
166
|
-
end
|
167
|
-
|
168
|
-
# Returns mimetype information for the current file based on exif data (if available and not a trusted source that we'd rather get from the file system command)
|
169
|
-
# @return [String] mime type for supplied file
|
170
|
-
# @example
|
171
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
172
|
-
# puts source_file.exif_mimetype # 'text/plain'
|
173
|
-
def exif_mimetype
|
174
|
-
@exif_mimetype ||= begin
|
175
|
-
check_for_file
|
176
|
-
prefer_exif = !Assembly::TRUSTED_MIMETYPES.include?(file_mimetype) # if it's not a "trusted" mimetype and there is exif data; get the mimetype from the exif
|
177
|
-
exif.mimetype if exif&.mimetype && prefer_exif
|
178
|
-
end
|
179
|
-
end
|
180
|
-
|
181
|
-
# @note Uses shell call to "file", only expected to work on unix based systems
|
182
|
-
# @return [String] encoding for supplied file
|
183
|
-
# @example
|
184
|
-
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
185
|
-
# puts source_file.encoding # 'us-ascii'
|
186
|
-
def encoding
|
187
|
-
@encoding ||= begin
|
188
|
-
check_for_file
|
189
|
-
`file --mime-encoding "#{path}"`.delete("\n").split(':')[1].strip
|
190
|
-
end
|
191
|
-
end
|
192
|
-
|
193
|
-
# @return [Symbol] the type of object, could be :application (for PDF or Word, etc), :audio, :image, :message, :model, :multipart, :text or :video
|
155
|
+
# @return [Symbol] the type of object, could be :application (for PDF or Word, etc),
|
156
|
+
# :audio, :image, :message, :model, :multipart, :text or :video
|
194
157
|
# @example
|
195
158
|
# source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
196
159
|
# puts source_file.object_type # :image
|
@@ -219,18 +182,10 @@ module Assembly
|
|
219
182
|
mimetype == 'image/jp2' || jp2able?
|
220
183
|
end
|
221
184
|
|
222
|
-
#
|
223
|
-
#
|
224
|
-
#
|
225
|
-
#
|
226
|
-
def has_color_profile?
|
227
|
-
return false unless exif
|
228
|
-
|
229
|
-
exif['profiledescription'] || exif['colorspace'] ? true : false
|
230
|
-
end
|
231
|
-
|
232
|
-
# Examines the input image for validity to create a jp2. Same as valid_image? but also confirms the existence of a profile description and further restricts mimetypes.
|
233
|
-
# It is used by the assembly robots to decide if a jp2 will be created and is also called before you create a jp2 using assembly-image.
|
185
|
+
# Examines the input image for validity to create a jp2. Same as valid_image? but also confirms
|
186
|
+
# the existence of a profile description and further restricts mimetypes.
|
187
|
+
# It is used by the assembly robots to decide if a jp2 will be created and is also called before
|
188
|
+
# you create a jp2 using assembly-image.
|
234
189
|
# @return [Boolean] true if image should have a jp2 created, false if not.
|
235
190
|
# @example
|
236
191
|
# source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
|
@@ -262,14 +217,74 @@ module Assembly
|
|
262
217
|
|
263
218
|
private
|
264
219
|
|
220
|
+
# private method to check for file existence before operating on it
|
221
|
+
def check_for_file
|
222
|
+
raise "input file #{path} does not exist or is a directory" unless file_exists?
|
223
|
+
end
|
224
|
+
|
265
225
|
# prive method defining default preferred ordering of how mimetypes are determined
|
266
226
|
def default_mime_type_order
|
267
227
|
%i[override exif file extension]
|
268
228
|
end
|
269
229
|
|
270
|
-
#
|
271
|
-
|
272
|
-
|
230
|
+
# Returns mimetype information using the mime-types gem (based on a file extension lookup)
|
231
|
+
# @return [String] mime type for supplied file
|
232
|
+
# @example
|
233
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
234
|
+
# puts source_file.extension_mimetype # 'text/plain'
|
235
|
+
def extension_mimetype
|
236
|
+
@extension_mimetype ||= begin
|
237
|
+
mtype = MIME::Types.type_for(path).first
|
238
|
+
mtype ? mtype.content_type : ''
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
# Returns mimetype information for the current file based on unix file system command.
|
243
|
+
# @return [String] mime type for supplied file
|
244
|
+
# @example
|
245
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
246
|
+
# puts source_file.file_mimetype # 'text/plain'
|
247
|
+
def file_mimetype
|
248
|
+
@file_mimetype ||= begin
|
249
|
+
check_for_file
|
250
|
+
`file --mime-type "#{path}"`.delete("\n").split(':')[1].strip # get the mimetype from the unix file command
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
# Returns mimetype information for the current file based on exif data
|
255
|
+
# (if available and not a trusted source that we'd rather get from the file system command)
|
256
|
+
# @return [String] mime type for supplied file
|
257
|
+
# @example
|
258
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
259
|
+
# puts source_file.exif_mimetype # 'text/plain'
|
260
|
+
def exif_mimetype
|
261
|
+
@exif_mimetype ||= begin
|
262
|
+
check_for_file
|
263
|
+
# if it's not a "trusted" mimetype and there is exif data; get the mimetype from the exif
|
264
|
+
prefer_exif = !Assembly::TRUSTED_MIMETYPES.include?(file_mimetype)
|
265
|
+
exif.mimetype if prefer_exif && exif&.mimetype
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
# Returns mimetype information using the manual override mapping (based on a file extension lookup)
|
270
|
+
# @return [String] mime type for supplied file if a mapping exists for the file's extension
|
271
|
+
# @example
|
272
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.json')
|
273
|
+
# puts source_file.override_mimetype # 'application/json'
|
274
|
+
def override_mimetype
|
275
|
+
@override_mimetype ||= Assembly::OVERRIDE_MIMETYPES.fetch(ext.to_sym, '')
|
276
|
+
end
|
277
|
+
|
278
|
+
# @note Uses shell call to "file", only expected to work on unix based systems
|
279
|
+
# @return [String] encoding for supplied file
|
280
|
+
# @example
|
281
|
+
# source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
|
282
|
+
# puts source_file.encoding # 'us-ascii'
|
283
|
+
def encoding
|
284
|
+
@encoding ||= begin
|
285
|
+
check_for_file
|
286
|
+
`file --mime-encoding "#{path}"`.delete("\n").split(':')[1].strip
|
287
|
+
end
|
273
288
|
end
|
274
289
|
end
|
275
290
|
end
|