assembly-objectfile 1.13.0 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +1 -1
  3. data/.gitignore +0 -1
  4. data/.rubocop.yml +34 -1
  5. data/.rubocop_todo.yml +3 -120
  6. data/Gemfile.lock +97 -0
  7. data/README.md +6 -6
  8. data/assembly-objectfile.gemspec +4 -8
  9. data/config/boot.rb +0 -1
  10. data/lib/{assembly-objectfile → assembly/object_file}/version.rb +1 -1
  11. data/lib/{assembly-objectfile/object_fileable.rb → assembly/object_file.rb} +116 -101
  12. data/lib/assembly-objectfile.rb +14 -19
  13. data/spec/assembly/object_file_spec.rb +452 -0
  14. data/spec/spec_helper.rb +3 -37
  15. metadata +10 -143
  16. data/lib/assembly-objectfile/content_metadata/config.rb +0 -26
  17. data/lib/assembly-objectfile/content_metadata/file.rb +0 -63
  18. data/lib/assembly-objectfile/content_metadata/file_set.rb +0 -73
  19. data/lib/assembly-objectfile/content_metadata/file_set_builder.rb +0 -65
  20. data/lib/assembly-objectfile/content_metadata/nokogiri_builder.rb +0 -57
  21. data/lib/assembly-objectfile/content_metadata.rb +0 -117
  22. data/lib/assembly-objectfile/object_file.rb +0 -29
  23. data/profiles/AdobeRGB1998.icc +0 -0
  24. data/profiles/DotGain20.icc +0 -0
  25. data/profiles/sRGBIEC6196621.icc +0 -0
  26. data/spec/content_metadata_spec.rb +0 -809
  27. data/spec/object_file_spec.rb +0 -222
  28. data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_001.tif +0 -0
  29. data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_002.tif +0 -0
  30. data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_001.jp2 +0 -0
  31. data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_002.jp2 +0 -0
  32. data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_001.pdf +0 -1
  33. data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_002.pdf +0 -1
  34. data/spec/test_data/input/oo000oo0001/31/oo000oo0001_31_001.pdf +0 -1
  35. data/spec/test_data/input/oo000oo0001/50/oo000oo0001_50_001.tif +0 -0
  36. data/spec/test_data/input/oo000oo0001/oo000oo0001_book.pdf +0 -1
  37. data/spec/test_data/input/res1_image1.jp2 +0 -0
  38. data/spec/test_data/input/res1_image2.jp2 +0 -0
  39. data/spec/test_data/input/res1_image2.tif +0 -0
  40. data/spec/test_data/input/res1_teifile.txt +0 -1
  41. data/spec/test_data/input/res2_image1.jp2 +0 -0
  42. data/spec/test_data/input/res2_image1.tif +0 -0
  43. data/spec/test_data/input/res2_image2.jp2 +0 -0
  44. data/spec/test_data/input/res2_image2.tif +0 -0
  45. data/spec/test_data/input/res2_teifile.txt +0 -1
  46. data/spec/test_data/input/res2_textfile.txt +0 -1
  47. data/spec/test_data/input/res3_image1.jp2 +0 -0
  48. data/spec/test_data/input/res3_image1.tif +0 -0
  49. data/spec/test_data/input/res3_teifile.txt +0 -1
  50. data/spec/test_data/input/test.pdf +0 -1
  51. data/spec/test_data/input/test.svg +0 -2
  52. data/spec/test_data/input/test2.jp2 +0 -0
  53. data/spec/test_data/input/test2.tif +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8c85bb6c05fe0d46e9b2f37e90710043efce16322c5d58e0b0a9578fd21ece77
4
- data.tar.gz: c7e3ac5e3f0a0aa2124f8ce3d6587de2d02923c733ee94b1c299ae171101f5a5
3
+ metadata.gz: 76dac08191a4c118cf16e4736b9538c9d2a60811cf9547e0ad395ab468b1b51a
4
+ data.tar.gz: b9fec1e99d03941f658cd6716e1c00db37ce2f2042816e4ba8f77246b102d1f4
5
5
  SHA512:
6
- metadata.gz: fbad307e9a103af61b4a6b05b35bb6b98953774a2c9bd32cb1404f1a9ff6e3404fe78cfc48f9061bfffaa23c3610f2a6f19df5e08975a20c6ee387073cc1010d
7
- data.tar.gz: 717e174ccf88a94880f4a832b2d281b351774299fb1273a96b4a1bb942e360cd20b0ba3ac68b7ad4f113e3bd3c78652c7670efcf7bcf8f9bcf068972ba5cd42d
6
+ metadata.gz: '082fc021ca4f4dde1f3eaaac937e66862d8ea36f5198bdece9866a72e5be068021665e10bd40b8ea2835d848c062886982ed41033935120525c8d4e57efadafd'
7
+ data.tar.gz: cf2e4facd4248532c5f92c741f80b5e8d27e04b8cc23e3b7959b59118cceaf8122d03aa92af8faf99c8000bb4298b74d9ab86001744c6f852cd148ceda21d675
data/.circleci/config.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  version: 2.1
2
2
  orbs:
3
- ruby-rails: sul-dlss/ruby-rails@3.0.1
3
+ ruby-rails: sul-dlss/ruby-rails@3.1.2
4
4
  workflows:
5
5
  build:
6
6
  jobs:
data/.gitignore CHANGED
@@ -10,4 +10,3 @@ log/*
10
10
  pkg/*
11
11
  tags
12
12
  tmp
13
- Gemfile.lock
data/.rubocop.yml CHANGED
@@ -9,12 +9,32 @@ AllCops:
9
9
  - '**/*.md'
10
10
  - 'vendor/**/*' # avoid running rubocop on cached bundler
11
11
 
12
+ Layout/LineLength:
13
+ Max: 120
14
+
15
+ Naming/FileName:
16
+ Exclude:
17
+ - 'lib/assembly-objectfile.rb'
18
+
19
+ Naming/PredicateName:
20
+ NamePrefix:
21
+ - 'is_'
22
+
12
23
  Metrics/BlockLength:
13
24
  Exclude:
14
25
  - 'spec/**/*.rb'
15
26
  - '**/*.gemspec'
16
27
 
17
- Gemspec/DateAssignment: # (new in 1.10)
28
+ RSpec/ExampleLength:
29
+ Max: 10
30
+
31
+ RSpec/MultipleExpectations:
32
+ Max: 10
33
+
34
+ RSpec/NestedGroups:
35
+ Max: 4
36
+
37
+ Gemspec/DeprecatedAttributeAssignment: # (new in 1.10)
18
38
  Enabled: true
19
39
 
20
40
  Layout/SpaceAroundMethodCallOperator:
@@ -207,3 +227,16 @@ RSpec/ChangeByZero: # new in 2.11.0
207
227
  Enabled: true
208
228
  RSpec/VerifiedDoubleReference: # new in 2.10.0
209
229
  Enabled: true
230
+
231
+ Layout/LineContinuationLeadingSpace: # new in 1.31
232
+ Enabled: true
233
+ Layout/LineContinuationSpacing: # new in 1.31
234
+ Enabled: true
235
+ Lint/ConstantOverwrittenInRescue: # new in 1.31
236
+ Enabled: true
237
+ Lint/NonAtomicFileOperation: # new in 1.31
238
+ Enabled: true
239
+ RSpec/Capybara/SpecificMatcher: # new in 2.12
240
+ Enabled: true
241
+ RSpec/Rails/HaveHttpStatus: # new in 2.12
242
+ Enabled: true
data/.rubocop_todo.yml CHANGED
@@ -1,129 +1,12 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2022-02-28 21:11:02 UTC using RuboCop version 1.25.1.
3
+ # on 2022-07-20 17:16:46 UTC using RuboCop version 1.31.2.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
7
7
  # versions of RuboCop, may require this file to be generated again.
8
8
 
9
- # Offense count: 1
10
- Lint/UselessAssignment:
11
- Exclude:
12
- - 'config/boot.rb'
13
-
14
- # Offense count: 3
15
- # Configuration parameters: IgnoredMethods, CountRepeatedAttributes.
16
- Metrics/AbcSize:
17
- Max: 55
18
-
19
- # Offense count: 1
20
- # Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
21
- # IgnoredMethods: refine
22
- Metrics/BlockLength:
23
- Max: 27
24
-
25
- # Offense count: 2
26
- # Configuration parameters: IgnoredMethods.
27
- Metrics/CyclomaticComplexity:
28
- Max: 14
29
-
30
- # Offense count: 4
31
- # Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
32
- Metrics/MethodLength:
33
- Max: 31
34
-
35
9
  # Offense count: 1
36
10
  # Configuration parameters: CountComments, CountAsOne.
37
- Metrics/ModuleLength:
38
- Max: 120
39
-
40
- # Offense count: 1
41
- # Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
42
- Metrics/ParameterLists:
43
- Max: 12
44
-
45
- # Offense count: 2
46
- # Configuration parameters: IgnoredMethods.
47
- Metrics/PerceivedComplexity:
48
- Max: 15
49
-
50
- # Offense count: 1
51
- # Configuration parameters: ExpectMatchingDefinition, CheckDefinitionPathHierarchy, CheckDefinitionPathHierarchyRoots, Regex, IgnoreExecutableScripts, AllowedAcronyms.
52
- # CheckDefinitionPathHierarchyRoots: lib, spec, test, src
53
- # AllowedAcronyms: CLI, DSL, ACL, API, ASCII, CPU, CSS, DNS, EOF, GUID, HTML, HTTP, HTTPS, ID, IP, JSON, LHS, QPS, RAM, RHS, RPC, SLA, SMTP, SQL, SSH, TCP, TLS, TTL, UDP, UI, UID, UUID, URI, URL, UTF8, VM, XML, XMPP, XSRF, XSS
54
- Naming/FileName:
55
- Exclude:
56
- - 'lib/assembly-objectfile.rb'
57
-
58
- # Offense count: 1
59
- # Configuration parameters: NamePrefix, ForbiddenPrefixes, AllowedMethods, MethodDefinitionMacros.
60
- # NamePrefix: is_, has_, have_
61
- # ForbiddenPrefixes: is_, has_, have_
62
- # AllowedMethods: is_a?
63
- # MethodDefinitionMacros: define_method, define_singleton_method
64
- Naming/PredicateName:
65
- Exclude:
66
- - 'spec/**/*'
67
- - 'lib/assembly-objectfile/object_fileable.rb'
68
-
69
- # Offense count: 1
70
- # Configuration parameters: Prefixes.
71
- # Prefixes: when, with, without
72
- RSpec/ContextWording:
73
- Exclude:
74
- - 'spec/content_metadata_spec.rb'
75
-
76
- # Offense count: 32
77
- # Configuration parameters: CountAsOne.
78
- RSpec/ExampleLength:
79
- Max: 34
80
-
81
- # Offense count: 2
82
- # Configuration parameters: Include, CustomTransform, IgnoreMethods, SpecSuffixOnly.
83
- # Include: **/*_spec*rb*, **/spec/**/*
84
- RSpec/FilePath:
85
- Exclude:
86
- - 'spec/content_metadata_spec.rb'
87
- - 'spec/object_file_spec.rb'
88
-
89
- # Offense count: 74
90
- # Configuration parameters: AssignmentOnly.
91
- RSpec/InstanceVariable:
92
- Exclude:
93
- - 'spec/object_file_spec.rb'
94
-
95
- # Offense count: 41
96
- RSpec/MultipleExpectations:
97
- Max: 29
98
-
99
- # Offense count: 20
100
- RSpec/NestedGroups:
101
- Max: 4
102
-
103
- # Offense count: 2
104
- RSpec/RepeatedDescription:
105
- Exclude:
106
- - 'spec/object_file_spec.rb'
107
-
108
- # Offense count: 2
109
- RSpec/RepeatedExample:
110
- Exclude:
111
- - 'spec/object_file_spec.rb'
112
-
113
- # Offense count: 5
114
- RSpec/RepeatedExampleGroupDescription:
115
- Exclude:
116
- - 'spec/content_metadata_spec.rb'
117
-
118
- # Offense count: 2
119
- # Cop supports --auto-correct.
120
- Style/CommentedKeyword:
121
- Exclude:
122
- - 'lib/assembly-objectfile/content_metadata.rb'
123
-
124
- # Offense count: 123
125
- # Cop supports --auto-correct.
126
- # Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, IgnoredPatterns.
127
- # URISchemes: http, https
128
- Layout/LineLength:
129
- Max: 277
11
+ Metrics/ClassLength:
12
+ Max: 122
data/Gemfile.lock ADDED
@@ -0,0 +1,97 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ assembly-objectfile (2.1.1)
5
+ activesupport (>= 5.2.0)
6
+ mime-types (> 3)
7
+ mini_exiftool
8
+
9
+ GEM
10
+ remote: http://rubygems.org/
11
+ specs:
12
+ activesupport (7.0.3.1)
13
+ concurrent-ruby (~> 1.0, >= 1.0.2)
14
+ i18n (>= 1.6, < 2)
15
+ minitest (>= 5.1)
16
+ tzinfo (~> 2.0)
17
+ ast (2.4.2)
18
+ byebug (11.1.3)
19
+ coderay (1.1.3)
20
+ concurrent-ruby (1.1.10)
21
+ diff-lcs (1.5.0)
22
+ docile (1.4.0)
23
+ i18n (1.12.0)
24
+ concurrent-ruby (~> 1.0)
25
+ json (2.6.2)
26
+ method_source (1.0.0)
27
+ mime-types (3.4.1)
28
+ mime-types-data (~> 3.2015)
29
+ mime-types-data (3.2022.0105)
30
+ mini_exiftool (2.10.2)
31
+ minitest (5.16.2)
32
+ parallel (1.22.1)
33
+ parser (3.1.2.0)
34
+ ast (~> 2.4.1)
35
+ pry (0.13.1)
36
+ coderay (~> 1.1)
37
+ method_source (~> 1.0)
38
+ pry-byebug (3.9.0)
39
+ byebug (~> 11.0)
40
+ pry (~> 0.13.0)
41
+ rainbow (3.1.1)
42
+ rake (13.0.6)
43
+ regexp_parser (2.5.0)
44
+ rexml (3.2.5)
45
+ rspec (3.11.0)
46
+ rspec-core (~> 3.11.0)
47
+ rspec-expectations (~> 3.11.0)
48
+ rspec-mocks (~> 3.11.0)
49
+ rspec-core (3.11.0)
50
+ rspec-support (~> 3.11.0)
51
+ rspec-expectations (3.11.0)
52
+ diff-lcs (>= 1.2.0, < 2.0)
53
+ rspec-support (~> 3.11.0)
54
+ rspec-mocks (3.11.1)
55
+ diff-lcs (>= 1.2.0, < 2.0)
56
+ rspec-support (~> 3.11.0)
57
+ rspec-support (3.11.0)
58
+ rubocop (1.31.2)
59
+ json (~> 2.3)
60
+ parallel (~> 1.10)
61
+ parser (>= 3.1.0.0)
62
+ rainbow (>= 2.2.2, < 4.0)
63
+ regexp_parser (>= 1.8, < 3.0)
64
+ rexml (>= 3.2.5, < 4.0)
65
+ rubocop-ast (>= 1.18.0, < 2.0)
66
+ ruby-progressbar (~> 1.7)
67
+ unicode-display_width (>= 1.4.0, < 3.0)
68
+ rubocop-ast (1.19.1)
69
+ parser (>= 3.1.1.0)
70
+ rubocop-rspec (2.12.1)
71
+ rubocop (~> 1.31)
72
+ ruby-progressbar (1.11.0)
73
+ simplecov (0.21.2)
74
+ docile (~> 1.1)
75
+ simplecov-html (~> 0.11)
76
+ simplecov_json_formatter (~> 0.1)
77
+ simplecov-html (0.12.3)
78
+ simplecov_json_formatter (0.1.4)
79
+ tzinfo (2.0.5)
80
+ concurrent-ruby (~> 1.0)
81
+ unicode-display_width (2.2.0)
82
+
83
+ PLATFORMS
84
+ ruby
85
+
86
+ DEPENDENCIES
87
+ assembly-objectfile!
88
+ byebug
89
+ pry-byebug
90
+ rake
91
+ rspec (~> 3.0)
92
+ rubocop (~> 1.25)
93
+ rubocop-rspec
94
+ simplecov
95
+
96
+ BUNDLED WITH
97
+ 2.3.17
data/README.md CHANGED
@@ -8,15 +8,15 @@
8
8
  ## Overview
9
9
  This gem contains classes used by the Stanford University Digital Library to
10
10
  perform file operations necessary for accessioning of content. It is also
11
- used by related gems to perform content type specific operations (such as jp2
12
- generation).
11
+ used by related gems that perform content type specific operations (e.g.
12
+ assembly-image for jp2 generation).
13
13
 
14
14
  ## Usage
15
15
 
16
16
  The gem currently has methods for:
17
17
  * filesize
18
- * exif
19
- * generate content metadata
18
+ * mimetype
19
+ * exif - consumers use ExifTool to get file information
20
20
 
21
21
  ## Running tests
22
22
 
@@ -34,8 +34,8 @@ rake release
34
34
 
35
35
  1. Exiftool
36
36
 
37
- RHEL: (RPM to install comming soon) Download latest version from:
38
- http://www.sno.phy.queensu.ca/~phil/exiftool
37
+ RHEL: (RPM to install coming soon) Download latest version from:
38
+ https://exiftool.org/
39
39
 
40
40
  tar -xf Image-ExifTool-#.##.tar.gz
41
41
  cd Image-ExifTool-#.##
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  $LOAD_PATH.push File.expand_path('lib', __dir__)
4
- require 'assembly-objectfile/version'
4
+ require 'assembly/object_file/version'
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = 'assembly-objectfile'
@@ -10,13 +10,13 @@ Gem::Specification.new do |s|
10
10
  s.authors = ['Peter Mangiafico', 'Renzo Sanchez-Silva', 'Monty Hindman', 'Tony Calavano']
11
11
  s.email = ['pmangiafico@stanford.edu']
12
12
  s.homepage = 'https://github.com/sul-dlss/assembly-objectfile'
13
- s.summary = 'Ruby immplementation of file services needed to prepare objects to be accessioned in SULAIR digital library'
13
+ s.summary = 'Ruby implementation of file services needed to prepare objects to be accessioned ' \
14
+ 'into the Stanford Digital Repository'
14
15
  s.description = 'Get exif data, file sizes and more.'
15
16
  s.license = 'ALv2'
16
17
  s.metadata['rubygems_mfa_required'] = 'true'
17
18
 
18
19
  s.files = `git ls-files`.split("\n")
19
- s.test_files = `git ls-files -- spec/*`.split("\n")
20
20
  s.bindir = 'exe'
21
21
  s.executables = s.files.grep(%r{^exe/}) { |f| File.basename(f) }
22
22
  s.require_paths = ['lib']
@@ -24,14 +24,10 @@ Gem::Specification.new do |s|
24
24
  s.required_ruby_version = '>= 3.0'
25
25
 
26
26
  s.add_dependency 'activesupport', '>= 5.2.0'
27
- s.add_dependency 'deprecation'
28
- s.add_dependency 'dry-struct', '~> 1.0'
29
- s.add_dependency 'dry-types', '~> 1.1'
30
27
  s.add_dependency 'mime-types', '> 3'
31
28
  s.add_dependency 'mini_exiftool'
32
- s.add_dependency 'nokogiri'
33
29
 
34
- s.add_development_dependency 'json'
30
+ s.add_development_dependency 'pry-byebug'
35
31
  s.add_development_dependency 'rake'
36
32
  s.add_development_dependency 'rspec', '~> 3.0'
37
33
  s.add_development_dependency 'rubocop', '~> 1.25'
data/config/boot.rb CHANGED
@@ -2,7 +2,6 @@
2
2
 
3
3
  require 'rubygems'
4
4
 
5
- environment = ENV['ENVIRONMENT'] ||= 'development'
6
5
  project_root = File.expand_path("#{File.dirname(__FILE__)}/..")
7
6
 
8
7
  # Load config for current environment.
@@ -4,6 +4,6 @@
4
4
  module Assembly
5
5
  class ObjectFile
6
6
  # Gem version
7
- VERSION = '1.13.0'
7
+ VERSION = '2.1.1'
8
8
  end
9
9
  end
@@ -2,25 +2,56 @@
2
2
 
3
3
  require 'mini_exiftool'
4
4
  require 'mime/types'
5
+ require 'active_support/core_ext/object/blank'
5
6
 
6
7
  module Assembly
7
- # Common behaviors we need for other classes in the gem
8
- module ObjectFileable
8
+ # This class contains generic methods to operate on any file.
9
+ class ObjectFile
10
+ # Class level method that given an array of strings, return the longest common initial path.
11
+ # Useful for removing a common path from a set of filenames when producing content metadata
12
+ #
13
+ # @param [Array] strings Array of filenames with paths to operate on
14
+ # @return [String] longest common initial part of path of filenames passed in
15
+ #
16
+ # Example:
17
+ # puts Assembly::ObjectFile.common_prefix(['/Users/peter/00/test.tif','/Users/peter/05/test.jp2'])
18
+ # # => '/Users/peter/0'
19
+ def self.common_path(strings)
20
+ return nil if strings.empty?
21
+
22
+ n = 0
23
+ x = strings.last
24
+ n += 1 while strings.all? { |s| s[n] && (s[n] == x[n]) }
25
+ common_prefix = x[0...n]
26
+ if common_prefix[-1, 1] == '/' # check if last element of the common string is the end of a directory
27
+ common_prefix # if not, split string along directories, and reject last one
28
+ else
29
+ "#{common_prefix.split('/')[0..-2].join('/')}/" # if it was, then return the common prefix directly
30
+ end
31
+ end
32
+
9
33
  attr_accessor :file_attributes, :label, :path, :provider_md5, :provider_sha1, :relative_path, :mime_type_order
10
34
 
11
35
  VALID_MIMETYPE_METHODS = %i[override exif file extension].freeze
12
36
 
13
37
  # @param [String] path full path to the file to be worked with
14
38
  # @param [Hash<Symbol => Object>] params options used during content metadata generation
15
- # @option params [Hash<Symbol => ['yes', 'no']>] :file_attributes e.g. {:preserve=>'yes',:shelve=>'no',:publish=>'no'}, defaults pulled from mimetype
16
- # @option params [String] :label a resource label (files bundlded together will just get the first file's label attribute if set)
39
+ # @option params [Hash<Symbol => ['yes', 'no']>] :file_attributes e.g.:
40
+ # {:preserve=>'yes',:shelve=>'no',:publish=>'no'},
41
+ # defaults pulled from mimetype
42
+ # @option params [String] :label a resource label (files bundled together will just get the first
43
+ # file's label attribute if set)
17
44
  # @option params [String] :provider_md5 pre-computed MD5 checksum
18
45
  # @option params [String] :provider_sha1 pre-computed SHA1 checksum
19
- # @option params [String] :relative_path if you want the file ids in the content metadata it can be set, otherwise content metadata will get the full path
46
+ # @option params [String] :relative_path if you want the file ids in the content metadata it can be set,
47
+ # otherwise content metadata will get the full path
20
48
  # @option params [Array] :mime_type_order can be set to the order in which you want mimetypes to be determined
21
- # options are :override (from manual overide mapping if exists), :exif (from exif if exists),
22
- # :extension (from file extension), and :file (from unix file system command)
23
- # the default is defined in the private `default_mime_type_order` method but you can override to set your own order
49
+ # options are :override (from manual overide mapping if exists),
50
+ # :exif (from exif if exists)
51
+ # :extension (from file extension)
52
+ # :file (from unix file system command)
53
+ # the default is defined in the private `default_mime_type_order` method
54
+ # but you can override to set your own order
24
55
  # @example
25
56
  # Assembly::ObjectFile.new('/input/path_to_file.tif')
26
57
  def initialize(path, params = {})
@@ -33,24 +64,6 @@ module Assembly
33
64
  @mime_type_order = params[:mime_type_order] || default_mime_type_order
34
65
  end
35
66
 
36
- # @return [String] DPG base filename, removing the extension and the '00','05', etc. placeholders
37
- # @example
38
- # source_file = Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
39
- # puts source_file.dpg_basename # "cy565rm7188_001"
40
- def dpg_basename
41
- file_parts = File.basename(path, ext).split('_')
42
- file_parts.size == 3 ? "#{file_parts[0]}_#{file_parts[2]}" : filename_without_ext
43
- end
44
-
45
- # @return [String] DPG subfolder for the given filename, i.e. '00','05', etc.
46
- # @example
47
- # source_file = Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
48
- # puts source_file.dpg_folder # "00"
49
- def dpg_folder
50
- file_parts = File.basename(path, ext).split('_')
51
- file_parts.size == 3 ? file_parts[1] : ''
52
- end
53
-
54
67
  # @return [String] base filename
55
68
  # @example
56
69
  # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
@@ -91,6 +104,13 @@ module Assembly
91
104
  @exif ||= begin
92
105
  check_for_file
93
106
  MiniExiftool.new(path, replace_invalid_chars: '?')
107
+ rescue MiniExiftool::Error
108
+ # MiniExiftool will throw an exception when it tries to initialize for problematic files,
109
+ # but the exception it throws does not tell you the file that caused the problem.
110
+ # Instead, we will raise our own exception with more context in logging/reporting upstream.
111
+ # Note: if the file that causes the problem should NOT use exiftool to determine mimetype, add it to the skipped
112
+ # mimetypes in Assembly::TRUSTED_MIMETYPES to bypass initialization of MiniExiftool for mimetype generation
113
+ raise MiniExiftool::Error, "error initializing MiniExiftool for #{path}"
94
114
  end
95
115
  end
96
116
 
@@ -125,72 +145,15 @@ module Assembly
125
145
  check_for_file
126
146
  mimetype = ''
127
147
  mime_type_order.each do |mime_type_method|
128
- mimetype = public_send("#{mime_type_method}_mimetype") if VALID_MIMETYPE_METHODS.include?(mime_type_method)
148
+ mimetype = send("#{mime_type_method}_mimetype") if VALID_MIMETYPE_METHODS.include?(mime_type_method)
129
149
  break if mimetype.present?
130
150
  end
131
151
  mimetype
132
152
  end
133
153
  end
134
154
 
135
- # Returns mimetype information using the manual override mapping (based on a file extension lookup)
136
- # @return [String] mime type for supplied file if a mapping exists for the file's extension
137
- # @example
138
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.json')
139
- # puts source_file.override_mimetype # 'application/json'
140
- def override_mimetype
141
- @override_mimetype ||= Assembly::OVERRIDE_MIMETYPES.fetch(ext.to_sym, '')
142
- end
143
-
144
- # Returns mimetype information using the mime-types gem (based on a file extension lookup)
145
- # @return [String] mime type for supplied file
146
- # @example
147
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
148
- # puts source_file.extension_mimetype # 'text/plain'
149
- def extension_mimetype
150
- @extension_mimetype ||= begin
151
- mtype = MIME::Types.type_for(path).first
152
- mtype ? mtype.content_type : ''
153
- end
154
- end
155
-
156
- # Returns mimetype information for the current file based on unix file system command.
157
- # @return [String] mime type for supplied file
158
- # @example
159
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
160
- # puts source_file.file_mimetype # 'text/plain'
161
- def file_mimetype
162
- @file_mimetype ||= begin
163
- check_for_file
164
- `file --mime-type "#{path}"`.delete("\n").split(':')[1].strip # first try and get the mimetype from the unix file command
165
- end
166
- end
167
-
168
- # Returns mimetype information for the current file based on exif data (if available and not a trusted source that we'd rather get from the file system command)
169
- # @return [String] mime type for supplied file
170
- # @example
171
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
172
- # puts source_file.exif_mimetype # 'text/plain'
173
- def exif_mimetype
174
- @exif_mimetype ||= begin
175
- check_for_file
176
- prefer_exif = !Assembly::TRUSTED_MIMETYPES.include?(file_mimetype) # if it's not a "trusted" mimetype and there is exif data; get the mimetype from the exif
177
- exif.mimetype if exif&.mimetype && prefer_exif
178
- end
179
- end
180
-
181
- # @note Uses shell call to "file", only expected to work on unix based systems
182
- # @return [String] encoding for supplied file
183
- # @example
184
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
185
- # puts source_file.encoding # 'us-ascii'
186
- def encoding
187
- @encoding ||= begin
188
- check_for_file
189
- `file --mime-encoding "#{path}"`.delete("\n").split(':')[1].strip
190
- end
191
- end
192
-
193
- # @return [Symbol] the type of object, could be :application (for PDF or Word, etc), :audio, :image, :message, :model, :multipart, :text or :video
155
+ # @return [Symbol] the type of object, could be :application (for PDF or Word, etc),
156
+ # :audio, :image, :message, :model, :multipart, :text or :video
194
157
  # @example
195
158
  # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
196
159
  # puts source_file.object_type # :image
@@ -219,18 +182,10 @@ module Assembly
219
182
  mimetype == 'image/jp2' || jp2able?
220
183
  end
221
184
 
222
- # @return [Boolean] true if image has a color profile, false if not.
223
- # @example
224
- # source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
225
- # puts source_img.has_color_profile? # true
226
- def has_color_profile?
227
- return false unless exif
228
-
229
- exif['profiledescription'] || exif['colorspace'] ? true : false
230
- end
231
-
232
- # Examines the input image for validity to create a jp2. Same as valid_image? but also confirms the existence of a profile description and further restricts mimetypes.
233
- # It is used by the assembly robots to decide if a jp2 will be created and is also called before you create a jp2 using assembly-image.
185
+ # Examines the input image for validity to create a jp2. Same as valid_image? but also confirms
186
+ # the existence of a profile description and further restricts mimetypes.
187
+ # It is used by the assembly robots to decide if a jp2 will be created and is also called before
188
+ # you create a jp2 using assembly-image.
234
189
  # @return [Boolean] true if image should have a jp2 created, false if not.
235
190
  # @example
236
191
  # source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
@@ -262,14 +217,74 @@ module Assembly
262
217
 
263
218
  private
264
219
 
220
+ # private method to check for file existence before operating on it
221
+ def check_for_file
222
+ raise "input file #{path} does not exist or is a directory" unless file_exists?
223
+ end
224
+
265
225
  # prive method defining default preferred ordering of how mimetypes are determined
266
226
  def default_mime_type_order
267
227
  %i[override exif file extension]
268
228
  end
269
229
 
270
- # private method to check for file existence before operating on it
271
- def check_for_file
272
- raise "input file #{path} does not exist or is a directory" unless file_exists?
230
+ # Returns mimetype information using the mime-types gem (based on a file extension lookup)
231
+ # @return [String] mime type for supplied file
232
+ # @example
233
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
234
+ # puts source_file.extension_mimetype # 'text/plain'
235
+ def extension_mimetype
236
+ @extension_mimetype ||= begin
237
+ mtype = MIME::Types.type_for(path).first
238
+ mtype ? mtype.content_type : ''
239
+ end
240
+ end
241
+
242
+ # Returns mimetype information for the current file based on unix file system command.
243
+ # @return [String] mime type for supplied file
244
+ # @example
245
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
246
+ # puts source_file.file_mimetype # 'text/plain'
247
+ def file_mimetype
248
+ @file_mimetype ||= begin
249
+ check_for_file
250
+ `file --mime-type "#{path}"`.delete("\n").split(':')[1].strip # get the mimetype from the unix file command
251
+ end
252
+ end
253
+
254
+ # Returns mimetype information for the current file based on exif data
255
+ # (if available and not a trusted source that we'd rather get from the file system command)
256
+ # @return [String] mime type for supplied file
257
+ # @example
258
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
259
+ # puts source_file.exif_mimetype # 'text/plain'
260
+ def exif_mimetype
261
+ @exif_mimetype ||= begin
262
+ check_for_file
263
+ # if it's not a "trusted" mimetype and there is exif data; get the mimetype from the exif
264
+ prefer_exif = !Assembly::TRUSTED_MIMETYPES.include?(file_mimetype)
265
+ exif.mimetype if prefer_exif && exif&.mimetype
266
+ end
267
+ end
268
+
269
+ # Returns mimetype information using the manual override mapping (based on a file extension lookup)
270
+ # @return [String] mime type for supplied file if a mapping exists for the file's extension
271
+ # @example
272
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.json')
273
+ # puts source_file.override_mimetype # 'application/json'
274
+ def override_mimetype
275
+ @override_mimetype ||= Assembly::OVERRIDE_MIMETYPES.fetch(ext.to_sym, '')
276
+ end
277
+
278
+ # @note Uses shell call to "file", only expected to work on unix based systems
279
+ # @return [String] encoding for supplied file
280
+ # @example
281
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
282
+ # puts source_file.encoding # 'us-ascii'
283
+ def encoding
284
+ @encoding ||= begin
285
+ check_for_file
286
+ `file --mime-encoding "#{path}"`.delete("\n").split(':')[1].strip
287
+ end
273
288
  end
274
289
  end
275
290
  end