assembly-objectfile 1.13.0 → 2.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +1 -1
  3. data/.gitignore +0 -1
  4. data/.rubocop.yml +34 -1
  5. data/.rubocop_todo.yml +3 -120
  6. data/Gemfile.lock +97 -0
  7. data/README.md +6 -6
  8. data/assembly-objectfile.gemspec +4 -8
  9. data/config/boot.rb +0 -1
  10. data/lib/{assembly-objectfile → assembly/object_file}/version.rb +1 -1
  11. data/lib/{assembly-objectfile/object_fileable.rb → assembly/object_file.rb} +116 -101
  12. data/lib/assembly-objectfile.rb +14 -19
  13. data/spec/assembly/object_file_spec.rb +452 -0
  14. data/spec/spec_helper.rb +3 -37
  15. metadata +10 -143
  16. data/lib/assembly-objectfile/content_metadata/config.rb +0 -26
  17. data/lib/assembly-objectfile/content_metadata/file.rb +0 -63
  18. data/lib/assembly-objectfile/content_metadata/file_set.rb +0 -73
  19. data/lib/assembly-objectfile/content_metadata/file_set_builder.rb +0 -65
  20. data/lib/assembly-objectfile/content_metadata/nokogiri_builder.rb +0 -57
  21. data/lib/assembly-objectfile/content_metadata.rb +0 -117
  22. data/lib/assembly-objectfile/object_file.rb +0 -29
  23. data/profiles/AdobeRGB1998.icc +0 -0
  24. data/profiles/DotGain20.icc +0 -0
  25. data/profiles/sRGBIEC6196621.icc +0 -0
  26. data/spec/content_metadata_spec.rb +0 -809
  27. data/spec/object_file_spec.rb +0 -222
  28. data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_001.tif +0 -0
  29. data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_002.tif +0 -0
  30. data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_001.jp2 +0 -0
  31. data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_002.jp2 +0 -0
  32. data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_001.pdf +0 -1
  33. data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_002.pdf +0 -1
  34. data/spec/test_data/input/oo000oo0001/31/oo000oo0001_31_001.pdf +0 -1
  35. data/spec/test_data/input/oo000oo0001/50/oo000oo0001_50_001.tif +0 -0
  36. data/spec/test_data/input/oo000oo0001/oo000oo0001_book.pdf +0 -1
  37. data/spec/test_data/input/res1_image1.jp2 +0 -0
  38. data/spec/test_data/input/res1_image2.jp2 +0 -0
  39. data/spec/test_data/input/res1_image2.tif +0 -0
  40. data/spec/test_data/input/res1_teifile.txt +0 -1
  41. data/spec/test_data/input/res2_image1.jp2 +0 -0
  42. data/spec/test_data/input/res2_image1.tif +0 -0
  43. data/spec/test_data/input/res2_image2.jp2 +0 -0
  44. data/spec/test_data/input/res2_image2.tif +0 -0
  45. data/spec/test_data/input/res2_teifile.txt +0 -1
  46. data/spec/test_data/input/res2_textfile.txt +0 -1
  47. data/spec/test_data/input/res3_image1.jp2 +0 -0
  48. data/spec/test_data/input/res3_image1.tif +0 -0
  49. data/spec/test_data/input/res3_teifile.txt +0 -1
  50. data/spec/test_data/input/test.pdf +0 -1
  51. data/spec/test_data/input/test.svg +0 -2
  52. data/spec/test_data/input/test2.jp2 +0 -0
  53. data/spec/test_data/input/test2.tif +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8c85bb6c05fe0d46e9b2f37e90710043efce16322c5d58e0b0a9578fd21ece77
4
- data.tar.gz: c7e3ac5e3f0a0aa2124f8ce3d6587de2d02923c733ee94b1c299ae171101f5a5
3
+ metadata.gz: 76dac08191a4c118cf16e4736b9538c9d2a60811cf9547e0ad395ab468b1b51a
4
+ data.tar.gz: b9fec1e99d03941f658cd6716e1c00db37ce2f2042816e4ba8f77246b102d1f4
5
5
  SHA512:
6
- metadata.gz: fbad307e9a103af61b4a6b05b35bb6b98953774a2c9bd32cb1404f1a9ff6e3404fe78cfc48f9061bfffaa23c3610f2a6f19df5e08975a20c6ee387073cc1010d
7
- data.tar.gz: 717e174ccf88a94880f4a832b2d281b351774299fb1273a96b4a1bb942e360cd20b0ba3ac68b7ad4f113e3bd3c78652c7670efcf7bcf8f9bcf068972ba5cd42d
6
+ metadata.gz: '082fc021ca4f4dde1f3eaaac937e66862d8ea36f5198bdece9866a72e5be068021665e10bd40b8ea2835d848c062886982ed41033935120525c8d4e57efadafd'
7
+ data.tar.gz: cf2e4facd4248532c5f92c741f80b5e8d27e04b8cc23e3b7959b59118cceaf8122d03aa92af8faf99c8000bb4298b74d9ab86001744c6f852cd148ceda21d675
data/.circleci/config.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  version: 2.1
2
2
  orbs:
3
- ruby-rails: sul-dlss/ruby-rails@3.0.1
3
+ ruby-rails: sul-dlss/ruby-rails@3.1.2
4
4
  workflows:
5
5
  build:
6
6
  jobs:
data/.gitignore CHANGED
@@ -10,4 +10,3 @@ log/*
10
10
  pkg/*
11
11
  tags
12
12
  tmp
13
- Gemfile.lock
data/.rubocop.yml CHANGED
@@ -9,12 +9,32 @@ AllCops:
9
9
  - '**/*.md'
10
10
  - 'vendor/**/*' # avoid running rubocop on cached bundler
11
11
 
12
+ Layout/LineLength:
13
+ Max: 120
14
+
15
+ Naming/FileName:
16
+ Exclude:
17
+ - 'lib/assembly-objectfile.rb'
18
+
19
+ Naming/PredicateName:
20
+ NamePrefix:
21
+ - 'is_'
22
+
12
23
  Metrics/BlockLength:
13
24
  Exclude:
14
25
  - 'spec/**/*.rb'
15
26
  - '**/*.gemspec'
16
27
 
17
- Gemspec/DateAssignment: # (new in 1.10)
28
+ RSpec/ExampleLength:
29
+ Max: 10
30
+
31
+ RSpec/MultipleExpectations:
32
+ Max: 10
33
+
34
+ RSpec/NestedGroups:
35
+ Max: 4
36
+
37
+ Gemspec/DeprecatedAttributeAssignment: # (new in 1.10)
18
38
  Enabled: true
19
39
 
20
40
  Layout/SpaceAroundMethodCallOperator:
@@ -207,3 +227,16 @@ RSpec/ChangeByZero: # new in 2.11.0
207
227
  Enabled: true
208
228
  RSpec/VerifiedDoubleReference: # new in 2.10.0
209
229
  Enabled: true
230
+
231
+ Layout/LineContinuationLeadingSpace: # new in 1.31
232
+ Enabled: true
233
+ Layout/LineContinuationSpacing: # new in 1.31
234
+ Enabled: true
235
+ Lint/ConstantOverwrittenInRescue: # new in 1.31
236
+ Enabled: true
237
+ Lint/NonAtomicFileOperation: # new in 1.31
238
+ Enabled: true
239
+ RSpec/Capybara/SpecificMatcher: # new in 2.12
240
+ Enabled: true
241
+ RSpec/Rails/HaveHttpStatus: # new in 2.12
242
+ Enabled: true
data/.rubocop_todo.yml CHANGED
@@ -1,129 +1,12 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2022-02-28 21:11:02 UTC using RuboCop version 1.25.1.
3
+ # on 2022-07-20 17:16:46 UTC using RuboCop version 1.31.2.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
7
7
  # versions of RuboCop, may require this file to be generated again.
8
8
 
9
- # Offense count: 1
10
- Lint/UselessAssignment:
11
- Exclude:
12
- - 'config/boot.rb'
13
-
14
- # Offense count: 3
15
- # Configuration parameters: IgnoredMethods, CountRepeatedAttributes.
16
- Metrics/AbcSize:
17
- Max: 55
18
-
19
- # Offense count: 1
20
- # Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
21
- # IgnoredMethods: refine
22
- Metrics/BlockLength:
23
- Max: 27
24
-
25
- # Offense count: 2
26
- # Configuration parameters: IgnoredMethods.
27
- Metrics/CyclomaticComplexity:
28
- Max: 14
29
-
30
- # Offense count: 4
31
- # Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
32
- Metrics/MethodLength:
33
- Max: 31
34
-
35
9
  # Offense count: 1
36
10
  # Configuration parameters: CountComments, CountAsOne.
37
- Metrics/ModuleLength:
38
- Max: 120
39
-
40
- # Offense count: 1
41
- # Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
42
- Metrics/ParameterLists:
43
- Max: 12
44
-
45
- # Offense count: 2
46
- # Configuration parameters: IgnoredMethods.
47
- Metrics/PerceivedComplexity:
48
- Max: 15
49
-
50
- # Offense count: 1
51
- # Configuration parameters: ExpectMatchingDefinition, CheckDefinitionPathHierarchy, CheckDefinitionPathHierarchyRoots, Regex, IgnoreExecutableScripts, AllowedAcronyms.
52
- # CheckDefinitionPathHierarchyRoots: lib, spec, test, src
53
- # AllowedAcronyms: CLI, DSL, ACL, API, ASCII, CPU, CSS, DNS, EOF, GUID, HTML, HTTP, HTTPS, ID, IP, JSON, LHS, QPS, RAM, RHS, RPC, SLA, SMTP, SQL, SSH, TCP, TLS, TTL, UDP, UI, UID, UUID, URI, URL, UTF8, VM, XML, XMPP, XSRF, XSS
54
- Naming/FileName:
55
- Exclude:
56
- - 'lib/assembly-objectfile.rb'
57
-
58
- # Offense count: 1
59
- # Configuration parameters: NamePrefix, ForbiddenPrefixes, AllowedMethods, MethodDefinitionMacros.
60
- # NamePrefix: is_, has_, have_
61
- # ForbiddenPrefixes: is_, has_, have_
62
- # AllowedMethods: is_a?
63
- # MethodDefinitionMacros: define_method, define_singleton_method
64
- Naming/PredicateName:
65
- Exclude:
66
- - 'spec/**/*'
67
- - 'lib/assembly-objectfile/object_fileable.rb'
68
-
69
- # Offense count: 1
70
- # Configuration parameters: Prefixes.
71
- # Prefixes: when, with, without
72
- RSpec/ContextWording:
73
- Exclude:
74
- - 'spec/content_metadata_spec.rb'
75
-
76
- # Offense count: 32
77
- # Configuration parameters: CountAsOne.
78
- RSpec/ExampleLength:
79
- Max: 34
80
-
81
- # Offense count: 2
82
- # Configuration parameters: Include, CustomTransform, IgnoreMethods, SpecSuffixOnly.
83
- # Include: **/*_spec*rb*, **/spec/**/*
84
- RSpec/FilePath:
85
- Exclude:
86
- - 'spec/content_metadata_spec.rb'
87
- - 'spec/object_file_spec.rb'
88
-
89
- # Offense count: 74
90
- # Configuration parameters: AssignmentOnly.
91
- RSpec/InstanceVariable:
92
- Exclude:
93
- - 'spec/object_file_spec.rb'
94
-
95
- # Offense count: 41
96
- RSpec/MultipleExpectations:
97
- Max: 29
98
-
99
- # Offense count: 20
100
- RSpec/NestedGroups:
101
- Max: 4
102
-
103
- # Offense count: 2
104
- RSpec/RepeatedDescription:
105
- Exclude:
106
- - 'spec/object_file_spec.rb'
107
-
108
- # Offense count: 2
109
- RSpec/RepeatedExample:
110
- Exclude:
111
- - 'spec/object_file_spec.rb'
112
-
113
- # Offense count: 5
114
- RSpec/RepeatedExampleGroupDescription:
115
- Exclude:
116
- - 'spec/content_metadata_spec.rb'
117
-
118
- # Offense count: 2
119
- # Cop supports --auto-correct.
120
- Style/CommentedKeyword:
121
- Exclude:
122
- - 'lib/assembly-objectfile/content_metadata.rb'
123
-
124
- # Offense count: 123
125
- # Cop supports --auto-correct.
126
- # Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, IgnoredPatterns.
127
- # URISchemes: http, https
128
- Layout/LineLength:
129
- Max: 277
11
+ Metrics/ClassLength:
12
+ Max: 122
data/Gemfile.lock ADDED
@@ -0,0 +1,97 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ assembly-objectfile (2.1.1)
5
+ activesupport (>= 5.2.0)
6
+ mime-types (> 3)
7
+ mini_exiftool
8
+
9
+ GEM
10
+ remote: http://rubygems.org/
11
+ specs:
12
+ activesupport (7.0.3.1)
13
+ concurrent-ruby (~> 1.0, >= 1.0.2)
14
+ i18n (>= 1.6, < 2)
15
+ minitest (>= 5.1)
16
+ tzinfo (~> 2.0)
17
+ ast (2.4.2)
18
+ byebug (11.1.3)
19
+ coderay (1.1.3)
20
+ concurrent-ruby (1.1.10)
21
+ diff-lcs (1.5.0)
22
+ docile (1.4.0)
23
+ i18n (1.12.0)
24
+ concurrent-ruby (~> 1.0)
25
+ json (2.6.2)
26
+ method_source (1.0.0)
27
+ mime-types (3.4.1)
28
+ mime-types-data (~> 3.2015)
29
+ mime-types-data (3.2022.0105)
30
+ mini_exiftool (2.10.2)
31
+ minitest (5.16.2)
32
+ parallel (1.22.1)
33
+ parser (3.1.2.0)
34
+ ast (~> 2.4.1)
35
+ pry (0.13.1)
36
+ coderay (~> 1.1)
37
+ method_source (~> 1.0)
38
+ pry-byebug (3.9.0)
39
+ byebug (~> 11.0)
40
+ pry (~> 0.13.0)
41
+ rainbow (3.1.1)
42
+ rake (13.0.6)
43
+ regexp_parser (2.5.0)
44
+ rexml (3.2.5)
45
+ rspec (3.11.0)
46
+ rspec-core (~> 3.11.0)
47
+ rspec-expectations (~> 3.11.0)
48
+ rspec-mocks (~> 3.11.0)
49
+ rspec-core (3.11.0)
50
+ rspec-support (~> 3.11.0)
51
+ rspec-expectations (3.11.0)
52
+ diff-lcs (>= 1.2.0, < 2.0)
53
+ rspec-support (~> 3.11.0)
54
+ rspec-mocks (3.11.1)
55
+ diff-lcs (>= 1.2.0, < 2.0)
56
+ rspec-support (~> 3.11.0)
57
+ rspec-support (3.11.0)
58
+ rubocop (1.31.2)
59
+ json (~> 2.3)
60
+ parallel (~> 1.10)
61
+ parser (>= 3.1.0.0)
62
+ rainbow (>= 2.2.2, < 4.0)
63
+ regexp_parser (>= 1.8, < 3.0)
64
+ rexml (>= 3.2.5, < 4.0)
65
+ rubocop-ast (>= 1.18.0, < 2.0)
66
+ ruby-progressbar (~> 1.7)
67
+ unicode-display_width (>= 1.4.0, < 3.0)
68
+ rubocop-ast (1.19.1)
69
+ parser (>= 3.1.1.0)
70
+ rubocop-rspec (2.12.1)
71
+ rubocop (~> 1.31)
72
+ ruby-progressbar (1.11.0)
73
+ simplecov (0.21.2)
74
+ docile (~> 1.1)
75
+ simplecov-html (~> 0.11)
76
+ simplecov_json_formatter (~> 0.1)
77
+ simplecov-html (0.12.3)
78
+ simplecov_json_formatter (0.1.4)
79
+ tzinfo (2.0.5)
80
+ concurrent-ruby (~> 1.0)
81
+ unicode-display_width (2.2.0)
82
+
83
+ PLATFORMS
84
+ ruby
85
+
86
+ DEPENDENCIES
87
+ assembly-objectfile!
88
+ byebug
89
+ pry-byebug
90
+ rake
91
+ rspec (~> 3.0)
92
+ rubocop (~> 1.25)
93
+ rubocop-rspec
94
+ simplecov
95
+
96
+ BUNDLED WITH
97
+ 2.3.17
data/README.md CHANGED
@@ -8,15 +8,15 @@
8
8
  ## Overview
9
9
  This gem contains classes used by the Stanford University Digital Library to
10
10
  perform file operations necessary for accessioning of content. It is also
11
- used by related gems to perform content type specific operations (such as jp2
12
- generation).
11
+ used by related gems that perform content type specific operations (e.g.
12
+ assembly-image for jp2 generation).
13
13
 
14
14
  ## Usage
15
15
 
16
16
  The gem currently has methods for:
17
17
  * filesize
18
- * exif
19
- * generate content metadata
18
+ * mimetype
19
+ * exif - consumers use ExifTool to get file information
20
20
 
21
21
  ## Running tests
22
22
 
@@ -34,8 +34,8 @@ rake release
34
34
 
35
35
  1. Exiftool
36
36
 
37
- RHEL: (RPM to install comming soon) Download latest version from:
38
- http://www.sno.phy.queensu.ca/~phil/exiftool
37
+ RHEL: (RPM to install coming soon) Download latest version from:
38
+ https://exiftool.org/
39
39
 
40
40
  tar -xf Image-ExifTool-#.##.tar.gz
41
41
  cd Image-ExifTool-#.##
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  $LOAD_PATH.push File.expand_path('lib', __dir__)
4
- require 'assembly-objectfile/version'
4
+ require 'assembly/object_file/version'
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = 'assembly-objectfile'
@@ -10,13 +10,13 @@ Gem::Specification.new do |s|
10
10
  s.authors = ['Peter Mangiafico', 'Renzo Sanchez-Silva', 'Monty Hindman', 'Tony Calavano']
11
11
  s.email = ['pmangiafico@stanford.edu']
12
12
  s.homepage = 'https://github.com/sul-dlss/assembly-objectfile'
13
- s.summary = 'Ruby immplementation of file services needed to prepare objects to be accessioned in SULAIR digital library'
13
+ s.summary = 'Ruby implementation of file services needed to prepare objects to be accessioned ' \
14
+ 'into the Stanford Digital Repository'
14
15
  s.description = 'Get exif data, file sizes and more.'
15
16
  s.license = 'ALv2'
16
17
  s.metadata['rubygems_mfa_required'] = 'true'
17
18
 
18
19
  s.files = `git ls-files`.split("\n")
19
- s.test_files = `git ls-files -- spec/*`.split("\n")
20
20
  s.bindir = 'exe'
21
21
  s.executables = s.files.grep(%r{^exe/}) { |f| File.basename(f) }
22
22
  s.require_paths = ['lib']
@@ -24,14 +24,10 @@ Gem::Specification.new do |s|
24
24
  s.required_ruby_version = '>= 3.0'
25
25
 
26
26
  s.add_dependency 'activesupport', '>= 5.2.0'
27
- s.add_dependency 'deprecation'
28
- s.add_dependency 'dry-struct', '~> 1.0'
29
- s.add_dependency 'dry-types', '~> 1.1'
30
27
  s.add_dependency 'mime-types', '> 3'
31
28
  s.add_dependency 'mini_exiftool'
32
- s.add_dependency 'nokogiri'
33
29
 
34
- s.add_development_dependency 'json'
30
+ s.add_development_dependency 'pry-byebug'
35
31
  s.add_development_dependency 'rake'
36
32
  s.add_development_dependency 'rspec', '~> 3.0'
37
33
  s.add_development_dependency 'rubocop', '~> 1.25'
data/config/boot.rb CHANGED
@@ -2,7 +2,6 @@
2
2
 
3
3
  require 'rubygems'
4
4
 
5
- environment = ENV['ENVIRONMENT'] ||= 'development'
6
5
  project_root = File.expand_path("#{File.dirname(__FILE__)}/..")
7
6
 
8
7
  # Load config for current environment.
@@ -4,6 +4,6 @@
4
4
  module Assembly
5
5
  class ObjectFile
6
6
  # Gem version
7
- VERSION = '1.13.0'
7
+ VERSION = '2.1.1'
8
8
  end
9
9
  end
@@ -2,25 +2,56 @@
2
2
 
3
3
  require 'mini_exiftool'
4
4
  require 'mime/types'
5
+ require 'active_support/core_ext/object/blank'
5
6
 
6
7
  module Assembly
7
- # Common behaviors we need for other classes in the gem
8
- module ObjectFileable
8
+ # This class contains generic methods to operate on any file.
9
+ class ObjectFile
10
+ # Class level method that given an array of strings, return the longest common initial path.
11
+ # Useful for removing a common path from a set of filenames when producing content metadata
12
+ #
13
+ # @param [Array] strings Array of filenames with paths to operate on
14
+ # @return [String] longest common initial part of path of filenames passed in
15
+ #
16
+ # Example:
17
+ # puts Assembly::ObjectFile.common_prefix(['/Users/peter/00/test.tif','/Users/peter/05/test.jp2'])
18
+ # # => '/Users/peter/0'
19
+ def self.common_path(strings)
20
+ return nil if strings.empty?
21
+
22
+ n = 0
23
+ x = strings.last
24
+ n += 1 while strings.all? { |s| s[n] && (s[n] == x[n]) }
25
+ common_prefix = x[0...n]
26
+ if common_prefix[-1, 1] == '/' # check if last element of the common string is the end of a directory
27
+ common_prefix # if not, split string along directories, and reject last one
28
+ else
29
+ "#{common_prefix.split('/')[0..-2].join('/')}/" # if it was, then return the common prefix directly
30
+ end
31
+ end
32
+
9
33
  attr_accessor :file_attributes, :label, :path, :provider_md5, :provider_sha1, :relative_path, :mime_type_order
10
34
 
11
35
  VALID_MIMETYPE_METHODS = %i[override exif file extension].freeze
12
36
 
13
37
  # @param [String] path full path to the file to be worked with
14
38
  # @param [Hash<Symbol => Object>] params options used during content metadata generation
15
- # @option params [Hash<Symbol => ['yes', 'no']>] :file_attributes e.g. {:preserve=>'yes',:shelve=>'no',:publish=>'no'}, defaults pulled from mimetype
16
- # @option params [String] :label a resource label (files bundlded together will just get the first file's label attribute if set)
39
+ # @option params [Hash<Symbol => ['yes', 'no']>] :file_attributes e.g.:
40
+ # {:preserve=>'yes',:shelve=>'no',:publish=>'no'},
41
+ # defaults pulled from mimetype
42
+ # @option params [String] :label a resource label (files bundled together will just get the first
43
+ # file's label attribute if set)
17
44
  # @option params [String] :provider_md5 pre-computed MD5 checksum
18
45
  # @option params [String] :provider_sha1 pre-computed SHA1 checksum
19
- # @option params [String] :relative_path if you want the file ids in the content metadata it can be set, otherwise content metadata will get the full path
46
+ # @option params [String] :relative_path if you want the file ids in the content metadata it can be set,
47
+ # otherwise content metadata will get the full path
20
48
  # @option params [Array] :mime_type_order can be set to the order in which you want mimetypes to be determined
21
- # options are :override (from manual overide mapping if exists), :exif (from exif if exists),
22
- # :extension (from file extension), and :file (from unix file system command)
23
- # the default is defined in the private `default_mime_type_order` method but you can override to set your own order
49
+ # options are :override (from manual overide mapping if exists),
50
+ # :exif (from exif if exists)
51
+ # :extension (from file extension)
52
+ # :file (from unix file system command)
53
+ # the default is defined in the private `default_mime_type_order` method
54
+ # but you can override to set your own order
24
55
  # @example
25
56
  # Assembly::ObjectFile.new('/input/path_to_file.tif')
26
57
  def initialize(path, params = {})
@@ -33,24 +64,6 @@ module Assembly
33
64
  @mime_type_order = params[:mime_type_order] || default_mime_type_order
34
65
  end
35
66
 
36
- # @return [String] DPG base filename, removing the extension and the '00','05', etc. placeholders
37
- # @example
38
- # source_file = Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
39
- # puts source_file.dpg_basename # "cy565rm7188_001"
40
- def dpg_basename
41
- file_parts = File.basename(path, ext).split('_')
42
- file_parts.size == 3 ? "#{file_parts[0]}_#{file_parts[2]}" : filename_without_ext
43
- end
44
-
45
- # @return [String] DPG subfolder for the given filename, i.e. '00','05', etc.
46
- # @example
47
- # source_file = Assembly::ObjectFile.new('/input/cy565rm7188_00_001.tif')
48
- # puts source_file.dpg_folder # "00"
49
- def dpg_folder
50
- file_parts = File.basename(path, ext).split('_')
51
- file_parts.size == 3 ? file_parts[1] : ''
52
- end
53
-
54
67
  # @return [String] base filename
55
68
  # @example
56
69
  # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
@@ -91,6 +104,13 @@ module Assembly
91
104
  @exif ||= begin
92
105
  check_for_file
93
106
  MiniExiftool.new(path, replace_invalid_chars: '?')
107
+ rescue MiniExiftool::Error
108
+ # MiniExiftool will throw an exception when it tries to initialize for problematic files,
109
+ # but the exception it throws does not tell you the file that caused the problem.
110
+ # Instead, we will raise our own exception with more context in logging/reporting upstream.
111
+ # Note: if the file that causes the problem should NOT use exiftool to determine mimetype, add it to the skipped
112
+ # mimetypes in Assembly::TRUSTED_MIMETYPES to bypass initialization of MiniExiftool for mimetype generation
113
+ raise MiniExiftool::Error, "error initializing MiniExiftool for #{path}"
94
114
  end
95
115
  end
96
116
 
@@ -125,72 +145,15 @@ module Assembly
125
145
  check_for_file
126
146
  mimetype = ''
127
147
  mime_type_order.each do |mime_type_method|
128
- mimetype = public_send("#{mime_type_method}_mimetype") if VALID_MIMETYPE_METHODS.include?(mime_type_method)
148
+ mimetype = send("#{mime_type_method}_mimetype") if VALID_MIMETYPE_METHODS.include?(mime_type_method)
129
149
  break if mimetype.present?
130
150
  end
131
151
  mimetype
132
152
  end
133
153
  end
134
154
 
135
- # Returns mimetype information using the manual override mapping (based on a file extension lookup)
136
- # @return [String] mime type for supplied file if a mapping exists for the file's extension
137
- # @example
138
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.json')
139
- # puts source_file.override_mimetype # 'application/json'
140
- def override_mimetype
141
- @override_mimetype ||= Assembly::OVERRIDE_MIMETYPES.fetch(ext.to_sym, '')
142
- end
143
-
144
- # Returns mimetype information using the mime-types gem (based on a file extension lookup)
145
- # @return [String] mime type for supplied file
146
- # @example
147
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
148
- # puts source_file.extension_mimetype # 'text/plain'
149
- def extension_mimetype
150
- @extension_mimetype ||= begin
151
- mtype = MIME::Types.type_for(path).first
152
- mtype ? mtype.content_type : ''
153
- end
154
- end
155
-
156
- # Returns mimetype information for the current file based on unix file system command.
157
- # @return [String] mime type for supplied file
158
- # @example
159
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
160
- # puts source_file.file_mimetype # 'text/plain'
161
- def file_mimetype
162
- @file_mimetype ||= begin
163
- check_for_file
164
- `file --mime-type "#{path}"`.delete("\n").split(':')[1].strip # first try and get the mimetype from the unix file command
165
- end
166
- end
167
-
168
- # Returns mimetype information for the current file based on exif data (if available and not a trusted source that we'd rather get from the file system command)
169
- # @return [String] mime type for supplied file
170
- # @example
171
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
172
- # puts source_file.exif_mimetype # 'text/plain'
173
- def exif_mimetype
174
- @exif_mimetype ||= begin
175
- check_for_file
176
- prefer_exif = !Assembly::TRUSTED_MIMETYPES.include?(file_mimetype) # if it's not a "trusted" mimetype and there is exif data; get the mimetype from the exif
177
- exif.mimetype if exif&.mimetype && prefer_exif
178
- end
179
- end
180
-
181
- # @note Uses shell call to "file", only expected to work on unix based systems
182
- # @return [String] encoding for supplied file
183
- # @example
184
- # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
185
- # puts source_file.encoding # 'us-ascii'
186
- def encoding
187
- @encoding ||= begin
188
- check_for_file
189
- `file --mime-encoding "#{path}"`.delete("\n").split(':')[1].strip
190
- end
191
- end
192
-
193
- # @return [Symbol] the type of object, could be :application (for PDF or Word, etc), :audio, :image, :message, :model, :multipart, :text or :video
155
+ # @return [Symbol] the type of object, could be :application (for PDF or Word, etc),
156
+ # :audio, :image, :message, :model, :multipart, :text or :video
194
157
  # @example
195
158
  # source_file = Assembly::ObjectFile.new('/input/path_to_file.tif')
196
159
  # puts source_file.object_type # :image
@@ -219,18 +182,10 @@ module Assembly
219
182
  mimetype == 'image/jp2' || jp2able?
220
183
  end
221
184
 
222
- # @return [Boolean] true if image has a color profile, false if not.
223
- # @example
224
- # source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
225
- # puts source_img.has_color_profile? # true
226
- def has_color_profile?
227
- return false unless exif
228
-
229
- exif['profiledescription'] || exif['colorspace'] ? true : false
230
- end
231
-
232
- # Examines the input image for validity to create a jp2. Same as valid_image? but also confirms the existence of a profile description and further restricts mimetypes.
233
- # It is used by the assembly robots to decide if a jp2 will be created and is also called before you create a jp2 using assembly-image.
185
+ # Examines the input image for validity to create a jp2. Same as valid_image? but also confirms
186
+ # the existence of a profile description and further restricts mimetypes.
187
+ # It is used by the assembly robots to decide if a jp2 will be created and is also called before
188
+ # you create a jp2 using assembly-image.
234
189
  # @return [Boolean] true if image should have a jp2 created, false if not.
235
190
  # @example
236
191
  # source_img = Assembly::ObjectFile.new('/input/path_to_file.tif')
@@ -262,14 +217,74 @@ module Assembly
262
217
 
263
218
  private
264
219
 
220
+ # private method to check for file existence before operating on it
221
+ def check_for_file
222
+ raise "input file #{path} does not exist or is a directory" unless file_exists?
223
+ end
224
+
265
225
  # prive method defining default preferred ordering of how mimetypes are determined
266
226
  def default_mime_type_order
267
227
  %i[override exif file extension]
268
228
  end
269
229
 
270
- # private method to check for file existence before operating on it
271
- def check_for_file
272
- raise "input file #{path} does not exist or is a directory" unless file_exists?
230
+ # Returns mimetype information using the mime-types gem (based on a file extension lookup)
231
+ # @return [String] mime type for supplied file
232
+ # @example
233
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
234
+ # puts source_file.extension_mimetype # 'text/plain'
235
+ def extension_mimetype
236
+ @extension_mimetype ||= begin
237
+ mtype = MIME::Types.type_for(path).first
238
+ mtype ? mtype.content_type : ''
239
+ end
240
+ end
241
+
242
+ # Returns mimetype information for the current file based on unix file system command.
243
+ # @return [String] mime type for supplied file
244
+ # @example
245
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
246
+ # puts source_file.file_mimetype # 'text/plain'
247
+ def file_mimetype
248
+ @file_mimetype ||= begin
249
+ check_for_file
250
+ `file --mime-type "#{path}"`.delete("\n").split(':')[1].strip # get the mimetype from the unix file command
251
+ end
252
+ end
253
+
254
+ # Returns mimetype information for the current file based on exif data
255
+ # (if available and not a trusted source that we'd rather get from the file system command)
256
+ # @return [String] mime type for supplied file
257
+ # @example
258
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
259
+ # puts source_file.exif_mimetype # 'text/plain'
260
+ def exif_mimetype
261
+ @exif_mimetype ||= begin
262
+ check_for_file
263
+ # if it's not a "trusted" mimetype and there is exif data; get the mimetype from the exif
264
+ prefer_exif = !Assembly::TRUSTED_MIMETYPES.include?(file_mimetype)
265
+ exif.mimetype if prefer_exif && exif&.mimetype
266
+ end
267
+ end
268
+
269
+ # Returns mimetype information using the manual override mapping (based on a file extension lookup)
270
+ # @return [String] mime type for supplied file if a mapping exists for the file's extension
271
+ # @example
272
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.json')
273
+ # puts source_file.override_mimetype # 'application/json'
274
+ def override_mimetype
275
+ @override_mimetype ||= Assembly::OVERRIDE_MIMETYPES.fetch(ext.to_sym, '')
276
+ end
277
+
278
+ # @note Uses shell call to "file", only expected to work on unix based systems
279
+ # @return [String] encoding for supplied file
280
+ # @example
281
+ # source_file = Assembly::ObjectFile.new('/input/path_to_file.txt')
282
+ # puts source_file.encoding # 'us-ascii'
283
+ def encoding
284
+ @encoding ||= begin
285
+ check_for_file
286
+ `file --mime-encoding "#{path}"`.delete("\n").split(':')[1].strip
287
+ end
273
288
  end
274
289
  end
275
290
  end