assembly-objectfile 1.7.1 → 1.7.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 4ef16d322b5b114f0108703d029612111aafd60d
4
- data.tar.gz: 4ce48229272115d47ed4a8b60ec60bc1ad3b953d
2
+ SHA256:
3
+ metadata.gz: 833c781110e681cf9555ef036857efd1a4758902c8aea55f55f4e712e78470d1
4
+ data.tar.gz: 83111a9c4145aac66c8e7ba24a07336c13cbf851407d05ad9a9d45d7bbf138d8
5
5
  SHA512:
6
- metadata.gz: 56d140f88b2f50e7c4d690bd4453facb5d79d52036e8d15781bcb954e9d2139ee0050d85336d24aca123a78e4ddc76bd3d765e6de934053b60b3e42a77e65bd3
7
- data.tar.gz: 4afc43a58e962182ca2badfd994aeaee676a2461ef65f28db5e1b62da2e60dae74d9fb02d95e61946258fb3eb7d6f3fb4cd6316c6a2b71b4bbf2782cc098fc8e
6
+ metadata.gz: 9346400c4e01abece34de372d086037a60f3f0cf78479630986d0d25a346042e51dc8d6e8a6835f0b9dd5bf9e70fdbd4385a8689cbd9d9ddbd899142b60534ae
7
+ data.tar.gz: 80cf1fa0011f28e08975d401c8d2ce3156485b4b4b3107b665d628679f1e9e4d4032313fb2434f5c7d24997f7004af6533f36b3f561560f284095264ff262a25
data/.rubocop.yml ADDED
@@ -0,0 +1,22 @@
1
+ inherit_from: .rubocop_todo.yml
2
+ require: rubocop-rspec
3
+
4
+ # Configuration parameters: AllowURI, URISchemes.
5
+ Metrics/LineLength:
6
+ Max: 200
7
+
8
+ RSpec/ContextWording:
9
+ Enabled: false # too dogmatic
10
+
11
+ RSpec/ExampleLength:
12
+ Max: 25
13
+
14
+ # we like 'expect(x).to receive' better than 'have_received'
15
+ RSpec/MessageSpies:
16
+ Enabled: false
17
+
18
+ RSpec/MultipleExpectations:
19
+ Max: 5
20
+
21
+ RSpec/NestedGroups:
22
+ Max: 4 # default: 3
data/.rubocop_todo.yml ADDED
@@ -0,0 +1,132 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config`
3
+ # on 2018-09-13 12:44:21 -0700 using RuboCop version 0.59.0.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 1
10
+ # Cop supports --auto-correct.
11
+ # Configuration parameters: EnforcedStyleAlignWith, AutoCorrect, Severity.
12
+ # SupportedStylesAlignWith: keyword, variable, start_of_line
13
+ Layout/EndAlignment:
14
+ Exclude:
15
+ - 'lib/assembly-objectfile/content_metadata.rb'
16
+
17
+ # Offense count: 1
18
+ Lint/UselessAssignment:
19
+ Exclude:
20
+ - 'config/boot.rb'
21
+
22
+ # Offense count: 5
23
+ # Configuration parameters: CheckForMethodsWithNoSideEffects.
24
+ Lint/Void:
25
+ Exclude:
26
+ - 'spec/content_metadata_spec.rb'
27
+
28
+ # Offense count: 3
29
+ Metrics/AbcSize:
30
+ Max: 170
31
+
32
+ # Offense count: 15
33
+ # Configuration parameters: CountComments, ExcludedMethods.
34
+ # ExcludedMethods: refine
35
+ Metrics/BlockLength:
36
+ Max: 549
37
+
38
+ # Offense count: 1
39
+ # Configuration parameters: CountComments.
40
+ Metrics/ClassLength:
41
+ Max: 137
42
+
43
+ # Offense count: 1
44
+ Metrics/CyclomaticComplexity:
45
+ Max: 63
46
+
47
+ # Offense count: 26
48
+ # Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, IgnoredPatterns.
49
+ # URISchemes: http, https
50
+ Metrics/LineLength:
51
+ Max: 304
52
+
53
+ # Offense count: 1
54
+ # Configuration parameters: CountComments.
55
+ Metrics/MethodLength:
56
+ Max: 131
57
+
58
+ # Offense count: 1
59
+ # Configuration parameters: CountComments.
60
+ Metrics/ModuleLength:
61
+ Max: 106
62
+
63
+ # Offense count: 1
64
+ Metrics/PerceivedComplexity:
65
+ Max: 59
66
+
67
+ # Offense count: 2
68
+ # Configuration parameters: ExpectMatchingDefinition, Regex, IgnoreExecutableScripts, AllowedAcronyms.
69
+ # AllowedAcronyms: CLI, DSL, ACL, API, ASCII, CPU, CSS, DNS, EOF, GUID, HTML, HTTP, HTTPS, ID, IP, JSON, LHS, QPS, RAM, RHS, RPC, SLA, SMTP, SQL, SSH, TCP, TLS, TTL, UDP, UI, UID, UUID, URI, URL, UTF8, VM, XML, XMPP, XSRF, XSS
70
+ Naming/FileName:
71
+ Exclude:
72
+ - 'assembly-objectfile.gemspec'
73
+ - 'lib/assembly-objectfile.rb'
74
+
75
+ # Offense count: 2
76
+ # Configuration parameters: NamePrefix, NamePrefixBlacklist, NameWhitelist, MethodDefinitionMacros.
77
+ # NamePrefix: is_, has_, have_
78
+ # NamePrefixBlacklist: is_, has_, have_
79
+ # NameWhitelist: is_a?
80
+ # MethodDefinitionMacros: define_method, define_singleton_method
81
+ Naming/PredicateName:
82
+ Exclude:
83
+ - 'spec/**/*'
84
+ - 'lib/assembly-objectfile/content_metadata.rb'
85
+ - 'lib/assembly-objectfile/object_fileable.rb'
86
+
87
+ # Offense count: 8
88
+ # Configuration parameters: Max.
89
+ RSpec/ExampleLength:
90
+ Exclude:
91
+ - 'spec/content_metadata_spec.rb'
92
+
93
+ # Offense count: 2
94
+ # Configuration parameters: CustomTransform, IgnoreMethods.
95
+ RSpec/FilePath:
96
+ Exclude:
97
+ - 'spec/content_metadata_spec.rb'
98
+ - 'spec/object_file_spec.rb'
99
+
100
+ # Offense count: 60
101
+ # Configuration parameters: AssignmentOnly.
102
+ RSpec/InstanceVariable:
103
+ Exclude:
104
+ - 'spec/object_file_spec.rb'
105
+
106
+ # Offense count: 28
107
+ # Configuration parameters: AggregateFailuresByDefault.
108
+ RSpec/MultipleExpectations:
109
+ Max: 29
110
+
111
+ # Offense count: 2
112
+ RSpec/RepeatedDescription:
113
+ Exclude:
114
+ - 'spec/object_file_spec.rb'
115
+
116
+ # Offense count: 6
117
+ Style/CommentedKeyword:
118
+ Exclude:
119
+ - 'lib/assembly-objectfile/content_metadata.rb'
120
+
121
+ # Offense count: 1
122
+ Style/Documentation:
123
+ Exclude:
124
+ - 'spec/**/*'
125
+ - 'test/**/*'
126
+ - 'lib/assembly-objectfile.rb'
127
+
128
+ # Offense count: 1
129
+ # Configuration parameters: MinBodyLength.
130
+ Style/GuardClause:
131
+ Exclude:
132
+ - 'lib/assembly-objectfile/object_file.rb'
data/Gemfile CHANGED
@@ -1,4 +1,4 @@
1
- source "http://rubygems.org"
1
+ source 'http://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in .gemspec
4
4
  gemspec
data/Rakefile CHANGED
@@ -1,17 +1,17 @@
1
- require "bundler/gem_tasks"
1
+ require 'bundler/gem_tasks'
2
2
 
3
- #require 'dlss/rake/dlss_release'
4
- #Dlss::Release.new
3
+ # require 'dlss/rake/dlss_release'
4
+ # Dlss::Release.new
5
5
 
6
- desc "Run console with irb (default), pry, etc."
7
- task :console, :irb do |t, args|
8
- irb = args[:irb].nil?? 'irb' : args[:irb]
9
- sh irb, "-r", "#{File.dirname(__FILE__)}/config/boot.rb"
6
+ desc 'Run console with irb (default), pry, etc.'
7
+ task :console, :irb do |_t, args|
8
+ irb = args[:irb].nil? ? 'irb' : args[:irb]
9
+ sh irb, '-r', "#{File.dirname(__FILE__)}/config/boot.rb"
10
10
  end
11
11
 
12
12
  require 'rspec/core/rake_task'
13
13
 
14
- desc "Run specs"
14
+ desc 'Run specs'
15
15
  RSpec::Core::RakeTask.new(:spec)
16
16
 
17
- task :default => :spec
17
+ task default: :spec
@@ -1,15 +1,15 @@
1
- $LOAD_PATH.push File.expand_path("../lib", __FILE__)
1
+ $LOAD_PATH.push File.expand_path('lib', __dir__)
2
2
  require 'assembly-objectfile/version'
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = 'assembly-objectfile'
6
6
  s.version = Assembly::ObjectFile::VERSION
7
7
  s.platform = Gem::Platform::RUBY
8
- s.authors = ["Peter Mangiafico", "Renzo Sanchez-Silva","Monty Hindman","Tony Calavano"]
9
- s.email = ["pmangiafico@stanford.edu"]
8
+ s.authors = ['Peter Mangiafico', 'Renzo Sanchez-Silva', 'Monty Hindman', 'Tony Calavano']
9
+ s.email = ['pmangiafico@stanford.edu']
10
10
  s.homepage = 'https://github.com/sul-dlss/assembly-objectfile'
11
- s.summary = %q{Ruby immplementation of file services needed to prepare objects to be accessioned in SULAIR digital library}
12
- s.description = %q{Get exif data, file sizes and more.}
11
+ s.summary = 'Ruby immplementation of file services needed to prepare objects to be accessioned in SULAIR digital library'
12
+ s.description = 'Get exif data, file sizes and more.'
13
13
  s.license = 'ALv2'
14
14
 
15
15
  s.rubyforge_project = 'assembly-objectfile'
@@ -20,13 +20,14 @@ Gem::Specification.new do |s|
20
20
  s.executables = s.files.grep(%r{^exe/}) { |f| File.basename(f) }
21
21
  s.require_paths = ['lib']
22
22
 
23
+ s.add_dependency 'mime-types', '> 3'
23
24
  s.add_dependency 'mini_exiftool'
24
- s.add_dependency 'mime-types'
25
25
  s.add_dependency 'nokogiri'
26
26
 
27
- s.add_development_dependency 'rake'
28
27
  s.add_development_dependency 'json'
29
- s.add_development_dependency "rspec", "~> 3.0"
30
- s.add_development_dependency "yard"
31
-
28
+ s.add_development_dependency 'rake'
29
+ s.add_development_dependency 'rspec', '~> 3.0'
30
+ s.add_development_dependency 'rubocop'
31
+ s.add_development_dependency 'rubocop-rspec'
32
+ s.add_development_dependency 'yard'
32
33
  end
@@ -1,37 +1,39 @@
1
1
  module Assembly
2
-
3
2
  # the path to the gem, used to access profiles stored with the gem
4
3
  PATH_TO_GEM = File.expand_path(File.dirname(__FILE__) + '/..')
5
4
 
6
5
  # if input image is not one of these mime types, it will not be regarded as a valid image for the purpose of generating a JP2 derivative
7
- VALID_IMAGE_MIMETYPES=["image/jpeg","image/tiff","image/tif","image/png"]
8
-
6
+ VALID_IMAGE_MIMETYPES = ['image/jpeg', 'image/tiff', 'image/tif', 'image/png'].freeze
7
+
8
+ # if input file has one of these extensions in a 3D object, it will get the 3d resource type
9
+ VALID_THREE_DIMENSION_EXTENTIONS = ['.obj', '.ply', '.threejs', '.gltf'].freeze
10
+
9
11
  # the list of mimetypes that will be "trusted" by the unix file command; if a mimetype other than one of these is returned
10
12
  # by the file command, then a check will be made to see if exif data exists...if so, the mimetype returned by the exif data will be used
11
13
  # if no exif data exists, then the mimetype returned by the unix file command will be used
12
- TRUSTED_MIMETYPES=["text/plain","plain/text","application/pdf","text/html","application/xml"]
13
-
14
+ TRUSTED_MIMETYPES = ['text/plain', 'plain/text', 'application/pdf', 'text/html', 'application/xml'].freeze
15
+
14
16
  # default publish/preserve/shelve attributes used in content metadata
15
- FILE_ATTRIBUTES=Hash.new
17
+ FILE_ATTRIBUTES = {}
16
18
  # if no mimetype specific attributes are specified for a given file, define some defaults, and override for specific mimetypes below
17
- FILE_ATTRIBUTES['default']={:preserve=>'yes',:shelve=>'no',:publish=>'no'}
18
- FILE_ATTRIBUTES['image/tif']={:preserve=>'yes',:shelve=>'no',:publish=>'no'}
19
- FILE_ATTRIBUTES['image/tiff']={:preserve=>'yes',:shelve=>'no',:publish=>'no'}
20
- FILE_ATTRIBUTES['image/jp2']={:preserve=>'no',:shelve=>'yes',:publish=>'yes'}
21
- FILE_ATTRIBUTES['image/jpeg']={:preserve=>'yes',:shelve=>'no',:publish=>'no'}
22
- FILE_ATTRIBUTES['audio/wav']={:preserve=>'yes',:shelve=>'no',:publish=>'no'}
23
- FILE_ATTRIBUTES['audio/x-wav']={:preserve=>'yes',:shelve=>'no',:publish=>'no'}
24
- FILE_ATTRIBUTES['audio/mp3']={:preserve=>'no',:shelve=>'yes',:publish=>'yes'}
25
- FILE_ATTRIBUTES['audio/mpeg']={:preserve=>'no',:shelve=>'yes',:publish=>'yes'}
26
- FILE_ATTRIBUTES['application/pdf']={:preserve=>'yes',:shelve=>'yes',:publish=>'yes'}
27
- FILE_ATTRIBUTES['plain/text']={:preserve=>'yes',:shelve=>'yes',:publish=>'yes'}
28
- FILE_ATTRIBUTES['text/plain']={:preserve=>'yes',:shelve=>'yes',:publish=>'yes'}
29
- FILE_ATTRIBUTES['image/png']={:preserve=>'no',:shelve=>'yes',:publish=>'yes'}
30
- FILE_ATTRIBUTES['application/zip']={:preserve=>'yes',:shelve=>'no',:publish=>'no'}
31
-
19
+ FILE_ATTRIBUTES['default'] = { preserve: 'yes', shelve: 'no', publish: 'no' }
20
+ FILE_ATTRIBUTES['image/tif'] = { preserve: 'yes', shelve: 'no', publish: 'no' }
21
+ FILE_ATTRIBUTES['image/tiff'] = { preserve: 'yes', shelve: 'no', publish: 'no' }
22
+ FILE_ATTRIBUTES['image/jp2'] = { preserve: 'no', shelve: 'yes', publish: 'yes' }
23
+ FILE_ATTRIBUTES['image/jpeg'] = { preserve: 'yes', shelve: 'no', publish: 'no' }
24
+ FILE_ATTRIBUTES['audio/wav'] = { preserve: 'yes', shelve: 'no', publish: 'no' }
25
+ FILE_ATTRIBUTES['audio/x-wav'] = { preserve: 'yes', shelve: 'no', publish: 'no' }
26
+ FILE_ATTRIBUTES['audio/mp3'] = { preserve: 'no', shelve: 'yes', publish: 'yes' }
27
+ FILE_ATTRIBUTES['audio/mpeg'] = { preserve: 'no', shelve: 'yes', publish: 'yes' }
28
+ FILE_ATTRIBUTES['application/pdf'] = { preserve: 'yes', shelve: 'yes', publish: 'yes' }
29
+ FILE_ATTRIBUTES['plain/text'] = { preserve: 'yes', shelve: 'yes', publish: 'yes' }
30
+ FILE_ATTRIBUTES['text/plain'] = { preserve: 'yes', shelve: 'yes', publish: 'yes' }
31
+ FILE_ATTRIBUTES['image/png'] = { preserve: 'yes', shelve: 'yes', publish: 'no'}
32
+ FILE_ATTRIBUTES['application/zip'] = { preserve: 'yes', shelve: 'no', publish: 'no' }
33
+ FILE_ATTRIBUTES['application/json'] = { preserve: 'yes', shelve: 'yes', publish: 'yes' }
32
34
  end
33
35
 
34
36
  require 'assembly-objectfile/content_metadata'
35
37
  require 'assembly-objectfile/object_fileable'
36
38
  require 'assembly-objectfile/object_file'
37
- require 'assembly-objectfile/version'
39
+ require 'assembly-objectfile/version'
@@ -1,232 +1,237 @@
1
1
  require 'nokogiri'
2
2
 
3
3
  module Assembly
4
+ SPECIAL_DPG_FOLDERS = %w[31 44 50].freeze # these special dpg folders will force any files contained in them into their own resources, regardless of filenaming convention
5
+ # these are used when :bundle=>:dpg only
6
+
7
+ DEPRECATED_STYLES = %i[book_with_pdf book_as_image].freeze
4
8
 
5
- SPECIAL_DPG_FOLDERS = ['31', '44', '50'] # these special dpg folders will force any files contained in them into their own resources, regardless of filenaming convention
6
- # these are used when :bundle=>:dpg only
7
-
8
- DEPRECATED_STYLES = [:book_with_pdf, :book_as_image]
9
-
10
9
  # This class generates content metadata for image files
11
10
  class ContentMetadata
12
-
13
- # Generates image content XML metadata for a repository object.
14
- # This method only produces content metadata for images
15
- # and does not depend on a specific folder structure. Note that it is class level method.
16
- #
17
- # @param [Hash] params a hash containg parameters needed to produce content metadata
18
- # :druid = required - a string of druid of the repository object's druid id (with or without 'druid:' prefix)
19
- # :objects = required - an array of Assembly::ObjectFile objects containing the list of files to add to content metadata
20
- # NOTE: if you set the :bundle option to :prebundled, you will need to pass in an array of arrays, and not a flat array, as noted below
21
- # :style = optional - a symbol containing the style of metadata to create, allowed values are
22
- # :simple_image (default), contentMetadata type="image", resource type="image"
23
- # :file, contentMetadata type="file", resource type="file"
24
- # :simple_book, contentMetadata type="book", resource type="page", but any resource which has file(s) other than an image, and also contains no images at all, will be resource type="object"
25
- # :book_with_pdf, contentMetadata type="book", resource type="page", but any resource which has any file(s) other than an image will be resource type="object" - NOTE: THIS IS DEPRECATED
26
- # :book_as_image, as simple_book, but with contentMetadata type="book", resource type="image" (same rule applies for resources with non images) - NOTE: THIS IS DEPRECATED
27
- # :map, like simple_image, but with contentMetadata type="map", resource type="image"
28
- # :bundle = optional - a symbol containing the method of bundling files into resources, allowed values are
29
- # :default = all files get their own resources (default)
30
- # :filename = files with the same filename but different extensions get bundled together in a single resource
31
- # :dpg = files representing the same image but of different mimetype that use the SULAIR DPG filenaming standard (00 vs 05) get bundled together in a single resource
32
- # :prebundlded = this option requires you to prebundled the files passed in as an array of arrays, indicating how files are bundlded into resources; this is the most flexible option since it gives you full control
33
- # :add_exif = optional - a boolean to indicate if exif data should be added (mimetype, filesize, image height/width, etc.) to each file, defaults to false and is not required if project goes through assembly
34
- # :add_file_attributes = optional - a boolean to indicate if publish/preserve/shelve/role attributes should be added using defaults or by supplied override by mime/type, defaults to false and is not required if project goes through assembly
35
- # :file_attributes = optional - a hash of file attributes by mimetype to use instead of defaults, only used if add_file_attributes is also true,
36
- # If a mimetype match is not found in your hash, the default is used (either your supplied default or the gems).
37
- # e.g. {'default'=>{:preserve=>'yes',:shelve=>'yes',:publish=>'yes'},'image/tif'=>{:preserve=>'yes',:shelve=>'no',:publish=>'no'},'application/pdf'=>{:preserve=>'yes',:shelve=>'yes',:publish=>'yes'}}
38
- # :include_root_xml = optional - a boolean to indicate if the contentMetadata returned includes a root <?xml version="1.0"?> tag, defaults to true
39
- # :preserve_common_paths = optional - When creating the file "id" attribute, content metadata uses the "relative_path" attribute of the ObjectFile objects passed in. If the "relative_path" attribute is not set, the "path" attribute is used instead,
40
- # which includes a full path to the file. If the "preserve_common_paths" parameter is set to false or left off, then the common paths of all of the ObjectFile's passed in are removed from any "path" attributes. This should turn full paths into
41
- # the relative paths that are required in content metadata file id nodes. If you do not want this behavior, set "preserve_common_paths" to true. The default is false.
42
- # :flatten_folder_structure = optional - Will remove *all* folder structure when genearting file IDs (e.g. DPG subfolders like '00','05' will be removed) when generating file IDs. This is useful if the folder structure is flattened when staging files (like for DPG).
43
- # The default is false. If set to true, will override the "preserve_common_paths" parameter.
44
- # :auto_labels = optional - Will add automated resource labels (e.g. "File 1") when labels are not provided by the user. The default is true.
45
- # Example:
46
- # Assembly::ContentMetadata.create_content_metadata(:druid=>'druid:nx288wh8889',:style=>:simple_image,:objects=>object_files,:add_file_attributes=>false)
47
- def self.create_content_metadata(params={})
48
-
49
- druid=params[:druid]
50
- objects=params[:objects]
51
-
52
- raise "No objects and/or druid supplied" if druid.nil? || objects.nil?
53
-
54
- pid=druid.gsub('druid:','') # remove druid prefix when creating IDs
55
-
56
- style=params[:style] || :simple_image
57
- bundle=params[:bundle] || :default
58
- add_exif=params[:add_exif] || false
59
- auto_labels=(params[:auto_labels].nil? ? true : params[:auto_labels])
60
- add_file_attributes=params[:add_file_attributes] || false
61
- file_attributes=params[:file_attributes] || {}
62
- preserve_common_paths=params[:preserve_common_paths] || false
63
- flatten_folder_structure=params[:flatten_folder_structure] || false
64
- include_root_xml=params[:include_root_xml]
65
-
66
- all_paths=[]
67
- objects.flatten.each do |obj|
68
- raise "File '#{obj.path}' not found" unless obj.file_exists?
69
- all_paths << obj.path unless preserve_common_paths # collect all of the filenames into an array
70
- end
71
-
72
- common_path=Assembly::ObjectFile.common_path(all_paths) unless preserve_common_paths # find common paths to all files provided if needed
73
-
74
- # these are the valid strings for each type of document to be use contentMetadata type and resourceType
75
- content_type_descriptions={:file=>'file',:image=>'image',:book=>'book',:map=>'map'}
76
- resource_type_descriptions={:object=>'object',:file=>'file',:image=>'image',:book=>'page',:map=>'image'}
77
-
78
- # global sequence for resource IDs
79
- sequence = 0
80
-
81
- # a counter to use when creating auto-labels for resources, with incremenets for each type
82
- resource_type_counters=Hash.new(0)
83
-
84
- # set the object level content type id
85
- case style
86
- when :simple_image
87
- content_type_description = content_type_descriptions[:image]
88
- when :file
89
- content_type_description = content_type_descriptions[:file]
90
- when :simple_book,:book_with_pdf,:book_as_image
91
- content_type_description = content_type_descriptions[:book]
92
- when :map
93
- content_type_description = content_type_descriptions[:map]
94
- else
95
- raise "Supplied style not valid"
11
+ # Generates image content XML metadata for a repository object.
12
+ # This method only produces content metadata for images
13
+ # and does not depend on a specific folder structure. Note that it is class level method.
14
+ #
15
+ # @param [Hash] params a hash containg parameters needed to produce content metadata
16
+ # :druid = required - a string of druid of the repository object's druid id (with or without 'druid:' prefix)
17
+ # :objects = required - an array of Assembly::ObjectFile objects containing the list of files to add to content metadata
18
+ # NOTE: if you set the :bundle option to :prebundled, you will need to pass in an array of arrays, and not a flat array, as noted below
19
+ # :style = optional - a symbol containing the style of metadata to create, allowed values are
20
+ # :simple_image (default), contentMetadata type="image", resource type="image"
21
+ # :file, contentMetadata type="file", resource type="file"
22
+ # :simple_book, contentMetadata type="book", resource type="page", but any resource which has file(s) other than an image, and also contains no images at all, will be resource type="object"
23
+ # :book_with_pdf, contentMetadata type="book", resource type="page", but any resource which has any file(s) other than an image will be resource type="object" - NOTE: THIS IS DEPRECATED
24
+ # :book_as_image, as simple_book, but with contentMetadata type="book", resource type="image" (same rule applies for resources with non images) - NOTE: THIS IS DEPRECATED
25
+ # :map, like simple_image, but with contentMetadata type="map", resource type="image"
26
+ # :3d, contentMetadata type="3d", ".obj" and other configured 3d extension files go into resource_type="3d", everything else into resource_type="file"
27
+ # :bundle = optional - a symbol containing the method of bundling files into resources, allowed values are
28
+ # :default = all files get their own resources (default)
29
+ # :filename = files with the same filename but different extensions get bundled together in a single resource
30
+ # :dpg = files representing the same image but of different mimetype that use the SULAIR DPG filenaming standard (00 vs 05) get bundled together in a single resource
31
+ # :prebundlded = this option requires you to prebundled the files passed in as an array of arrays, indicating how files are bundlded into resources; this is the most flexible option since it gives you full control
32
+ # :add_exif = optional - a boolean to indicate if exif data should be added (mimetype, filesize, image height/width, etc.) to each file, defaults to false and is not required if project goes through assembly
33
+ # :add_file_attributes = optional - a boolean to indicate if publish/preserve/shelve/role attributes should be added using defaults or by supplied override by mime/type, defaults to false and is not required if project goes through assembly
34
+ # :file_attributes = optional - a hash of file attributes by mimetype to use instead of defaults, only used if add_file_attributes is also true,
35
+ # If a mimetype match is not found in your hash, the default is used (either your supplied default or the gems).
36
+ # e.g. {'default'=>{:preserve=>'yes',:shelve=>'yes',:publish=>'yes'},'image/tif'=>{:preserve=>'yes',:shelve=>'no',:publish=>'no'},'application/pdf'=>{:preserve=>'yes',:shelve=>'yes',:publish=>'yes'}}
37
+ # :include_root_xml = optional - a boolean to indicate if the contentMetadata returned includes a root <?xml version="1.0"?> tag, defaults to true
38
+ # :preserve_common_paths = optional - When creating the file "id" attribute, content metadata uses the "relative_path" attribute of the ObjectFile objects passed in. If the "relative_path" attribute is not set, the "path" attribute is used instead,
39
+ # which includes a full path to the file. If the "preserve_common_paths" parameter is set to false or left off, then the common paths of all of the ObjectFile's passed in are removed from any "path" attributes. This should turn full paths into
40
+ # the relative paths that are required in content metadata file id nodes. If you do not want this behavior, set "preserve_common_paths" to true. The default is false.
41
+ # :flatten_folder_structure = optional - Will remove *all* folder structure when genearting file IDs (e.g. DPG subfolders like '00','05' will be removed) when generating file IDs. This is useful if the folder structure is flattened when staging files (like for DPG).
42
+ # The default is false. If set to true, will override the "preserve_common_paths" parameter.
43
+ # :auto_labels = optional - Will add automated resource labels (e.g. "File 1") when labels are not provided by the user. The default is true.
44
+ # Example:
45
+ # Assembly::ContentMetadata.create_content_metadata(:druid=>'druid:nx288wh8889',:style=>:simple_image,:objects=>object_files,:add_file_attributes=>false)
46
+ def self.create_content_metadata(params = {})
47
+ druid = params[:druid]
48
+ objects = params[:objects]
49
+
50
+ raise 'No objects and/or druid supplied' if druid.nil? || objects.nil?
51
+
52
+ pid = druid.gsub('druid:', '') # remove druid prefix when creating IDs
53
+
54
+ style = params[:style] || :simple_image
55
+ bundle = params[:bundle] || :default
56
+ add_exif = params[:add_exif] || false
57
+ auto_labels = (params[:auto_labels].nil? ? true : params[:auto_labels])
58
+ add_file_attributes = params[:add_file_attributes] || false
59
+ file_attributes = params[:file_attributes] || {}
60
+ preserve_common_paths = params[:preserve_common_paths] || false
61
+ flatten_folder_structure = params[:flatten_folder_structure] || false
62
+ include_root_xml = params[:include_root_xml]
63
+
64
+ all_paths = []
65
+ objects.flatten.each do |obj|
66
+ raise "File '#{obj.path}' not found" unless obj.file_exists?
67
+
68
+ all_paths << obj.path unless preserve_common_paths # collect all of the filenames into an array
69
+ end
70
+
71
+ common_path = Assembly::ObjectFile.common_path(all_paths) unless preserve_common_paths # find common paths to all files provided if needed
72
+
73
+ # these are the valid strings for each type of document to be use contentMetadata type and resourceType
74
+ content_type_descriptions = { file: 'file', image: 'image', book: 'book', map: 'map', '3d': '3d' }
75
+ resource_type_descriptions = { object: 'object', file: 'file', image: 'image', book: 'page', map: 'image', '3d': '3d' }
76
+
77
+ # global sequence for resource IDs
78
+ sequence = 0
79
+
80
+ # a counter to use when creating auto-labels for resources, with incremenets for each type
81
+ resource_type_counters = Hash.new(0)
82
+
83
+ # set the object level content type id
84
+ case style
85
+ when :simple_image
86
+ content_type_description = content_type_descriptions[:image]
87
+ when :file
88
+ content_type_description = content_type_descriptions[:file]
89
+ when :simple_book, :book_with_pdf, :book_as_image
90
+ content_type_description = content_type_descriptions[:book]
91
+ when :map
92
+ content_type_description = content_type_descriptions[:map]
93
+ when :'3d'
94
+ content_type_description = content_type_descriptions[:'3d']
95
+ else
96
+ raise 'Supplied style not valid'
97
+ end
98
+
99
+ puts "WARNING - the style #{style} is now deprecated and should not be used." if DEPRECATED_STYLES.include? style
100
+
101
+ # determine how many resources to create
102
+ # setup an array of arrays, where the first array is the number of resources, and the second array is the object files containined in that resource
103
+ case bundle
104
+ when :default # one resource per object
105
+ resources = objects.collect { |obj| [obj] }
106
+ when :filename # one resource per distinct filename (excluding extension)
107
+ # loop over distinct filenames, this determines how many resources we will have and
108
+ # create one resource node per distinct filename, collecting the relevant objects with the distinct filename into that resource
109
+ resources = []
110
+ distinct_filenames = objects.collect(&:filename_without_ext).uniq # find all the unique filenames in the set of objects, leaving off extensions and base paths
111
+ distinct_filenames.each { |distinct_filename| resources << objects.collect { |obj| obj if obj.filename_without_ext == distinct_filename }.compact }
112
+ when :dpg # group by DPG filename
113
+ # loop over distinct dpg base names, this determines how many resources we will have and
114
+ # create one resource node per distinct dpg base name, collecting the relevant objects with the distinct names into that resource
115
+ resources = []
116
+ distinct_filenames = objects.collect(&:dpg_basename).uniq # find all the unique DPG filenames in the set of objects
117
+ distinct_filenames.each do |distinct_filename|
118
+ resources << objects.collect { |obj| obj if obj.dpg_basename == distinct_filename && !is_special_dpg_folder?(obj.dpg_folder) }.compact
96
119
  end
97
-
98
- puts "WARNING - the style #{style} is now deprecated and should not be used." if DEPRECATED_STYLES.include? style
99
-
100
- # determine how many resources to create
101
- # setup an array of arrays, where the first array is the number of resources, and the second array is the object files containined in that resource
102
- case bundle
103
- when :default # one resource per object
104
- resources=objects.collect {|obj| [obj]}
105
- when :filename # one resource per distinct filename (excluding extension)
106
- # loop over distinct filenames, this determines how many resources we will have and
107
- # create one resource node per distinct filename, collecting the relevant objects with the distinct filename into that resource
108
- resources=[]
109
- distinct_filenames=objects.collect {|obj| obj.filename_without_ext}.uniq # find all the unique filenames in the set of objects, leaving off extensions and base paths
110
- distinct_filenames.each {|distinct_filename| resources << objects.collect {|obj| obj if obj.filename_without_ext == distinct_filename}.compact }
111
- when :dpg # group by DPG filename
112
- # loop over distinct dpg base names, this determines how many resources we will have and
113
- # create one resource node per distinct dpg base name, collecting the relevant objects with the distinct names into that resource
114
- resources=[]
115
- distinct_filenames=objects.collect {|obj| obj.dpg_basename}.uniq # find all the unique DPG filenames in the set of objects
116
- distinct_filenames.each do |distinct_filename|
117
- resources << objects.collect {|obj| obj if obj.dpg_basename == distinct_filename && !self.is_special_dpg_folder?(obj.dpg_folder)}.compact
120
+ objects.each { |obj| resources << [obj] if is_special_dpg_folder?(obj.dpg_folder) } # certain subfolders require individual resources for files within them regardless of file-naming convention
121
+ when :prebundled
122
+ # if the user specifies this method, they will pass in an array of arrays, indicating resources, so we don't need to bundle in the gem
123
+ resources = objects
124
+ else
125
+ raise 'Invalid bundle method'
126
+ end
127
+
128
+ resources.delete([]) # delete any empty elements
129
+
130
+ builder = Nokogiri::XML::Builder.new do |xml|
131
+ xml.contentMetadata(objectId: druid.to_s, type: content_type_description) do
132
+ resources.each do |resource_files| # iterate over all the resources
133
+ # start a new resource element
134
+ sequence += 1
135
+ resource_id = "#{pid}_#{sequence}"
136
+
137
+ # grab all of the file types within a resource into an array so we can decide what the resource type should be
138
+ resource_file_types = resource_files.collect(&:object_type)
139
+ resource_has_non_images = !(resource_file_types - [:image]).empty?
140
+ resource_from_special_dpg_folder = resource_files.collect { |obj| is_special_dpg_folder?(obj.dpg_folder) }.uniq
141
+
142
+ if bundle == :dpg && resource_from_special_dpg_folder.include?(true) # objects in the special DPG folders are always type=object when we using :bundle=>:dpg
143
+ resource_type_description = resource_type_descriptions[:object]
144
+ else # otherwise look at the style to determine the resource_type_description
145
+ case style
146
+ when :simple_image
147
+ resource_type_description = resource_type_descriptions[:image]
148
+ when :file
149
+ resource_type_description = resource_type_descriptions[:file]
150
+ when :simple_book # in a simple book project, all resources are pages unless they are *all* non-images -- if so, switch the type to object
151
+ resource_type_description = resource_has_non_images && resource_file_types.include?(:image) == false ? resource_type_descriptions[:object] : resource_type_descriptions[:book]
152
+ when :book_as_image # same as simple book, but all resources are images instead of pages, unless we need to switch them to object type
153
+ resource_type_description = resource_has_non_images && resource_file_types.include?(:image) == false ? resource_type_descriptions[:object] : resource_type_descriptions[:image]
154
+ when :book_with_pdf # in book with PDF type, if we find a resource with *any* non images, switch it's type from book to object
155
+ resource_type_description = resource_has_non_images ? resource_type_descriptions[:object] : resource_type_descriptions[:book]
156
+ when :map
157
+ resource_type_description = resource_type_descriptions[:map]
158
+ when :'3d'
159
+ resource_extensions = resource_files.collect {|obj| obj.ext}
160
+ if (resource_extensions & VALID_THREE_DIMENSION_EXTENTIONS).empty? # if this resource contains no known 3D file extensions, the resource type is file
161
+ resource_type_description = resource_type_descriptions[:file]
162
+ else # otherwise the resource type is 3d
163
+ resource_type_description = resource_type_descriptions[:'3d']
164
+ end
165
+ end
118
166
  end
119
- objects.each {|obj| resources << [obj] if self.is_special_dpg_folder?(obj.dpg_folder)} # certain subfolders require individual resources for files within them regardless of file-naming convention
120
- when :prebundled
121
- # if the user specifies this method, they will pass in an array of arrays, indicating resources, so we don't need to bundle in the gem
122
- resources=objects
123
- else
124
- raise "Invalid bundle method"
125
- end
126
-
127
- resources.delete([]) # delete any empty elements
128
-
129
- builder = Nokogiri::XML::Builder.new do |xml|
130
- xml.contentMetadata(:objectId => "#{druid}",:type => content_type_description) {
131
- resources.each do |resource_files| # iterate over all the resources
132
-
133
- # start a new resource element
134
- sequence += 1
135
- resource_id = "#{pid}_#{sequence}"
136
-
137
- # grab all of the file types within a resource into an array so we can decide what the resource type should be
138
- resource_file_types=resource_files.collect {|obj| obj.object_type}
139
- resource_has_non_images=((resource_file_types-[:image]).size > 0)
140
- resource_from_special_dpg_folder=resource_files.collect {|obj| self.is_special_dpg_folder?(obj.dpg_folder)}.uniq
141
-
142
- if bundle == :dpg && resource_from_special_dpg_folder.include?(true) # objects in the special DPG folders are always type=object when we using :bundle=>:dpg
143
- resource_type_description = resource_type_descriptions[:object]
144
- else # otherwise look at the style to determine the resource_type_description
145
- case style
146
- when :simple_image
147
- resource_type_description = resource_type_descriptions[:image]
148
- when :file
149
- resource_type_description = resource_type_descriptions[:file]
150
- when :simple_book # in a simple book project, all resources are pages unless they are *all* non-images -- if so, switch the type to object
151
- resource_type_description = (resource_has_non_images && resource_file_types.include?(:image) == false) ? resource_type_descriptions[:object] : resource_type_descriptions[:book]
152
- when :book_as_image # same as simple book, but all resources are images instead of pages, unless we need to switch them to object type
153
- resource_type_description = (resource_has_non_images && resource_file_types.include?(:image) == false) ? resource_type_descriptions[:object] : resource_type_descriptions[:image]
154
- when :book_with_pdf # in book with PDF type, if we find a resource with *any* non images, switch it's type from book to object
155
- resource_type_description = resource_has_non_images ? resource_type_descriptions[:object] : resource_type_descriptions[:book]
156
- when :map #
157
- resource_type_description = resource_type_descriptions[:map]
158
- end
159
- end
160
-
161
- resource_type_counters[resource_type_description.to_sym]+=1 # each resource type description gets its own incrementing counter
162
-
163
- xml.resource(:id => resource_id,:sequence => sequence,:type => resource_type_description) {
164
-
165
- # create a generic resource label if needed
166
- resource_label = (auto_labels == true ? "#{resource_type_description.capitalize} #{resource_type_counters[resource_type_description.to_sym]}" : "")
167
-
168
- # but if one of the files has a label, use it instead
169
- resource_files.each {|obj| resource_label = obj.label unless obj.label.nil? || obj.label.empty? }
170
-
171
- xml.label(resource_label) unless resource_label.empty?
172
-
173
- resource_files.each do |obj| # iterate over all the files in a resource
174
-
175
- mimetype = obj.mimetype if (add_file_attributes || add_exif) # we only need to compute the mimetype if we are adding file attributes or exif info, otherwise skip it for performance reasons
176
-
177
- # set file id attribute, first check the relative_path parameter on the object, and if it is set, just use that
178
- if obj.relative_path
179
- file_id=obj.relative_path
180
- else
181
- # if the relative_path attribute is not set, then use the path attribute and check to see if we need to remove the common part of the path
182
- file_id=preserve_common_paths ? obj.path : obj.path.gsub(common_path,'')
183
- file_id=File.basename(file_id) if flatten_folder_structure
184
- end
185
-
186
- xml_file_params = {:id=> file_id}
187
-
188
- if add_file_attributes
189
- file_attributes_hash=obj.file_attributes || file_attributes[mimetype] || file_attributes['default'] || Assembly::FILE_ATTRIBUTES[mimetype] || Assembly::FILE_ATTRIBUTES['default']
190
- xml_file_params.merge!({
191
- :preserve => file_attributes_hash[:preserve],
192
- :publish => file_attributes_hash[:publish],
193
- :shelve => file_attributes_hash[:shelve],
194
- :role => file_attributes_hash[:role],
195
- })
196
- xml_file_params.reject! { |k, v| v.nil? || v.empty? }
197
- end
198
-
199
- xml_file_params.merge!({:mimetype => mimetype,:size => obj.filesize}) if add_exif
200
- xml.file(xml_file_params) {
201
- if add_exif # add exif info if the user requested it
202
- xml.checksum(obj.sha1, :type => 'sha1')
203
- xml.checksum(obj.md5, :type => 'md5')
204
- xml.imageData(:height => obj.exif.imageheight, :width => obj.exif.imagewidth) if obj.image? # add image data for an image
205
- elsif obj.provider_md5 || obj.provider_sha1 # if we did not add exif info, see if there are user supplied checksums to add
206
- xml.checksum(obj.provider_sha1, :type => 'sha1') if obj.provider_sha1
207
- xml.checksum(obj.provider_md5, :type => 'md5') if obj.provider_md5
208
- end #add_exif
209
- }
210
- end # end resource_files.each
211
- }
212
- end # resources.each
213
- }
214
- end
215
-
216
- if include_root_xml == false
217
- result = builder.doc.root.to_xml
218
- else
219
- result = builder.to_xml
167
+
168
+ resource_type_counters[resource_type_description.to_sym] += 1 # each resource type description gets its own incrementing counter
169
+
170
+ xml.resource(id: resource_id, sequence: sequence, type: resource_type_description) do
171
+ # create a generic resource label if needed
172
+ resource_label = (auto_labels == true ? "#{resource_type_description.capitalize} #{resource_type_counters[resource_type_description.to_sym]}" : '')
173
+
174
+ # but if one of the files has a label, use it instead
175
+ resource_files.each { |obj| resource_label = obj.label unless obj.label.nil? || obj.label.empty? }
176
+
177
+ xml.label(resource_label) unless resource_label.empty?
178
+
179
+ resource_files.each do |obj| # iterate over all the files in a resource
180
+ mimetype = obj.mimetype if add_file_attributes || add_exif # we only need to compute the mimetype if we are adding file attributes or exif info, otherwise skip it for performance reasons
181
+
182
+ # set file id attribute, first check the relative_path parameter on the object, and if it is set, just use that
183
+ if obj.relative_path
184
+ file_id = obj.relative_path
185
+ else
186
+ # if the relative_path attribute is not set, then use the path attribute and check to see if we need to remove the common part of the path
187
+ file_id = preserve_common_paths ? obj.path : obj.path.gsub(common_path, '')
188
+ file_id = File.basename(file_id) if flatten_folder_structure
189
+ end
190
+
191
+ xml_file_params = { id: file_id }
192
+
193
+ if add_file_attributes
194
+ file_attributes_hash = obj.file_attributes || file_attributes[mimetype] || file_attributes['default'] || Assembly::FILE_ATTRIBUTES[mimetype] || Assembly::FILE_ATTRIBUTES['default']
195
+ xml_file_params.merge!(
196
+ preserve: file_attributes_hash[:preserve],
197
+ publish: file_attributes_hash[:publish],
198
+ shelve: file_attributes_hash[:shelve],
199
+ role: file_attributes_hash[:role]
200
+ )
201
+ xml_file_params.reject! { |_k, v| v.nil? || v.empty? }
202
+ end
203
+
204
+ if add_exif
205
+ xml_file_params[:mimetype] = mimetype
206
+ xml_file_params[:size] = obj.filesize
207
+ end
208
+ xml.file(xml_file_params) do
209
+ if add_exif # add exif info if the user requested it
210
+ xml.checksum(obj.sha1, type: 'sha1')
211
+ xml.checksum(obj.md5, type: 'md5')
212
+ xml.imageData(height: obj.exif.imageheight, width: obj.exif.imagewidth) if obj.image? # add image data for an image
213
+ elsif obj.provider_md5 || obj.provider_sha1 # if we did not add exif info, see if there are user supplied checksums to add
214
+ xml.checksum(obj.provider_sha1, type: 'sha1') if obj.provider_sha1
215
+ xml.checksum(obj.provider_md5, type: 'md5') if obj.provider_md5
216
+ end # add_exif
217
+ end
218
+ end # end resource_files.each
219
+ end
220
+ end # resources.each
220
221
  end
221
-
222
- return result
223
-
224
- end # create_content_metadata
225
-
226
- def self.is_special_dpg_folder?(folder)
227
- SPECIAL_DPG_FOLDERS.include?(folder)
228
222
  end
229
-
223
+
224
+ result = if include_root_xml == false
225
+ builder.doc.root.to_xml
226
+ else
227
+ builder.to_xml
228
+ end
229
+
230
+ result
231
+ end # create_content_metadata
232
+
233
+ def self.is_special_dpg_folder?(folder)
234
+ SPECIAL_DPG_FOLDERS.include?(folder)
235
+ end
230
236
  end # class
231
-
232
237
  end # module