assembly-objectfile 1.13.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +0 -1
  3. data/.rubocop.yml +1 -1
  4. data/.rubocop_todo.yml +14 -80
  5. data/Gemfile.lock +106 -0
  6. data/assembly-objectfile.gemspec +1 -3
  7. data/lib/assembly-objectfile/object_file.rb +253 -3
  8. data/lib/assembly-objectfile/version.rb +1 -1
  9. data/lib/assembly-objectfile.rb +0 -5
  10. data/spec/object_file_spec.rb +411 -172
  11. data/spec/spec_helper.rb +2 -31
  12. metadata +19 -107
  13. data/lib/assembly-objectfile/content_metadata/config.rb +0 -26
  14. data/lib/assembly-objectfile/content_metadata/file.rb +0 -63
  15. data/lib/assembly-objectfile/content_metadata/file_set.rb +0 -73
  16. data/lib/assembly-objectfile/content_metadata/file_set_builder.rb +0 -65
  17. data/lib/assembly-objectfile/content_metadata/nokogiri_builder.rb +0 -57
  18. data/lib/assembly-objectfile/content_metadata.rb +0 -117
  19. data/lib/assembly-objectfile/object_fileable.rb +0 -275
  20. data/spec/content_metadata_spec.rb +0 -809
  21. data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_001.tif +0 -0
  22. data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_002.tif +0 -0
  23. data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_001.jp2 +0 -0
  24. data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_002.jp2 +0 -0
  25. data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_001.pdf +0 -1
  26. data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_002.pdf +0 -1
  27. data/spec/test_data/input/oo000oo0001/31/oo000oo0001_31_001.pdf +0 -1
  28. data/spec/test_data/input/oo000oo0001/50/oo000oo0001_50_001.tif +0 -0
  29. data/spec/test_data/input/oo000oo0001/oo000oo0001_book.pdf +0 -1
  30. data/spec/test_data/input/res1_image1.jp2 +0 -0
  31. data/spec/test_data/input/res1_image2.jp2 +0 -0
  32. data/spec/test_data/input/res1_image2.tif +0 -0
  33. data/spec/test_data/input/res1_teifile.txt +0 -1
  34. data/spec/test_data/input/res2_image1.jp2 +0 -0
  35. data/spec/test_data/input/res2_image1.tif +0 -0
  36. data/spec/test_data/input/res2_image2.jp2 +0 -0
  37. data/spec/test_data/input/res2_image2.tif +0 -0
  38. data/spec/test_data/input/res2_teifile.txt +0 -1
  39. data/spec/test_data/input/res2_textfile.txt +0 -1
  40. data/spec/test_data/input/res3_image1.jp2 +0 -0
  41. data/spec/test_data/input/res3_image1.tif +0 -0
  42. data/spec/test_data/input/res3_teifile.txt +0 -1
  43. data/spec/test_data/input/test.pdf +0 -1
  44. data/spec/test_data/input/test.svg +0 -2
  45. data/spec/test_data/input/test2.jp2 +0 -0
  46. data/spec/test_data/input/test2.tif +0 -0
data/spec/spec_helper.rb CHANGED
@@ -4,7 +4,7 @@ require 'simplecov'
4
4
  SimpleCov.start
5
5
 
6
6
  require File.expand_path("#{File.dirname(__FILE__)}/../config/boot")
7
- require 'byebug'
7
+ require 'pry-byebug'
8
8
 
9
9
  RSpec.configure do |config|
10
10
  config.order = 'random'
@@ -14,45 +14,16 @@ TEST_DATA_DIR = File.join(Assembly::PATH_TO_GEM, 'spec', 'test_data')
14
14
  TEST_INPUT_DIR = File.join(TEST_DATA_DIR, 'input')
15
15
  TEST_OUTPUT_DIR = File.join(TEST_DATA_DIR, 'output')
16
16
  TEST_TIF_INPUT_FILE = File.join(TEST_INPUT_DIR, 'test.tif')
17
- TEST_TIF_INPUT_FILE2 = File.join(TEST_INPUT_DIR, 'test2.tif')
18
- TEST_JPEG_INPUT_FILE = File.join(TEST_INPUT_DIR, 'test.jpg')
17
+ TEST_JPEG_INPUT_FILE = File.join(TEST_INPUT_DIR, 'test.jpg')
19
18
  TEST_JP2_INPUT_FILE = File.join(TEST_INPUT_DIR, 'test.jp2')
20
- TEST_JP2_INPUT_FILE2 = File.join(TEST_INPUT_DIR, 'test2.jp2')
21
- TEST_SVG_INPUT_FILE = File.join(TEST_INPUT_DIR, 'test.svg')
22
19
  TEST_JP2_OUTPUT_FILE = File.join(TEST_OUTPUT_DIR, 'test.jp2')
23
- TEST_PDF_FILE = File.join(TEST_INPUT_DIR, 'test.pdf')
24
-
25
- TEST_DPG_TIF = File.join(TEST_INPUT_DIR, 'oo000oo0001', '00', 'oo000oo0001_00_001.tif')
26
- TEST_DPG_TIF2 = File.join(TEST_INPUT_DIR, 'oo000oo0001', '00', 'oo000oo0001_00_002.tif')
27
- TEST_DPG_JP = File.join(TEST_INPUT_DIR, 'oo000oo0001', '05', 'oo000oo0001_05_001.jp2')
28
- TEST_DPG_JP2 = File.join(TEST_INPUT_DIR, 'oo000oo0001', '05', 'oo000oo0001_05_002.jp2')
29
- TEST_DPG_PDF = File.join(TEST_INPUT_DIR, 'oo000oo0001', '15', 'oo000oo0001_15_001.pdf')
30
- TEST_DPG_PDF2 = File.join(TEST_INPUT_DIR, 'oo000oo0001', '15', 'oo000oo0001_15_002.pdf')
31
- TEST_DPG_SPECIAL_PDF1 = File.join(TEST_INPUT_DIR, 'oo000oo0001', 'oo000oo0001_book.pdf')
32
- TEST_DPG_SPECIAL_PDF2 = File.join(TEST_INPUT_DIR, 'oo000oo0001', '31', 'oo000oo0001_31_001.pdf')
33
- TEST_DPG_SPECIAL_TIF = File.join(TEST_INPUT_DIR, 'oo000oo0001', '50', 'oo000oo0001_50_001.tif')
34
20
 
35
21
  TEST_TIFF_NO_COLOR_FILE = File.join(TEST_INPUT_DIR, 'test_no_color_profile.tif')
36
22
 
37
23
  TEST_RES1_TIF1 = File.join(TEST_INPUT_DIR, 'res1_image1.tif')
38
- TEST_RES1_JP1 = File.join(TEST_INPUT_DIR, 'res1_image1.jp2')
39
- TEST_RES1_TIF2 = File.join(TEST_INPUT_DIR, 'res1_image2.tif')
40
- TEST_RES1_JP2 = File.join(TEST_INPUT_DIR, 'res1_image2.jp2')
41
- TEST_RES1_TEI = File.join(TEST_INPUT_DIR, 'res1_teifile.txt')
42
24
  TEST_RES1_TEXT = File.join(TEST_INPUT_DIR, 'res1_textfile.txt')
43
25
  TEST_RES1_PDF = File.join(TEST_INPUT_DIR, 'res1_transcript.pdf')
44
26
 
45
- TEST_RES2_TIF1 = File.join(TEST_INPUT_DIR, 'res2_image1.tif')
46
- TEST_RES2_JP1 = File.join(TEST_INPUT_DIR, 'res2_image1.jp2')
47
- TEST_RES2_TIF2 = File.join(TEST_INPUT_DIR, 'res2_image2.tif')
48
- TEST_RES2_JP2 = File.join(TEST_INPUT_DIR, 'res2_image2.jp2')
49
- TEST_RES2_TEI = File.join(TEST_INPUT_DIR, 'res2_teifile.txt')
50
- TEST_RES2_TEXT = File.join(TEST_INPUT_DIR, 'res2_textfile.txt')
51
-
52
- TEST_RES3_TIF1 = File.join(TEST_INPUT_DIR, 'res3_image1.tif')
53
- TEST_RES3_JP1 = File.join(TEST_INPUT_DIR, 'res3_image1.jp2')
54
- TEST_RES3_TEI = File.join(TEST_INPUT_DIR, 'res3_teifile.txt')
55
-
56
27
  TEST_FILE_NO_EXIF = File.join(TEST_INPUT_DIR, 'file_with_no_exif.xml')
57
28
 
58
29
  TEST_JSON_FILE = File.join(TEST_INPUT_DIR, 'test.json')
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: assembly-objectfile
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.13.0
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter Mangiafico
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: exe
13
13
  cert_chain: []
14
- date: 2022-06-03 00:00:00.000000000 Z
14
+ date: 2022-07-08 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: activesupport
@@ -41,34 +41,6 @@ dependencies:
41
41
  - - ">="
42
42
  - !ruby/object:Gem::Version
43
43
  version: '0'
44
- - !ruby/object:Gem::Dependency
45
- name: dry-struct
46
- requirement: !ruby/object:Gem::Requirement
47
- requirements:
48
- - - "~>"
49
- - !ruby/object:Gem::Version
50
- version: '1.0'
51
- type: :runtime
52
- prerelease: false
53
- version_requirements: !ruby/object:Gem::Requirement
54
- requirements:
55
- - - "~>"
56
- - !ruby/object:Gem::Version
57
- version: '1.0'
58
- - !ruby/object:Gem::Dependency
59
- name: dry-types
60
- requirement: !ruby/object:Gem::Requirement
61
- requirements:
62
- - - "~>"
63
- - !ruby/object:Gem::Version
64
- version: '1.1'
65
- type: :runtime
66
- prerelease: false
67
- version_requirements: !ruby/object:Gem::Requirement
68
- requirements:
69
- - - "~>"
70
- - !ruby/object:Gem::Version
71
- version: '1.1'
72
44
  - !ruby/object:Gem::Dependency
73
45
  name: mime-types
74
46
  requirement: !ruby/object:Gem::Requirement
@@ -125,6 +97,20 @@ dependencies:
125
97
  - - ">="
126
98
  - !ruby/object:Gem::Version
127
99
  version: '0'
100
+ - !ruby/object:Gem::Dependency
101
+ name: pry-byebug
102
+ requirement: !ruby/object:Gem::Requirement
103
+ requirements:
104
+ - - ">="
105
+ - !ruby/object:Gem::Version
106
+ version: '0'
107
+ type: :development
108
+ prerelease: false
109
+ version_requirements: !ruby/object:Gem::Requirement
110
+ requirements:
111
+ - - ">="
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
128
114
  - !ruby/object:Gem::Dependency
129
115
  name: rake
130
116
  requirement: !ruby/object:Gem::Requirement
@@ -210,6 +196,7 @@ files:
210
196
  - ".rubocop_todo.yml"
211
197
  - ".rvmrc.example"
212
198
  - Gemfile
199
+ - Gemfile.lock
213
200
  - LICENSE
214
201
  - README.md
215
202
  - Rakefile
@@ -218,58 +205,24 @@ files:
218
205
  - bin/run_all_tests
219
206
  - config/boot.rb
220
207
  - lib/assembly-objectfile.rb
221
- - lib/assembly-objectfile/content_metadata.rb
222
- - lib/assembly-objectfile/content_metadata/config.rb
223
- - lib/assembly-objectfile/content_metadata/file.rb
224
- - lib/assembly-objectfile/content_metadata/file_set.rb
225
- - lib/assembly-objectfile/content_metadata/file_set_builder.rb
226
- - lib/assembly-objectfile/content_metadata/nokogiri_builder.rb
227
208
  - lib/assembly-objectfile/object_file.rb
228
- - lib/assembly-objectfile/object_fileable.rb
229
209
  - lib/assembly-objectfile/version.rb
230
210
  - profiles/AdobeRGB1998.icc
231
211
  - profiles/DotGain20.icc
232
212
  - profiles/sRGBIEC6196621.icc
233
- - spec/content_metadata_spec.rb
234
213
  - spec/object_file_spec.rb
235
214
  - spec/spec_helper.rb
236
215
  - spec/test_data/empty.txt
237
216
  - spec/test_data/input/.empty
238
217
  - spec/test_data/input/file_with_no_exif.xml
239
- - spec/test_data/input/oo000oo0001/00/oo000oo0001_00_001.tif
240
- - spec/test_data/input/oo000oo0001/00/oo000oo0001_00_002.tif
241
- - spec/test_data/input/oo000oo0001/05/oo000oo0001_05_001.jp2
242
- - spec/test_data/input/oo000oo0001/05/oo000oo0001_05_002.jp2
243
- - spec/test_data/input/oo000oo0001/15/oo000oo0001_15_001.pdf
244
- - spec/test_data/input/oo000oo0001/15/oo000oo0001_15_002.pdf
245
- - spec/test_data/input/oo000oo0001/31/oo000oo0001_31_001.pdf
246
- - spec/test_data/input/oo000oo0001/50/oo000oo0001_50_001.tif
247
- - spec/test_data/input/oo000oo0001/oo000oo0001_book.pdf
248
- - spec/test_data/input/res1_image1.jp2
249
218
  - spec/test_data/input/res1_image1.tif
250
- - spec/test_data/input/res1_image2.jp2
251
- - spec/test_data/input/res1_image2.tif
252
- - spec/test_data/input/res1_teifile.txt
253
219
  - spec/test_data/input/res1_textfile.txt
254
220
  - spec/test_data/input/res1_transcript.pdf
255
- - spec/test_data/input/res2_image1.jp2
256
- - spec/test_data/input/res2_image1.tif
257
- - spec/test_data/input/res2_image2.jp2
258
- - spec/test_data/input/res2_image2.tif
259
- - spec/test_data/input/res2_teifile.txt
260
- - spec/test_data/input/res2_textfile.txt
261
- - spec/test_data/input/res3_image1.jp2
262
- - spec/test_data/input/res3_image1.tif
263
- - spec/test_data/input/res3_teifile.txt
264
221
  - spec/test_data/input/someobject.obj
265
222
  - spec/test_data/input/someobject.ply
266
223
  - spec/test_data/input/test.jp2
267
224
  - spec/test_data/input/test.json
268
- - spec/test_data/input/test.pdf
269
- - spec/test_data/input/test.svg
270
225
  - spec/test_data/input/test.tif
271
- - spec/test_data/input/test2.jp2
272
- - spec/test_data/input/test2.tif
273
226
  - spec/test_data/input/test_no_color_profile.tif
274
227
  homepage: https://github.com/sul-dlss/assembly-objectfile
275
228
  licenses:
@@ -291,50 +244,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
291
244
  - !ruby/object:Gem::Version
292
245
  version: '0'
293
246
  requirements: []
294
- rubygems_version: 3.2.32
247
+ rubygems_version: 3.3.7
295
248
  signing_key:
296
249
  specification_version: 4
297
250
  summary: Ruby immplementation of file services needed to prepare objects to be accessioned
298
251
  in SULAIR digital library
299
- test_files:
300
- - spec/content_metadata_spec.rb
301
- - spec/object_file_spec.rb
302
- - spec/spec_helper.rb
303
- - spec/test_data/empty.txt
304
- - spec/test_data/input/.empty
305
- - spec/test_data/input/file_with_no_exif.xml
306
- - spec/test_data/input/oo000oo0001/00/oo000oo0001_00_001.tif
307
- - spec/test_data/input/oo000oo0001/00/oo000oo0001_00_002.tif
308
- - spec/test_data/input/oo000oo0001/05/oo000oo0001_05_001.jp2
309
- - spec/test_data/input/oo000oo0001/05/oo000oo0001_05_002.jp2
310
- - spec/test_data/input/oo000oo0001/15/oo000oo0001_15_001.pdf
311
- - spec/test_data/input/oo000oo0001/15/oo000oo0001_15_002.pdf
312
- - spec/test_data/input/oo000oo0001/31/oo000oo0001_31_001.pdf
313
- - spec/test_data/input/oo000oo0001/50/oo000oo0001_50_001.tif
314
- - spec/test_data/input/oo000oo0001/oo000oo0001_book.pdf
315
- - spec/test_data/input/res1_image1.jp2
316
- - spec/test_data/input/res1_image1.tif
317
- - spec/test_data/input/res1_image2.jp2
318
- - spec/test_data/input/res1_image2.tif
319
- - spec/test_data/input/res1_teifile.txt
320
- - spec/test_data/input/res1_textfile.txt
321
- - spec/test_data/input/res1_transcript.pdf
322
- - spec/test_data/input/res2_image1.jp2
323
- - spec/test_data/input/res2_image1.tif
324
- - spec/test_data/input/res2_image2.jp2
325
- - spec/test_data/input/res2_image2.tif
326
- - spec/test_data/input/res2_teifile.txt
327
- - spec/test_data/input/res2_textfile.txt
328
- - spec/test_data/input/res3_image1.jp2
329
- - spec/test_data/input/res3_image1.tif
330
- - spec/test_data/input/res3_teifile.txt
331
- - spec/test_data/input/someobject.obj
332
- - spec/test_data/input/someobject.ply
333
- - spec/test_data/input/test.jp2
334
- - spec/test_data/input/test.json
335
- - spec/test_data/input/test.pdf
336
- - spec/test_data/input/test.svg
337
- - spec/test_data/input/test.tif
338
- - spec/test_data/input/test2.jp2
339
- - spec/test_data/input/test2.tif
340
- - spec/test_data/input/test_no_color_profile.tif
252
+ test_files: []
@@ -1,26 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'dry-struct'
4
- require 'dry-types'
5
-
6
- module Assembly
7
- class ContentMetadata
8
- # Types for the configuration
9
- module Types
10
- include Dry.Types()
11
- end
12
-
13
- # Represents a configuration for generating the content metadata
14
- class Config < Dry::Struct
15
- STYLES = %w[image file book map 3d document webarchive-seed].freeze
16
- READING_ORDERS = %w[ltr rtl].freeze
17
- attribute :auto_labels, Types::Strict::Bool.default(true)
18
- attribute :flatten_folder_structure, Types::Strict::Bool.default(false)
19
- attribute :add_file_attributes, Types::Strict::Bool.default(false)
20
- attribute :add_exif, Types::Strict::Bool.default(false)
21
- attribute :file_attributes, Types::Strict::Hash.default({}.freeze)
22
- attribute :type, Types::Strict::String.enum(*STYLES)
23
- attribute :reading_order, Types::Strict::String.default('ltr').enum(*READING_ORDERS)
24
- end
25
- end
26
- end
@@ -1,63 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'active_support/core_ext/module/delegation'
4
-
5
- module Assembly
6
- class ContentMetadata
7
- # Represents a single File
8
- class File
9
- # default publish/preserve/shelve attributes used in content metadata
10
- # if no mimetype specific attributes are specified for a given file, define some defaults, and override for specific mimetypes below
11
- ATTRIBUTES_FOR_TYPE = {
12
- 'default' => { preserve: 'yes', shelve: 'no', publish: 'no' },
13
- 'image/tif' => { preserve: 'yes', shelve: 'no', publish: 'no' },
14
- 'image/tiff' => { preserve: 'yes', shelve: 'no', publish: 'no' },
15
- 'image/jp2' => { preserve: 'no', shelve: 'yes', publish: 'yes' },
16
- 'image/jpeg' => { preserve: 'yes', shelve: 'no', publish: 'no' },
17
- 'audio/wav' => { preserve: 'yes', shelve: 'no', publish: 'no' },
18
- 'audio/x-wav' => { preserve: 'yes', shelve: 'no', publish: 'no' },
19
- 'audio/mp3' => { preserve: 'no', shelve: 'yes', publish: 'yes' },
20
- 'audio/mpeg' => { preserve: 'no', shelve: 'yes', publish: 'yes' },
21
- 'application/pdf' => { preserve: 'yes', shelve: 'yes', publish: 'yes' },
22
- 'plain/text' => { preserve: 'yes', shelve: 'yes', publish: 'yes' },
23
- 'text/plain' => { preserve: 'yes', shelve: 'yes', publish: 'yes' },
24
- 'image/png' => { preserve: 'yes', shelve: 'yes', publish: 'no' },
25
- 'application/zip' => { preserve: 'yes', shelve: 'no', publish: 'no' },
26
- 'application/json' => { preserve: 'yes', shelve: 'yes', publish: 'yes' }
27
- }.freeze
28
-
29
- # @param [Symbol] bundle
30
- # @param [Assembly::ObjectFile] file
31
- # @param style
32
- def initialize(file:, bundle: nil, style: nil)
33
- @bundle = bundle
34
- @file = file
35
- @style = style
36
- end
37
-
38
- delegate :sha1, :md5, :provider_md5, :provider_sha1, :mimetype, :filesize, :image?, :valid_image?, to: :file
39
-
40
- def file_id(common_path:, flatten_folder_structure:)
41
- # set file id attribute, first check the relative_path parameter on the object, and if it is set, just use that
42
- return file.relative_path if file.relative_path
43
-
44
- # if the relative_path attribute is not set, then use the path attribute and check to see if we need to remove the common part of the path
45
- file_id = common_path ? file.path.gsub(common_path, '') : file.path
46
- file_id = ::File.basename(file_id) if flatten_folder_structure
47
- file_id
48
- end
49
-
50
- def file_attributes(provided_file_attributes)
51
- file.file_attributes || provided_file_attributes[mimetype] || provided_file_attributes['default'] || ATTRIBUTES_FOR_TYPE[mimetype] || ATTRIBUTES_FOR_TYPE['default']
52
- end
53
-
54
- def image_data
55
- { height: file.exif.imageheight, width: file.exif.imagewidth }
56
- end
57
-
58
- private
59
-
60
- attr_reader :file
61
- end
62
- end
63
- end
@@ -1,73 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'active_support/core_ext/object/blank'
4
-
5
- module Assembly
6
- class ContentMetadata
7
- # Represents a groups of related Files, such as a single master file and the derivatives
8
- class FileSet
9
- # @param [Boolean] dpg (false) is it a dpg bundle?
10
- # @param [Array<Assembly::ObjectFile>] resource_files
11
- # @param style
12
- def initialize(resource_files:, style:, dpg: false)
13
- @dpg = dpg
14
- @resource_files = resource_files
15
- @style = style
16
- end
17
-
18
- # objects in the special DPG folders are always type=object when we using :bundle=>:dpg
19
- # otherwise look at the style to determine the resource_type_description
20
- def resource_type_description
21
- @resource_type_description ||= special_dpg_resource? ? 'object' : resource_type_descriptions
22
- end
23
-
24
- def label_from_file(default:)
25
- resource_files.find { |obj| obj.label.present? }&.label || default
26
- end
27
-
28
- def files
29
- resource_files.map { |file| File.new(file: file) }
30
- end
31
-
32
- private
33
-
34
- attr_reader :dpg, :resource_files, :style
35
-
36
- def special_dpg_resource?
37
- return false unless dpg
38
-
39
- resource_files.collect { |obj| ContentMetadata.special_dpg_folder?(obj.dpg_folder) }.include?(true)
40
- end
41
-
42
- # rubocop:disable Metrics/CyclomaticComplexity
43
- def resource_type_descriptions
44
- # grab all of the file types within a resource into an array so we can decide what the resource type should be
45
- resource_file_types = resource_files.collect(&:object_type)
46
- resource_has_non_images = !(resource_file_types - [:image]).empty?
47
-
48
- case style
49
- when :simple_image, :map, :'webarchive-seed'
50
- 'image'
51
- when :file
52
- 'file'
53
- when :simple_book # in a simple book project, all resources are pages unless they are *all* non-images -- if so, switch the type to object
54
- resource_has_non_images && resource_file_types.include?(:image) == false ? 'object' : 'page'
55
- when :book_as_image # same as simple book, but all resources are images instead of pages, unless we need to switch them to object type
56
- resource_has_non_images && resource_file_types.include?(:image) == false ? 'object' : 'image'
57
- when :book_with_pdf # in book with PDF type, if we find a resource with *any* non images, switch it's type from book to object
58
- resource_has_non_images ? 'object' : 'page'
59
- when :document
60
- 'document'
61
- when :'3d'
62
- resource_extensions = resource_files.collect(&:ext)
63
- if (resource_extensions & VALID_THREE_DIMENSION_EXTENTIONS).empty? # if this resource contains no known 3D file extensions, the resource type is file
64
- 'file'
65
- else # otherwise the resource type is 3d
66
- '3d'
67
- end
68
- end
69
- end
70
- # rubocop:enable Metrics/CyclomaticComplexity
71
- end
72
- end
73
- end
@@ -1,65 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Assembly
4
- class ContentMetadata
5
- # Builds a groups of related Files, based on bundle
6
- class FileSetBuilder
7
- # @param [Symbol] bundle one of: :default, :filename, :dpg or :prebundled
8
- # @param [Array<Assembly::ObjectFile>] objects
9
- # @param [Symbol] style one of: :simple_image, :file, :simple_book, :book_as_image, :book_with_pdf, :map, or :'3d'
10
- def self.build(bundle:, objects:, style:)
11
- new(bundle: bundle, objects: objects, style: style).build
12
- end
13
-
14
- def initialize(bundle:, objects:, style:)
15
- @bundle = bundle
16
- @objects = objects
17
- @style = style
18
- end
19
-
20
- # @return [Array<FileSet>] a list of filesets in the object
21
- def build
22
- case bundle
23
- when :default # one resource per object
24
- objects.collect { |obj| FileSet.new(resource_files: [obj], style: style) }
25
- when :filename # one resource per distinct filename (excluding extension)
26
- build_for_filename
27
- when :dpg # group by DPG filename
28
- build_for_dpg
29
- when :prebundled
30
- # if the user specifies this method, they will pass in an array of arrays, indicating resources, so we don't need to bundle in the gem
31
- # This is used by the assemblyWF if you have stubContentMetadata.xml
32
- objects.map { |inner| FileSet.new(resource_files: inner, style: style) }
33
- else
34
- raise 'Invalid bundle method'
35
- end
36
- end
37
-
38
- private
39
-
40
- attr_reader :bundle, :objects, :style
41
-
42
- def build_for_filename
43
- # loop over distinct filenames, this determines how many resources we will have and
44
- # create one resource node per distinct filename, collecting the relevant objects with the distinct filename into that resource
45
- distinct_filenames = objects.collect(&:filename_without_ext).uniq # find all the unique filenames in the set of objects, leaving off extensions and base paths
46
- distinct_filenames.map do |distinct_filename|
47
- FileSet.new(resource_files: objects.collect { |obj| obj if obj.filename_without_ext == distinct_filename }.compact,
48
- style: style)
49
- end
50
- end
51
-
52
- def build_for_dpg
53
- # loop over distinct dpg base names, this determines how many resources we will have and
54
- # create one resource node per distinct dpg base name, collecting the relevant objects with the distinct names into that resource
55
-
56
- distinct_filenames = objects.collect(&:dpg_basename).uniq # find all the unique DPG filenames in the set of objects
57
- resources = distinct_filenames.map do |distinct_filename|
58
- FileSet.new(dpg: true, resource_files: objects.collect { |obj| obj if obj.dpg_basename == distinct_filename && !ContentMetadata.special_dpg_folder?(obj.dpg_folder) }.compact, style: style)
59
- end
60
- objects.each { |obj| resources << FileSet.new(dpg: true, resource_files: [obj], style: style) if ContentMetadata.special_dpg_folder?(obj.dpg_folder) } # certain subfolders require individual resources for files within them regardless of file-naming convention
61
- resources
62
- end
63
- end
64
- end
65
- end
@@ -1,57 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Assembly
4
- class ContentMetadata
5
- # Builds a nokogiri representation of the content metadata
6
- class NokogiriBuilder
7
- # @param [Array<Fileset>] filesets
8
- # @param [String] druid
9
- # @param [String] common_path
10
- # @param [Config] config
11
- def self.build(filesets:, druid:, common_path:, config:)
12
- # a counter to use when creating auto-labels for resources, with incremenets for each type
13
- resource_type_counters = Hash.new(0)
14
- pid = druid.gsub('druid:', '') # remove druid prefix when creating IDs
15
-
16
- Nokogiri::XML::Builder.new do |xml|
17
- xml.contentMetadata(objectId: druid.to_s, type: config.type) do
18
- xml.bookData(readingOrder: config.reading_order) if config.type == 'book'
19
-
20
- filesets.each_with_index do |fileset, index| # iterate over all the resources
21
- # start a new resource element
22
- sequence = index + 1
23
-
24
- resource_type_counters[fileset.resource_type_description] += 1 # each resource type description gets its own incrementing counter
25
-
26
- xml.resource(id: "#{pid}_#{sequence}", sequence: sequence, type: fileset.resource_type_description) do
27
- # create a generic resource label if needed
28
- default_label = config.auto_labels ? "#{fileset.resource_type_description.capitalize} #{resource_type_counters[fileset.resource_type_description]}" : ''
29
-
30
- # but if one of the files has a label, use it instead
31
- resource_label = fileset.label_from_file(default: default_label)
32
-
33
- xml.label(resource_label) unless resource_label.empty?
34
- fileset.files.each do |cm_file| # iterate over all the files in a resource
35
- xml_file_params = { id: cm_file.file_id(common_path: common_path, flatten_folder_structure: config.flatten_folder_structure) }
36
- xml_file_params.merge!(cm_file.file_attributes(config.file_attributes)) if config.add_file_attributes
37
- xml_file_params.merge!(mimetype: cm_file.mimetype, size: cm_file.filesize) if config.add_exif
38
-
39
- xml.file(xml_file_params) do
40
- if config.add_exif # add exif info if the user requested it
41
- xml.checksum(cm_file.sha1, type: 'sha1')
42
- xml.checksum(cm_file.md5, type: 'md5')
43
- xml.imageData(cm_file.image_data) if cm_file.valid_image? # add image data for an image
44
- elsif cm_file.provider_md5 || cm_file.provider_sha1 # if we did not add exif info, see if there are user supplied checksums to add
45
- xml.checksum(cm_file.provider_sha1, type: 'sha1') if cm_file.provider_sha1
46
- xml.checksum(cm_file.provider_md5, type: 'md5') if cm_file.provider_md5
47
- end
48
- end
49
- end
50
- end
51
- end
52
- end
53
- end
54
- end
55
- end
56
- end
57
- end
@@ -1,117 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'nokogiri'
4
- require 'deprecation'
5
- require 'active_support'
6
- require 'assembly-objectfile/content_metadata/file'
7
- require 'assembly-objectfile/content_metadata/file_set'
8
- require 'assembly-objectfile/content_metadata/file_set_builder'
9
- require 'assembly-objectfile/content_metadata/config'
10
- require 'assembly-objectfile/content_metadata/nokogiri_builder'
11
-
12
- module Assembly
13
- SPECIAL_DPG_FOLDERS = %w[31 44 50].freeze # these special dpg folders will force any files contained in them into their own resources, regardless of filenaming convention
14
- # these are used when :bundle=>:dpg only
15
-
16
- DEPRECATED_STYLES = %i[book_with_pdf book_as_image].freeze
17
- VALID_STYLES = %i[simple_image simple_book file map document 3d webarchive-seed].freeze
18
-
19
- # This class generates content metadata for image files
20
- class ContentMetadata
21
- # Generates image content XML metadata for a repository object.
22
- # This method only produces content metadata for images
23
- # and does not depend on a specific folder structure. Note that it is class level method.
24
- #
25
- # @param [Hash] params a hash containg parameters needed to produce content metadata
26
- # :druid = required - a string of druid of the repository object's druid id (with or without 'druid:' prefix)
27
- # :objects = required - an array of Assembly::ObjectFile objects containing the list of files to add to content metadata
28
- # NOTE: if you set the :bundle option to :prebundled, you will need to pass in an array of arrays, and not a flat array, as noted below
29
- # :style = optional - a symbol containing the style of metadata to create, allowed values are
30
- # :simple_image (default), contentMetadata type="image", resource type="image"
31
- # :file, contentMetadata type="file", resource type="file"
32
- # :simple_book, contentMetadata type="book", resource type="page", but any resource which has file(s) other than an image, and also contains no images at all, will be resource type="object"
33
- # :book_with_pdf, contentMetadata type="book", resource type="page", but any resource which has any file(s) other than an image will be resource type="object" - NOTE: THIS IS DEPRECATED
34
- # :book_as_image, as simple_book, but with contentMetadata type="book", resource type="image" (same rule applies for resources with non images) - NOTE: THIS IS DEPRECATED
35
- # :map, like simple_image, but with contentMetadata type="map", resource type="image"
36
- # :3d, contentMetadata type="3d", ".obj" and other configured 3d extension files go into resource_type="3d", everything else into resource_type="file"
37
- # :webarchive-seed, contentMetadata type="webarchive-seed", resource type="image"
38
- # :bundle = optional - a symbol containing the method of bundling files into resources, allowed values are
39
- # :default = all files get their own resources (default)
40
- # :filename = files with the same filename but different extensions get bundled together in a single resource
41
- # :dpg = files representing the same image but of different mimetype that use the SULAIR DPG filenaming standard (00 vs 05) get bundled together in a single resource
42
- # :prebundlded = this option requires you to prebundled the files passed in as an array of arrays, indicating how files are bundlded into resources; this is the most flexible option since it gives you full control
43
- # :add_exif = optional - a boolean to indicate if exif data should be added (mimetype, filesize, image height/width, etc.) to each file, defaults to false and is not required if project goes through assembly
44
- # :add_file_attributes = optional - a boolean to indicate if publish/preserve/shelve/role attributes should be added using defaults or by supplied override by mime/type, defaults to false and is not required if project goes through assembly
45
- # :file_attributes = optional - a hash of file attributes by mimetype to use instead of defaults, only used if add_file_attributes is also true,
46
- # If a mimetype match is not found in your hash, the default is used (either your supplied default or the gems).
47
- # e.g. {'default'=>{:preserve=>'yes',:shelve=>'yes',:publish=>'yes'},'image/tif'=>{:preserve=>'yes',:shelve=>'no',:publish=>'no'},'application/pdf'=>{:preserve=>'yes',:shelve=>'yes',:publish=>'yes'}}
48
- # :include_root_xml = optional - a boolean to indicate if the contentMetadata returned includes a root <?xml version="1.0"?> tag, defaults to true
49
- # :preserve_common_paths = optional - When creating the file "id" attribute, content metadata uses the "relative_path" attribute of the ObjectFile objects passed in. If the "relative_path" attribute is not set, the "path" attribute is used instead,
50
- # which includes a full path to the file. If the "preserve_common_paths" parameter is set to false or left off, then the common paths of all of the ObjectFile's passed in are removed from any "path" attributes. This should turn full paths into
51
- # the relative paths that are required in content metadata file id nodes. If you do not want this behavior, set "preserve_common_paths" to true. The default is false.
52
- # :flatten_folder_structure = optional - Will remove *all* folder structure when genearting file IDs (e.g. DPG subfolders like '00','05' will be removed) when generating file IDs. This is useful if the folder structure is flattened when staging files (like for DPG).
53
- # The default is false. If set to true, will override the "preserve_common_paths" parameter.
54
- # :auto_labels = optional - Will add automated resource labels (e.g. "File 1") when labels are not provided by the user. The default is true.
55
- # See https://consul.stanford.edu/pages/viewpage.action?spaceKey=chimera&title=DOR+content+types%2C+resource+types+and+interpretive+metadata for next two settings
56
- # :reading_order = optional - only valid for simple_book, can be 'rtl' or 'ltr'. The default is 'ltr'.
57
- # Example:
58
- # Assembly::ContentMetadata.create_content_metadata(:druid=>'druid:nx288wh8889',:style=>:simple_image,:objects=>object_files,:add_file_attributes=>false)
59
- def self.create_content_metadata(druid:, objects:, auto_labels: true,
60
- add_exif: false, bundle: :default, style: :simple_image,
61
- add_file_attributes: false, file_attributes: {},
62
- preserve_common_paths: false, flatten_folder_structure: false,
63
- include_root_xml: nil, reading_order: 'ltr')
64
-
65
- common_path = find_common_path(objects) unless preserve_common_paths # find common paths to all files provided if needed
66
-
67
- filesets = FileSetBuilder.build(bundle: bundle, objects: objects, style: style)
68
- config = Config.new(auto_labels: auto_labels,
69
- flatten_folder_structure: flatten_folder_structure,
70
- add_file_attributes: add_file_attributes,
71
- file_attributes: file_attributes,
72
- add_exif: add_exif,
73
- reading_order: reading_order,
74
- type: object_level_type(style))
75
-
76
- builder = NokogiriBuilder.build(druid: druid,
77
- filesets: filesets,
78
- common_path: common_path,
79
- config: config)
80
-
81
- if include_root_xml == false
82
- builder.doc.root.to_xml
83
- else
84
- builder.to_xml
85
- end
86
- end
87
-
88
- def self.special_dpg_folder?(folder)
89
- SPECIAL_DPG_FOLDERS.include?(folder)
90
- end
91
-
92
- def self.find_common_path(objects)
93
- all_paths = objects.flatten.map do |obj|
94
- raise "File '#{obj.path}' not found" unless obj.file_exists?
95
-
96
- obj.path # collect all of the filenames into an array
97
- end
98
-
99
- Assembly::ObjectFile.common_path(all_paths) # find common paths to all files provided if needed
100
- end
101
- private_class_method :find_common_path
102
-
103
- def self.object_level_type(style)
104
- Deprecation.warn(self, "the style #{style} is now deprecated and should not be used. This will be removed in assembly-objectfile 2.0") if DEPRECATED_STYLES.include? style
105
- raise "Supplied style (#{style}) not valid" unless (VALID_STYLES + DEPRECATED_STYLES).include? style
106
-
107
- case style
108
- when :simple_image
109
- 'image'
110
- when :simple_book, :book_with_pdf, :book_as_image
111
- 'book'
112
- else
113
- style.to_s
114
- end
115
- end
116
- end # class
117
- end # module