assembly-objectfile 1.13.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +0 -1
- data/.rubocop.yml +1 -1
- data/.rubocop_todo.yml +14 -80
- data/Gemfile.lock +106 -0
- data/assembly-objectfile.gemspec +1 -3
- data/lib/assembly-objectfile/object_file.rb +253 -3
- data/lib/assembly-objectfile/version.rb +1 -1
- data/lib/assembly-objectfile.rb +0 -5
- data/spec/object_file_spec.rb +411 -172
- data/spec/spec_helper.rb +2 -31
- metadata +19 -107
- data/lib/assembly-objectfile/content_metadata/config.rb +0 -26
- data/lib/assembly-objectfile/content_metadata/file.rb +0 -63
- data/lib/assembly-objectfile/content_metadata/file_set.rb +0 -73
- data/lib/assembly-objectfile/content_metadata/file_set_builder.rb +0 -65
- data/lib/assembly-objectfile/content_metadata/nokogiri_builder.rb +0 -57
- data/lib/assembly-objectfile/content_metadata.rb +0 -117
- data/lib/assembly-objectfile/object_fileable.rb +0 -275
- data/spec/content_metadata_spec.rb +0 -809
- data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_001.tif +0 -0
- data/spec/test_data/input/oo000oo0001/00/oo000oo0001_00_002.tif +0 -0
- data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_001.jp2 +0 -0
- data/spec/test_data/input/oo000oo0001/05/oo000oo0001_05_002.jp2 +0 -0
- data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_001.pdf +0 -1
- data/spec/test_data/input/oo000oo0001/15/oo000oo0001_15_002.pdf +0 -1
- data/spec/test_data/input/oo000oo0001/31/oo000oo0001_31_001.pdf +0 -1
- data/spec/test_data/input/oo000oo0001/50/oo000oo0001_50_001.tif +0 -0
- data/spec/test_data/input/oo000oo0001/oo000oo0001_book.pdf +0 -1
- data/spec/test_data/input/res1_image1.jp2 +0 -0
- data/spec/test_data/input/res1_image2.jp2 +0 -0
- data/spec/test_data/input/res1_image2.tif +0 -0
- data/spec/test_data/input/res1_teifile.txt +0 -1
- data/spec/test_data/input/res2_image1.jp2 +0 -0
- data/spec/test_data/input/res2_image1.tif +0 -0
- data/spec/test_data/input/res2_image2.jp2 +0 -0
- data/spec/test_data/input/res2_image2.tif +0 -0
- data/spec/test_data/input/res2_teifile.txt +0 -1
- data/spec/test_data/input/res2_textfile.txt +0 -1
- data/spec/test_data/input/res3_image1.jp2 +0 -0
- data/spec/test_data/input/res3_image1.tif +0 -0
- data/spec/test_data/input/res3_teifile.txt +0 -1
- data/spec/test_data/input/test.pdf +0 -1
- data/spec/test_data/input/test.svg +0 -2
- data/spec/test_data/input/test2.jp2 +0 -0
- data/spec/test_data/input/test2.tif +0 -0
data/spec/spec_helper.rb
CHANGED
@@ -4,7 +4,7 @@ require 'simplecov'
|
|
4
4
|
SimpleCov.start
|
5
5
|
|
6
6
|
require File.expand_path("#{File.dirname(__FILE__)}/../config/boot")
|
7
|
-
require 'byebug'
|
7
|
+
require 'pry-byebug'
|
8
8
|
|
9
9
|
RSpec.configure do |config|
|
10
10
|
config.order = 'random'
|
@@ -14,45 +14,16 @@ TEST_DATA_DIR = File.join(Assembly::PATH_TO_GEM, 'spec', 'test_data')
|
|
14
14
|
TEST_INPUT_DIR = File.join(TEST_DATA_DIR, 'input')
|
15
15
|
TEST_OUTPUT_DIR = File.join(TEST_DATA_DIR, 'output')
|
16
16
|
TEST_TIF_INPUT_FILE = File.join(TEST_INPUT_DIR, 'test.tif')
|
17
|
-
|
18
|
-
TEST_JPEG_INPUT_FILE = File.join(TEST_INPUT_DIR, 'test.jpg')
|
17
|
+
TEST_JPEG_INPUT_FILE = File.join(TEST_INPUT_DIR, 'test.jpg')
|
19
18
|
TEST_JP2_INPUT_FILE = File.join(TEST_INPUT_DIR, 'test.jp2')
|
20
|
-
TEST_JP2_INPUT_FILE2 = File.join(TEST_INPUT_DIR, 'test2.jp2')
|
21
|
-
TEST_SVG_INPUT_FILE = File.join(TEST_INPUT_DIR, 'test.svg')
|
22
19
|
TEST_JP2_OUTPUT_FILE = File.join(TEST_OUTPUT_DIR, 'test.jp2')
|
23
|
-
TEST_PDF_FILE = File.join(TEST_INPUT_DIR, 'test.pdf')
|
24
|
-
|
25
|
-
TEST_DPG_TIF = File.join(TEST_INPUT_DIR, 'oo000oo0001', '00', 'oo000oo0001_00_001.tif')
|
26
|
-
TEST_DPG_TIF2 = File.join(TEST_INPUT_DIR, 'oo000oo0001', '00', 'oo000oo0001_00_002.tif')
|
27
|
-
TEST_DPG_JP = File.join(TEST_INPUT_DIR, 'oo000oo0001', '05', 'oo000oo0001_05_001.jp2')
|
28
|
-
TEST_DPG_JP2 = File.join(TEST_INPUT_DIR, 'oo000oo0001', '05', 'oo000oo0001_05_002.jp2')
|
29
|
-
TEST_DPG_PDF = File.join(TEST_INPUT_DIR, 'oo000oo0001', '15', 'oo000oo0001_15_001.pdf')
|
30
|
-
TEST_DPG_PDF2 = File.join(TEST_INPUT_DIR, 'oo000oo0001', '15', 'oo000oo0001_15_002.pdf')
|
31
|
-
TEST_DPG_SPECIAL_PDF1 = File.join(TEST_INPUT_DIR, 'oo000oo0001', 'oo000oo0001_book.pdf')
|
32
|
-
TEST_DPG_SPECIAL_PDF2 = File.join(TEST_INPUT_DIR, 'oo000oo0001', '31', 'oo000oo0001_31_001.pdf')
|
33
|
-
TEST_DPG_SPECIAL_TIF = File.join(TEST_INPUT_DIR, 'oo000oo0001', '50', 'oo000oo0001_50_001.tif')
|
34
20
|
|
35
21
|
TEST_TIFF_NO_COLOR_FILE = File.join(TEST_INPUT_DIR, 'test_no_color_profile.tif')
|
36
22
|
|
37
23
|
TEST_RES1_TIF1 = File.join(TEST_INPUT_DIR, 'res1_image1.tif')
|
38
|
-
TEST_RES1_JP1 = File.join(TEST_INPUT_DIR, 'res1_image1.jp2')
|
39
|
-
TEST_RES1_TIF2 = File.join(TEST_INPUT_DIR, 'res1_image2.tif')
|
40
|
-
TEST_RES1_JP2 = File.join(TEST_INPUT_DIR, 'res1_image2.jp2')
|
41
|
-
TEST_RES1_TEI = File.join(TEST_INPUT_DIR, 'res1_teifile.txt')
|
42
24
|
TEST_RES1_TEXT = File.join(TEST_INPUT_DIR, 'res1_textfile.txt')
|
43
25
|
TEST_RES1_PDF = File.join(TEST_INPUT_DIR, 'res1_transcript.pdf')
|
44
26
|
|
45
|
-
TEST_RES2_TIF1 = File.join(TEST_INPUT_DIR, 'res2_image1.tif')
|
46
|
-
TEST_RES2_JP1 = File.join(TEST_INPUT_DIR, 'res2_image1.jp2')
|
47
|
-
TEST_RES2_TIF2 = File.join(TEST_INPUT_DIR, 'res2_image2.tif')
|
48
|
-
TEST_RES2_JP2 = File.join(TEST_INPUT_DIR, 'res2_image2.jp2')
|
49
|
-
TEST_RES2_TEI = File.join(TEST_INPUT_DIR, 'res2_teifile.txt')
|
50
|
-
TEST_RES2_TEXT = File.join(TEST_INPUT_DIR, 'res2_textfile.txt')
|
51
|
-
|
52
|
-
TEST_RES3_TIF1 = File.join(TEST_INPUT_DIR, 'res3_image1.tif')
|
53
|
-
TEST_RES3_JP1 = File.join(TEST_INPUT_DIR, 'res3_image1.jp2')
|
54
|
-
TEST_RES3_TEI = File.join(TEST_INPUT_DIR, 'res3_teifile.txt')
|
55
|
-
|
56
27
|
TEST_FILE_NO_EXIF = File.join(TEST_INPUT_DIR, 'file_with_no_exif.xml')
|
57
28
|
|
58
29
|
TEST_JSON_FILE = File.join(TEST_INPUT_DIR, 'test.json')
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: assembly-objectfile
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Peter Mangiafico
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: exe
|
13
13
|
cert_chain: []
|
14
|
-
date: 2022-
|
14
|
+
date: 2022-07-08 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: activesupport
|
@@ -41,34 +41,6 @@ dependencies:
|
|
41
41
|
- - ">="
|
42
42
|
- !ruby/object:Gem::Version
|
43
43
|
version: '0'
|
44
|
-
- !ruby/object:Gem::Dependency
|
45
|
-
name: dry-struct
|
46
|
-
requirement: !ruby/object:Gem::Requirement
|
47
|
-
requirements:
|
48
|
-
- - "~>"
|
49
|
-
- !ruby/object:Gem::Version
|
50
|
-
version: '1.0'
|
51
|
-
type: :runtime
|
52
|
-
prerelease: false
|
53
|
-
version_requirements: !ruby/object:Gem::Requirement
|
54
|
-
requirements:
|
55
|
-
- - "~>"
|
56
|
-
- !ruby/object:Gem::Version
|
57
|
-
version: '1.0'
|
58
|
-
- !ruby/object:Gem::Dependency
|
59
|
-
name: dry-types
|
60
|
-
requirement: !ruby/object:Gem::Requirement
|
61
|
-
requirements:
|
62
|
-
- - "~>"
|
63
|
-
- !ruby/object:Gem::Version
|
64
|
-
version: '1.1'
|
65
|
-
type: :runtime
|
66
|
-
prerelease: false
|
67
|
-
version_requirements: !ruby/object:Gem::Requirement
|
68
|
-
requirements:
|
69
|
-
- - "~>"
|
70
|
-
- !ruby/object:Gem::Version
|
71
|
-
version: '1.1'
|
72
44
|
- !ruby/object:Gem::Dependency
|
73
45
|
name: mime-types
|
74
46
|
requirement: !ruby/object:Gem::Requirement
|
@@ -125,6 +97,20 @@ dependencies:
|
|
125
97
|
- - ">="
|
126
98
|
- !ruby/object:Gem::Version
|
127
99
|
version: '0'
|
100
|
+
- !ruby/object:Gem::Dependency
|
101
|
+
name: pry-byebug
|
102
|
+
requirement: !ruby/object:Gem::Requirement
|
103
|
+
requirements:
|
104
|
+
- - ">="
|
105
|
+
- !ruby/object:Gem::Version
|
106
|
+
version: '0'
|
107
|
+
type: :development
|
108
|
+
prerelease: false
|
109
|
+
version_requirements: !ruby/object:Gem::Requirement
|
110
|
+
requirements:
|
111
|
+
- - ">="
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
version: '0'
|
128
114
|
- !ruby/object:Gem::Dependency
|
129
115
|
name: rake
|
130
116
|
requirement: !ruby/object:Gem::Requirement
|
@@ -210,6 +196,7 @@ files:
|
|
210
196
|
- ".rubocop_todo.yml"
|
211
197
|
- ".rvmrc.example"
|
212
198
|
- Gemfile
|
199
|
+
- Gemfile.lock
|
213
200
|
- LICENSE
|
214
201
|
- README.md
|
215
202
|
- Rakefile
|
@@ -218,58 +205,24 @@ files:
|
|
218
205
|
- bin/run_all_tests
|
219
206
|
- config/boot.rb
|
220
207
|
- lib/assembly-objectfile.rb
|
221
|
-
- lib/assembly-objectfile/content_metadata.rb
|
222
|
-
- lib/assembly-objectfile/content_metadata/config.rb
|
223
|
-
- lib/assembly-objectfile/content_metadata/file.rb
|
224
|
-
- lib/assembly-objectfile/content_metadata/file_set.rb
|
225
|
-
- lib/assembly-objectfile/content_metadata/file_set_builder.rb
|
226
|
-
- lib/assembly-objectfile/content_metadata/nokogiri_builder.rb
|
227
208
|
- lib/assembly-objectfile/object_file.rb
|
228
|
-
- lib/assembly-objectfile/object_fileable.rb
|
229
209
|
- lib/assembly-objectfile/version.rb
|
230
210
|
- profiles/AdobeRGB1998.icc
|
231
211
|
- profiles/DotGain20.icc
|
232
212
|
- profiles/sRGBIEC6196621.icc
|
233
|
-
- spec/content_metadata_spec.rb
|
234
213
|
- spec/object_file_spec.rb
|
235
214
|
- spec/spec_helper.rb
|
236
215
|
- spec/test_data/empty.txt
|
237
216
|
- spec/test_data/input/.empty
|
238
217
|
- spec/test_data/input/file_with_no_exif.xml
|
239
|
-
- spec/test_data/input/oo000oo0001/00/oo000oo0001_00_001.tif
|
240
|
-
- spec/test_data/input/oo000oo0001/00/oo000oo0001_00_002.tif
|
241
|
-
- spec/test_data/input/oo000oo0001/05/oo000oo0001_05_001.jp2
|
242
|
-
- spec/test_data/input/oo000oo0001/05/oo000oo0001_05_002.jp2
|
243
|
-
- spec/test_data/input/oo000oo0001/15/oo000oo0001_15_001.pdf
|
244
|
-
- spec/test_data/input/oo000oo0001/15/oo000oo0001_15_002.pdf
|
245
|
-
- spec/test_data/input/oo000oo0001/31/oo000oo0001_31_001.pdf
|
246
|
-
- spec/test_data/input/oo000oo0001/50/oo000oo0001_50_001.tif
|
247
|
-
- spec/test_data/input/oo000oo0001/oo000oo0001_book.pdf
|
248
|
-
- spec/test_data/input/res1_image1.jp2
|
249
218
|
- spec/test_data/input/res1_image1.tif
|
250
|
-
- spec/test_data/input/res1_image2.jp2
|
251
|
-
- spec/test_data/input/res1_image2.tif
|
252
|
-
- spec/test_data/input/res1_teifile.txt
|
253
219
|
- spec/test_data/input/res1_textfile.txt
|
254
220
|
- spec/test_data/input/res1_transcript.pdf
|
255
|
-
- spec/test_data/input/res2_image1.jp2
|
256
|
-
- spec/test_data/input/res2_image1.tif
|
257
|
-
- spec/test_data/input/res2_image2.jp2
|
258
|
-
- spec/test_data/input/res2_image2.tif
|
259
|
-
- spec/test_data/input/res2_teifile.txt
|
260
|
-
- spec/test_data/input/res2_textfile.txt
|
261
|
-
- spec/test_data/input/res3_image1.jp2
|
262
|
-
- spec/test_data/input/res3_image1.tif
|
263
|
-
- spec/test_data/input/res3_teifile.txt
|
264
221
|
- spec/test_data/input/someobject.obj
|
265
222
|
- spec/test_data/input/someobject.ply
|
266
223
|
- spec/test_data/input/test.jp2
|
267
224
|
- spec/test_data/input/test.json
|
268
|
-
- spec/test_data/input/test.pdf
|
269
|
-
- spec/test_data/input/test.svg
|
270
225
|
- spec/test_data/input/test.tif
|
271
|
-
- spec/test_data/input/test2.jp2
|
272
|
-
- spec/test_data/input/test2.tif
|
273
226
|
- spec/test_data/input/test_no_color_profile.tif
|
274
227
|
homepage: https://github.com/sul-dlss/assembly-objectfile
|
275
228
|
licenses:
|
@@ -291,50 +244,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
291
244
|
- !ruby/object:Gem::Version
|
292
245
|
version: '0'
|
293
246
|
requirements: []
|
294
|
-
rubygems_version: 3.
|
247
|
+
rubygems_version: 3.3.7
|
295
248
|
signing_key:
|
296
249
|
specification_version: 4
|
297
250
|
summary: Ruby immplementation of file services needed to prepare objects to be accessioned
|
298
251
|
in SULAIR digital library
|
299
|
-
test_files:
|
300
|
-
- spec/content_metadata_spec.rb
|
301
|
-
- spec/object_file_spec.rb
|
302
|
-
- spec/spec_helper.rb
|
303
|
-
- spec/test_data/empty.txt
|
304
|
-
- spec/test_data/input/.empty
|
305
|
-
- spec/test_data/input/file_with_no_exif.xml
|
306
|
-
- spec/test_data/input/oo000oo0001/00/oo000oo0001_00_001.tif
|
307
|
-
- spec/test_data/input/oo000oo0001/00/oo000oo0001_00_002.tif
|
308
|
-
- spec/test_data/input/oo000oo0001/05/oo000oo0001_05_001.jp2
|
309
|
-
- spec/test_data/input/oo000oo0001/05/oo000oo0001_05_002.jp2
|
310
|
-
- spec/test_data/input/oo000oo0001/15/oo000oo0001_15_001.pdf
|
311
|
-
- spec/test_data/input/oo000oo0001/15/oo000oo0001_15_002.pdf
|
312
|
-
- spec/test_data/input/oo000oo0001/31/oo000oo0001_31_001.pdf
|
313
|
-
- spec/test_data/input/oo000oo0001/50/oo000oo0001_50_001.tif
|
314
|
-
- spec/test_data/input/oo000oo0001/oo000oo0001_book.pdf
|
315
|
-
- spec/test_data/input/res1_image1.jp2
|
316
|
-
- spec/test_data/input/res1_image1.tif
|
317
|
-
- spec/test_data/input/res1_image2.jp2
|
318
|
-
- spec/test_data/input/res1_image2.tif
|
319
|
-
- spec/test_data/input/res1_teifile.txt
|
320
|
-
- spec/test_data/input/res1_textfile.txt
|
321
|
-
- spec/test_data/input/res1_transcript.pdf
|
322
|
-
- spec/test_data/input/res2_image1.jp2
|
323
|
-
- spec/test_data/input/res2_image1.tif
|
324
|
-
- spec/test_data/input/res2_image2.jp2
|
325
|
-
- spec/test_data/input/res2_image2.tif
|
326
|
-
- spec/test_data/input/res2_teifile.txt
|
327
|
-
- spec/test_data/input/res2_textfile.txt
|
328
|
-
- spec/test_data/input/res3_image1.jp2
|
329
|
-
- spec/test_data/input/res3_image1.tif
|
330
|
-
- spec/test_data/input/res3_teifile.txt
|
331
|
-
- spec/test_data/input/someobject.obj
|
332
|
-
- spec/test_data/input/someobject.ply
|
333
|
-
- spec/test_data/input/test.jp2
|
334
|
-
- spec/test_data/input/test.json
|
335
|
-
- spec/test_data/input/test.pdf
|
336
|
-
- spec/test_data/input/test.svg
|
337
|
-
- spec/test_data/input/test.tif
|
338
|
-
- spec/test_data/input/test2.jp2
|
339
|
-
- spec/test_data/input/test2.tif
|
340
|
-
- spec/test_data/input/test_no_color_profile.tif
|
252
|
+
test_files: []
|
@@ -1,26 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'dry-struct'
|
4
|
-
require 'dry-types'
|
5
|
-
|
6
|
-
module Assembly
|
7
|
-
class ContentMetadata
|
8
|
-
# Types for the configuration
|
9
|
-
module Types
|
10
|
-
include Dry.Types()
|
11
|
-
end
|
12
|
-
|
13
|
-
# Represents a configuration for generating the content metadata
|
14
|
-
class Config < Dry::Struct
|
15
|
-
STYLES = %w[image file book map 3d document webarchive-seed].freeze
|
16
|
-
READING_ORDERS = %w[ltr rtl].freeze
|
17
|
-
attribute :auto_labels, Types::Strict::Bool.default(true)
|
18
|
-
attribute :flatten_folder_structure, Types::Strict::Bool.default(false)
|
19
|
-
attribute :add_file_attributes, Types::Strict::Bool.default(false)
|
20
|
-
attribute :add_exif, Types::Strict::Bool.default(false)
|
21
|
-
attribute :file_attributes, Types::Strict::Hash.default({}.freeze)
|
22
|
-
attribute :type, Types::Strict::String.enum(*STYLES)
|
23
|
-
attribute :reading_order, Types::Strict::String.default('ltr').enum(*READING_ORDERS)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
@@ -1,63 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'active_support/core_ext/module/delegation'
|
4
|
-
|
5
|
-
module Assembly
|
6
|
-
class ContentMetadata
|
7
|
-
# Represents a single File
|
8
|
-
class File
|
9
|
-
# default publish/preserve/shelve attributes used in content metadata
|
10
|
-
# if no mimetype specific attributes are specified for a given file, define some defaults, and override for specific mimetypes below
|
11
|
-
ATTRIBUTES_FOR_TYPE = {
|
12
|
-
'default' => { preserve: 'yes', shelve: 'no', publish: 'no' },
|
13
|
-
'image/tif' => { preserve: 'yes', shelve: 'no', publish: 'no' },
|
14
|
-
'image/tiff' => { preserve: 'yes', shelve: 'no', publish: 'no' },
|
15
|
-
'image/jp2' => { preserve: 'no', shelve: 'yes', publish: 'yes' },
|
16
|
-
'image/jpeg' => { preserve: 'yes', shelve: 'no', publish: 'no' },
|
17
|
-
'audio/wav' => { preserve: 'yes', shelve: 'no', publish: 'no' },
|
18
|
-
'audio/x-wav' => { preserve: 'yes', shelve: 'no', publish: 'no' },
|
19
|
-
'audio/mp3' => { preserve: 'no', shelve: 'yes', publish: 'yes' },
|
20
|
-
'audio/mpeg' => { preserve: 'no', shelve: 'yes', publish: 'yes' },
|
21
|
-
'application/pdf' => { preserve: 'yes', shelve: 'yes', publish: 'yes' },
|
22
|
-
'plain/text' => { preserve: 'yes', shelve: 'yes', publish: 'yes' },
|
23
|
-
'text/plain' => { preserve: 'yes', shelve: 'yes', publish: 'yes' },
|
24
|
-
'image/png' => { preserve: 'yes', shelve: 'yes', publish: 'no' },
|
25
|
-
'application/zip' => { preserve: 'yes', shelve: 'no', publish: 'no' },
|
26
|
-
'application/json' => { preserve: 'yes', shelve: 'yes', publish: 'yes' }
|
27
|
-
}.freeze
|
28
|
-
|
29
|
-
# @param [Symbol] bundle
|
30
|
-
# @param [Assembly::ObjectFile] file
|
31
|
-
# @param style
|
32
|
-
def initialize(file:, bundle: nil, style: nil)
|
33
|
-
@bundle = bundle
|
34
|
-
@file = file
|
35
|
-
@style = style
|
36
|
-
end
|
37
|
-
|
38
|
-
delegate :sha1, :md5, :provider_md5, :provider_sha1, :mimetype, :filesize, :image?, :valid_image?, to: :file
|
39
|
-
|
40
|
-
def file_id(common_path:, flatten_folder_structure:)
|
41
|
-
# set file id attribute, first check the relative_path parameter on the object, and if it is set, just use that
|
42
|
-
return file.relative_path if file.relative_path
|
43
|
-
|
44
|
-
# if the relative_path attribute is not set, then use the path attribute and check to see if we need to remove the common part of the path
|
45
|
-
file_id = common_path ? file.path.gsub(common_path, '') : file.path
|
46
|
-
file_id = ::File.basename(file_id) if flatten_folder_structure
|
47
|
-
file_id
|
48
|
-
end
|
49
|
-
|
50
|
-
def file_attributes(provided_file_attributes)
|
51
|
-
file.file_attributes || provided_file_attributes[mimetype] || provided_file_attributes['default'] || ATTRIBUTES_FOR_TYPE[mimetype] || ATTRIBUTES_FOR_TYPE['default']
|
52
|
-
end
|
53
|
-
|
54
|
-
def image_data
|
55
|
-
{ height: file.exif.imageheight, width: file.exif.imagewidth }
|
56
|
-
end
|
57
|
-
|
58
|
-
private
|
59
|
-
|
60
|
-
attr_reader :file
|
61
|
-
end
|
62
|
-
end
|
63
|
-
end
|
@@ -1,73 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'active_support/core_ext/object/blank'
|
4
|
-
|
5
|
-
module Assembly
|
6
|
-
class ContentMetadata
|
7
|
-
# Represents a groups of related Files, such as a single master file and the derivatives
|
8
|
-
class FileSet
|
9
|
-
# @param [Boolean] dpg (false) is it a dpg bundle?
|
10
|
-
# @param [Array<Assembly::ObjectFile>] resource_files
|
11
|
-
# @param style
|
12
|
-
def initialize(resource_files:, style:, dpg: false)
|
13
|
-
@dpg = dpg
|
14
|
-
@resource_files = resource_files
|
15
|
-
@style = style
|
16
|
-
end
|
17
|
-
|
18
|
-
# objects in the special DPG folders are always type=object when we using :bundle=>:dpg
|
19
|
-
# otherwise look at the style to determine the resource_type_description
|
20
|
-
def resource_type_description
|
21
|
-
@resource_type_description ||= special_dpg_resource? ? 'object' : resource_type_descriptions
|
22
|
-
end
|
23
|
-
|
24
|
-
def label_from_file(default:)
|
25
|
-
resource_files.find { |obj| obj.label.present? }&.label || default
|
26
|
-
end
|
27
|
-
|
28
|
-
def files
|
29
|
-
resource_files.map { |file| File.new(file: file) }
|
30
|
-
end
|
31
|
-
|
32
|
-
private
|
33
|
-
|
34
|
-
attr_reader :dpg, :resource_files, :style
|
35
|
-
|
36
|
-
def special_dpg_resource?
|
37
|
-
return false unless dpg
|
38
|
-
|
39
|
-
resource_files.collect { |obj| ContentMetadata.special_dpg_folder?(obj.dpg_folder) }.include?(true)
|
40
|
-
end
|
41
|
-
|
42
|
-
# rubocop:disable Metrics/CyclomaticComplexity
|
43
|
-
def resource_type_descriptions
|
44
|
-
# grab all of the file types within a resource into an array so we can decide what the resource type should be
|
45
|
-
resource_file_types = resource_files.collect(&:object_type)
|
46
|
-
resource_has_non_images = !(resource_file_types - [:image]).empty?
|
47
|
-
|
48
|
-
case style
|
49
|
-
when :simple_image, :map, :'webarchive-seed'
|
50
|
-
'image'
|
51
|
-
when :file
|
52
|
-
'file'
|
53
|
-
when :simple_book # in a simple book project, all resources are pages unless they are *all* non-images -- if so, switch the type to object
|
54
|
-
resource_has_non_images && resource_file_types.include?(:image) == false ? 'object' : 'page'
|
55
|
-
when :book_as_image # same as simple book, but all resources are images instead of pages, unless we need to switch them to object type
|
56
|
-
resource_has_non_images && resource_file_types.include?(:image) == false ? 'object' : 'image'
|
57
|
-
when :book_with_pdf # in book with PDF type, if we find a resource with *any* non images, switch it's type from book to object
|
58
|
-
resource_has_non_images ? 'object' : 'page'
|
59
|
-
when :document
|
60
|
-
'document'
|
61
|
-
when :'3d'
|
62
|
-
resource_extensions = resource_files.collect(&:ext)
|
63
|
-
if (resource_extensions & VALID_THREE_DIMENSION_EXTENTIONS).empty? # if this resource contains no known 3D file extensions, the resource type is file
|
64
|
-
'file'
|
65
|
-
else # otherwise the resource type is 3d
|
66
|
-
'3d'
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
70
|
-
# rubocop:enable Metrics/CyclomaticComplexity
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
@@ -1,65 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Assembly
|
4
|
-
class ContentMetadata
|
5
|
-
# Builds a groups of related Files, based on bundle
|
6
|
-
class FileSetBuilder
|
7
|
-
# @param [Symbol] bundle one of: :default, :filename, :dpg or :prebundled
|
8
|
-
# @param [Array<Assembly::ObjectFile>] objects
|
9
|
-
# @param [Symbol] style one of: :simple_image, :file, :simple_book, :book_as_image, :book_with_pdf, :map, or :'3d'
|
10
|
-
def self.build(bundle:, objects:, style:)
|
11
|
-
new(bundle: bundle, objects: objects, style: style).build
|
12
|
-
end
|
13
|
-
|
14
|
-
def initialize(bundle:, objects:, style:)
|
15
|
-
@bundle = bundle
|
16
|
-
@objects = objects
|
17
|
-
@style = style
|
18
|
-
end
|
19
|
-
|
20
|
-
# @return [Array<FileSet>] a list of filesets in the object
|
21
|
-
def build
|
22
|
-
case bundle
|
23
|
-
when :default # one resource per object
|
24
|
-
objects.collect { |obj| FileSet.new(resource_files: [obj], style: style) }
|
25
|
-
when :filename # one resource per distinct filename (excluding extension)
|
26
|
-
build_for_filename
|
27
|
-
when :dpg # group by DPG filename
|
28
|
-
build_for_dpg
|
29
|
-
when :prebundled
|
30
|
-
# if the user specifies this method, they will pass in an array of arrays, indicating resources, so we don't need to bundle in the gem
|
31
|
-
# This is used by the assemblyWF if you have stubContentMetadata.xml
|
32
|
-
objects.map { |inner| FileSet.new(resource_files: inner, style: style) }
|
33
|
-
else
|
34
|
-
raise 'Invalid bundle method'
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
private
|
39
|
-
|
40
|
-
attr_reader :bundle, :objects, :style
|
41
|
-
|
42
|
-
def build_for_filename
|
43
|
-
# loop over distinct filenames, this determines how many resources we will have and
|
44
|
-
# create one resource node per distinct filename, collecting the relevant objects with the distinct filename into that resource
|
45
|
-
distinct_filenames = objects.collect(&:filename_without_ext).uniq # find all the unique filenames in the set of objects, leaving off extensions and base paths
|
46
|
-
distinct_filenames.map do |distinct_filename|
|
47
|
-
FileSet.new(resource_files: objects.collect { |obj| obj if obj.filename_without_ext == distinct_filename }.compact,
|
48
|
-
style: style)
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
def build_for_dpg
|
53
|
-
# loop over distinct dpg base names, this determines how many resources we will have and
|
54
|
-
# create one resource node per distinct dpg base name, collecting the relevant objects with the distinct names into that resource
|
55
|
-
|
56
|
-
distinct_filenames = objects.collect(&:dpg_basename).uniq # find all the unique DPG filenames in the set of objects
|
57
|
-
resources = distinct_filenames.map do |distinct_filename|
|
58
|
-
FileSet.new(dpg: true, resource_files: objects.collect { |obj| obj if obj.dpg_basename == distinct_filename && !ContentMetadata.special_dpg_folder?(obj.dpg_folder) }.compact, style: style)
|
59
|
-
end
|
60
|
-
objects.each { |obj| resources << FileSet.new(dpg: true, resource_files: [obj], style: style) if ContentMetadata.special_dpg_folder?(obj.dpg_folder) } # certain subfolders require individual resources for files within them regardless of file-naming convention
|
61
|
-
resources
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
65
|
-
end
|
@@ -1,57 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Assembly
|
4
|
-
class ContentMetadata
|
5
|
-
# Builds a nokogiri representation of the content metadata
|
6
|
-
class NokogiriBuilder
|
7
|
-
# @param [Array<Fileset>] filesets
|
8
|
-
# @param [String] druid
|
9
|
-
# @param [String] common_path
|
10
|
-
# @param [Config] config
|
11
|
-
def self.build(filesets:, druid:, common_path:, config:)
|
12
|
-
# a counter to use when creating auto-labels for resources, with incremenets for each type
|
13
|
-
resource_type_counters = Hash.new(0)
|
14
|
-
pid = druid.gsub('druid:', '') # remove druid prefix when creating IDs
|
15
|
-
|
16
|
-
Nokogiri::XML::Builder.new do |xml|
|
17
|
-
xml.contentMetadata(objectId: druid.to_s, type: config.type) do
|
18
|
-
xml.bookData(readingOrder: config.reading_order) if config.type == 'book'
|
19
|
-
|
20
|
-
filesets.each_with_index do |fileset, index| # iterate over all the resources
|
21
|
-
# start a new resource element
|
22
|
-
sequence = index + 1
|
23
|
-
|
24
|
-
resource_type_counters[fileset.resource_type_description] += 1 # each resource type description gets its own incrementing counter
|
25
|
-
|
26
|
-
xml.resource(id: "#{pid}_#{sequence}", sequence: sequence, type: fileset.resource_type_description) do
|
27
|
-
# create a generic resource label if needed
|
28
|
-
default_label = config.auto_labels ? "#{fileset.resource_type_description.capitalize} #{resource_type_counters[fileset.resource_type_description]}" : ''
|
29
|
-
|
30
|
-
# but if one of the files has a label, use it instead
|
31
|
-
resource_label = fileset.label_from_file(default: default_label)
|
32
|
-
|
33
|
-
xml.label(resource_label) unless resource_label.empty?
|
34
|
-
fileset.files.each do |cm_file| # iterate over all the files in a resource
|
35
|
-
xml_file_params = { id: cm_file.file_id(common_path: common_path, flatten_folder_structure: config.flatten_folder_structure) }
|
36
|
-
xml_file_params.merge!(cm_file.file_attributes(config.file_attributes)) if config.add_file_attributes
|
37
|
-
xml_file_params.merge!(mimetype: cm_file.mimetype, size: cm_file.filesize) if config.add_exif
|
38
|
-
|
39
|
-
xml.file(xml_file_params) do
|
40
|
-
if config.add_exif # add exif info if the user requested it
|
41
|
-
xml.checksum(cm_file.sha1, type: 'sha1')
|
42
|
-
xml.checksum(cm_file.md5, type: 'md5')
|
43
|
-
xml.imageData(cm_file.image_data) if cm_file.valid_image? # add image data for an image
|
44
|
-
elsif cm_file.provider_md5 || cm_file.provider_sha1 # if we did not add exif info, see if there are user supplied checksums to add
|
45
|
-
xml.checksum(cm_file.provider_sha1, type: 'sha1') if cm_file.provider_sha1
|
46
|
-
xml.checksum(cm_file.provider_md5, type: 'md5') if cm_file.provider_md5
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|
@@ -1,117 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'nokogiri'
|
4
|
-
require 'deprecation'
|
5
|
-
require 'active_support'
|
6
|
-
require 'assembly-objectfile/content_metadata/file'
|
7
|
-
require 'assembly-objectfile/content_metadata/file_set'
|
8
|
-
require 'assembly-objectfile/content_metadata/file_set_builder'
|
9
|
-
require 'assembly-objectfile/content_metadata/config'
|
10
|
-
require 'assembly-objectfile/content_metadata/nokogiri_builder'
|
11
|
-
|
12
|
-
module Assembly
|
13
|
-
SPECIAL_DPG_FOLDERS = %w[31 44 50].freeze # these special dpg folders will force any files contained in them into their own resources, regardless of filenaming convention
|
14
|
-
# these are used when :bundle=>:dpg only
|
15
|
-
|
16
|
-
DEPRECATED_STYLES = %i[book_with_pdf book_as_image].freeze
|
17
|
-
VALID_STYLES = %i[simple_image simple_book file map document 3d webarchive-seed].freeze
|
18
|
-
|
19
|
-
# This class generates content metadata for image files
|
20
|
-
class ContentMetadata
|
21
|
-
# Generates image content XML metadata for a repository object.
|
22
|
-
# This method only produces content metadata for images
|
23
|
-
# and does not depend on a specific folder structure. Note that it is class level method.
|
24
|
-
#
|
25
|
-
# @param [Hash] params a hash containg parameters needed to produce content metadata
|
26
|
-
# :druid = required - a string of druid of the repository object's druid id (with or without 'druid:' prefix)
|
27
|
-
# :objects = required - an array of Assembly::ObjectFile objects containing the list of files to add to content metadata
|
28
|
-
# NOTE: if you set the :bundle option to :prebundled, you will need to pass in an array of arrays, and not a flat array, as noted below
|
29
|
-
# :style = optional - a symbol containing the style of metadata to create, allowed values are
|
30
|
-
# :simple_image (default), contentMetadata type="image", resource type="image"
|
31
|
-
# :file, contentMetadata type="file", resource type="file"
|
32
|
-
# :simple_book, contentMetadata type="book", resource type="page", but any resource which has file(s) other than an image, and also contains no images at all, will be resource type="object"
|
33
|
-
# :book_with_pdf, contentMetadata type="book", resource type="page", but any resource which has any file(s) other than an image will be resource type="object" - NOTE: THIS IS DEPRECATED
|
34
|
-
# :book_as_image, as simple_book, but with contentMetadata type="book", resource type="image" (same rule applies for resources with non images) - NOTE: THIS IS DEPRECATED
|
35
|
-
# :map, like simple_image, but with contentMetadata type="map", resource type="image"
|
36
|
-
# :3d, contentMetadata type="3d", ".obj" and other configured 3d extension files go into resource_type="3d", everything else into resource_type="file"
|
37
|
-
# :webarchive-seed, contentMetadata type="webarchive-seed", resource type="image"
|
38
|
-
# :bundle = optional - a symbol containing the method of bundling files into resources, allowed values are
|
39
|
-
# :default = all files get their own resources (default)
|
40
|
-
# :filename = files with the same filename but different extensions get bundled together in a single resource
|
41
|
-
# :dpg = files representing the same image but of different mimetype that use the SULAIR DPG filenaming standard (00 vs 05) get bundled together in a single resource
|
42
|
-
# :prebundlded = this option requires you to prebundled the files passed in as an array of arrays, indicating how files are bundlded into resources; this is the most flexible option since it gives you full control
|
43
|
-
# :add_exif = optional - a boolean to indicate if exif data should be added (mimetype, filesize, image height/width, etc.) to each file, defaults to false and is not required if project goes through assembly
|
44
|
-
# :add_file_attributes = optional - a boolean to indicate if publish/preserve/shelve/role attributes should be added using defaults or by supplied override by mime/type, defaults to false and is not required if project goes through assembly
|
45
|
-
# :file_attributes = optional - a hash of file attributes by mimetype to use instead of defaults, only used if add_file_attributes is also true,
|
46
|
-
# If a mimetype match is not found in your hash, the default is used (either your supplied default or the gems).
|
47
|
-
# e.g. {'default'=>{:preserve=>'yes',:shelve=>'yes',:publish=>'yes'},'image/tif'=>{:preserve=>'yes',:shelve=>'no',:publish=>'no'},'application/pdf'=>{:preserve=>'yes',:shelve=>'yes',:publish=>'yes'}}
|
48
|
-
# :include_root_xml = optional - a boolean to indicate if the contentMetadata returned includes a root <?xml version="1.0"?> tag, defaults to true
|
49
|
-
# :preserve_common_paths = optional - When creating the file "id" attribute, content metadata uses the "relative_path" attribute of the ObjectFile objects passed in. If the "relative_path" attribute is not set, the "path" attribute is used instead,
|
50
|
-
# which includes a full path to the file. If the "preserve_common_paths" parameter is set to false or left off, then the common paths of all of the ObjectFile's passed in are removed from any "path" attributes. This should turn full paths into
|
51
|
-
# the relative paths that are required in content metadata file id nodes. If you do not want this behavior, set "preserve_common_paths" to true. The default is false.
|
52
|
-
# :flatten_folder_structure = optional - Will remove *all* folder structure when genearting file IDs (e.g. DPG subfolders like '00','05' will be removed) when generating file IDs. This is useful if the folder structure is flattened when staging files (like for DPG).
|
53
|
-
# The default is false. If set to true, will override the "preserve_common_paths" parameter.
|
54
|
-
# :auto_labels = optional - Will add automated resource labels (e.g. "File 1") when labels are not provided by the user. The default is true.
|
55
|
-
# See https://consul.stanford.edu/pages/viewpage.action?spaceKey=chimera&title=DOR+content+types%2C+resource+types+and+interpretive+metadata for next two settings
|
56
|
-
# :reading_order = optional - only valid for simple_book, can be 'rtl' or 'ltr'. The default is 'ltr'.
|
57
|
-
# Example:
|
58
|
-
# Assembly::ContentMetadata.create_content_metadata(:druid=>'druid:nx288wh8889',:style=>:simple_image,:objects=>object_files,:add_file_attributes=>false)
|
59
|
-
def self.create_content_metadata(druid:, objects:, auto_labels: true,
|
60
|
-
add_exif: false, bundle: :default, style: :simple_image,
|
61
|
-
add_file_attributes: false, file_attributes: {},
|
62
|
-
preserve_common_paths: false, flatten_folder_structure: false,
|
63
|
-
include_root_xml: nil, reading_order: 'ltr')
|
64
|
-
|
65
|
-
common_path = find_common_path(objects) unless preserve_common_paths # find common paths to all files provided if needed
|
66
|
-
|
67
|
-
filesets = FileSetBuilder.build(bundle: bundle, objects: objects, style: style)
|
68
|
-
config = Config.new(auto_labels: auto_labels,
|
69
|
-
flatten_folder_structure: flatten_folder_structure,
|
70
|
-
add_file_attributes: add_file_attributes,
|
71
|
-
file_attributes: file_attributes,
|
72
|
-
add_exif: add_exif,
|
73
|
-
reading_order: reading_order,
|
74
|
-
type: object_level_type(style))
|
75
|
-
|
76
|
-
builder = NokogiriBuilder.build(druid: druid,
|
77
|
-
filesets: filesets,
|
78
|
-
common_path: common_path,
|
79
|
-
config: config)
|
80
|
-
|
81
|
-
if include_root_xml == false
|
82
|
-
builder.doc.root.to_xml
|
83
|
-
else
|
84
|
-
builder.to_xml
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
def self.special_dpg_folder?(folder)
|
89
|
-
SPECIAL_DPG_FOLDERS.include?(folder)
|
90
|
-
end
|
91
|
-
|
92
|
-
def self.find_common_path(objects)
|
93
|
-
all_paths = objects.flatten.map do |obj|
|
94
|
-
raise "File '#{obj.path}' not found" unless obj.file_exists?
|
95
|
-
|
96
|
-
obj.path # collect all of the filenames into an array
|
97
|
-
end
|
98
|
-
|
99
|
-
Assembly::ObjectFile.common_path(all_paths) # find common paths to all files provided if needed
|
100
|
-
end
|
101
|
-
private_class_method :find_common_path
|
102
|
-
|
103
|
-
def self.object_level_type(style)
|
104
|
-
Deprecation.warn(self, "the style #{style} is now deprecated and should not be used. This will be removed in assembly-objectfile 2.0") if DEPRECATED_STYLES.include? style
|
105
|
-
raise "Supplied style (#{style}) not valid" unless (VALID_STYLES + DEPRECATED_STYLES).include? style
|
106
|
-
|
107
|
-
case style
|
108
|
-
when :simple_image
|
109
|
-
'image'
|
110
|
-
when :simple_book, :book_with_pdf, :book_as_image
|
111
|
-
'book'
|
112
|
-
else
|
113
|
-
style.to_s
|
114
|
-
end
|
115
|
-
end
|
116
|
-
end # class
|
117
|
-
end # module
|