cocina-models 0.75.0 → 0.78.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +40 -12
- data/.rubocop_todo.yml +71 -2
- data/README.md +41 -5
- data/cocina-models.gemspec +2 -0
- data/description_types.yml +167 -38
- data/docs/description_types.md +471 -216
- data/lib/cocina/generator/generator.rb +7 -12
- data/lib/cocina/generator/schema.rb +1 -3
- data/lib/cocina/generator/schema_base.rb +0 -8
- data/lib/cocina/generator/schema_ref.rb +1 -1
- data/lib/cocina/generator/schema_value.rb +14 -4
- data/lib/cocina/models/access.rb +4 -4
- data/lib/cocina/models/admin_policy.rb +1 -1
- data/lib/cocina/models/admin_policy_access_template.rb +7 -7
- data/lib/cocina/models/admin_policy_administrative.rb +1 -1
- data/lib/cocina/models/admin_policy_with_metadata.rb +3 -3
- data/lib/cocina/models/builders/name_title_group_builder.rb +0 -4
- data/lib/cocina/models/builders/title_builder.rb +0 -2
- data/lib/cocina/models/citation_only_access.rb +2 -2
- data/lib/cocina/models/collection_access.rb +4 -4
- data/lib/cocina/models/collection_identification.rb +1 -1
- data/lib/cocina/models/collection_with_metadata.rb +2 -2
- data/lib/cocina/models/contributor.rb +4 -4
- data/lib/cocina/models/controlled_digital_lending_access.rb +2 -2
- data/lib/cocina/models/dark_access.rb +4 -4
- data/lib/cocina/models/description.rb +3 -3
- data/lib/cocina/models/descriptive_basic_value.rb +13 -13
- data/lib/cocina/models/descriptive_parallel_contributor.rb +5 -5
- data/lib/cocina/models/descriptive_parallel_event.rb +3 -3
- data/lib/cocina/models/descriptive_value.rb +13 -13
- data/lib/cocina/models/descriptive_value_language.rb +6 -6
- data/lib/cocina/models/dro.rb +1 -1
- data/lib/cocina/models/dro_access.rb +8 -8
- data/lib/cocina/models/dro_with_metadata.rb +3 -3
- data/lib/cocina/models/embargo.rb +5 -5
- data/lib/cocina/models/event.rb +3 -3
- data/lib/cocina/models/file.rb +4 -4
- data/lib/cocina/models/file_access.rb +4 -4
- data/lib/cocina/models/identification.rb +2 -2
- data/lib/cocina/models/language.rb +12 -12
- data/lib/cocina/models/location_based_access.rb +1 -1
- data/lib/cocina/models/location_based_download_access.rb +1 -1
- data/lib/cocina/models/mapping/error_notifier.rb +36 -0
- data/lib/cocina/models/mapping/from_mods/access.rb +177 -0
- data/lib/cocina/models/mapping/from_mods/admin_metadata.rb +217 -0
- data/lib/cocina/models/mapping/from_mods/alt_rep_group.rb +26 -0
- data/lib/cocina/models/mapping/from_mods/authority.rb +51 -0
- data/lib/cocina/models/mapping/from_mods/contributor.rb +161 -0
- data/lib/cocina/models/mapping/from_mods/description.rb +98 -0
- data/lib/cocina/models/mapping/from_mods/description_builder.rb +61 -0
- data/lib/cocina/models/mapping/from_mods/event.rb +543 -0
- data/lib/cocina/models/mapping/from_mods/form.rb +381 -0
- data/lib/cocina/models/mapping/from_mods/geographic.rb +219 -0
- data/lib/cocina/models/mapping/from_mods/hydrus_default_title_builder.rb +28 -0
- data/lib/cocina/models/mapping/from_mods/identifier.rb +51 -0
- data/lib/cocina/models/mapping/from_mods/identifier_builder.rb +71 -0
- data/lib/cocina/models/mapping/from_mods/identifier_type.rb +292 -0
- data/lib/cocina/models/mapping/from_mods/language.rb +36 -0
- data/lib/cocina/models/mapping/from_mods/language_script.rb +30 -0
- data/lib/cocina/models/mapping/from_mods/language_term.rb +106 -0
- data/lib/cocina/models/mapping/from_mods/name_builder.rb +307 -0
- data/lib/cocina/models/mapping/from_mods/note.rb +162 -0
- data/lib/cocina/models/mapping/from_mods/part_builder.rb +147 -0
- data/lib/cocina/models/mapping/from_mods/primary.rb +27 -0
- data/lib/cocina/models/mapping/from_mods/purl.rb +53 -0
- data/lib/cocina/models/mapping/from_mods/related_resource.rb +105 -0
- data/lib/cocina/models/mapping/from_mods/subject.rb +413 -0
- data/lib/cocina/models/mapping/from_mods/subject_authority_codes.rb +794 -0
- data/lib/cocina/models/mapping/from_mods/title.rb +160 -0
- data/lib/cocina/models/mapping/from_mods/title_builder.rb +106 -0
- data/lib/cocina/models/mapping/from_mods/title_builder_strategy.rb +19 -0
- data/lib/cocina/models/mapping/from_mods/value_uri.rb +25 -0
- data/lib/cocina/models/mapping/normalizers/base.rb +16 -0
- data/lib/cocina/models/mapping/normalizers/mods/geo_extension_normalizer.rb +69 -0
- data/lib/cocina/models/mapping/normalizers/mods/name_normalizer.rb +191 -0
- data/lib/cocina/models/mapping/normalizers/mods/origin_info_normalizer.rb +157 -0
- data/lib/cocina/models/mapping/normalizers/mods/subject_normalizer.rb +296 -0
- data/lib/cocina/models/mapping/normalizers/mods/title_normalizer.rb +91 -0
- data/lib/cocina/models/mapping/normalizers/mods_normalizer.rb +409 -0
- data/lib/cocina/models/mapping/purl.rb +27 -0
- data/lib/cocina/models/mapping/to_mods/access.rb +155 -0
- data/lib/cocina/models/mapping/to_mods/admin_metadata.rb +129 -0
- data/lib/cocina/models/mapping/to_mods/contributor.rb +49 -0
- data/lib/cocina/models/mapping/to_mods/description.rb +63 -0
- data/lib/cocina/models/mapping/to_mods/event.rb +200 -0
- data/lib/cocina/models/mapping/to_mods/form.rb +292 -0
- data/lib/cocina/models/mapping/to_mods/geographic.rb +151 -0
- data/lib/cocina/models/mapping/to_mods/id_generator.rb +25 -0
- data/lib/cocina/models/mapping/to_mods/identifier.rb +57 -0
- data/lib/cocina/models/mapping/to_mods/language.rb +82 -0
- data/lib/cocina/models/mapping/to_mods/mods_writer.rb +38 -0
- data/lib/cocina/models/mapping/to_mods/name_title_group.rb +29 -0
- data/lib/cocina/models/mapping/to_mods/name_writer.rb +228 -0
- data/lib/cocina/models/mapping/to_mods/note.rb +105 -0
- data/lib/cocina/models/mapping/to_mods/part_writer.rb +115 -0
- data/lib/cocina/models/mapping/to_mods/related_resource.rb +108 -0
- data/lib/cocina/models/mapping/to_mods/role_writer.rb +50 -0
- data/lib/cocina/models/mapping/to_mods/subject.rb +486 -0
- data/lib/cocina/models/mapping/to_mods/title.rb +260 -0
- data/lib/cocina/models/object_metadata.rb +2 -2
- data/lib/cocina/models/presentation.rb +2 -2
- data/lib/cocina/models/related_resource.rb +9 -9
- data/lib/cocina/models/release_tag.rb +4 -4
- data/lib/cocina/models/request_admin_policy.rb +1 -1
- data/lib/cocina/models/request_administrative.rb +1 -1
- data/lib/cocina/models/request_collection.rb +2 -2
- data/lib/cocina/models/request_description.rb +3 -3
- data/lib/cocina/models/request_dro.rb +4 -4
- data/lib/cocina/models/request_file.rb +5 -5
- data/lib/cocina/models/request_identification.rb +1 -1
- data/lib/cocina/models/sequence.rb +1 -1
- data/lib/cocina/models/source.rb +4 -4
- data/lib/cocina/models/standard.rb +5 -5
- data/lib/cocina/models/stanford_access.rb +2 -2
- data/lib/cocina/models/title.rb +13 -13
- data/lib/cocina/models/validators/dark_validator.rb +4 -2
- data/lib/cocina/models/validators/description_values_validator.rb +77 -0
- data/lib/cocina/models/validators/open_api_validator.rb +0 -4
- data/lib/cocina/models/validators/validator.rb +2 -1
- data/lib/cocina/models/version.rb +1 -1
- data/lib/cocina/models/world_access.rb +2 -2
- data/lib/cocina/models.rb +4 -0
- data/lib/cocina/rspec/factories.rb +205 -0
- data/lib/cocina/rspec.rb +2 -0
- data/openapi.yml +5 -5
- metadata +89 -17
- data/docs/_config.yml +0 -1
- data/docs/maps/Agent.json +0 -18
- data/docs/maps/Collection.json +0 -240
- data/docs/maps/DRO.json +0 -316
- data/docs/maps/Description.json +0 -17
- data/docs/maps/File.json +0 -196
- data/docs/maps/Fileset.json +0 -143
- data/docs/maps/README.md +0 -7
- data/docs/maps/ReleaseTag.json +0 -39
- data/docs/maps/Sequence.json +0 -46
- data/docs/maps/Title.json +0 -18
- data/docs/sampleETD/foxml-export.xml +0 -935
- data/docs/sampleETD/foxml.xml +0 -3475
- data/docs/sampleETD/xn109qc9773_bibframe.ttl +0 -95
- data/docs/sampleETD/xn109qc9773_taco.json +0 -158
@@ -0,0 +1,409 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Cocina
|
4
|
+
module Models
|
5
|
+
module Mapping
|
6
|
+
module Normalizers
|
7
|
+
# Normalizes a Fedora MODS document, accounting for differences between Fedora MODS and MODS generated from Cocina.
|
8
|
+
# these adjustments have been approved by our metadata authority, Arcadia.
|
9
|
+
class ModsNormalizer # rubocop:disable Metrics/ClassLength
|
10
|
+
include Cocina::Models::Mapping::Normalizers::Base
|
11
|
+
|
12
|
+
MODS_NS = Cocina::Models::Mapping::FromMods::Description::DESC_METADATA_NS
|
13
|
+
XLINK_NS = Cocina::Models::Mapping::FromMods::Description::XLINK_NS
|
14
|
+
|
15
|
+
# @param [Nokogiri::Document] mods_ng_xml MODS to be normalized
|
16
|
+
# @param [String] druid
|
17
|
+
# @param [String] label
|
18
|
+
# @return [Nokogiri::Document] normalized MODS
|
19
|
+
def self.normalize(mods_ng_xml:, druid:, label:)
|
20
|
+
new(mods_ng_xml: mods_ng_xml, druid: druid, label: label).normalize
|
21
|
+
end
|
22
|
+
|
23
|
+
# @param [Nokogiri::Document] mods_ng_xml MODS to be normalized
|
24
|
+
# @param [String] druid
|
25
|
+
# @return [Nokogiri::Document] normalized MODS
|
26
|
+
def self.normalize_purl(mods_ng_xml:, druid:)
|
27
|
+
new(mods_ng_xml: mods_ng_xml, druid: druid).normalize_purl
|
28
|
+
end
|
29
|
+
|
30
|
+
# @param [Nokogiri::Document] mods_ng_xml MODS to be normalized
|
31
|
+
# @param [String] druid
|
32
|
+
# @param [String] label
|
33
|
+
# @return [Nokogiri::Document] normalized MODS
|
34
|
+
def self.normalize_purl_and_missing_title(mods_ng_xml:, druid:, label:)
|
35
|
+
new(mods_ng_xml: mods_ng_xml, druid: druid, label: label).normalize_purl_and_missing_title
|
36
|
+
end
|
37
|
+
|
38
|
+
# @param [Nokogiri::Document] mods_ng_xml MODS to be normalized
|
39
|
+
# @return [Nokogiri::Document] normalized MODS
|
40
|
+
def self.normalize_identifier_type(mods_ng_xml:)
|
41
|
+
new(mods_ng_xml: mods_ng_xml, druid: nil).normalize_identifier_type
|
42
|
+
end
|
43
|
+
|
44
|
+
def initialize(mods_ng_xml:, druid:, label: nil)
|
45
|
+
@ng_xml = mods_ng_xml.root ? mods_ng_xml.dup : blank_ng_xml
|
46
|
+
@ng_xml.encoding = 'UTF-8'
|
47
|
+
@druid = druid
|
48
|
+
@label = label
|
49
|
+
end
|
50
|
+
|
51
|
+
def normalize
|
52
|
+
normalize_default_namespace
|
53
|
+
normalize_xsi
|
54
|
+
normalize_version
|
55
|
+
normalize_empty_attributes
|
56
|
+
normalize_authority_uris # must be called before OriginInfoNormalizer
|
57
|
+
@ng_xml = Cocina::Models::Mapping::Normalizers::Mods::OriginInfoNormalizer.normalize(mods_ng_xml: ng_xml)
|
58
|
+
@ng_xml = Cocina::Models::Mapping::Normalizers::Mods::SubjectNormalizer.normalize(mods_ng_xml: ng_xml)
|
59
|
+
@ng_xml = Cocina::Models::Mapping::Normalizers::Mods::NameNormalizer.normalize(mods_ng_xml: ng_xml)
|
60
|
+
normalize_related_item_other_type
|
61
|
+
normalize_unmatched_altrepgroup
|
62
|
+
normalize_unmatched_nametitlegroup
|
63
|
+
normalize_xml_space
|
64
|
+
normalize_language_term_type
|
65
|
+
normalize_access_condition
|
66
|
+
normalize_identifier_type_attr
|
67
|
+
normalize_location_physical_location
|
68
|
+
normalize_purl_location
|
69
|
+
normalize_empty_notes
|
70
|
+
@ng_xml = Cocina::Models::Mapping::Normalizers::Mods::TitleNormalizer.normalize(mods_ng_xml: ng_xml, label: label)
|
71
|
+
@ng_xml = Cocina::Models::Mapping::Normalizers::Mods::GeoExtensionNormalizer.normalize(mods_ng_xml: ng_xml, druid: druid)
|
72
|
+
normalize_empty_type_of_resource # Must be after normalize_empty_attributes
|
73
|
+
normalize_notes
|
74
|
+
normalize_abstracts
|
75
|
+
normalize_usage_primary
|
76
|
+
normalize_related_item_attributes
|
77
|
+
# This should be last-ish.
|
78
|
+
normalize_empty_related_items
|
79
|
+
remove_empty_elements(ng_xml.root) # this must be last
|
80
|
+
ng_xml
|
81
|
+
end
|
82
|
+
|
83
|
+
def normalize_purl
|
84
|
+
normalize_purl_location
|
85
|
+
ng_xml
|
86
|
+
end
|
87
|
+
|
88
|
+
def normalize_purl_and_missing_title
|
89
|
+
normalize_purl_location
|
90
|
+
@ng_xml = Cocina::Models::Mapping::Normalizers::Mods::TitleNormalizer.normalize_missing_title(mods_ng_xml: ng_xml, label: label)
|
91
|
+
ng_xml
|
92
|
+
end
|
93
|
+
|
94
|
+
def normalize_identifier_type
|
95
|
+
normalize_identifier_type_attr
|
96
|
+
ng_xml
|
97
|
+
end
|
98
|
+
|
99
|
+
private
|
100
|
+
|
101
|
+
attr_reader :ng_xml, :druid, :label
|
102
|
+
|
103
|
+
# remove all empty elements that have no attributes and no children, recursively
|
104
|
+
def remove_empty_elements(start_node)
|
105
|
+
return unless start_node
|
106
|
+
|
107
|
+
# remove node if there are no element children, there is no text value and there are no attributes
|
108
|
+
if start_node.elements.size.zero? &&
|
109
|
+
start_node.text.blank? &&
|
110
|
+
start_node.attributes.size.zero? &&
|
111
|
+
start_node.name != 'etal'
|
112
|
+
parent = start_node.parent
|
113
|
+
start_node.remove
|
114
|
+
remove_empty_elements(parent) # need to call again after child has been deleted
|
115
|
+
else
|
116
|
+
start_node.element_children.each { |e| remove_empty_elements(e) }
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def normalize_default_namespace
|
121
|
+
xml = ng_xml.to_s
|
122
|
+
|
123
|
+
unless xml.include?('xmlns="http://www.loc.gov/mods/v3"')
|
124
|
+
xml.sub!('mods:mods', 'mods:mods xmlns="http://www.loc.gov/mods/v3"')
|
125
|
+
xml.gsub!('mods:', '')
|
126
|
+
end
|
127
|
+
|
128
|
+
regenerate_ng_xml(xml)
|
129
|
+
end
|
130
|
+
|
131
|
+
def normalize_xsi
|
132
|
+
return if ng_xml.namespaces.include?('xmlns:xsi')
|
133
|
+
|
134
|
+
xml = ng_xml.to_s
|
135
|
+
xml.sub!('<mods ', '<mods xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" ')
|
136
|
+
|
137
|
+
regenerate_ng_xml(xml)
|
138
|
+
end
|
139
|
+
|
140
|
+
def normalize_version
|
141
|
+
# Only normalize version when version isn't mapped.
|
142
|
+
match = /MODS version (\d\.\d)/.match(ng_xml.at('//mods:recordInfo/mods:recordOrigin', mods: MODS_NS)&.content)
|
143
|
+
|
144
|
+
if !match
|
145
|
+
ng_xml.root['version'] = '3.7'
|
146
|
+
ng_xml.root['xsi:schemaLocation'] = 'http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-7.xsd'
|
147
|
+
elsif match && match[1] != ng_xml.root['version']
|
148
|
+
ng_xml.root['version'] = match[1]
|
149
|
+
ng_xml.root['xsi:schemaLocation'] = "http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-#{match[1].sub('.', '-')}.xsd"
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def normalize_authority_uris
|
154
|
+
Cocina::Models::Mapping::FromMods::Authority::NORMALIZE_AUTHORITY_URIS.each do |authority_uri|
|
155
|
+
ng_xml.xpath("//mods:*[@authorityURI='#{authority_uri}']", mods: MODS_NS).each do |node|
|
156
|
+
node[:authorityURI] = "#{authority_uri}/"
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
def normalize_purl_location
|
162
|
+
normalize_purl_for(ng_xml.root, purl: Cocina::Models::Mapping::Purl.for(druid: druid))
|
163
|
+
ng_xml.xpath('/mods:mods/mods:relatedItem', mods: MODS_NS).each { |related_item_node| normalize_purl_for(related_item_node) }
|
164
|
+
end
|
165
|
+
|
166
|
+
def normalize_purl_for(base_node, purl: nil)
|
167
|
+
purl_nodes(base_node).each do |purl_node|
|
168
|
+
purl_node.content = Cocina::Models::Mapping::FromMods::Purl.purl_value(purl_node)
|
169
|
+
end
|
170
|
+
|
171
|
+
# If there is a purl, add a url node if there is not already one.
|
172
|
+
if purl && purl_nodes(base_node).to_a.none? { |purl_node| purl_node.content == purl }
|
173
|
+
new_location = Nokogiri::XML::Node.new('location', Nokogiri::XML(nil))
|
174
|
+
new_url = Nokogiri::XML::Node.new('url', Nokogiri::XML(nil))
|
175
|
+
new_url.content = purl
|
176
|
+
new_location << new_url
|
177
|
+
base_node << new_location
|
178
|
+
end
|
179
|
+
|
180
|
+
purl_nodes(base_node).each do |purl_node|
|
181
|
+
next if purl_node == Cocina::Models::Mapping::FromMods::Purl.primary_purl_node(base_node, purl)
|
182
|
+
|
183
|
+
# Move into own relatedItem
|
184
|
+
new_related_item = Nokogiri::XML::Node.new('relatedItem', Nokogiri::XML(nil))
|
185
|
+
location_node = purl_node.parent
|
186
|
+
location_node.remove
|
187
|
+
new_related_item << location_node
|
188
|
+
base_node << new_related_item
|
189
|
+
purl_node[:usage] = 'primary display'
|
190
|
+
end
|
191
|
+
|
192
|
+
primary_url_node = primary_url_node_for(base_node, purl)
|
193
|
+
base_node.xpath('mods:location/mods:url', mods: MODS_NS).each do |url_node|
|
194
|
+
if url_node == primary_url_node
|
195
|
+
url_node[:usage] = 'primary display'
|
196
|
+
elsif url_node[:usage] == 'primary display'
|
197
|
+
url_node.delete('usage')
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
def purl_nodes(base_node)
|
203
|
+
base_node.xpath('mods:location/mods:url', mods: MODS_NS).select { |url_node| ::Cocina::Models::Mapping::Purl.purl?(url_node.text) }
|
204
|
+
end
|
205
|
+
|
206
|
+
def primary_url_node_for(base_node, purl)
|
207
|
+
primary_purl_nodes, primary_url_nodes = base_node.xpath('mods:location/mods:url[@usage="primary display"]', mods: MODS_NS)
|
208
|
+
.partition { |url_node| ::Cocina::Models::Mapping::Purl.purl?(url_node.text) }
|
209
|
+
all_purl_nodes = base_node.xpath('mods:location/mods:url', mods: MODS_NS)
|
210
|
+
.select { |url_node| ::Cocina::Models::Mapping::Purl.purl?(url_node.text) }
|
211
|
+
|
212
|
+
this_purl_node = purl ? all_purl_nodes.find { |purl_node| purl_node.content == purl } : nil
|
213
|
+
|
214
|
+
primary_purl_nodes.first || primary_url_nodes.first || this_purl_node || all_purl_nodes.first
|
215
|
+
end
|
216
|
+
|
217
|
+
def normalize_related_item_other_type
|
218
|
+
ng_xml.xpath('//mods:relatedItem[@type and @otherType]', mods: MODS_NS).each do |related_node|
|
219
|
+
related_node.delete('otherType')
|
220
|
+
related_node.delete('otherTypeURI')
|
221
|
+
related_node.delete('otherTypeAuth')
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
def normalize_empty_notes
|
226
|
+
ng_xml.xpath('//mods:note[not(text()) and not(@xlink:href)]', mods: MODS_NS, xlink: XLINK_NS).each(&:remove)
|
227
|
+
end
|
228
|
+
|
229
|
+
def normalize_empty_type_of_resource
|
230
|
+
ng_xml.xpath('//mods:typeOfResource[not(text())][not(@*)]', mods: MODS_NS).each(&:remove)
|
231
|
+
end
|
232
|
+
|
233
|
+
def normalize_unmatched_altrepgroup
|
234
|
+
normalize_unmatched_altrepgroup_for(ng_xml.root)
|
235
|
+
ng_xml.xpath('//mods:relatedItem', mods: MODS_NS).each { |related_item_node| normalize_unmatched_altrepgroup_for(related_item_node) }
|
236
|
+
end
|
237
|
+
|
238
|
+
def normalize_unmatched_altrepgroup_for(base_node)
|
239
|
+
ids = {}
|
240
|
+
base_node.xpath('mods:*[@altRepGroup]', mods: MODS_NS).each do |node|
|
241
|
+
id = [node['altRepGroup'], node.name]
|
242
|
+
ids[id] ||= []
|
243
|
+
ids[id] << node
|
244
|
+
end
|
245
|
+
|
246
|
+
ids.each_value do |nodes|
|
247
|
+
next unless nodes.size == 1
|
248
|
+
|
249
|
+
nodes.first.delete('altRepGroup')
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
def normalize_unmatched_nametitlegroup
|
254
|
+
normalize_unmatched_nametitlegroup_for(ng_xml.root)
|
255
|
+
ng_xml.xpath('//mods:relatedItem', mods: MODS_NS).each { |related_item_node| normalize_unmatched_nametitlegroup_for(related_item_node) }
|
256
|
+
end
|
257
|
+
|
258
|
+
def normalize_unmatched_nametitlegroup_for(base_node)
|
259
|
+
ids = {}
|
260
|
+
base_node.xpath('mods:name[@nameTitleGroup] | mods:titleInfo[@nameTitleGroup]', mods: MODS_NS).each do |node|
|
261
|
+
id = node['nameTitleGroup']
|
262
|
+
ids[id] ||= []
|
263
|
+
ids[id] << node
|
264
|
+
end
|
265
|
+
|
266
|
+
ids.each_value do |nodes|
|
267
|
+
next unless nodes.size == 1
|
268
|
+
|
269
|
+
nodes.first.delete('nameTitleGroup')
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
def normalize_empty_attributes
|
274
|
+
ng_xml.xpath('//mods:*[@*=""]', mods: MODS_NS).each do |node|
|
275
|
+
node.each { |attr_name, attr_value| node.delete(attr_name) if attr_value.blank? }
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
def normalize_xml_space
|
280
|
+
ng_xml.xpath('//mods:*[@xml:space]', mods: MODS_NS).each do |node|
|
281
|
+
node.delete('space')
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
def normalize_language_term_type
|
286
|
+
ng_xml.xpath('//mods:languageTerm[not(@type)]', mods: MODS_NS).each do |node|
|
287
|
+
node['type'] = 'code'
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
def normalize_access_condition
|
292
|
+
ng_xml.xpath('//mods:accessCondition[@type="restrictionOnAccess"]', mods: MODS_NS).each do |node|
|
293
|
+
node['type'] = 'restriction on access'
|
294
|
+
end
|
295
|
+
ng_xml.xpath('//mods:accessCondition[@type="restrictionsOnAccess"]', mods: MODS_NS).each do |node|
|
296
|
+
node['type'] = 'restriction on access'
|
297
|
+
end
|
298
|
+
ng_xml.xpath('//mods:accessCondition[@type="useAndReproduction"]', mods: MODS_NS).each do |node|
|
299
|
+
node['type'] = 'use and reproduction'
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
def normalize_identifier_type_attr
|
304
|
+
ng_xml.xpath('//mods:identifier[@type]', mods: MODS_NS).each do |node|
|
305
|
+
node['type'] = normalized_identifier_type_for(node['type'])
|
306
|
+
end
|
307
|
+
ng_xml.xpath('//mods:nameIdentifier[@type]', mods: MODS_NS).each do |node|
|
308
|
+
node['type'] = normalized_identifier_type_for(node['type'])
|
309
|
+
end
|
310
|
+
ng_xml.xpath('//mods:recordIdentifier[@source]', mods: MODS_NS).each do |node|
|
311
|
+
node['source'] = normalized_identifier_type_for(node['source'])
|
312
|
+
end
|
313
|
+
end
|
314
|
+
|
315
|
+
def normalized_identifier_type_for(type)
|
316
|
+
cocina_type, _mods_type, identifier_source = Cocina::Models::Mapping::FromMods::IdentifierType.cocina_type_for_mods_type(type)
|
317
|
+
|
318
|
+
return Cocina::Models::Mapping::FromMods::IdentifierType.mods_type_for_cocina_type(cocina_type) if identifier_source
|
319
|
+
|
320
|
+
type
|
321
|
+
end
|
322
|
+
|
323
|
+
def normalize_location_physical_location
|
324
|
+
ng_xml.xpath('//mods:location', mods: MODS_NS).each do |location_node|
|
325
|
+
location_node.xpath('mods:physicalLocation|mods:url|mods:shelfLocator', mods: MODS_NS).each do |node|
|
326
|
+
next unless node.content.present? || node['xlink:href']
|
327
|
+
|
328
|
+
new_location = Nokogiri::XML::Node.new('location', ng_xml)
|
329
|
+
new_location << node
|
330
|
+
location_node.parent << new_location
|
331
|
+
end
|
332
|
+
location_node.remove
|
333
|
+
end
|
334
|
+
end
|
335
|
+
|
336
|
+
def normalize_empty_related_items
|
337
|
+
ng_xml.xpath('//mods:relatedItem/mods:part[count(mods:*)=1]/mods:detail[count(mods:*)=1]/mods:number[not(text())]',
|
338
|
+
mods: MODS_NS).each do |number_node|
|
339
|
+
number_node.parent.parent.remove
|
340
|
+
end
|
341
|
+
ng_xml.xpath('//mods:relatedItem[not(mods:*) and not(@xlink:href)]', mods: MODS_NS, xlink: XLINK_NS).each(&:remove)
|
342
|
+
end
|
343
|
+
|
344
|
+
def normalize_notes
|
345
|
+
ng_xml.xpath('//mods:note', mods: MODS_NS).each do |note_node|
|
346
|
+
if Cocina::Models::Mapping::ToMods::Note.note_type_to_abstract_type.include?(note_node['type']&.downcase) ||
|
347
|
+
Cocina::Models::Mapping::ToMods::Note.display_label_to_abstract_type.include?(note_node['displayLabel'])
|
348
|
+
note_node.delete('type') unless note_node['type']&.downcase == 'summary'
|
349
|
+
note_node.name = 'abstract'
|
350
|
+
end
|
351
|
+
if Cocina::Models::Mapping::ToMods::Note.display_label_to_abstract_type.include? note_node['displayLabel']
|
352
|
+
note_node['displayLabel'] =
|
353
|
+
note_node['displayLabel'].capitalize
|
354
|
+
end
|
355
|
+
end
|
356
|
+
end
|
357
|
+
|
358
|
+
def normalize_abstracts
|
359
|
+
ng_xml.xpath('/mods:mods/mods:abstract', mods: MODS_NS).each do |abstract_node|
|
360
|
+
if Cocina::Models::Mapping::ToMods::Note.note_type_to_abstract_type.include? abstract_node['type']&.downcase
|
361
|
+
abstract_node['type'] =
|
362
|
+
abstract_node['type'].downcase
|
363
|
+
end
|
364
|
+
if Cocina::Models::Mapping::ToMods::Note.display_label_to_abstract_type.include? abstract_node['displayLabel']
|
365
|
+
abstract_node['displayLabel'] =
|
366
|
+
abstract_node['displayLabel'].capitalize
|
367
|
+
end
|
368
|
+
abstract_node.delete('type') if abstract_node['type'] == 'abstract'
|
369
|
+
end
|
370
|
+
end
|
371
|
+
|
372
|
+
def normalize_usage_primary
|
373
|
+
normalize_usage_primary_for(ng_xml.root)
|
374
|
+
ng_xml.xpath('/mods:mods/mods:relatedItem', mods: ModsNormalizer::MODS_NS).each do |related_item_node|
|
375
|
+
normalize_usage_primary_for(related_item_node)
|
376
|
+
end
|
377
|
+
ng_xml.xpath('//mods:subject', mods: ModsNormalizer::MODS_NS).each { |subject_node| normalize_usage_primary_for(subject_node) }
|
378
|
+
end
|
379
|
+
|
380
|
+
def normalize_usage_primary_for(base_node)
|
381
|
+
%w[genre language classification subject titleInfo typeOfResource name].each do |node_name|
|
382
|
+
primary_nodes = base_node.xpath("mods:#{node_name}[@usage=\"primary\"]", mods: MODS_NS)
|
383
|
+
next if primary_nodes.size < 2
|
384
|
+
|
385
|
+
primary_nodes[1..].each { |primary_node| primary_node.delete('usage') }
|
386
|
+
end
|
387
|
+
end
|
388
|
+
|
389
|
+
def normalize_related_item_attributes
|
390
|
+
ng_xml.xpath('/mods:mods/mods:relatedItem[@lang or @script]', mods: ModsNormalizer::MODS_NS).each do |related_item_node|
|
391
|
+
related_item_node.delete('lang')
|
392
|
+
related_item_node.delete('script')
|
393
|
+
end
|
394
|
+
end
|
395
|
+
|
396
|
+
def blank_ng_xml
|
397
|
+
Nokogiri::XML(<<~XML
|
398
|
+
<mods xmlns="http://www.loc.gov/mods/v3"#{' '}
|
399
|
+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"#{' '}
|
400
|
+
version="3.6"#{' '}
|
401
|
+
xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-6.xsd" />
|
402
|
+
XML
|
403
|
+
)
|
404
|
+
end
|
405
|
+
end
|
406
|
+
end
|
407
|
+
end
|
408
|
+
end
|
409
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Cocina
|
4
|
+
module Models
|
5
|
+
module Mapping
|
6
|
+
# Utility methods for generating purl links
|
7
|
+
class Purl
|
8
|
+
class_attribute :base_url, default: 'https://purl.stanford.edu'
|
9
|
+
|
10
|
+
def self.for(druid:)
|
11
|
+
return nil if druid.nil?
|
12
|
+
|
13
|
+
"#{base_url}/#{druid.delete_prefix('druid:')}"
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.purl?(node)
|
17
|
+
node.start_with?("https://#{host}") || node.start_with?("http://#{host}")
|
18
|
+
end
|
19
|
+
|
20
|
+
# the purl without the protocol part
|
21
|
+
def self.host
|
22
|
+
@host ||= base_url.sub(%r{^https?://}, '')
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,155 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Cocina
|
4
|
+
module Models
|
5
|
+
module Mapping
|
6
|
+
module ToMods
|
7
|
+
# Maps the Access subschema from cocina to MODS XML
|
8
|
+
class Access # rubocop:disable Metrics/ClassLength
|
9
|
+
# @params [Nokogiri::XML::Builder] xml
|
10
|
+
# @params [Cocina::Models::Access] access
|
11
|
+
# @params [string] purl
|
12
|
+
def self.write(xml:, access:, purl:)
|
13
|
+
new(xml: xml, access: access, purl: purl).write
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize(xml:, access:, purl:)
|
17
|
+
@xml = xml
|
18
|
+
@access = access
|
19
|
+
@purl = purl
|
20
|
+
end
|
21
|
+
|
22
|
+
def write
|
23
|
+
write_purl unless purl.nil?
|
24
|
+
return if access.nil?
|
25
|
+
|
26
|
+
write_access_conditions if access
|
27
|
+
|
28
|
+
Array(access.url).each do |url|
|
29
|
+
xml.location do
|
30
|
+
write_url(url)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
write_physical_locations
|
35
|
+
write_digital_locations
|
36
|
+
write_shelf_locators
|
37
|
+
write_access_contact_locations
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
attr_reader :xml, :access, :purl
|
43
|
+
|
44
|
+
def write_physical_locations
|
45
|
+
Array(access.physicalLocation).reject do |physical_location|
|
46
|
+
shelf_locator?(physical_location)
|
47
|
+
end.each do |physical_location|
|
48
|
+
xml.location do
|
49
|
+
xml.physicalLocation physical_location.value || physical_location.code,
|
50
|
+
descriptive_attrs(physical_location)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def write_digital_locations
|
56
|
+
Array(access.digitalLocation).select do |digital_location|
|
57
|
+
digital_location.type == 'discovery'
|
58
|
+
end.each do |digital_location|
|
59
|
+
xml.location do
|
60
|
+
xml.physicalLocation digital_location.value || digital_location.code,
|
61
|
+
descriptive_attrs(digital_location)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def write_access_contact_locations
|
67
|
+
Array(access.accessContact).each do |access_contact|
|
68
|
+
if access_contact.type == 'email'
|
69
|
+
xml.note access_contact.value, descriptive_attrs(access_contact).merge({ type: 'contact' })
|
70
|
+
else
|
71
|
+
xml.location do
|
72
|
+
xml.physicalLocation access_contact.value || access_contact.code,
|
73
|
+
{ type: 'repository' }.merge(descriptive_attrs(access_contact))
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def write_shelf_locators
|
80
|
+
Array(access.physicalLocation).select do |physical_location|
|
81
|
+
shelf_locator?(physical_location)
|
82
|
+
end.each do |physical_location|
|
83
|
+
xml.location do
|
84
|
+
xml.shelfLocator physical_location.value
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def write_url(url)
|
90
|
+
url_attrs = {}.tap do |attrs|
|
91
|
+
attrs[:usage] = 'primary display' if url.status == 'primary'
|
92
|
+
attrs[:displayLabel] = url.displayLabel
|
93
|
+
attrs[:note] = url.note.first.value if url.note.present?
|
94
|
+
end.compact
|
95
|
+
xml.url url.value, url_attrs
|
96
|
+
end
|
97
|
+
|
98
|
+
def primary_url_is_not_purl?
|
99
|
+
Array(access&.url).any? { |url| url.status == 'primary' }
|
100
|
+
end
|
101
|
+
|
102
|
+
def write_purl
|
103
|
+
purl_attrs = {}.tap do |attrs|
|
104
|
+
attrs[:note] = find_note_value(nil)
|
105
|
+
attrs[:usage] = 'primary display' unless primary_url_is_not_purl?
|
106
|
+
attrs[:displayLabel] = find_note_value('display label')
|
107
|
+
end.compact
|
108
|
+
|
109
|
+
xml.location do
|
110
|
+
xml.url purl, purl_attrs
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def find_note_value(note_type)
|
115
|
+
Array(access&.note).find do |note|
|
116
|
+
note.type == note_type && purl_note?(note)
|
117
|
+
end&.value
|
118
|
+
end
|
119
|
+
|
120
|
+
def descriptive_attrs(cocina)
|
121
|
+
{
|
122
|
+
valueURI: cocina.uri,
|
123
|
+
authorityURI: cocina.source&.uri,
|
124
|
+
authority: cocina.source&.code,
|
125
|
+
script: cocina.valueLanguage&.valueScript&.code,
|
126
|
+
lang: cocina.valueLanguage&.code,
|
127
|
+
type: cocina.type,
|
128
|
+
displayLabel: cocina.displayLabel,
|
129
|
+
'xlink:href' => cocina.valueAt
|
130
|
+
}.compact
|
131
|
+
end
|
132
|
+
|
133
|
+
def shelf_locator?(physical_location)
|
134
|
+
physical_location.type == 'shelf locator'
|
135
|
+
end
|
136
|
+
|
137
|
+
def write_access_conditions
|
138
|
+
Array(access.note).reject { |note| purl_note?(note) }.each do |note|
|
139
|
+
attributes = {
|
140
|
+
type: note.type == 'access restriction' ? 'restriction on access' : note.type,
|
141
|
+
displayLabel: note.displayLabel,
|
142
|
+
'xlink:href' => note.valueAt
|
143
|
+
}.compact
|
144
|
+
xml.accessCondition note.value, attributes
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
def purl_note?(note)
|
149
|
+
Array(note.appliesTo).any? { |applies_to| applies_to.value == 'purl' }
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|