cocina-models 0.75.0 → 0.76.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +40 -12
- data/.rubocop_todo.yml +71 -2
- data/README.md +10 -3
- data/cocina-models.gemspec +2 -0
- data/description_types.yml +165 -38
- data/docs/description_types.md +469 -216
- data/lib/cocina/generator/generator.rb +7 -12
- data/lib/cocina/generator/schema.rb +1 -3
- data/lib/cocina/generator/schema_base.rb +0 -8
- data/lib/cocina/generator/schema_ref.rb +1 -1
- data/lib/cocina/generator/schema_value.rb +14 -4
- data/lib/cocina/models/access.rb +4 -4
- data/lib/cocina/models/admin_policy.rb +1 -1
- data/lib/cocina/models/admin_policy_access_template.rb +7 -7
- data/lib/cocina/models/admin_policy_administrative.rb +1 -1
- data/lib/cocina/models/admin_policy_with_metadata.rb +3 -3
- data/lib/cocina/models/builders/name_title_group_builder.rb +0 -4
- data/lib/cocina/models/builders/title_builder.rb +0 -2
- data/lib/cocina/models/citation_only_access.rb +2 -2
- data/lib/cocina/models/collection_access.rb +4 -4
- data/lib/cocina/models/collection_identification.rb +1 -1
- data/lib/cocina/models/collection_with_metadata.rb +2 -2
- data/lib/cocina/models/contributor.rb +4 -4
- data/lib/cocina/models/controlled_digital_lending_access.rb +2 -2
- data/lib/cocina/models/dark_access.rb +4 -4
- data/lib/cocina/models/description.rb +3 -3
- data/lib/cocina/models/descriptive_basic_value.rb +13 -13
- data/lib/cocina/models/descriptive_parallel_contributor.rb +5 -5
- data/lib/cocina/models/descriptive_parallel_event.rb +3 -3
- data/lib/cocina/models/descriptive_value.rb +13 -13
- data/lib/cocina/models/descriptive_value_language.rb +6 -6
- data/lib/cocina/models/dro.rb +1 -1
- data/lib/cocina/models/dro_access.rb +8 -8
- data/lib/cocina/models/dro_with_metadata.rb +3 -3
- data/lib/cocina/models/embargo.rb +5 -5
- data/lib/cocina/models/event.rb +3 -3
- data/lib/cocina/models/file.rb +4 -4
- data/lib/cocina/models/file_access.rb +4 -4
- data/lib/cocina/models/identification.rb +2 -2
- data/lib/cocina/models/language.rb +12 -12
- data/lib/cocina/models/location_based_access.rb +1 -1
- data/lib/cocina/models/location_based_download_access.rb +1 -1
- data/lib/cocina/models/mapping/error_notifier.rb +36 -0
- data/lib/cocina/models/mapping/from_mods/access.rb +177 -0
- data/lib/cocina/models/mapping/from_mods/admin_metadata.rb +217 -0
- data/lib/cocina/models/mapping/from_mods/alt_rep_group.rb +26 -0
- data/lib/cocina/models/mapping/from_mods/authority.rb +51 -0
- data/lib/cocina/models/mapping/from_mods/contributor.rb +161 -0
- data/lib/cocina/models/mapping/from_mods/description.rb +99 -0
- data/lib/cocina/models/mapping/from_mods/description_builder.rb +61 -0
- data/lib/cocina/models/mapping/from_mods/event.rb +543 -0
- data/lib/cocina/models/mapping/from_mods/form.rb +381 -0
- data/lib/cocina/models/mapping/from_mods/geographic.rb +219 -0
- data/lib/cocina/models/mapping/from_mods/hydrus_default_title_builder.rb +28 -0
- data/lib/cocina/models/mapping/from_mods/identifier.rb +51 -0
- data/lib/cocina/models/mapping/from_mods/identifier_builder.rb +71 -0
- data/lib/cocina/models/mapping/from_mods/identifier_type.rb +292 -0
- data/lib/cocina/models/mapping/from_mods/language.rb +36 -0
- data/lib/cocina/models/mapping/from_mods/language_script.rb +30 -0
- data/lib/cocina/models/mapping/from_mods/language_term.rb +106 -0
- data/lib/cocina/models/mapping/from_mods/name_builder.rb +307 -0
- data/lib/cocina/models/mapping/from_mods/note.rb +162 -0
- data/lib/cocina/models/mapping/from_mods/part_builder.rb +147 -0
- data/lib/cocina/models/mapping/from_mods/primary.rb +27 -0
- data/lib/cocina/models/mapping/from_mods/purl.rb +53 -0
- data/lib/cocina/models/mapping/from_mods/related_resource.rb +105 -0
- data/lib/cocina/models/mapping/from_mods/subject.rb +413 -0
- data/lib/cocina/models/mapping/from_mods/subject_authority_codes.rb +794 -0
- data/lib/cocina/models/mapping/from_mods/title.rb +160 -0
- data/lib/cocina/models/mapping/from_mods/title_builder.rb +106 -0
- data/lib/cocina/models/mapping/from_mods/title_builder_strategy.rb +19 -0
- data/lib/cocina/models/mapping/from_mods/value_uri.rb +25 -0
- data/lib/cocina/models/mapping/normalizers/base.rb +16 -0
- data/lib/cocina/models/mapping/normalizers/mods/geo_extension_normalizer.rb +69 -0
- data/lib/cocina/models/mapping/normalizers/mods/name_normalizer.rb +191 -0
- data/lib/cocina/models/mapping/normalizers/mods/origin_info_normalizer.rb +157 -0
- data/lib/cocina/models/mapping/normalizers/mods/subject_normalizer.rb +296 -0
- data/lib/cocina/models/mapping/normalizers/mods/title_normalizer.rb +91 -0
- data/lib/cocina/models/mapping/normalizers/mods_normalizer.rb +409 -0
- data/lib/cocina/models/mapping/purl.rb +28 -0
- data/lib/cocina/models/mapping/to_mods/access.rb +155 -0
- data/lib/cocina/models/mapping/to_mods/admin_metadata.rb +129 -0
- data/lib/cocina/models/mapping/to_mods/contributor.rb +49 -0
- data/lib/cocina/models/mapping/to_mods/description.rb +63 -0
- data/lib/cocina/models/mapping/to_mods/event.rb +200 -0
- data/lib/cocina/models/mapping/to_mods/form.rb +292 -0
- data/lib/cocina/models/mapping/to_mods/geographic.rb +151 -0
- data/lib/cocina/models/mapping/to_mods/id_generator.rb +25 -0
- data/lib/cocina/models/mapping/to_mods/identifier.rb +57 -0
- data/lib/cocina/models/mapping/to_mods/language.rb +82 -0
- data/lib/cocina/models/mapping/to_mods/mods_writer.rb +38 -0
- data/lib/cocina/models/mapping/to_mods/name_title_group.rb +29 -0
- data/lib/cocina/models/mapping/to_mods/name_writer.rb +228 -0
- data/lib/cocina/models/mapping/to_mods/note.rb +105 -0
- data/lib/cocina/models/mapping/to_mods/part_writer.rb +115 -0
- data/lib/cocina/models/mapping/to_mods/related_resource.rb +108 -0
- data/lib/cocina/models/mapping/to_mods/role_writer.rb +50 -0
- data/lib/cocina/models/mapping/to_mods/subject.rb +486 -0
- data/lib/cocina/models/mapping/to_mods/title.rb +260 -0
- data/lib/cocina/models/object_metadata.rb +2 -2
- data/lib/cocina/models/presentation.rb +2 -2
- data/lib/cocina/models/related_resource.rb +9 -9
- data/lib/cocina/models/release_tag.rb +4 -4
- data/lib/cocina/models/request_admin_policy.rb +1 -1
- data/lib/cocina/models/request_administrative.rb +1 -1
- data/lib/cocina/models/request_collection.rb +2 -2
- data/lib/cocina/models/request_description.rb +3 -3
- data/lib/cocina/models/request_dro.rb +4 -4
- data/lib/cocina/models/request_file.rb +5 -5
- data/lib/cocina/models/request_identification.rb +1 -1
- data/lib/cocina/models/sequence.rb +1 -1
- data/lib/cocina/models/source.rb +4 -4
- data/lib/cocina/models/standard.rb +5 -5
- data/lib/cocina/models/stanford_access.rb +2 -2
- data/lib/cocina/models/title.rb +13 -13
- data/lib/cocina/models/validators/dark_validator.rb +4 -2
- data/lib/cocina/models/validators/open_api_validator.rb +0 -4
- data/lib/cocina/models/version.rb +1 -1
- data/lib/cocina/models/world_access.rb +2 -2
- data/lib/cocina/models.rb +4 -0
- data/lib/cocina/rspec/factories.rb +157 -0
- data/lib/cocina/rspec.rb +2 -0
- data/openapi.yml +4 -4
- metadata +88 -3
- data/docs/_config.yml +0 -1
@@ -0,0 +1,157 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Cocina
|
4
|
+
module Models
|
5
|
+
module Mapping
|
6
|
+
module Normalizers
|
7
|
+
module Mods
|
8
|
+
# Normalizes a Fedora MODS document for originInfo elements.
|
9
|
+
# Must be called after authorityURI attribs are normalized
|
10
|
+
class OriginInfoNormalizer
|
11
|
+
DATE_FIELDS = %w[dateIssued copyrightDate dateCreated dateCaptured dateValid dateOther dateModified].freeze
|
12
|
+
|
13
|
+
# @param [Nokogiri::Document] mods_ng_xml MODS to be normalized
|
14
|
+
# @return [Nokogiri::Document] normalized MODS
|
15
|
+
def self.normalize(mods_ng_xml:)
|
16
|
+
new(mods_ng_xml: mods_ng_xml).normalize
|
17
|
+
end
|
18
|
+
|
19
|
+
def initialize(mods_ng_xml:)
|
20
|
+
@ng_xml = mods_ng_xml.dup
|
21
|
+
@ng_xml.encoding = 'UTF-8'
|
22
|
+
end
|
23
|
+
|
24
|
+
def normalize
|
25
|
+
remove_empty_child_elements
|
26
|
+
remove_empty_origin_info # must be after remove_empty_child_elements
|
27
|
+
normalize_legacy_mods_event_type
|
28
|
+
place_term_type_normalization
|
29
|
+
place_term_authority_normalization # must be after place_term_type_normalization
|
30
|
+
normalize_authority_marcountry
|
31
|
+
single_key_date
|
32
|
+
remove_trailing_period_from_date_values
|
33
|
+
ng_xml
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
attr_reader :ng_xml
|
39
|
+
|
40
|
+
# must be called before remove_empty_origin_info
|
41
|
+
def remove_empty_child_elements
|
42
|
+
ng_xml.root.xpath('//mods:originInfo/mods:*', mods: ModsNormalizer::MODS_NS).each do |child_node|
|
43
|
+
# if a node has either of these 2 attributes, it could have meaning even without any content
|
44
|
+
next if child_node.xpath('.//*[@valueURI]').present?
|
45
|
+
next if child_node.xpath('.//*[@xlink:href]', xlink: ModsNormalizer::XLINK_NS).present?
|
46
|
+
|
47
|
+
child_node.remove if child_node.content.blank?
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# must be called after remove_empty_child_elements
|
52
|
+
def remove_empty_origin_info
|
53
|
+
ng_xml.root.xpath('//mods:originInfo[not(mods:*) and not(@*)]', mods: ModsNormalizer::MODS_NS).each(&:remove)
|
54
|
+
# make sure we remove ones such as <originInfo eventType="publication"/>
|
55
|
+
ng_xml.root.xpath('//mods:originInfo[not(mods:*) and not(text()[normalize-space()])]', mods: ModsNormalizer::MODS_NS).each(&:remove)
|
56
|
+
end
|
57
|
+
|
58
|
+
LEGACY_EVENT_TYPES_2_TYPE = Cocina::Models::Mapping::FromMods::Event::LEGACY_EVENT_TYPES_2_TYPE
|
59
|
+
|
60
|
+
# because eventType is a relatively new addition to the MODS schema, records converted from MARC to MODS prior
|
61
|
+
# to its introduction used displayLabel as a stopgap measure, with certain values
|
62
|
+
# The same values were also sometimes used as eventType values themselves, and will be converted to our preferred vocab.
|
63
|
+
def normalize_legacy_mods_event_type
|
64
|
+
ng_xml.root.xpath('//mods:originInfo[@*]', mods: ModsNormalizer::MODS_NS).each do |origin_info_node|
|
65
|
+
event_type = origin_info_node['eventType']
|
66
|
+
event_type = origin_info_node['displayLabel'] if event_type.blank? &&
|
67
|
+
LEGACY_EVENT_TYPES_2_TYPE.key?(origin_info_node['displayLabel'])
|
68
|
+
event_type = LEGACY_EVENT_TYPES_2_TYPE[event_type] if LEGACY_EVENT_TYPES_2_TYPE.key?(event_type)
|
69
|
+
|
70
|
+
origin_info_node['eventType'] = event_type if event_type.present?
|
71
|
+
origin_info_node.delete('displayLabel') if event_type.present? &&
|
72
|
+
event_type == LEGACY_EVENT_TYPES_2_TYPE[origin_info_node['displayLabel']]
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# must be called before place_term_authority_normalization
|
77
|
+
# if the cocina model doesn't have a code, then it will have a value;
|
78
|
+
# this is output as attribute type=text on the roundtripped placeTerm element
|
79
|
+
def place_term_type_normalization
|
80
|
+
ng_xml.root.xpath('//mods:originInfo/mods:place/mods:placeTerm', mods: ModsNormalizer::MODS_NS).each do |place_term_node|
|
81
|
+
next if place_term_node.content.blank?
|
82
|
+
|
83
|
+
place_term_node['type'] = 'text' if place_term_node.attributes['type'].blank?
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# must be called after place_term_type_normalization
|
88
|
+
# if the MODS has a single place element with both text and code placeTerm elements, if the text
|
89
|
+
# element has no authority attributes but the code element DOES have authority attributes, then both
|
90
|
+
# the text and the code elements get the authority attributes from the code element.
|
91
|
+
def place_term_authority_normalization
|
92
|
+
ng_xml.root.xpath('//mods:originInfo/mods:place[mods:placeTerm/@type]', mods: ModsNormalizer::MODS_NS).each do |place_node|
|
93
|
+
text_place_term_node = place_node.xpath("mods:placeTerm[not(@type='code')]", mods: ModsNormalizer::MODS_NS).first
|
94
|
+
next unless text_place_term_node
|
95
|
+
next if text_place_term_node.text.blank?
|
96
|
+
|
97
|
+
code_place_term_node = place_node.xpath("mods:placeTerm[@type='code']", mods: ModsNormalizer::MODS_NS).first
|
98
|
+
next unless code_place_term_node
|
99
|
+
next if code_place_term_node.text.blank?
|
100
|
+
|
101
|
+
text_authority_attributes = authority_attributes(text_place_term_node)
|
102
|
+
code_authority_attributes = authority_attributes(code_place_term_node)
|
103
|
+
|
104
|
+
# NOTE: deliberately skipping situation where text node has some authority info and code node
|
105
|
+
# has other authority info as we may never encounter this
|
106
|
+
|
107
|
+
if text_authority_attributes.present? && code_authority_attributes.blank?
|
108
|
+
text_authority_attributes.each do |key, val|
|
109
|
+
code_place_term_node[key] = val
|
110
|
+
end
|
111
|
+
next
|
112
|
+
end
|
113
|
+
|
114
|
+
next if code_authority_attributes.blank? || text_authority_attributes.present?
|
115
|
+
|
116
|
+
code_authority_attributes.each do |key, val|
|
117
|
+
text_place_term_node[key] = val
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
def authority_attributes(ng_node)
|
123
|
+
{
|
124
|
+
valueURI: ng_node['valueURI'],
|
125
|
+
authority: ng_node['authority'],
|
126
|
+
authorityURI: ng_node['authorityURI']
|
127
|
+
}.compact
|
128
|
+
end
|
129
|
+
|
130
|
+
def normalize_authority_marcountry
|
131
|
+
ng_xml.root.xpath("//mods:*[@authority='marcountry']", mods: ModsNormalizer::MODS_NS).each do |node|
|
132
|
+
node[:authority] = 'marccountry'
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def single_key_date
|
137
|
+
DATE_FIELDS.each do |date_field|
|
138
|
+
key_date_nodes = ng_xml.root.xpath("//mods:originInfo/mods:#{date_field}[@point and @keyDate='yes']", mods: ModsNormalizer::MODS_NS)
|
139
|
+
next unless key_date_nodes.size == 2
|
140
|
+
|
141
|
+
end_node = key_date_nodes.find { |node| node['point'] == 'end' }
|
142
|
+
end_node.delete('keyDate') if end_node && end_node['keyDate'].present?
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def remove_trailing_period_from_date_values
|
147
|
+
DATE_FIELDS.each do |date_field|
|
148
|
+
ng_xml.root.xpath("//mods:originInfo/mods:#{date_field}", mods: ModsNormalizer::MODS_NS)
|
149
|
+
.each { |date_node| date_node.content = date_node.content.delete_suffix('.') }
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
@@ -0,0 +1,296 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Cocina
|
4
|
+
module Models
|
5
|
+
module Mapping
|
6
|
+
module Normalizers
|
7
|
+
module Mods
|
8
|
+
# Normalizes a Fedora MODS document for subject elements.
|
9
|
+
class SubjectNormalizer # rubocop:disable Metrics/ClassLength
|
10
|
+
# @param [Nokogiri::Document] mods_ng_xml MODS to be normalized
|
11
|
+
# @return [Nokogiri::Document] normalized MODS
|
12
|
+
def self.normalize(mods_ng_xml:)
|
13
|
+
new(mods_ng_xml: mods_ng_xml).normalize
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize(mods_ng_xml:)
|
17
|
+
@ng_xml = mods_ng_xml.dup
|
18
|
+
@ng_xml.encoding = 'UTF-8'
|
19
|
+
end
|
20
|
+
|
21
|
+
def normalize
|
22
|
+
normalize_xlink_href
|
23
|
+
normalize_empty_geographic
|
24
|
+
normalize_marcgac
|
25
|
+
normalize_empty_temporal
|
26
|
+
normalize_subject
|
27
|
+
normalize_subject_children
|
28
|
+
normalize_subject_authority
|
29
|
+
normalize_subject_authority_lcnaf
|
30
|
+
normalize_subject_authority_naf
|
31
|
+
normalize_subject_authority_tgm
|
32
|
+
normalize_coordinates # Must be before normalize_subject_cartographics
|
33
|
+
normalize_subject_cartographics
|
34
|
+
normalize_subject_lang_and_script
|
35
|
+
ng_xml
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
attr_reader :ng_xml
|
41
|
+
|
42
|
+
def normalize_marcgac
|
43
|
+
ng_xml.root.xpath('//mods:geographicCode', mods: ModsNormalizer::MODS_NS).each do |node|
|
44
|
+
node.content = node.content.sub(/-+$/, '')
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
49
|
+
# rubocop:disable Metrics/AbcSize
|
50
|
+
def normalize_subject
|
51
|
+
ng_xml.root.xpath('//mods:subject[not(mods:cartographics)]', mods: ModsNormalizer::MODS_NS).each do |subject_node|
|
52
|
+
children_nodes = subject_node.xpath('mods:*', mods: ModsNormalizer::MODS_NS)
|
53
|
+
|
54
|
+
next if children_nodes.empty?
|
55
|
+
|
56
|
+
if (has_authorityURI?(subject_node) || has_valueURI?(subject_node)) &&
|
57
|
+
children_nodes.size == 1
|
58
|
+
# If subject has authority and child doesn't, copy to child.
|
59
|
+
add_authority(children_nodes, subject_node) if has_authority?(subject_node) && !has_authority?(children_nodes)
|
60
|
+
# If subject has authorityURI and child doesn't, move to child.
|
61
|
+
add_authorityURI(children_nodes, subject_node) if has_authorityURI?(subject_node) && !has_authorityURI?(children_nodes)
|
62
|
+
subject_node.delete('authorityURI')
|
63
|
+
# If subject has valueURI and child doesn't, move to child.
|
64
|
+
add_valueURI(children_nodes, subject_node) if has_valueURI?(subject_node) && !has_valueURI?(children_nodes)
|
65
|
+
subject_node.delete('valueURI')
|
66
|
+
end
|
67
|
+
|
68
|
+
if !has_authority?(subject_node) &&
|
69
|
+
has_authority?(children_nodes.first) &&
|
70
|
+
has_same_authority?(children_nodes, children_nodes.first)
|
71
|
+
add_authority(subject_node, children_nodes.first, naf_to_lcsh: true)
|
72
|
+
end
|
73
|
+
|
74
|
+
next unless has_authority?(subject_node) &&
|
75
|
+
has_authorityURI?(subject_node) &&
|
76
|
+
!has_valueURI?(subject_node) &&
|
77
|
+
has_authority?(children_nodes.first) &&
|
78
|
+
has_same_authority?(children_nodes, children_nodes.first)
|
79
|
+
|
80
|
+
delete_authorityURI(subject_node)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def normalize_subject_children
|
85
|
+
ng_xml.root.xpath('//mods:subject[not(mods:cartographics)]', mods: ModsNormalizer::MODS_NS).each do |subject_node|
|
86
|
+
children_nodes = subject_node.xpath('mods:*', mods: ModsNormalizer::MODS_NS)
|
87
|
+
|
88
|
+
children_nodes.each do |child_node|
|
89
|
+
next unless !has_authorityURI?(subject_node) &&
|
90
|
+
!has_valueURI?(subject_node) &&
|
91
|
+
has_authority?(child_node) &&
|
92
|
+
has_same_authority?(child_node, subject_node) &&
|
93
|
+
child_node['authority'] != 'naf' &&
|
94
|
+
!(has_authorityURI?(child_node) || has_valueURI?(child_node))
|
95
|
+
|
96
|
+
delete_authority(child_node)
|
97
|
+
end
|
98
|
+
|
99
|
+
next unless !has_authorityURI?(subject_node) &&
|
100
|
+
!has_valueURI?(subject_node) &&
|
101
|
+
has_authority?(subject_node) &&
|
102
|
+
!has_authority?(children_nodes) &&
|
103
|
+
(has_authorityURI?(children_nodes) || has_valueURI?(children_nodes))
|
104
|
+
|
105
|
+
add_authority(children_nodes, subject_node)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
109
|
+
# rubocop:enable Metrics/AbcSize
|
110
|
+
|
111
|
+
# rubocop:disable Naming/PredicateName
|
112
|
+
def has_authority?(nodes)
|
113
|
+
nodes_to_a(nodes).all? { |node| node[:authority] }
|
114
|
+
end
|
115
|
+
|
116
|
+
def has_same_authority?(nodes, same_node)
|
117
|
+
nodes_to_a(nodes).all? { |node| same_node[:authority] == node[:authority] || (lcsh_or_naf?(same_node) && lcsh_or_naf?(node)) }
|
118
|
+
end
|
119
|
+
# rubocop:enable Naming/PredicateName
|
120
|
+
|
121
|
+
def lcsh_or_naf?(node)
|
122
|
+
%w[lcsh naf].include?(node[:authority])
|
123
|
+
end
|
124
|
+
|
125
|
+
def add_authority(nodes, from_node, naf_to_lcsh: false)
|
126
|
+
authority = if naf_to_lcsh && from_node[:authority] == 'naf'
|
127
|
+
'lcsh'
|
128
|
+
else
|
129
|
+
from_node[:authority]
|
130
|
+
end
|
131
|
+
nodes_to_a(nodes).each { |node| node[:authority] = authority }
|
132
|
+
end
|
133
|
+
|
134
|
+
def delete_authority(nodes)
|
135
|
+
nodes_to_a(nodes).each { |node| node.delete('authority') }
|
136
|
+
end
|
137
|
+
|
138
|
+
# rubocop:disable Naming/MethodName
|
139
|
+
# rubocop:disable Naming/PredicateName
|
140
|
+
def has_authorityURI?(nodes)
|
141
|
+
nodes_to_a(nodes).all? { |node| node[:authorityURI] }
|
142
|
+
end
|
143
|
+
|
144
|
+
def add_authorityURI(nodes, from_node)
|
145
|
+
nodes_to_a(nodes).each { |node| node[:authorityURI] = from_node[:authorityURI] }
|
146
|
+
end
|
147
|
+
|
148
|
+
def delete_authorityURI(nodes)
|
149
|
+
nodes_to_a(nodes).each { |node| node.delete('authorityURI') }
|
150
|
+
end
|
151
|
+
|
152
|
+
def has_valueURI?(nodes)
|
153
|
+
nodes_to_a(nodes).all? { |node| node[:valueURI] }
|
154
|
+
end
|
155
|
+
|
156
|
+
def add_valueURI(nodes, from_node)
|
157
|
+
nodes_to_a(nodes).each { |node| node[:valueURI] = from_node[:valueURI] }
|
158
|
+
end
|
159
|
+
# rubocop:enable Naming/MethodName
|
160
|
+
# rubocop:enable Naming/PredicateName
|
161
|
+
|
162
|
+
def nodes_to_a(nodes)
|
163
|
+
nodes.is_a?(Nokogiri::XML::NodeSet) ? nodes : [nodes]
|
164
|
+
end
|
165
|
+
|
166
|
+
def normalize_subject_authority
|
167
|
+
ng_xml.root.xpath('//mods:subject[not(@authority) and count(mods:*) = 1 and not(mods:geographicCode)]/mods:*[@authority and @authority != "naf"]',
|
168
|
+
mods: ModsNormalizer::MODS_NS).each do |node|
|
169
|
+
node.parent['authority'] = node['authority']
|
170
|
+
node.delete('authority') unless node['authorityURI'] || node['valueURI']
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
def normalize_subject_authority_lcnaf
|
175
|
+
ng_xml.root.xpath("//mods:*[@authority='lcnaf']", mods: ModsNormalizer::MODS_NS).each do |node|
|
176
|
+
node[:authority] = 'naf'
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
def normalize_subject_authority_tgm
|
181
|
+
ng_xml.root.xpath("//mods:*[@authority='tgm']", mods: ModsNormalizer::MODS_NS).each do |node|
|
182
|
+
node[:authority] = 'lctgm'
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
def normalize_coordinates
|
187
|
+
ng_xml.root.xpath('//mods:coordinates[text()]', mods: ModsNormalizer::MODS_NS).each do |coordinate_node|
|
188
|
+
coordinate_node.content = coordinate_node.content.delete_prefix('(').delete_suffix(')')
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
# Collapse multiple subject/cartographics nodes into a single one
|
193
|
+
def normalize_subject_cartographics
|
194
|
+
normalize_subject_cartographics_for(ng_xml.root)
|
195
|
+
ng_xml.root.xpath('mods:relatedItem', mods: ModsNormalizer::MODS_NS).each do |related_item_node|
|
196
|
+
normalize_subject_cartographics_for(related_item_node)
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def normalize_subject_cartographics_for(root_node)
|
201
|
+
carto_subject_nodes = root_node.xpath('mods:subject[not(@altRepGroup)][mods:cartographics]', mods: ModsNormalizer::MODS_NS)
|
202
|
+
return if carto_subject_nodes.empty?
|
203
|
+
|
204
|
+
# Create a default carto subject.
|
205
|
+
default_carto_subject_node = Nokogiri::XML::Node.new('subject', Nokogiri::XML(nil))
|
206
|
+
default_carto_node = Nokogiri::XML::Node.new('cartographics', Nokogiri::XML(nil))
|
207
|
+
default_carto_subject_node << default_carto_node
|
208
|
+
|
209
|
+
carto_subject_nodes.each do |carto_subject_node|
|
210
|
+
carto_subject_node.xpath('mods:cartographics', mods: ModsNormalizer::MODS_NS).each do |carto_node|
|
211
|
+
normalize_cartographic_node(carto_node, carto_subject_node, default_carto_node)
|
212
|
+
end
|
213
|
+
carto_subject_node.remove if carto_subject_node.elements.empty?
|
214
|
+
end
|
215
|
+
|
216
|
+
root_node << default_carto_subject_node if default_carto_node.elements.present?
|
217
|
+
end
|
218
|
+
|
219
|
+
# Normalizes a single cartographic node
|
220
|
+
def normalize_cartographic_node(carto_node, carto_subject_node, default_carto_node)
|
221
|
+
child_nodes = if carto_subject_node['authority'] || carto_subject_node['authorityURI'] || carto_subject_node['valueURI']
|
222
|
+
# Move scale and coordinates to default carto subject.
|
223
|
+
carto_node.xpath('mods:scale',
|
224
|
+
mods: ModsNormalizer::MODS_NS) + carto_node.xpath('mods:coordinates', mods: ModsNormalizer::MODS_NS)
|
225
|
+
else
|
226
|
+
# Merge all into default carto_subject.
|
227
|
+
carto_node.elements
|
228
|
+
end
|
229
|
+
|
230
|
+
child_nodes.each do |child_node|
|
231
|
+
child_node.remove
|
232
|
+
next if child_node.children.blank? # skip empty nodes
|
233
|
+
|
234
|
+
default_carto_node << child_node unless child_node_exists?(child_node, default_carto_node)
|
235
|
+
end
|
236
|
+
carto_node.remove if carto_node.elements.empty?
|
237
|
+
end
|
238
|
+
|
239
|
+
def child_node_exists?(child_node, parent_node)
|
240
|
+
parent_node.elements.any? { |check_node| child_node.name == check_node.name && child_node.content == check_node.content }
|
241
|
+
end
|
242
|
+
|
243
|
+
def normalize_subject_authority_naf
|
244
|
+
ng_xml.root.xpath("//mods:subject[@authority='naf']", mods: ModsNormalizer::MODS_NS).each do |subject_node|
|
245
|
+
subject_node[:authority] = 'lcsh'
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
def normalize_subject_lang_and_script
|
250
|
+
ng_xml.root.xpath('//mods:subject[mods:*]', mods: ModsNormalizer::MODS_NS).each do |subject_node|
|
251
|
+
check_child_node = subject_node.elements.first
|
252
|
+
# If all children have the same lang, then move to subject and delete from children.
|
253
|
+
check_lang = check_child_node['lang']
|
254
|
+
if check_lang && subject_node.elements.all? { |child_node| child_node['lang'] == check_lang }
|
255
|
+
subject_node['lang'] = check_lang
|
256
|
+
subject_node.elements.each { |child_node| child_node.delete('lang') }
|
257
|
+
end
|
258
|
+
|
259
|
+
# If all children have the same script, then move to subject and delete from children.
|
260
|
+
check_script = check_child_node['script']
|
261
|
+
if check_script && subject_node.elements.all? { |node| node['script'] == check_script }
|
262
|
+
subject_node['script'] = check_script
|
263
|
+
subject_node.elements.each { |child_node| child_node.delete('script') }
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
def normalize_empty_temporal
|
269
|
+
ng_xml.root.xpath('//mods:subject/mods:temporal[not(text())]', mods: ModsNormalizer::MODS_NS).each do |temporal_node|
|
270
|
+
subject_node = temporal_node.parent
|
271
|
+
temporal_node.remove
|
272
|
+
subject_node.remove if subject_node.elements.empty?
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
def normalize_empty_geographic
|
277
|
+
ng_xml.root.xpath('//mods:subject/mods:geographic[not(@valueURI) and not(text())]', mods: ModsNormalizer::MODS_NS).each do |geo_node|
|
278
|
+
subject_node = geo_node.parent
|
279
|
+
geo_node.remove
|
280
|
+
subject_node.remove if subject_node.elements.empty? && subject_node.attributes.empty?
|
281
|
+
end
|
282
|
+
end
|
283
|
+
|
284
|
+
def normalize_xlink_href
|
285
|
+
ng_xml.root.xpath('//mods:subject/mods:*[@xlink:href]', mods: ModsNormalizer::MODS_NS, xlink: ModsNormalizer::XLINK_NS).each do |child_node|
|
286
|
+
subject_node = child_node.parent
|
287
|
+
subject_node['xlink:href'] = child_node['xlink:href']
|
288
|
+
child_node.delete('href')
|
289
|
+
end
|
290
|
+
end
|
291
|
+
end
|
292
|
+
end
|
293
|
+
end
|
294
|
+
end
|
295
|
+
end
|
296
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Cocina
|
4
|
+
module Models
|
5
|
+
module Mapping
|
6
|
+
module Normalizers
|
7
|
+
module Mods
|
8
|
+
# Normalizes a Fedora MODS document for title elements.
|
9
|
+
class TitleNormalizer
|
10
|
+
# @param [Nokogiri::Document] mods_ng_xml MODS to be normalized
|
11
|
+
# @param [String] label
|
12
|
+
# @return [Nokogiri::Document] normalized MODS
|
13
|
+
def self.normalize(mods_ng_xml:, label:)
|
14
|
+
new(mods_ng_xml: mods_ng_xml, label: label).normalize
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.normalize_missing_title(mods_ng_xml:, label:)
|
18
|
+
new(mods_ng_xml: mods_ng_xml, label: label).normalize_missing_title
|
19
|
+
end
|
20
|
+
|
21
|
+
def initialize(mods_ng_xml:, label:)
|
22
|
+
@ng_xml = mods_ng_xml.dup
|
23
|
+
@ng_xml.encoding = 'UTF-8'
|
24
|
+
@label = label
|
25
|
+
end
|
26
|
+
|
27
|
+
def normalize
|
28
|
+
normalize_hydrus_title
|
29
|
+
clean_empty_titles
|
30
|
+
normalize_title_type
|
31
|
+
normalize_title_trailing
|
32
|
+
normalize_title_as_label
|
33
|
+
ng_xml
|
34
|
+
end
|
35
|
+
|
36
|
+
def normalize_missing_title
|
37
|
+
normalize_title_as_label
|
38
|
+
ng_xml
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
attr_reader :ng_xml, :label
|
44
|
+
|
45
|
+
def normalize_hydrus_title
|
46
|
+
titles = ng_xml.root.xpath('mods:titleInfo/mods:title[string-length() > 0]', mods: ModsNormalizer::MODS_NS)
|
47
|
+
return if titles.present? || label != 'Hydrus'
|
48
|
+
|
49
|
+
add_title('Hydrus')
|
50
|
+
end
|
51
|
+
|
52
|
+
def clean_empty_titles
|
53
|
+
ng_xml.root.xpath('//mods:title[not(text())]', mods: ModsNormalizer::MODS_NS).each(&:remove)
|
54
|
+
ng_xml.root.xpath('//mods:subTitle[not(text())]', mods: ModsNormalizer::MODS_NS).each(&:remove)
|
55
|
+
ng_xml.root.xpath('//mods:titleInfo[not(mods:*) and not(@xlink:href)]',
|
56
|
+
mods: ModsNormalizer::MODS_NS, xlink: ModsNormalizer::XLINK_NS).each(&:remove)
|
57
|
+
end
|
58
|
+
|
59
|
+
def normalize_title_type
|
60
|
+
ng_xml.root.xpath('//mods:title[@type]', mods: ModsNormalizer::MODS_NS).each do |title_node|
|
61
|
+
title_node.delete('type')
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def normalize_title_trailing
|
66
|
+
ng_xml.root.xpath('//mods:titleInfo[not(@type="abbreviated")]/mods:title', mods: ModsNormalizer::MODS_NS).each do |title_node|
|
67
|
+
title_node.content = title_node.content.delete_suffix(',')
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def normalize_title_as_label
|
72
|
+
return if ng_xml.root.xpath('mods:titleInfo/mods:title',
|
73
|
+
mods: ModsNormalizer::MODS_NS).present? || ng_xml.root.xpath('mods:titleInfo[@xlink:href]', mods: ModsNormalizer::MODS_NS,
|
74
|
+
xlink: ModsNormalizer::XLINK_NS).present?
|
75
|
+
|
76
|
+
add_title(label)
|
77
|
+
end
|
78
|
+
|
79
|
+
def add_title(content)
|
80
|
+
new_title_info = Nokogiri::XML::Node.new('titleInfo', Nokogiri::XML(nil))
|
81
|
+
new_title = Nokogiri::XML::Node.new('title', Nokogiri::XML(nil))
|
82
|
+
new_title.content = content
|
83
|
+
new_title_info << new_title
|
84
|
+
ng_xml.root << new_title_info
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|