cocina-models 0.74.1 → 0.77.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (147) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +40 -11
  3. data/.rubocop_todo.yml +71 -2
  4. data/README.md +19 -3
  5. data/cocina-models.gemspec +2 -0
  6. data/description_types.yml +168 -39
  7. data/docs/description_types.md +471 -216
  8. data/lib/cocina/generator/generator.rb +7 -15
  9. data/lib/cocina/generator/schema.rb +1 -3
  10. data/lib/cocina/generator/schema_base.rb +0 -8
  11. data/lib/cocina/generator/schema_ref.rb +1 -1
  12. data/lib/cocina/generator/schema_value.rb +14 -4
  13. data/lib/cocina/models/access.rb +4 -4
  14. data/lib/cocina/models/admin_policy.rb +1 -1
  15. data/lib/cocina/models/admin_policy_access_template.rb +7 -7
  16. data/lib/cocina/models/admin_policy_administrative.rb +1 -1
  17. data/lib/cocina/models/admin_policy_with_metadata.rb +3 -3
  18. data/lib/cocina/models/builders/dro_rights_description_builder.rb +69 -0
  19. data/lib/cocina/models/builders/name_title_group_builder.rb +130 -0
  20. data/lib/cocina/models/builders/rights_description_builder.rb +83 -0
  21. data/lib/cocina/models/builders/title_builder.rb +211 -0
  22. data/lib/cocina/models/citation_only_access.rb +2 -2
  23. data/lib/cocina/models/collection_access.rb +4 -4
  24. data/lib/cocina/models/collection_identification.rb +1 -1
  25. data/lib/cocina/models/collection_with_metadata.rb +2 -2
  26. data/lib/cocina/models/contributor.rb +4 -4
  27. data/lib/cocina/models/controlled_digital_lending_access.rb +2 -2
  28. data/lib/cocina/models/dark_access.rb +4 -4
  29. data/lib/cocina/models/description.rb +3 -3
  30. data/lib/cocina/models/descriptive_basic_value.rb +13 -13
  31. data/lib/cocina/models/descriptive_parallel_contributor.rb +5 -5
  32. data/lib/cocina/models/descriptive_parallel_event.rb +3 -3
  33. data/lib/cocina/models/descriptive_value.rb +13 -13
  34. data/lib/cocina/models/descriptive_value_language.rb +6 -6
  35. data/lib/cocina/models/dro.rb +1 -1
  36. data/lib/cocina/models/dro_access.rb +8 -8
  37. data/lib/cocina/models/dro_with_metadata.rb +3 -3
  38. data/lib/cocina/models/embargo.rb +5 -5
  39. data/lib/cocina/models/event.rb +3 -3
  40. data/lib/cocina/models/file.rb +4 -4
  41. data/lib/cocina/models/file_access.rb +4 -4
  42. data/lib/cocina/models/identification.rb +2 -2
  43. data/lib/cocina/models/language.rb +12 -12
  44. data/lib/cocina/models/location_based_access.rb +1 -1
  45. data/lib/cocina/models/location_based_download_access.rb +1 -1
  46. data/lib/cocina/models/mapping/error_notifier.rb +36 -0
  47. data/lib/cocina/models/mapping/from_mods/access.rb +177 -0
  48. data/lib/cocina/models/mapping/from_mods/admin_metadata.rb +217 -0
  49. data/lib/cocina/models/mapping/from_mods/alt_rep_group.rb +26 -0
  50. data/lib/cocina/models/mapping/from_mods/authority.rb +51 -0
  51. data/lib/cocina/models/mapping/from_mods/contributor.rb +161 -0
  52. data/lib/cocina/models/mapping/from_mods/description.rb +98 -0
  53. data/lib/cocina/models/mapping/from_mods/description_builder.rb +61 -0
  54. data/lib/cocina/models/mapping/from_mods/event.rb +543 -0
  55. data/lib/cocina/models/mapping/from_mods/form.rb +381 -0
  56. data/lib/cocina/models/mapping/from_mods/geographic.rb +219 -0
  57. data/lib/cocina/models/mapping/from_mods/hydrus_default_title_builder.rb +28 -0
  58. data/lib/cocina/models/mapping/from_mods/identifier.rb +51 -0
  59. data/lib/cocina/models/mapping/from_mods/identifier_builder.rb +71 -0
  60. data/lib/cocina/models/mapping/from_mods/identifier_type.rb +292 -0
  61. data/lib/cocina/models/mapping/from_mods/language.rb +36 -0
  62. data/lib/cocina/models/mapping/from_mods/language_script.rb +30 -0
  63. data/lib/cocina/models/mapping/from_mods/language_term.rb +106 -0
  64. data/lib/cocina/models/mapping/from_mods/name_builder.rb +307 -0
  65. data/lib/cocina/models/mapping/from_mods/note.rb +162 -0
  66. data/lib/cocina/models/mapping/from_mods/part_builder.rb +147 -0
  67. data/lib/cocina/models/mapping/from_mods/primary.rb +27 -0
  68. data/lib/cocina/models/mapping/from_mods/purl.rb +53 -0
  69. data/lib/cocina/models/mapping/from_mods/related_resource.rb +105 -0
  70. data/lib/cocina/models/mapping/from_mods/subject.rb +413 -0
  71. data/lib/cocina/models/mapping/from_mods/subject_authority_codes.rb +794 -0
  72. data/lib/cocina/models/mapping/from_mods/title.rb +160 -0
  73. data/lib/cocina/models/mapping/from_mods/title_builder.rb +106 -0
  74. data/lib/cocina/models/mapping/from_mods/title_builder_strategy.rb +19 -0
  75. data/lib/cocina/models/mapping/from_mods/value_uri.rb +25 -0
  76. data/lib/cocina/models/mapping/normalizers/base.rb +16 -0
  77. data/lib/cocina/models/mapping/normalizers/mods/geo_extension_normalizer.rb +69 -0
  78. data/lib/cocina/models/mapping/normalizers/mods/name_normalizer.rb +191 -0
  79. data/lib/cocina/models/mapping/normalizers/mods/origin_info_normalizer.rb +157 -0
  80. data/lib/cocina/models/mapping/normalizers/mods/subject_normalizer.rb +296 -0
  81. data/lib/cocina/models/mapping/normalizers/mods/title_normalizer.rb +91 -0
  82. data/lib/cocina/models/mapping/normalizers/mods_normalizer.rb +409 -0
  83. data/lib/cocina/models/mapping/purl.rb +27 -0
  84. data/lib/cocina/models/mapping/to_mods/access.rb +155 -0
  85. data/lib/cocina/models/mapping/to_mods/admin_metadata.rb +129 -0
  86. data/lib/cocina/models/mapping/to_mods/contributor.rb +49 -0
  87. data/lib/cocina/models/mapping/to_mods/description.rb +63 -0
  88. data/lib/cocina/models/mapping/to_mods/event.rb +200 -0
  89. data/lib/cocina/models/mapping/to_mods/form.rb +292 -0
  90. data/lib/cocina/models/mapping/to_mods/geographic.rb +151 -0
  91. data/lib/cocina/models/mapping/to_mods/id_generator.rb +25 -0
  92. data/lib/cocina/models/mapping/to_mods/identifier.rb +57 -0
  93. data/lib/cocina/models/mapping/to_mods/language.rb +82 -0
  94. data/lib/cocina/models/mapping/to_mods/mods_writer.rb +38 -0
  95. data/lib/cocina/models/mapping/to_mods/name_title_group.rb +29 -0
  96. data/lib/cocina/models/mapping/to_mods/name_writer.rb +228 -0
  97. data/lib/cocina/models/mapping/to_mods/note.rb +105 -0
  98. data/lib/cocina/models/mapping/to_mods/part_writer.rb +115 -0
  99. data/lib/cocina/models/mapping/to_mods/related_resource.rb +108 -0
  100. data/lib/cocina/models/mapping/to_mods/role_writer.rb +50 -0
  101. data/lib/cocina/models/mapping/to_mods/subject.rb +486 -0
  102. data/lib/cocina/models/mapping/to_mods/title.rb +260 -0
  103. data/lib/cocina/models/object_metadata.rb +2 -2
  104. data/lib/cocina/models/presentation.rb +2 -2
  105. data/lib/cocina/models/related_resource.rb +9 -9
  106. data/lib/cocina/models/release_tag.rb +4 -4
  107. data/lib/cocina/models/request_admin_policy.rb +1 -1
  108. data/lib/cocina/models/request_administrative.rb +1 -1
  109. data/lib/cocina/models/request_collection.rb +2 -2
  110. data/lib/cocina/models/request_description.rb +3 -3
  111. data/lib/cocina/models/request_dro.rb +4 -4
  112. data/lib/cocina/models/request_file.rb +5 -5
  113. data/lib/cocina/models/request_identification.rb +1 -1
  114. data/lib/cocina/models/sequence.rb +1 -1
  115. data/lib/cocina/models/source.rb +4 -4
  116. data/lib/cocina/models/standard.rb +5 -5
  117. data/lib/cocina/models/stanford_access.rb +2 -2
  118. data/lib/cocina/models/title.rb +13 -13
  119. data/lib/cocina/models/validators/associated_name_validator.rb +77 -0
  120. data/lib/cocina/models/validators/dark_validator.rb +4 -2
  121. data/lib/cocina/models/validators/open_api_validator.rb +0 -4
  122. data/lib/cocina/models/validators/validator.rb +1 -0
  123. data/lib/cocina/models/version.rb +1 -1
  124. data/lib/cocina/models/world_access.rb +2 -2
  125. data/lib/cocina/models.rb +4 -0
  126. data/lib/cocina/rspec/factories.rb +205 -0
  127. data/lib/cocina/rspec.rb +2 -0
  128. data/openapi.yml +4 -4
  129. metadata +97 -24
  130. data/docs/_config.yml +0 -1
  131. data/docs/maps/Agent.json +0 -18
  132. data/docs/maps/Collection.json +0 -240
  133. data/docs/maps/DRO.json +0 -316
  134. data/docs/maps/Description.json +0 -17
  135. data/docs/maps/File.json +0 -196
  136. data/docs/maps/Fileset.json +0 -143
  137. data/docs/maps/README.md +0 -7
  138. data/docs/maps/ReleaseTag.json +0 -39
  139. data/docs/maps/Sequence.json +0 -46
  140. data/docs/maps/Title.json +0 -18
  141. data/docs/sampleETD/foxml-export.xml +0 -935
  142. data/docs/sampleETD/foxml.xml +0 -3475
  143. data/docs/sampleETD/xn109qc9773_bibframe.ttl +0 -95
  144. data/docs/sampleETD/xn109qc9773_taco.json +0 -158
  145. data/lib/cocina/models/dro_rights_description_builder.rb +0 -67
  146. data/lib/cocina/models/rights_description_builder.rb +0 -81
  147. data/lib/cocina/models/title_builder.rb +0 -208
@@ -0,0 +1,157 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cocina
4
+ module Models
5
+ module Mapping
6
+ module Normalizers
7
+ module Mods
8
+ # Normalizes a Fedora MODS document for originInfo elements.
9
+ # Must be called after authorityURI attribs are normalized
10
+ class OriginInfoNormalizer
11
+ DATE_FIELDS = %w[dateIssued copyrightDate dateCreated dateCaptured dateValid dateOther dateModified].freeze
12
+
13
+ # @param [Nokogiri::Document] mods_ng_xml MODS to be normalized
14
+ # @return [Nokogiri::Document] normalized MODS
15
+ def self.normalize(mods_ng_xml:)
16
+ new(mods_ng_xml: mods_ng_xml).normalize
17
+ end
18
+
19
+ def initialize(mods_ng_xml:)
20
+ @ng_xml = mods_ng_xml.dup
21
+ @ng_xml.encoding = 'UTF-8'
22
+ end
23
+
24
+ def normalize
25
+ remove_empty_child_elements
26
+ remove_empty_origin_info # must be after remove_empty_child_elements
27
+ normalize_legacy_mods_event_type
28
+ place_term_type_normalization
29
+ place_term_authority_normalization # must be after place_term_type_normalization
30
+ normalize_authority_marcountry
31
+ single_key_date
32
+ remove_trailing_period_from_date_values
33
+ ng_xml
34
+ end
35
+
36
+ private
37
+
38
+ attr_reader :ng_xml
39
+
40
+ # must be called before remove_empty_origin_info
41
+ def remove_empty_child_elements
42
+ ng_xml.root.xpath('//mods:originInfo/mods:*', mods: ModsNormalizer::MODS_NS).each do |child_node|
43
+ # if a node has either of these 2 attributes, it could have meaning even without any content
44
+ next if child_node.xpath('.//*[@valueURI]').present?
45
+ next if child_node.xpath('.//*[@xlink:href]', xlink: ModsNormalizer::XLINK_NS).present?
46
+
47
+ child_node.remove if child_node.content.blank?
48
+ end
49
+ end
50
+
51
+ # must be called after remove_empty_child_elements
52
+ def remove_empty_origin_info
53
+ ng_xml.root.xpath('//mods:originInfo[not(mods:*) and not(@*)]', mods: ModsNormalizer::MODS_NS).each(&:remove)
54
+ # make sure we remove ones such as <originInfo eventType="publication"/>
55
+ ng_xml.root.xpath('//mods:originInfo[not(mods:*) and not(text()[normalize-space()])]', mods: ModsNormalizer::MODS_NS).each(&:remove)
56
+ end
57
+
58
+ LEGACY_EVENT_TYPES_2_TYPE = Cocina::Models::Mapping::FromMods::Event::LEGACY_EVENT_TYPES_2_TYPE
59
+
60
+ # because eventType is a relatively new addition to the MODS schema, records converted from MARC to MODS prior
61
+ # to its introduction used displayLabel as a stopgap measure, with certain values
62
+ # The same values were also sometimes used as eventType values themselves, and will be converted to our preferred vocab.
63
+ def normalize_legacy_mods_event_type
64
+ ng_xml.root.xpath('//mods:originInfo[@*]', mods: ModsNormalizer::MODS_NS).each do |origin_info_node|
65
+ event_type = origin_info_node['eventType']
66
+ event_type = origin_info_node['displayLabel'] if event_type.blank? &&
67
+ LEGACY_EVENT_TYPES_2_TYPE.key?(origin_info_node['displayLabel'])
68
+ event_type = LEGACY_EVENT_TYPES_2_TYPE[event_type] if LEGACY_EVENT_TYPES_2_TYPE.key?(event_type)
69
+
70
+ origin_info_node['eventType'] = event_type if event_type.present?
71
+ origin_info_node.delete('displayLabel') if event_type.present? &&
72
+ event_type == LEGACY_EVENT_TYPES_2_TYPE[origin_info_node['displayLabel']]
73
+ end
74
+ end
75
+
76
+ # must be called before place_term_authority_normalization
77
+ # if the cocina model doesn't have a code, then it will have a value;
78
+ # this is output as attribute type=text on the roundtripped placeTerm element
79
+ def place_term_type_normalization
80
+ ng_xml.root.xpath('//mods:originInfo/mods:place/mods:placeTerm', mods: ModsNormalizer::MODS_NS).each do |place_term_node|
81
+ next if place_term_node.content.blank?
82
+
83
+ place_term_node['type'] = 'text' if place_term_node.attributes['type'].blank?
84
+ end
85
+ end
86
+
87
+ # must be called after place_term_type_normalization
88
+ # if the MODS has a single place element with both text and code placeTerm elements, if the text
89
+ # element has no authority attributes but the code element DOES have authority attributes, then both
90
+ # the text and the code elements get the authority attributes from the code element.
91
+ def place_term_authority_normalization
92
+ ng_xml.root.xpath('//mods:originInfo/mods:place[mods:placeTerm/@type]', mods: ModsNormalizer::MODS_NS).each do |place_node|
93
+ text_place_term_node = place_node.xpath("mods:placeTerm[not(@type='code')]", mods: ModsNormalizer::MODS_NS).first
94
+ next unless text_place_term_node
95
+ next if text_place_term_node.text.blank?
96
+
97
+ code_place_term_node = place_node.xpath("mods:placeTerm[@type='code']", mods: ModsNormalizer::MODS_NS).first
98
+ next unless code_place_term_node
99
+ next if code_place_term_node.text.blank?
100
+
101
+ text_authority_attributes = authority_attributes(text_place_term_node)
102
+ code_authority_attributes = authority_attributes(code_place_term_node)
103
+
104
+ # NOTE: deliberately skipping situation where text node has some authority info and code node
105
+ # has other authority info as we may never encounter this
106
+
107
+ if text_authority_attributes.present? && code_authority_attributes.blank?
108
+ text_authority_attributes.each do |key, val|
109
+ code_place_term_node[key] = val
110
+ end
111
+ next
112
+ end
113
+
114
+ next if code_authority_attributes.blank? || text_authority_attributes.present?
115
+
116
+ code_authority_attributes.each do |key, val|
117
+ text_place_term_node[key] = val
118
+ end
119
+ end
120
+ end
121
+
122
+ def authority_attributes(ng_node)
123
+ {
124
+ valueURI: ng_node['valueURI'],
125
+ authority: ng_node['authority'],
126
+ authorityURI: ng_node['authorityURI']
127
+ }.compact
128
+ end
129
+
130
+ def normalize_authority_marcountry
131
+ ng_xml.root.xpath("//mods:*[@authority='marcountry']", mods: ModsNormalizer::MODS_NS).each do |node|
132
+ node[:authority] = 'marccountry'
133
+ end
134
+ end
135
+
136
+ def single_key_date
137
+ DATE_FIELDS.each do |date_field|
138
+ key_date_nodes = ng_xml.root.xpath("//mods:originInfo/mods:#{date_field}[@point and @keyDate='yes']", mods: ModsNormalizer::MODS_NS)
139
+ next unless key_date_nodes.size == 2
140
+
141
+ end_node = key_date_nodes.find { |node| node['point'] == 'end' }
142
+ end_node.delete('keyDate') if end_node && end_node['keyDate'].present?
143
+ end
144
+ end
145
+
146
+ def remove_trailing_period_from_date_values
147
+ DATE_FIELDS.each do |date_field|
148
+ ng_xml.root.xpath("//mods:originInfo/mods:#{date_field}", mods: ModsNormalizer::MODS_NS)
149
+ .each { |date_node| date_node.content = date_node.content.delete_suffix('.') }
150
+ end
151
+ end
152
+ end
153
+ end
154
+ end
155
+ end
156
+ end
157
+ end
@@ -0,0 +1,296 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cocina
4
+ module Models
5
+ module Mapping
6
+ module Normalizers
7
+ module Mods
8
+ # Normalizes a Fedora MODS document for subject elements.
9
+ class SubjectNormalizer # rubocop:disable Metrics/ClassLength
10
+ # @param [Nokogiri::Document] mods_ng_xml MODS to be normalized
11
+ # @return [Nokogiri::Document] normalized MODS
12
+ def self.normalize(mods_ng_xml:)
13
+ new(mods_ng_xml: mods_ng_xml).normalize
14
+ end
15
+
16
+ def initialize(mods_ng_xml:)
17
+ @ng_xml = mods_ng_xml.dup
18
+ @ng_xml.encoding = 'UTF-8'
19
+ end
20
+
21
+ def normalize
22
+ normalize_xlink_href
23
+ normalize_empty_geographic
24
+ normalize_marcgac
25
+ normalize_empty_temporal
26
+ normalize_subject
27
+ normalize_subject_children
28
+ normalize_subject_authority
29
+ normalize_subject_authority_lcnaf
30
+ normalize_subject_authority_naf
31
+ normalize_subject_authority_tgm
32
+ normalize_coordinates # Must be before normalize_subject_cartographics
33
+ normalize_subject_cartographics
34
+ normalize_subject_lang_and_script
35
+ ng_xml
36
+ end
37
+
38
+ private
39
+
40
+ attr_reader :ng_xml
41
+
42
+ def normalize_marcgac
43
+ ng_xml.root.xpath('//mods:geographicCode', mods: ModsNormalizer::MODS_NS).each do |node|
44
+ node.content = node.content.sub(/-+$/, '')
45
+ end
46
+ end
47
+
48
+ # rubocop:disable Metrics/CyclomaticComplexity
49
+ # rubocop:disable Metrics/AbcSize
50
+ def normalize_subject
51
+ ng_xml.root.xpath('//mods:subject[not(mods:cartographics)]', mods: ModsNormalizer::MODS_NS).each do |subject_node|
52
+ children_nodes = subject_node.xpath('mods:*', mods: ModsNormalizer::MODS_NS)
53
+
54
+ next if children_nodes.empty?
55
+
56
+ if (has_authorityURI?(subject_node) || has_valueURI?(subject_node)) &&
57
+ children_nodes.size == 1
58
+ # If subject has authority and child doesn't, copy to child.
59
+ add_authority(children_nodes, subject_node) if has_authority?(subject_node) && !has_authority?(children_nodes)
60
+ # If subject has authorityURI and child doesn't, move to child.
61
+ add_authorityURI(children_nodes, subject_node) if has_authorityURI?(subject_node) && !has_authorityURI?(children_nodes)
62
+ subject_node.delete('authorityURI')
63
+ # If subject has valueURI and child doesn't, move to child.
64
+ add_valueURI(children_nodes, subject_node) if has_valueURI?(subject_node) && !has_valueURI?(children_nodes)
65
+ subject_node.delete('valueURI')
66
+ end
67
+
68
+ if !has_authority?(subject_node) &&
69
+ has_authority?(children_nodes.first) &&
70
+ has_same_authority?(children_nodes, children_nodes.first)
71
+ add_authority(subject_node, children_nodes.first, naf_to_lcsh: true)
72
+ end
73
+
74
+ next unless has_authority?(subject_node) &&
75
+ has_authorityURI?(subject_node) &&
76
+ !has_valueURI?(subject_node) &&
77
+ has_authority?(children_nodes.first) &&
78
+ has_same_authority?(children_nodes, children_nodes.first)
79
+
80
+ delete_authorityURI(subject_node)
81
+ end
82
+ end
83
+
84
+ def normalize_subject_children
85
+ ng_xml.root.xpath('//mods:subject[not(mods:cartographics)]', mods: ModsNormalizer::MODS_NS).each do |subject_node|
86
+ children_nodes = subject_node.xpath('mods:*', mods: ModsNormalizer::MODS_NS)
87
+
88
+ children_nodes.each do |child_node|
89
+ next unless !has_authorityURI?(subject_node) &&
90
+ !has_valueURI?(subject_node) &&
91
+ has_authority?(child_node) &&
92
+ has_same_authority?(child_node, subject_node) &&
93
+ child_node['authority'] != 'naf' &&
94
+ !(has_authorityURI?(child_node) || has_valueURI?(child_node))
95
+
96
+ delete_authority(child_node)
97
+ end
98
+
99
+ next unless !has_authorityURI?(subject_node) &&
100
+ !has_valueURI?(subject_node) &&
101
+ has_authority?(subject_node) &&
102
+ !has_authority?(children_nodes) &&
103
+ (has_authorityURI?(children_nodes) || has_valueURI?(children_nodes))
104
+
105
+ add_authority(children_nodes, subject_node)
106
+ end
107
+ end
108
+ # rubocop:enable Metrics/CyclomaticComplexity
109
+ # rubocop:enable Metrics/AbcSize
110
+
111
+ # rubocop:disable Naming/PredicateName
112
+ def has_authority?(nodes)
113
+ nodes_to_a(nodes).all? { |node| node[:authority] }
114
+ end
115
+
116
+ def has_same_authority?(nodes, same_node)
117
+ nodes_to_a(nodes).all? { |node| same_node[:authority] == node[:authority] || (lcsh_or_naf?(same_node) && lcsh_or_naf?(node)) }
118
+ end
119
+ # rubocop:enable Naming/PredicateName
120
+
121
+ def lcsh_or_naf?(node)
122
+ %w[lcsh naf].include?(node[:authority])
123
+ end
124
+
125
+ def add_authority(nodes, from_node, naf_to_lcsh: false)
126
+ authority = if naf_to_lcsh && from_node[:authority] == 'naf'
127
+ 'lcsh'
128
+ else
129
+ from_node[:authority]
130
+ end
131
+ nodes_to_a(nodes).each { |node| node[:authority] = authority }
132
+ end
133
+
134
+ def delete_authority(nodes)
135
+ nodes_to_a(nodes).each { |node| node.delete('authority') }
136
+ end
137
+
138
+ # rubocop:disable Naming/MethodName
139
+ # rubocop:disable Naming/PredicateName
140
+ def has_authorityURI?(nodes)
141
+ nodes_to_a(nodes).all? { |node| node[:authorityURI] }
142
+ end
143
+
144
+ def add_authorityURI(nodes, from_node)
145
+ nodes_to_a(nodes).each { |node| node[:authorityURI] = from_node[:authorityURI] }
146
+ end
147
+
148
+ def delete_authorityURI(nodes)
149
+ nodes_to_a(nodes).each { |node| node.delete('authorityURI') }
150
+ end
151
+
152
+ def has_valueURI?(nodes)
153
+ nodes_to_a(nodes).all? { |node| node[:valueURI] }
154
+ end
155
+
156
+ def add_valueURI(nodes, from_node)
157
+ nodes_to_a(nodes).each { |node| node[:valueURI] = from_node[:valueURI] }
158
+ end
159
+ # rubocop:enable Naming/MethodName
160
+ # rubocop:enable Naming/PredicateName
161
+
162
+ def nodes_to_a(nodes)
163
+ nodes.is_a?(Nokogiri::XML::NodeSet) ? nodes : [nodes]
164
+ end
165
+
166
+ def normalize_subject_authority
167
+ ng_xml.root.xpath('//mods:subject[not(@authority) and count(mods:*) = 1 and not(mods:geographicCode)]/mods:*[@authority and @authority != "naf"]',
168
+ mods: ModsNormalizer::MODS_NS).each do |node|
169
+ node.parent['authority'] = node['authority']
170
+ node.delete('authority') unless node['authorityURI'] || node['valueURI']
171
+ end
172
+ end
173
+
174
+ def normalize_subject_authority_lcnaf
175
+ ng_xml.root.xpath("//mods:*[@authority='lcnaf']", mods: ModsNormalizer::MODS_NS).each do |node|
176
+ node[:authority] = 'naf'
177
+ end
178
+ end
179
+
180
+ def normalize_subject_authority_tgm
181
+ ng_xml.root.xpath("//mods:*[@authority='tgm']", mods: ModsNormalizer::MODS_NS).each do |node|
182
+ node[:authority] = 'lctgm'
183
+ end
184
+ end
185
+
186
+ def normalize_coordinates
187
+ ng_xml.root.xpath('//mods:coordinates[text()]', mods: ModsNormalizer::MODS_NS).each do |coordinate_node|
188
+ coordinate_node.content = coordinate_node.content.delete_prefix('(').delete_suffix(')')
189
+ end
190
+ end
191
+
192
+ # Collapse multiple subject/cartographics nodes into a single one
193
+ def normalize_subject_cartographics
194
+ normalize_subject_cartographics_for(ng_xml.root)
195
+ ng_xml.root.xpath('mods:relatedItem', mods: ModsNormalizer::MODS_NS).each do |related_item_node|
196
+ normalize_subject_cartographics_for(related_item_node)
197
+ end
198
+ end
199
+
200
+ def normalize_subject_cartographics_for(root_node)
201
+ carto_subject_nodes = root_node.xpath('mods:subject[not(@altRepGroup)][mods:cartographics]', mods: ModsNormalizer::MODS_NS)
202
+ return if carto_subject_nodes.empty?
203
+
204
+ # Create a default carto subject.
205
+ default_carto_subject_node = Nokogiri::XML::Node.new('subject', Nokogiri::XML(nil))
206
+ default_carto_node = Nokogiri::XML::Node.new('cartographics', Nokogiri::XML(nil))
207
+ default_carto_subject_node << default_carto_node
208
+
209
+ carto_subject_nodes.each do |carto_subject_node|
210
+ carto_subject_node.xpath('mods:cartographics', mods: ModsNormalizer::MODS_NS).each do |carto_node|
211
+ normalize_cartographic_node(carto_node, carto_subject_node, default_carto_node)
212
+ end
213
+ carto_subject_node.remove if carto_subject_node.elements.empty?
214
+ end
215
+
216
+ root_node << default_carto_subject_node if default_carto_node.elements.present?
217
+ end
218
+
219
+ # Normalizes a single cartographic node
220
+ def normalize_cartographic_node(carto_node, carto_subject_node, default_carto_node)
221
+ child_nodes = if carto_subject_node['authority'] || carto_subject_node['authorityURI'] || carto_subject_node['valueURI']
222
+ # Move scale and coordinates to default carto subject.
223
+ carto_node.xpath('mods:scale',
224
+ mods: ModsNormalizer::MODS_NS) + carto_node.xpath('mods:coordinates', mods: ModsNormalizer::MODS_NS)
225
+ else
226
+ # Merge all into default carto_subject.
227
+ carto_node.elements
228
+ end
229
+
230
+ child_nodes.each do |child_node|
231
+ child_node.remove
232
+ next if child_node.children.blank? # skip empty nodes
233
+
234
+ default_carto_node << child_node unless child_node_exists?(child_node, default_carto_node)
235
+ end
236
+ carto_node.remove if carto_node.elements.empty?
237
+ end
238
+
239
+ def child_node_exists?(child_node, parent_node)
240
+ parent_node.elements.any? { |check_node| child_node.name == check_node.name && child_node.content == check_node.content }
241
+ end
242
+
243
+ def normalize_subject_authority_naf
244
+ ng_xml.root.xpath("//mods:subject[@authority='naf']", mods: ModsNormalizer::MODS_NS).each do |subject_node|
245
+ subject_node[:authority] = 'lcsh'
246
+ end
247
+ end
248
+
249
+ def normalize_subject_lang_and_script
250
+ ng_xml.root.xpath('//mods:subject[mods:*]', mods: ModsNormalizer::MODS_NS).each do |subject_node|
251
+ check_child_node = subject_node.elements.first
252
+ # If all children have the same lang, then move to subject and delete from children.
253
+ check_lang = check_child_node['lang']
254
+ if check_lang && subject_node.elements.all? { |child_node| child_node['lang'] == check_lang }
255
+ subject_node['lang'] = check_lang
256
+ subject_node.elements.each { |child_node| child_node.delete('lang') }
257
+ end
258
+
259
+ # If all children have the same script, then move to subject and delete from children.
260
+ check_script = check_child_node['script']
261
+ if check_script && subject_node.elements.all? { |node| node['script'] == check_script }
262
+ subject_node['script'] = check_script
263
+ subject_node.elements.each { |child_node| child_node.delete('script') }
264
+ end
265
+ end
266
+ end
267
+
268
+ def normalize_empty_temporal
269
+ ng_xml.root.xpath('//mods:subject/mods:temporal[not(text())]', mods: ModsNormalizer::MODS_NS).each do |temporal_node|
270
+ subject_node = temporal_node.parent
271
+ temporal_node.remove
272
+ subject_node.remove if subject_node.elements.empty?
273
+ end
274
+ end
275
+
276
+ def normalize_empty_geographic
277
+ ng_xml.root.xpath('//mods:subject/mods:geographic[not(@valueURI) and not(text())]', mods: ModsNormalizer::MODS_NS).each do |geo_node|
278
+ subject_node = geo_node.parent
279
+ geo_node.remove
280
+ subject_node.remove if subject_node.elements.empty? && subject_node.attributes.empty?
281
+ end
282
+ end
283
+
284
+ def normalize_xlink_href
285
+ ng_xml.root.xpath('//mods:subject/mods:*[@xlink:href]', mods: ModsNormalizer::MODS_NS, xlink: ModsNormalizer::XLINK_NS).each do |child_node|
286
+ subject_node = child_node.parent
287
+ subject_node['xlink:href'] = child_node['xlink:href']
288
+ child_node.delete('href')
289
+ end
290
+ end
291
+ end
292
+ end
293
+ end
294
+ end
295
+ end
296
+ end
@@ -0,0 +1,91 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cocina
4
+ module Models
5
+ module Mapping
6
+ module Normalizers
7
+ module Mods
8
+ # Normalizes a Fedora MODS document for title elements.
9
+ class TitleNormalizer
10
+ # @param [Nokogiri::Document] mods_ng_xml MODS to be normalized
11
+ # @param [String] label
12
+ # @return [Nokogiri::Document] normalized MODS
13
+ def self.normalize(mods_ng_xml:, label:)
14
+ new(mods_ng_xml: mods_ng_xml, label: label).normalize
15
+ end
16
+
17
+ def self.normalize_missing_title(mods_ng_xml:, label:)
18
+ new(mods_ng_xml: mods_ng_xml, label: label).normalize_missing_title
19
+ end
20
+
21
+ def initialize(mods_ng_xml:, label:)
22
+ @ng_xml = mods_ng_xml.dup
23
+ @ng_xml.encoding = 'UTF-8'
24
+ @label = label
25
+ end
26
+
27
+ def normalize
28
+ normalize_hydrus_title
29
+ clean_empty_titles
30
+ normalize_title_type
31
+ normalize_title_trailing
32
+ normalize_title_as_label
33
+ ng_xml
34
+ end
35
+
36
+ def normalize_missing_title
37
+ normalize_title_as_label
38
+ ng_xml
39
+ end
40
+
41
+ private
42
+
43
+ attr_reader :ng_xml, :label
44
+
45
+ def normalize_hydrus_title
46
+ titles = ng_xml.root.xpath('mods:titleInfo/mods:title[string-length() > 0]', mods: ModsNormalizer::MODS_NS)
47
+ return if titles.present? || label != 'Hydrus'
48
+
49
+ add_title('Hydrus')
50
+ end
51
+
52
+ def clean_empty_titles
53
+ ng_xml.root.xpath('//mods:title[not(text())]', mods: ModsNormalizer::MODS_NS).each(&:remove)
54
+ ng_xml.root.xpath('//mods:subTitle[not(text())]', mods: ModsNormalizer::MODS_NS).each(&:remove)
55
+ ng_xml.root.xpath('//mods:titleInfo[not(mods:*) and not(@xlink:href)]',
56
+ mods: ModsNormalizer::MODS_NS, xlink: ModsNormalizer::XLINK_NS).each(&:remove)
57
+ end
58
+
59
+ def normalize_title_type
60
+ ng_xml.root.xpath('//mods:title[@type]', mods: ModsNormalizer::MODS_NS).each do |title_node|
61
+ title_node.delete('type')
62
+ end
63
+ end
64
+
65
+ def normalize_title_trailing
66
+ ng_xml.root.xpath('//mods:titleInfo[not(@type="abbreviated")]/mods:title', mods: ModsNormalizer::MODS_NS).each do |title_node|
67
+ title_node.content = title_node.content.delete_suffix(',')
68
+ end
69
+ end
70
+
71
+ def normalize_title_as_label
72
+ return if ng_xml.root.xpath('mods:titleInfo/mods:title',
73
+ mods: ModsNormalizer::MODS_NS).present? || ng_xml.root.xpath('mods:titleInfo[@xlink:href]', mods: ModsNormalizer::MODS_NS,
74
+ xlink: ModsNormalizer::XLINK_NS).present?
75
+
76
+ add_title(label)
77
+ end
78
+
79
+ def add_title(content)
80
+ new_title_info = Nokogiri::XML::Node.new('titleInfo', Nokogiri::XML(nil))
81
+ new_title = Nokogiri::XML::Node.new('title', Nokogiri::XML(nil))
82
+ new_title.content = content
83
+ new_title_info << new_title
84
+ ng_xml.root << new_title_info
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end