cocina-models 0.75.0 → 0.78.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +40 -12
  3. data/.rubocop_todo.yml +71 -2
  4. data/README.md +41 -5
  5. data/cocina-models.gemspec +2 -0
  6. data/description_types.yml +167 -38
  7. data/docs/description_types.md +471 -216
  8. data/lib/cocina/generator/generator.rb +7 -12
  9. data/lib/cocina/generator/schema.rb +1 -3
  10. data/lib/cocina/generator/schema_base.rb +0 -8
  11. data/lib/cocina/generator/schema_ref.rb +1 -1
  12. data/lib/cocina/generator/schema_value.rb +14 -4
  13. data/lib/cocina/models/access.rb +4 -4
  14. data/lib/cocina/models/admin_policy.rb +1 -1
  15. data/lib/cocina/models/admin_policy_access_template.rb +7 -7
  16. data/lib/cocina/models/admin_policy_administrative.rb +1 -1
  17. data/lib/cocina/models/admin_policy_with_metadata.rb +3 -3
  18. data/lib/cocina/models/builders/name_title_group_builder.rb +0 -4
  19. data/lib/cocina/models/builders/title_builder.rb +0 -2
  20. data/lib/cocina/models/citation_only_access.rb +2 -2
  21. data/lib/cocina/models/collection_access.rb +4 -4
  22. data/lib/cocina/models/collection_identification.rb +1 -1
  23. data/lib/cocina/models/collection_with_metadata.rb +2 -2
  24. data/lib/cocina/models/contributor.rb +4 -4
  25. data/lib/cocina/models/controlled_digital_lending_access.rb +2 -2
  26. data/lib/cocina/models/dark_access.rb +4 -4
  27. data/lib/cocina/models/description.rb +3 -3
  28. data/lib/cocina/models/descriptive_basic_value.rb +13 -13
  29. data/lib/cocina/models/descriptive_parallel_contributor.rb +5 -5
  30. data/lib/cocina/models/descriptive_parallel_event.rb +3 -3
  31. data/lib/cocina/models/descriptive_value.rb +13 -13
  32. data/lib/cocina/models/descriptive_value_language.rb +6 -6
  33. data/lib/cocina/models/dro.rb +1 -1
  34. data/lib/cocina/models/dro_access.rb +8 -8
  35. data/lib/cocina/models/dro_with_metadata.rb +3 -3
  36. data/lib/cocina/models/embargo.rb +5 -5
  37. data/lib/cocina/models/event.rb +3 -3
  38. data/lib/cocina/models/file.rb +4 -4
  39. data/lib/cocina/models/file_access.rb +4 -4
  40. data/lib/cocina/models/identification.rb +2 -2
  41. data/lib/cocina/models/language.rb +12 -12
  42. data/lib/cocina/models/location_based_access.rb +1 -1
  43. data/lib/cocina/models/location_based_download_access.rb +1 -1
  44. data/lib/cocina/models/mapping/error_notifier.rb +36 -0
  45. data/lib/cocina/models/mapping/from_mods/access.rb +177 -0
  46. data/lib/cocina/models/mapping/from_mods/admin_metadata.rb +217 -0
  47. data/lib/cocina/models/mapping/from_mods/alt_rep_group.rb +26 -0
  48. data/lib/cocina/models/mapping/from_mods/authority.rb +51 -0
  49. data/lib/cocina/models/mapping/from_mods/contributor.rb +161 -0
  50. data/lib/cocina/models/mapping/from_mods/description.rb +98 -0
  51. data/lib/cocina/models/mapping/from_mods/description_builder.rb +61 -0
  52. data/lib/cocina/models/mapping/from_mods/event.rb +543 -0
  53. data/lib/cocina/models/mapping/from_mods/form.rb +381 -0
  54. data/lib/cocina/models/mapping/from_mods/geographic.rb +219 -0
  55. data/lib/cocina/models/mapping/from_mods/hydrus_default_title_builder.rb +28 -0
  56. data/lib/cocina/models/mapping/from_mods/identifier.rb +51 -0
  57. data/lib/cocina/models/mapping/from_mods/identifier_builder.rb +71 -0
  58. data/lib/cocina/models/mapping/from_mods/identifier_type.rb +292 -0
  59. data/lib/cocina/models/mapping/from_mods/language.rb +36 -0
  60. data/lib/cocina/models/mapping/from_mods/language_script.rb +30 -0
  61. data/lib/cocina/models/mapping/from_mods/language_term.rb +106 -0
  62. data/lib/cocina/models/mapping/from_mods/name_builder.rb +307 -0
  63. data/lib/cocina/models/mapping/from_mods/note.rb +162 -0
  64. data/lib/cocina/models/mapping/from_mods/part_builder.rb +147 -0
  65. data/lib/cocina/models/mapping/from_mods/primary.rb +27 -0
  66. data/lib/cocina/models/mapping/from_mods/purl.rb +53 -0
  67. data/lib/cocina/models/mapping/from_mods/related_resource.rb +105 -0
  68. data/lib/cocina/models/mapping/from_mods/subject.rb +413 -0
  69. data/lib/cocina/models/mapping/from_mods/subject_authority_codes.rb +794 -0
  70. data/lib/cocina/models/mapping/from_mods/title.rb +160 -0
  71. data/lib/cocina/models/mapping/from_mods/title_builder.rb +106 -0
  72. data/lib/cocina/models/mapping/from_mods/title_builder_strategy.rb +19 -0
  73. data/lib/cocina/models/mapping/from_mods/value_uri.rb +25 -0
  74. data/lib/cocina/models/mapping/normalizers/base.rb +16 -0
  75. data/lib/cocina/models/mapping/normalizers/mods/geo_extension_normalizer.rb +69 -0
  76. data/lib/cocina/models/mapping/normalizers/mods/name_normalizer.rb +191 -0
  77. data/lib/cocina/models/mapping/normalizers/mods/origin_info_normalizer.rb +157 -0
  78. data/lib/cocina/models/mapping/normalizers/mods/subject_normalizer.rb +296 -0
  79. data/lib/cocina/models/mapping/normalizers/mods/title_normalizer.rb +91 -0
  80. data/lib/cocina/models/mapping/normalizers/mods_normalizer.rb +409 -0
  81. data/lib/cocina/models/mapping/purl.rb +27 -0
  82. data/lib/cocina/models/mapping/to_mods/access.rb +155 -0
  83. data/lib/cocina/models/mapping/to_mods/admin_metadata.rb +129 -0
  84. data/lib/cocina/models/mapping/to_mods/contributor.rb +49 -0
  85. data/lib/cocina/models/mapping/to_mods/description.rb +63 -0
  86. data/lib/cocina/models/mapping/to_mods/event.rb +200 -0
  87. data/lib/cocina/models/mapping/to_mods/form.rb +292 -0
  88. data/lib/cocina/models/mapping/to_mods/geographic.rb +151 -0
  89. data/lib/cocina/models/mapping/to_mods/id_generator.rb +25 -0
  90. data/lib/cocina/models/mapping/to_mods/identifier.rb +57 -0
  91. data/lib/cocina/models/mapping/to_mods/language.rb +82 -0
  92. data/lib/cocina/models/mapping/to_mods/mods_writer.rb +38 -0
  93. data/lib/cocina/models/mapping/to_mods/name_title_group.rb +29 -0
  94. data/lib/cocina/models/mapping/to_mods/name_writer.rb +228 -0
  95. data/lib/cocina/models/mapping/to_mods/note.rb +105 -0
  96. data/lib/cocina/models/mapping/to_mods/part_writer.rb +115 -0
  97. data/lib/cocina/models/mapping/to_mods/related_resource.rb +108 -0
  98. data/lib/cocina/models/mapping/to_mods/role_writer.rb +50 -0
  99. data/lib/cocina/models/mapping/to_mods/subject.rb +486 -0
  100. data/lib/cocina/models/mapping/to_mods/title.rb +260 -0
  101. data/lib/cocina/models/object_metadata.rb +2 -2
  102. data/lib/cocina/models/presentation.rb +2 -2
  103. data/lib/cocina/models/related_resource.rb +9 -9
  104. data/lib/cocina/models/release_tag.rb +4 -4
  105. data/lib/cocina/models/request_admin_policy.rb +1 -1
  106. data/lib/cocina/models/request_administrative.rb +1 -1
  107. data/lib/cocina/models/request_collection.rb +2 -2
  108. data/lib/cocina/models/request_description.rb +3 -3
  109. data/lib/cocina/models/request_dro.rb +4 -4
  110. data/lib/cocina/models/request_file.rb +5 -5
  111. data/lib/cocina/models/request_identification.rb +1 -1
  112. data/lib/cocina/models/sequence.rb +1 -1
  113. data/lib/cocina/models/source.rb +4 -4
  114. data/lib/cocina/models/standard.rb +5 -5
  115. data/lib/cocina/models/stanford_access.rb +2 -2
  116. data/lib/cocina/models/title.rb +13 -13
  117. data/lib/cocina/models/validators/dark_validator.rb +4 -2
  118. data/lib/cocina/models/validators/description_values_validator.rb +77 -0
  119. data/lib/cocina/models/validators/open_api_validator.rb +0 -4
  120. data/lib/cocina/models/validators/validator.rb +2 -1
  121. data/lib/cocina/models/version.rb +1 -1
  122. data/lib/cocina/models/world_access.rb +2 -2
  123. data/lib/cocina/models.rb +4 -0
  124. data/lib/cocina/rspec/factories.rb +205 -0
  125. data/lib/cocina/rspec.rb +2 -0
  126. data/openapi.yml +5 -5
  127. metadata +89 -17
  128. data/docs/_config.yml +0 -1
  129. data/docs/maps/Agent.json +0 -18
  130. data/docs/maps/Collection.json +0 -240
  131. data/docs/maps/DRO.json +0 -316
  132. data/docs/maps/Description.json +0 -17
  133. data/docs/maps/File.json +0 -196
  134. data/docs/maps/Fileset.json +0 -143
  135. data/docs/maps/README.md +0 -7
  136. data/docs/maps/ReleaseTag.json +0 -39
  137. data/docs/maps/Sequence.json +0 -46
  138. data/docs/maps/Title.json +0 -18
  139. data/docs/sampleETD/foxml-export.xml +0 -935
  140. data/docs/sampleETD/foxml.xml +0 -3475
  141. data/docs/sampleETD/xn109qc9773_bibframe.ttl +0 -95
  142. data/docs/sampleETD/xn109qc9773_taco.json +0 -158
@@ -0,0 +1,160 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cocina
4
+ module Models
5
+ module Mapping
6
+ module FromMods
7
+ # Maps titles
8
+ class Title # rubocop:disable Metrics/ClassLength
9
+ TYPES = {
10
+ 'nonSort' => 'nonsorting characters',
11
+ 'title' => 'main title',
12
+ 'subTitle' => 'subtitle',
13
+ 'partNumber' => 'part number',
14
+ 'partName' => 'part name',
15
+ 'date' => 'life dates',
16
+ 'given' => 'forename',
17
+ 'family' => 'surname',
18
+ 'uniform' => 'title'
19
+ }.freeze
20
+
21
+ PERSON_TYPE = 'name'
22
+
23
+ NAME_TYPES = ['person', 'forename', 'surname', 'life dates'].freeze
24
+
25
+ # @param [Nokogiri::XML::Element] resource_element mods or relatedItem element
26
+ # @param [boolean] require_title notify if true and title is missing.
27
+ # @param [Cocina::Models::Mapping::ErrorNotifier] notifier
28
+ # @return [Hash] a hash that can be mapped to a cocina model
29
+ def self.build(resource_element:, notifier:, require_title: true)
30
+ new(resource_element: resource_element, notifier: notifier).build(require_title: require_title)
31
+ end
32
+
33
+ def initialize(resource_element:, notifier:)
34
+ @resource_element = resource_element
35
+ @notifier = notifier
36
+ end
37
+
38
+ def build(require_title: true)
39
+ altrepgroup_title_info_nodes, other_title_info_nodes = AltRepGroup.split(nodes: resource_element.xpath(
40
+ 'mods:titleInfo', mods: Description::DESC_METADATA_NS
41
+ ))
42
+
43
+ result = altrepgroup_title_info_nodes.map { |title_info_nodes| parallel(title_info_nodes) } \
44
+ + simple_or_structured(other_title_info_nodes)
45
+ Primary.adjust(result, 'title', notifier)
46
+
47
+ notifier.error('Missing title') if result.empty? && require_title
48
+
49
+ result
50
+ end
51
+
52
+ private
53
+
54
+ attr_reader :resource_element, :notifier
55
+
56
+ # @param [Nokogiri::XML::NodeSet] node_set the titleInfo elements in the parallel grouping
57
+ def parallel(node_set)
58
+ {
59
+ parallelValue: simple_or_structured(node_set, display_types: display_types?(node_set))
60
+ }.tap do |result|
61
+ type = parallel_type(node_set)
62
+ result[:type] = type if type && type != 'parallel'
63
+ end
64
+ end
65
+
66
+ def display_types?(node_set)
67
+ return false if node_set.all? { |node| node['type'] == 'uniform' }
68
+
69
+ true
70
+ end
71
+
72
+ def parallel_type(node_set)
73
+ # If both uniform, then uniform
74
+ return 'uniform' if node_set.all? { |node| node[:type] == 'uniform' }
75
+ # If none of these nodes are marked as primary or don't have a type, set the type to parallel
76
+ return 'parallel' unless node_set.any? { |node| node['usage'] || !node['type'] }
77
+
78
+ nil
79
+ end
80
+
81
+ def simple_or_structured(node_set, display_types: true)
82
+ node_set.filter_map do |node|
83
+ if node['primary']
84
+ structured_name(node: node, display_types: display_types)
85
+ else
86
+ attrs = TitleBuilder.build(title_info_element: node, notifier: notifier)
87
+ if attrs.present?
88
+ attrs.merge(common_attributes(node,
89
+ display_types: display_types)).merge(associated_name_note(node))
90
+ end
91
+ end
92
+ end
93
+ end
94
+
95
+ def structured_name(node:, display_types: true)
96
+ name_node = resource_element.xpath("mods:name[@nameTitleGroup='#{node['nameTitleGroup']}']",
97
+ mods: Description::DESC_METADATA_NS).first
98
+
99
+ structured_values = if name_node.nil?
100
+ notifier.warn('Name not found for title group')
101
+ []
102
+ else
103
+ NameBuilder.build(name_elements: [name_node], notifier: notifier)[:name]
104
+ end
105
+ structured_values.each { |structured_value| structured_value[:type] = 'name' }
106
+ title = TitleBuilder.build(title_info_element: node, notifier: notifier)
107
+ structured_values.unshift({ type: 'title' }.merge(title)) if title
108
+ { structuredValue: structured_values }.merge(common_attributes(node, display_types: display_types))
109
+ end
110
+
111
+ # @param [Hash<Symbol,String>] value
112
+ # @param [Nokogiri::XML::Element] title_info the titleInfo node
113
+ # @param [Bool] display_types this is set to false in the case that it's a parallelValue and all are translations
114
+ def common_attributes(title_info, display_types: true)
115
+ {}.tap do |attrs|
116
+ attrs[:status] = 'primary' if title_info['usage'] == 'primary'
117
+ attrs[:type] = title_info['type'] if display_types && title_info['type']
118
+ attrs[:type] = 'transliterated' if title_info['transliteration']
119
+ attrs[:type] = 'supplied' if title_info['supplied'] == 'yes'
120
+
121
+ source = {
122
+ code: Authority.normalize_code(title_info[:authority], notifier),
123
+ uri: Authority.normalize_uri(title_info[:authorityURI])
124
+ }.compact
125
+ attrs[:source] = source if source.present?
126
+ attrs[:uri] = ValueURI.sniff(title_info[:valueURI], notifier)
127
+
128
+ value_language = LanguageScript.build(node: title_info)
129
+ attrs[:valueLanguage] = value_language if value_language
130
+ attrs[:standard] = { value: title_info['transliteration'] } if title_info['transliteration']
131
+ attrs[:displayLabel] = title_info['displayLabel']
132
+ end.compact
133
+ end
134
+
135
+ def associated_name_note(title_info_node)
136
+ name_title_group_num = title_info_node['nameTitleGroup']
137
+ return {} if name_title_group_num.blank?
138
+
139
+ xpath_expression = "../mods:name[@nameTitleGroup='#{name_title_group_num}']"
140
+ matching_name_elements = title_info_node.xpath(xpath_expression, mods: Description::DESC_METADATA_NS)
141
+ if matching_name_elements.blank?
142
+ notifier.warn("For title '#{title_info_node.text.strip}', no name matching nameTitleGroup #{name_title_group_num}.")
143
+ {}
144
+ else
145
+ name = NameBuilder.build(name_elements: [matching_name_elements.first], notifier: notifier)
146
+ desired_name_attrs = name[:name].first.slice(:value, :structuredValue)
147
+ {
148
+ note: [
149
+ {
150
+ type: 'associated name'
151
+ }.merge(desired_name_attrs).compact
152
+ ]
153
+ }
154
+ end
155
+ end
156
+ end
157
+ end
158
+ end
159
+ end
160
+ end
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cocina
4
+ module Models
5
+ module Mapping
6
+ module FromMods
7
+ # Maps titles
8
+ class TitleBuilder
9
+ # @param [Nokogiri::XML::Element] title_info_element titleInfo element
10
+ # @param [Cocina::Models::Mapping::ErrorNotifier] notifier
11
+ # @return [Hash] a hash that can be mapped to a cocina model
12
+ def self.build(title_info_element:, notifier:)
13
+ new(title_info_element: title_info_element, notifier: notifier).build
14
+ end
15
+
16
+ def initialize(title_info_element:, notifier:)
17
+ @title_info_element = title_info_element
18
+ @notifier = notifier
19
+ end
20
+
21
+ def build
22
+ return { valueAt: title_info_element['xlink:href'] } if title_info_element['xlink:href']
23
+
24
+ # Find all the child nodes that have text
25
+ return nil if title_info_element.children.empty?
26
+
27
+ children = title_info_element.xpath('./*[child::node()[self::text()]]')
28
+ if children.empty?
29
+ notifier.warn('Empty title node')
30
+ return nil
31
+ end
32
+
33
+ notifier.warn('Title with type') if children_with_type?(children)
34
+
35
+ # If a displayLabel only with no title text element
36
+ # Note: this is an error condition,
37
+ # exceptions documented at: https://github.com/sul-dlss-labs/cocina-descriptive-metadata/blob/master/mods_cocina_mappings/mods_to_cocina_value_dependencies.txt
38
+ return {} if children.map(&:name) == []
39
+
40
+ # Is this a basic title or a title with parts
41
+ return simple_value(title_info_element) if simple_title?(children)
42
+
43
+ structured_value(children)
44
+ end
45
+
46
+ private
47
+
48
+ attr_reader :title_info_element, :notifier
49
+
50
+ def children_with_type?(children)
51
+ children.any? do |child|
52
+ child.name == 'title' && child[:type].present?
53
+ end
54
+ end
55
+
56
+ def simple_title?(children)
57
+ children.size == 1 && children.first.name == 'title'
58
+ end
59
+
60
+ # @param [Nokogiri::XML::Element] node the titleInfo node
61
+ def simple_value(node)
62
+ value = node.xpath('./mods:title', mods: Description::DESC_METADATA_NS).text
63
+
64
+ { value: clean_title(value, node.name) }
65
+ end
66
+
67
+ # @param [Nokogiri::XML::NodeSet] child_nodes the children of the titleInfo
68
+ def structured_value(child_nodes)
69
+ values = child_nodes.map do |node|
70
+ { value: clean_title(node.text, node.name), type: Title::TYPES[node.name] }
71
+ end
72
+ {
73
+ structuredValue: values,
74
+ note: note(child_nodes)
75
+ }.compact
76
+ end
77
+
78
+ def clean_title(title, tag)
79
+ if %w[title titleInfo].include?(tag)
80
+ title.delete_suffix(',')
81
+ elsif tag == 'nonSort'
82
+ title.sub(/ +$/, '')
83
+ else
84
+ title
85
+ end
86
+ end
87
+
88
+ def note(child_nodes)
89
+ unsortable = child_nodes.select { |node| node.name == 'nonSort' }
90
+ return nil if unsortable.empty?
91
+
92
+ count = unsortable.sum do |node|
93
+ last_character = node.text.slice(-1, 1)
94
+ add = ['-', "'", ' '].include?(last_character) ? 0 : 1
95
+ node.text.size + add
96
+ end
97
+ [{
98
+ value: count.to_s, # cast to String until cocina-models 0.40.0 is used. See https://github.com/sul-dlss/cocina-models/pull/146
99
+ type: 'nonsorting character count'
100
+ }]
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cocina
4
+ module Models
5
+ module Mapping
6
+ module FromMods
7
+ # Decides how to build a title based on whether this is a registered Hydrus object or not.
8
+ class TitleBuilderStrategy
9
+ # @param [String] label
10
+ # @return [#build] a class that can build a title
11
+ def self.find(label:)
12
+ # Some hydrus items don't have titles, so using label. See https://github.com/sul-dlss/hydrus/issues/421
13
+ label == 'Hydrus' ? HydrusDefaultTitleBuilder : Title
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cocina
4
+ module Models
5
+ module Mapping
6
+ module FromMods
7
+ # Sniffs value URIs
8
+ class ValueURI
9
+ SUPPORTED_PREFIXES = [
10
+ 'http'
11
+ ].freeze
12
+
13
+ def self.sniff(uri, notifier)
14
+ if uri.present? && !uri.starts_with?(*SUPPORTED_PREFIXES)
15
+ notifier.warn('Value URI has unexpected value',
16
+ { uri: uri })
17
+ end
18
+
19
+ uri.presence
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cocina
4
+ module Models
5
+ module Mapping
6
+ module Normalizers
7
+ # Shared methods available to normalizer class instances
8
+ module Base
9
+ def regenerate_ng_xml(xml)
10
+ @ng_xml = Nokogiri::XML(xml) { |config| config.default_xml.noblanks }
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cocina
4
+ module Models
5
+ module Mapping
6
+ module Normalizers
7
+ module Mods
8
+ # Normalizes a Fedora MODS document for geo extension elements.
9
+ class GeoExtensionNormalizer
10
+ # @param [Nokogiri::Document] mods_ng_xml MODS to be normalized
11
+ # @param [String] druid
12
+ # @return [Nokogiri::Document] normalized MODS
13
+ def self.normalize(mods_ng_xml:, druid:)
14
+ new(mods_ng_xml: mods_ng_xml, druid: druid).normalize
15
+ end
16
+
17
+ def initialize(mods_ng_xml:, druid:)
18
+ @ng_xml = mods_ng_xml.dup
19
+ @ng_xml.encoding = 'UTF-8'
20
+ @druid = druid
21
+ end
22
+
23
+ def normalize
24
+ normalize_geo_purl
25
+ normalize_dc_image
26
+ normalize_gml_id
27
+ normalize_empty_resource
28
+ ng_xml
29
+ end
30
+
31
+ private
32
+
33
+ attr_reader :ng_xml, :druid
34
+
35
+ def normalize_geo_purl
36
+ ng_xml.root.xpath('//mods:extension[@displayLabel="geo"]//rdf:Description',
37
+ mods: ModsNormalizer::MODS_NS,
38
+ rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#').each do |node|
39
+ node['rdf:about'] = "http://purl.stanford.edu/#{druid.delete_prefix('druid:')}"
40
+ end
41
+ end
42
+
43
+ def normalize_dc_image
44
+ ng_xml.root.xpath('//mods:extension[@displayLabel="geo"]//dc:type[text() = "image"]',
45
+ mods: ModsNormalizer::MODS_NS,
46
+ dc: 'http://purl.org/dc/elements/1.1/').each do |node|
47
+ node.content = 'Image'
48
+ end
49
+ end
50
+
51
+ def normalize_gml_id
52
+ ng_xml.root.xpath("//gml:Point[@gml:id='ID']", gml: 'http://www.opengis.net/gml/3.2/').each do |point_node|
53
+ point_node.delete('id')
54
+ end
55
+ end
56
+
57
+ def normalize_empty_resource
58
+ ng_xml.root.xpath('//dc:coverage[@rdf:resource = ""]',
59
+ dc: 'http://purl.org/dc/elements/1.1/',
60
+ rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#').each do |coverage_node|
61
+ coverage_node.delete('resource')
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,191 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cocina
4
+ module Models
5
+ module Mapping
6
+ module Normalizers
7
+ module Mods
8
+ # Normalizes a Fedora MODS document for name elements.
9
+ class NameNormalizer # rubocop:disable Metrics/ClassLength
10
+ # @param [Nokogiri::Document] mods_ng_xml MODS to be normalized
11
+ # @return [Nokogiri::Document] normalized MODS
12
+ def self.normalize(mods_ng_xml:)
13
+ new(mods_ng_xml: mods_ng_xml).normalize
14
+ end
15
+
16
+ def initialize(mods_ng_xml:)
17
+ @ng_xml = mods_ng_xml.dup
18
+ @ng_xml.encoding = 'UTF-8'
19
+ end
20
+
21
+ def normalize
22
+ normalize_parallel_name_role
23
+ normalize_text_role_term
24
+ normalize_role_term
25
+ normalize_role # must be after normalize_role_term
26
+ normalize_name
27
+ normalize_corporate_needing_primary
28
+ normalize_dupes
29
+ normalize_type
30
+ normalize_name_part_type
31
+ ng_xml
32
+ end
33
+
34
+ private
35
+
36
+ attr_reader :ng_xml
37
+
38
+ def normalize_parallel_name_role
39
+ # For parallel names, all should have the same roles.
40
+ name_nodes = ng_xml.root.xpath('//mods:name[@altRepGroup]', mods: ModsNormalizer::MODS_NS)
41
+ grouped_name_nodes = name_nodes.group_by { |name_node| name_node['altRepGroup'] }.values.reject { |name_node_group| name_node_group.size == 1 }
42
+ grouped_name_nodes.each do |name_node_group|
43
+ name_node_with_role = name_node_group.find { |name_node| role_node_for(name_node) }
44
+ next unless name_node_with_role
45
+
46
+ name_node_group.each do |name_node|
47
+ next if name_node == name_node_with_role
48
+
49
+ existing_role_node = role_node_for(name_node)
50
+ existing_role_node&.remove
51
+
52
+ name_node << role_node_for(name_node_with_role).dup
53
+ end
54
+ end
55
+ end
56
+
57
+ def role_node_for(name_node)
58
+ name_node.xpath('mods:role', mods: ModsNormalizer::MODS_NS).first
59
+ end
60
+
61
+ def normalize_text_role_term
62
+ # Add the type="text" attribute to roleTerms that don't have a type (seen in MODS 3.3 druid:yy910cj7795)
63
+ ng_xml.root.xpath('//mods:roleTerm[not(@type)]', mods: ModsNormalizer::MODS_NS).each do |role_term_node|
64
+ role_term_node['type'] = 'text'
65
+ end
66
+ end
67
+
68
+ def normalize_name
69
+ ng_xml.root.xpath('//mods:namePart[not(text())]', mods: ModsNormalizer::MODS_NS).each(&:remove)
70
+ ng_xml.root.xpath('//mods:name[not(mods:namePart) and not(@xlink:href) and not(mods:etal) and not(@valueURI)]',
71
+ mods: ModsNormalizer::MODS_NS, xlink: ModsNormalizer::XLINK_NS).each(&:remove)
72
+
73
+ # Some MODS 3.3 items have xlink:href attributes. See https://argo.stanford.edu/view/druid:yy910cj7795
74
+ # Move them only when there are children.
75
+ ng_xml.xpath('//mods:name[@xlink:href and mods:*]', mods: ModsNormalizer::MODS_NS, xlink: ModsNormalizer::XLINK_NS).each do |node|
76
+ node['valueURI'] = node.remove_attribute('href').value
77
+ end
78
+ end
79
+
80
+ # assign usage="primary" to a single corporate name with nameTitleGroup if there is no other "primary" usage designation
81
+ def normalize_corporate_needing_primary
82
+ existing_primary_name = ng_xml.root.xpath('//mods:mods/mods:name[@usage="primary"]', mods: ModsNormalizer::MODS_NS)
83
+ return if existing_primary_name.present?
84
+
85
+ name_title_group_names = ng_xml.root.xpath('//mods:mods/mods:name[@nameTitleGroup][@type="corporate"]', mods: ModsNormalizer::MODS_NS)
86
+ return unless name_title_group_names.size == 1
87
+
88
+ name_title_group_names.first['usage'] = 'primary'
89
+ end
90
+
91
+ def normalize_dupes
92
+ normalize_dupes_for(ng_xml.root)
93
+ ng_xml.root.xpath('mods:relatedItem', mods: ModsNormalizer::MODS_NS).each { |related_item_node| normalize_dupes_for(related_item_node) }
94
+ end
95
+
96
+ def normalize_dupes_for(base_node)
97
+ name_nodes = base_node.xpath('mods:name', mods: ModsNormalizer::MODS_NS)
98
+ dupe_name_nodes_groups = name_nodes.group_by { |name_node| name_node_comparitor(name_node) }
99
+ dupe_name_nodes_groups.each_value do |grouped_name_nodes|
100
+ if grouped_name_nodes.size == 1
101
+ include_all_uniq_roles(grouped_name_nodes, base_node)
102
+ else
103
+ # If there is a name with nameTitleGroup, prefer retaining it.
104
+ nametitle_names, other_names = grouped_name_nodes.partition { |name_node| name_node['nameTitleGroup'] }
105
+ ordered_name_nodes = nametitle_names + other_names
106
+
107
+ uniq_name_nodes = ordered_name_nodes.uniq { |name_node| name_node_comparitor(name_node) }
108
+ include_all_uniq_roles(uniq_name_nodes, base_node)
109
+
110
+ ordered_name_nodes[1..].each(&:remove)
111
+ end
112
+ end
113
+ end
114
+
115
+ def name_node_comparitor(name_node)
116
+ dup_name_node = name_node.dup
117
+ dup_name_node.delete('usage')
118
+ dup_name_node.delete('nameTitleGroup')
119
+ dup_name_node.xpath('mods:role', mods: ModsNormalizer::MODS_NS).each(&:unlink)
120
+ dup_name_node.to_s.strip.gsub(/\s+/, ' ')
121
+ end
122
+
123
+ # ensure all roles for each uniq name node are present
124
+ # @return [Array<Nokogiri::XML::Node] the uniq name nodes with all roles present
125
+ def include_all_uniq_roles(uniq_name_nodes, base_node)
126
+ names_to_roles = name_comparitor_2_role_nodes(base_node) # compute this once
127
+ uniq_name_nodes.each do |uniq_name_node|
128
+ role_nodes = names_to_roles[name_node_comparitor(uniq_name_node)]
129
+ next if role_nodes.blank?
130
+
131
+ uniq_name_node.xpath('mods:role', mods: ModsNormalizer::MODS_NS).each(&:unlink)
132
+ role_nodes.each { |role_node| uniq_name_node.add_child(role_node) }
133
+ end
134
+ uniq_name_nodes
135
+ end
136
+
137
+ # @return [Hash<String, Array[Nokogiri::XML::Node]] key is the string comparitor for a name node;
138
+ # value is an Array of uniq role nodes
139
+ def name_comparitor_2_role_nodes(base_node)
140
+ result = {}
141
+
142
+ # we must do this outside the loop in case of duplicate name nodes
143
+ all_role_nodes = base_node.xpath('mods:name/mods:role', mods: 'http://www.loc.gov/mods/v3')
144
+ all_role_nodes.each do |role_node|
145
+ name_comparitor = name_node_comparitor(role_node.parent)
146
+ result[name_comparitor] = if result[name_comparitor]
147
+ result[name_comparitor] << role_node
148
+ else
149
+ [role_node]
150
+ end
151
+ end
152
+ result.each { |_k, role_nodes| role_nodes.uniq! { |role_node| name_node_comparitor(role_node) } }
153
+ end
154
+
155
+ def normalize_type
156
+ ng_xml.root.xpath('//mods:name[@type]', mods: ModsNormalizer::MODS_NS).each do |name_node_w_type|
157
+ raw_type = name_node_w_type['type']
158
+ next if Cocina::Models::Mapping::FromMods::Contributor::ROLES.key?(raw_type)
159
+
160
+ if Cocina::Models::Mapping::FromMods::Contributor::ROLES.key?(raw_type.downcase)
161
+ name_node_w_type['type'] = raw_type.downcase
162
+ else
163
+ name_node_w_type.remove_attribute('type')
164
+ end
165
+ end
166
+ end
167
+
168
+ def normalize_name_part_type
169
+ ng_xml.root.xpath('//mods:namePart[(@type)]', mods: ModsNormalizer::MODS_NS).each do |name_part_node|
170
+ raw_type = name_part_node['type']
171
+ next if Cocina::Models::Mapping::FromMods::Contributor::NAME_PART.key?(raw_type)
172
+
173
+ name_part_node.remove_attribute('type')
174
+ end
175
+ end
176
+
177
+ # remove the roleTerm when there is no text value and no valueURI or URI attribute
178
+ def normalize_role_term
179
+ ng_xml.root.xpath('//mods:roleTerm[not(text()) and not(@valueURI) and not(@authorityURI)]', mods: ModsNormalizer::MODS_NS).each(&:remove)
180
+ end
181
+
182
+ # remove the role when there are no child elements and no attributes
183
+ def normalize_role
184
+ ng_xml.root.xpath('//mods:role[not(mods:*) and not(@*)]', mods: ModsNormalizer::MODS_NS).each(&:remove)
185
+ end
186
+ end
187
+ end
188
+ end
189
+ end
190
+ end
191
+ end