cocina-models 0.75.0 → 0.78.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +40 -12
- data/.rubocop_todo.yml +71 -2
- data/README.md +41 -5
- data/cocina-models.gemspec +2 -0
- data/description_types.yml +167 -38
- data/docs/description_types.md +471 -216
- data/lib/cocina/generator/generator.rb +7 -12
- data/lib/cocina/generator/schema.rb +1 -3
- data/lib/cocina/generator/schema_base.rb +0 -8
- data/lib/cocina/generator/schema_ref.rb +1 -1
- data/lib/cocina/generator/schema_value.rb +14 -4
- data/lib/cocina/models/access.rb +4 -4
- data/lib/cocina/models/admin_policy.rb +1 -1
- data/lib/cocina/models/admin_policy_access_template.rb +7 -7
- data/lib/cocina/models/admin_policy_administrative.rb +1 -1
- data/lib/cocina/models/admin_policy_with_metadata.rb +3 -3
- data/lib/cocina/models/builders/name_title_group_builder.rb +0 -4
- data/lib/cocina/models/builders/title_builder.rb +0 -2
- data/lib/cocina/models/citation_only_access.rb +2 -2
- data/lib/cocina/models/collection_access.rb +4 -4
- data/lib/cocina/models/collection_identification.rb +1 -1
- data/lib/cocina/models/collection_with_metadata.rb +2 -2
- data/lib/cocina/models/contributor.rb +4 -4
- data/lib/cocina/models/controlled_digital_lending_access.rb +2 -2
- data/lib/cocina/models/dark_access.rb +4 -4
- data/lib/cocina/models/description.rb +3 -3
- data/lib/cocina/models/descriptive_basic_value.rb +13 -13
- data/lib/cocina/models/descriptive_parallel_contributor.rb +5 -5
- data/lib/cocina/models/descriptive_parallel_event.rb +3 -3
- data/lib/cocina/models/descriptive_value.rb +13 -13
- data/lib/cocina/models/descriptive_value_language.rb +6 -6
- data/lib/cocina/models/dro.rb +1 -1
- data/lib/cocina/models/dro_access.rb +8 -8
- data/lib/cocina/models/dro_with_metadata.rb +3 -3
- data/lib/cocina/models/embargo.rb +5 -5
- data/lib/cocina/models/event.rb +3 -3
- data/lib/cocina/models/file.rb +4 -4
- data/lib/cocina/models/file_access.rb +4 -4
- data/lib/cocina/models/identification.rb +2 -2
- data/lib/cocina/models/language.rb +12 -12
- data/lib/cocina/models/location_based_access.rb +1 -1
- data/lib/cocina/models/location_based_download_access.rb +1 -1
- data/lib/cocina/models/mapping/error_notifier.rb +36 -0
- data/lib/cocina/models/mapping/from_mods/access.rb +177 -0
- data/lib/cocina/models/mapping/from_mods/admin_metadata.rb +217 -0
- data/lib/cocina/models/mapping/from_mods/alt_rep_group.rb +26 -0
- data/lib/cocina/models/mapping/from_mods/authority.rb +51 -0
- data/lib/cocina/models/mapping/from_mods/contributor.rb +161 -0
- data/lib/cocina/models/mapping/from_mods/description.rb +98 -0
- data/lib/cocina/models/mapping/from_mods/description_builder.rb +61 -0
- data/lib/cocina/models/mapping/from_mods/event.rb +543 -0
- data/lib/cocina/models/mapping/from_mods/form.rb +381 -0
- data/lib/cocina/models/mapping/from_mods/geographic.rb +219 -0
- data/lib/cocina/models/mapping/from_mods/hydrus_default_title_builder.rb +28 -0
- data/lib/cocina/models/mapping/from_mods/identifier.rb +51 -0
- data/lib/cocina/models/mapping/from_mods/identifier_builder.rb +71 -0
- data/lib/cocina/models/mapping/from_mods/identifier_type.rb +292 -0
- data/lib/cocina/models/mapping/from_mods/language.rb +36 -0
- data/lib/cocina/models/mapping/from_mods/language_script.rb +30 -0
- data/lib/cocina/models/mapping/from_mods/language_term.rb +106 -0
- data/lib/cocina/models/mapping/from_mods/name_builder.rb +307 -0
- data/lib/cocina/models/mapping/from_mods/note.rb +162 -0
- data/lib/cocina/models/mapping/from_mods/part_builder.rb +147 -0
- data/lib/cocina/models/mapping/from_mods/primary.rb +27 -0
- data/lib/cocina/models/mapping/from_mods/purl.rb +53 -0
- data/lib/cocina/models/mapping/from_mods/related_resource.rb +105 -0
- data/lib/cocina/models/mapping/from_mods/subject.rb +413 -0
- data/lib/cocina/models/mapping/from_mods/subject_authority_codes.rb +794 -0
- data/lib/cocina/models/mapping/from_mods/title.rb +160 -0
- data/lib/cocina/models/mapping/from_mods/title_builder.rb +106 -0
- data/lib/cocina/models/mapping/from_mods/title_builder_strategy.rb +19 -0
- data/lib/cocina/models/mapping/from_mods/value_uri.rb +25 -0
- data/lib/cocina/models/mapping/normalizers/base.rb +16 -0
- data/lib/cocina/models/mapping/normalizers/mods/geo_extension_normalizer.rb +69 -0
- data/lib/cocina/models/mapping/normalizers/mods/name_normalizer.rb +191 -0
- data/lib/cocina/models/mapping/normalizers/mods/origin_info_normalizer.rb +157 -0
- data/lib/cocina/models/mapping/normalizers/mods/subject_normalizer.rb +296 -0
- data/lib/cocina/models/mapping/normalizers/mods/title_normalizer.rb +91 -0
- data/lib/cocina/models/mapping/normalizers/mods_normalizer.rb +409 -0
- data/lib/cocina/models/mapping/purl.rb +27 -0
- data/lib/cocina/models/mapping/to_mods/access.rb +155 -0
- data/lib/cocina/models/mapping/to_mods/admin_metadata.rb +129 -0
- data/lib/cocina/models/mapping/to_mods/contributor.rb +49 -0
- data/lib/cocina/models/mapping/to_mods/description.rb +63 -0
- data/lib/cocina/models/mapping/to_mods/event.rb +200 -0
- data/lib/cocina/models/mapping/to_mods/form.rb +292 -0
- data/lib/cocina/models/mapping/to_mods/geographic.rb +151 -0
- data/lib/cocina/models/mapping/to_mods/id_generator.rb +25 -0
- data/lib/cocina/models/mapping/to_mods/identifier.rb +57 -0
- data/lib/cocina/models/mapping/to_mods/language.rb +82 -0
- data/lib/cocina/models/mapping/to_mods/mods_writer.rb +38 -0
- data/lib/cocina/models/mapping/to_mods/name_title_group.rb +29 -0
- data/lib/cocina/models/mapping/to_mods/name_writer.rb +228 -0
- data/lib/cocina/models/mapping/to_mods/note.rb +105 -0
- data/lib/cocina/models/mapping/to_mods/part_writer.rb +115 -0
- data/lib/cocina/models/mapping/to_mods/related_resource.rb +108 -0
- data/lib/cocina/models/mapping/to_mods/role_writer.rb +50 -0
- data/lib/cocina/models/mapping/to_mods/subject.rb +486 -0
- data/lib/cocina/models/mapping/to_mods/title.rb +260 -0
- data/lib/cocina/models/object_metadata.rb +2 -2
- data/lib/cocina/models/presentation.rb +2 -2
- data/lib/cocina/models/related_resource.rb +9 -9
- data/lib/cocina/models/release_tag.rb +4 -4
- data/lib/cocina/models/request_admin_policy.rb +1 -1
- data/lib/cocina/models/request_administrative.rb +1 -1
- data/lib/cocina/models/request_collection.rb +2 -2
- data/lib/cocina/models/request_description.rb +3 -3
- data/lib/cocina/models/request_dro.rb +4 -4
- data/lib/cocina/models/request_file.rb +5 -5
- data/lib/cocina/models/request_identification.rb +1 -1
- data/lib/cocina/models/sequence.rb +1 -1
- data/lib/cocina/models/source.rb +4 -4
- data/lib/cocina/models/standard.rb +5 -5
- data/lib/cocina/models/stanford_access.rb +2 -2
- data/lib/cocina/models/title.rb +13 -13
- data/lib/cocina/models/validators/dark_validator.rb +4 -2
- data/lib/cocina/models/validators/description_values_validator.rb +77 -0
- data/lib/cocina/models/validators/open_api_validator.rb +0 -4
- data/lib/cocina/models/validators/validator.rb +2 -1
- data/lib/cocina/models/version.rb +1 -1
- data/lib/cocina/models/world_access.rb +2 -2
- data/lib/cocina/models.rb +4 -0
- data/lib/cocina/rspec/factories.rb +205 -0
- data/lib/cocina/rspec.rb +2 -0
- data/openapi.yml +5 -5
- metadata +89 -17
- data/docs/_config.yml +0 -1
- data/docs/maps/Agent.json +0 -18
- data/docs/maps/Collection.json +0 -240
- data/docs/maps/DRO.json +0 -316
- data/docs/maps/Description.json +0 -17
- data/docs/maps/File.json +0 -196
- data/docs/maps/Fileset.json +0 -143
- data/docs/maps/README.md +0 -7
- data/docs/maps/ReleaseTag.json +0 -39
- data/docs/maps/Sequence.json +0 -46
- data/docs/maps/Title.json +0 -18
- data/docs/sampleETD/foxml-export.xml +0 -935
- data/docs/sampleETD/foxml.xml +0 -3475
- data/docs/sampleETD/xn109qc9773_bibframe.ttl +0 -95
- data/docs/sampleETD/xn109qc9773_taco.json +0 -158
@@ -0,0 +1,160 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Cocina
|
4
|
+
module Models
|
5
|
+
module Mapping
|
6
|
+
module FromMods
|
7
|
+
# Maps titles
|
8
|
+
class Title # rubocop:disable Metrics/ClassLength
|
9
|
+
TYPES = {
|
10
|
+
'nonSort' => 'nonsorting characters',
|
11
|
+
'title' => 'main title',
|
12
|
+
'subTitle' => 'subtitle',
|
13
|
+
'partNumber' => 'part number',
|
14
|
+
'partName' => 'part name',
|
15
|
+
'date' => 'life dates',
|
16
|
+
'given' => 'forename',
|
17
|
+
'family' => 'surname',
|
18
|
+
'uniform' => 'title'
|
19
|
+
}.freeze
|
20
|
+
|
21
|
+
PERSON_TYPE = 'name'
|
22
|
+
|
23
|
+
NAME_TYPES = ['person', 'forename', 'surname', 'life dates'].freeze
|
24
|
+
|
25
|
+
# @param [Nokogiri::XML::Element] resource_element mods or relatedItem element
|
26
|
+
# @param [boolean] require_title notify if true and title is missing.
|
27
|
+
# @param [Cocina::Models::Mapping::ErrorNotifier] notifier
|
28
|
+
# @return [Hash] a hash that can be mapped to a cocina model
|
29
|
+
def self.build(resource_element:, notifier:, require_title: true)
|
30
|
+
new(resource_element: resource_element, notifier: notifier).build(require_title: require_title)
|
31
|
+
end
|
32
|
+
|
33
|
+
def initialize(resource_element:, notifier:)
|
34
|
+
@resource_element = resource_element
|
35
|
+
@notifier = notifier
|
36
|
+
end
|
37
|
+
|
38
|
+
def build(require_title: true)
|
39
|
+
altrepgroup_title_info_nodes, other_title_info_nodes = AltRepGroup.split(nodes: resource_element.xpath(
|
40
|
+
'mods:titleInfo', mods: Description::DESC_METADATA_NS
|
41
|
+
))
|
42
|
+
|
43
|
+
result = altrepgroup_title_info_nodes.map { |title_info_nodes| parallel(title_info_nodes) } \
|
44
|
+
+ simple_or_structured(other_title_info_nodes)
|
45
|
+
Primary.adjust(result, 'title', notifier)
|
46
|
+
|
47
|
+
notifier.error('Missing title') if result.empty? && require_title
|
48
|
+
|
49
|
+
result
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
|
54
|
+
attr_reader :resource_element, :notifier
|
55
|
+
|
56
|
+
# @param [Nokogiri::XML::NodeSet] node_set the titleInfo elements in the parallel grouping
|
57
|
+
def parallel(node_set)
|
58
|
+
{
|
59
|
+
parallelValue: simple_or_structured(node_set, display_types: display_types?(node_set))
|
60
|
+
}.tap do |result|
|
61
|
+
type = parallel_type(node_set)
|
62
|
+
result[:type] = type if type && type != 'parallel'
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def display_types?(node_set)
|
67
|
+
return false if node_set.all? { |node| node['type'] == 'uniform' }
|
68
|
+
|
69
|
+
true
|
70
|
+
end
|
71
|
+
|
72
|
+
def parallel_type(node_set)
|
73
|
+
# If both uniform, then uniform
|
74
|
+
return 'uniform' if node_set.all? { |node| node[:type] == 'uniform' }
|
75
|
+
# If none of these nodes are marked as primary or don't have a type, set the type to parallel
|
76
|
+
return 'parallel' unless node_set.any? { |node| node['usage'] || !node['type'] }
|
77
|
+
|
78
|
+
nil
|
79
|
+
end
|
80
|
+
|
81
|
+
def simple_or_structured(node_set, display_types: true)
|
82
|
+
node_set.filter_map do |node|
|
83
|
+
if node['primary']
|
84
|
+
structured_name(node: node, display_types: display_types)
|
85
|
+
else
|
86
|
+
attrs = TitleBuilder.build(title_info_element: node, notifier: notifier)
|
87
|
+
if attrs.present?
|
88
|
+
attrs.merge(common_attributes(node,
|
89
|
+
display_types: display_types)).merge(associated_name_note(node))
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def structured_name(node:, display_types: true)
|
96
|
+
name_node = resource_element.xpath("mods:name[@nameTitleGroup='#{node['nameTitleGroup']}']",
|
97
|
+
mods: Description::DESC_METADATA_NS).first
|
98
|
+
|
99
|
+
structured_values = if name_node.nil?
|
100
|
+
notifier.warn('Name not found for title group')
|
101
|
+
[]
|
102
|
+
else
|
103
|
+
NameBuilder.build(name_elements: [name_node], notifier: notifier)[:name]
|
104
|
+
end
|
105
|
+
structured_values.each { |structured_value| structured_value[:type] = 'name' }
|
106
|
+
title = TitleBuilder.build(title_info_element: node, notifier: notifier)
|
107
|
+
structured_values.unshift({ type: 'title' }.merge(title)) if title
|
108
|
+
{ structuredValue: structured_values }.merge(common_attributes(node, display_types: display_types))
|
109
|
+
end
|
110
|
+
|
111
|
+
# @param [Hash<Symbol,String>] value
|
112
|
+
# @param [Nokogiri::XML::Element] title_info the titleInfo node
|
113
|
+
# @param [Bool] display_types this is set to false in the case that it's a parallelValue and all are translations
|
114
|
+
def common_attributes(title_info, display_types: true)
|
115
|
+
{}.tap do |attrs|
|
116
|
+
attrs[:status] = 'primary' if title_info['usage'] == 'primary'
|
117
|
+
attrs[:type] = title_info['type'] if display_types && title_info['type']
|
118
|
+
attrs[:type] = 'transliterated' if title_info['transliteration']
|
119
|
+
attrs[:type] = 'supplied' if title_info['supplied'] == 'yes'
|
120
|
+
|
121
|
+
source = {
|
122
|
+
code: Authority.normalize_code(title_info[:authority], notifier),
|
123
|
+
uri: Authority.normalize_uri(title_info[:authorityURI])
|
124
|
+
}.compact
|
125
|
+
attrs[:source] = source if source.present?
|
126
|
+
attrs[:uri] = ValueURI.sniff(title_info[:valueURI], notifier)
|
127
|
+
|
128
|
+
value_language = LanguageScript.build(node: title_info)
|
129
|
+
attrs[:valueLanguage] = value_language if value_language
|
130
|
+
attrs[:standard] = { value: title_info['transliteration'] } if title_info['transliteration']
|
131
|
+
attrs[:displayLabel] = title_info['displayLabel']
|
132
|
+
end.compact
|
133
|
+
end
|
134
|
+
|
135
|
+
def associated_name_note(title_info_node)
|
136
|
+
name_title_group_num = title_info_node['nameTitleGroup']
|
137
|
+
return {} if name_title_group_num.blank?
|
138
|
+
|
139
|
+
xpath_expression = "../mods:name[@nameTitleGroup='#{name_title_group_num}']"
|
140
|
+
matching_name_elements = title_info_node.xpath(xpath_expression, mods: Description::DESC_METADATA_NS)
|
141
|
+
if matching_name_elements.blank?
|
142
|
+
notifier.warn("For title '#{title_info_node.text.strip}', no name matching nameTitleGroup #{name_title_group_num}.")
|
143
|
+
{}
|
144
|
+
else
|
145
|
+
name = NameBuilder.build(name_elements: [matching_name_elements.first], notifier: notifier)
|
146
|
+
desired_name_attrs = name[:name].first.slice(:value, :structuredValue)
|
147
|
+
{
|
148
|
+
note: [
|
149
|
+
{
|
150
|
+
type: 'associated name'
|
151
|
+
}.merge(desired_name_attrs).compact
|
152
|
+
]
|
153
|
+
}
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Cocina
|
4
|
+
module Models
|
5
|
+
module Mapping
|
6
|
+
module FromMods
|
7
|
+
# Maps titles
|
8
|
+
class TitleBuilder
|
9
|
+
# @param [Nokogiri::XML::Element] title_info_element titleInfo element
|
10
|
+
# @param [Cocina::Models::Mapping::ErrorNotifier] notifier
|
11
|
+
# @return [Hash] a hash that can be mapped to a cocina model
|
12
|
+
def self.build(title_info_element:, notifier:)
|
13
|
+
new(title_info_element: title_info_element, notifier: notifier).build
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize(title_info_element:, notifier:)
|
17
|
+
@title_info_element = title_info_element
|
18
|
+
@notifier = notifier
|
19
|
+
end
|
20
|
+
|
21
|
+
def build
|
22
|
+
return { valueAt: title_info_element['xlink:href'] } if title_info_element['xlink:href']
|
23
|
+
|
24
|
+
# Find all the child nodes that have text
|
25
|
+
return nil if title_info_element.children.empty?
|
26
|
+
|
27
|
+
children = title_info_element.xpath('./*[child::node()[self::text()]]')
|
28
|
+
if children.empty?
|
29
|
+
notifier.warn('Empty title node')
|
30
|
+
return nil
|
31
|
+
end
|
32
|
+
|
33
|
+
notifier.warn('Title with type') if children_with_type?(children)
|
34
|
+
|
35
|
+
# If a displayLabel only with no title text element
|
36
|
+
# Note: this is an error condition,
|
37
|
+
# exceptions documented at: https://github.com/sul-dlss-labs/cocina-descriptive-metadata/blob/master/mods_cocina_mappings/mods_to_cocina_value_dependencies.txt
|
38
|
+
return {} if children.map(&:name) == []
|
39
|
+
|
40
|
+
# Is this a basic title or a title with parts
|
41
|
+
return simple_value(title_info_element) if simple_title?(children)
|
42
|
+
|
43
|
+
structured_value(children)
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
attr_reader :title_info_element, :notifier
|
49
|
+
|
50
|
+
def children_with_type?(children)
|
51
|
+
children.any? do |child|
|
52
|
+
child.name == 'title' && child[:type].present?
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def simple_title?(children)
|
57
|
+
children.size == 1 && children.first.name == 'title'
|
58
|
+
end
|
59
|
+
|
60
|
+
# @param [Nokogiri::XML::Element] node the titleInfo node
|
61
|
+
def simple_value(node)
|
62
|
+
value = node.xpath('./mods:title', mods: Description::DESC_METADATA_NS).text
|
63
|
+
|
64
|
+
{ value: clean_title(value, node.name) }
|
65
|
+
end
|
66
|
+
|
67
|
+
# @param [Nokogiri::XML::NodeSet] child_nodes the children of the titleInfo
|
68
|
+
def structured_value(child_nodes)
|
69
|
+
values = child_nodes.map do |node|
|
70
|
+
{ value: clean_title(node.text, node.name), type: Title::TYPES[node.name] }
|
71
|
+
end
|
72
|
+
{
|
73
|
+
structuredValue: values,
|
74
|
+
note: note(child_nodes)
|
75
|
+
}.compact
|
76
|
+
end
|
77
|
+
|
78
|
+
def clean_title(title, tag)
|
79
|
+
if %w[title titleInfo].include?(tag)
|
80
|
+
title.delete_suffix(',')
|
81
|
+
elsif tag == 'nonSort'
|
82
|
+
title.sub(/ +$/, '')
|
83
|
+
else
|
84
|
+
title
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def note(child_nodes)
|
89
|
+
unsortable = child_nodes.select { |node| node.name == 'nonSort' }
|
90
|
+
return nil if unsortable.empty?
|
91
|
+
|
92
|
+
count = unsortable.sum do |node|
|
93
|
+
last_character = node.text.slice(-1, 1)
|
94
|
+
add = ['-', "'", ' '].include?(last_character) ? 0 : 1
|
95
|
+
node.text.size + add
|
96
|
+
end
|
97
|
+
[{
|
98
|
+
value: count.to_s, # cast to String until cocina-models 0.40.0 is used. See https://github.com/sul-dlss/cocina-models/pull/146
|
99
|
+
type: 'nonsorting character count'
|
100
|
+
}]
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Cocina
|
4
|
+
module Models
|
5
|
+
module Mapping
|
6
|
+
module FromMods
|
7
|
+
# Decides how to build a title based on whether this is a registered Hydrus object or not.
|
8
|
+
class TitleBuilderStrategy
|
9
|
+
# @param [String] label
|
10
|
+
# @return [#build] a class that can build a title
|
11
|
+
def self.find(label:)
|
12
|
+
# Some hydrus items don't have titles, so using label. See https://github.com/sul-dlss/hydrus/issues/421
|
13
|
+
label == 'Hydrus' ? HydrusDefaultTitleBuilder : Title
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Cocina
|
4
|
+
module Models
|
5
|
+
module Mapping
|
6
|
+
module FromMods
|
7
|
+
# Sniffs value URIs
|
8
|
+
class ValueURI
|
9
|
+
SUPPORTED_PREFIXES = [
|
10
|
+
'http'
|
11
|
+
].freeze
|
12
|
+
|
13
|
+
def self.sniff(uri, notifier)
|
14
|
+
if uri.present? && !uri.starts_with?(*SUPPORTED_PREFIXES)
|
15
|
+
notifier.warn('Value URI has unexpected value',
|
16
|
+
{ uri: uri })
|
17
|
+
end
|
18
|
+
|
19
|
+
uri.presence
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Cocina
|
4
|
+
module Models
|
5
|
+
module Mapping
|
6
|
+
module Normalizers
|
7
|
+
# Shared methods available to normalizer class instances
|
8
|
+
module Base
|
9
|
+
def regenerate_ng_xml(xml)
|
10
|
+
@ng_xml = Nokogiri::XML(xml) { |config| config.default_xml.noblanks }
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Cocina
|
4
|
+
module Models
|
5
|
+
module Mapping
|
6
|
+
module Normalizers
|
7
|
+
module Mods
|
8
|
+
# Normalizes a Fedora MODS document for geo extension elements.
|
9
|
+
class GeoExtensionNormalizer
|
10
|
+
# @param [Nokogiri::Document] mods_ng_xml MODS to be normalized
|
11
|
+
# @param [String] druid
|
12
|
+
# @return [Nokogiri::Document] normalized MODS
|
13
|
+
def self.normalize(mods_ng_xml:, druid:)
|
14
|
+
new(mods_ng_xml: mods_ng_xml, druid: druid).normalize
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(mods_ng_xml:, druid:)
|
18
|
+
@ng_xml = mods_ng_xml.dup
|
19
|
+
@ng_xml.encoding = 'UTF-8'
|
20
|
+
@druid = druid
|
21
|
+
end
|
22
|
+
|
23
|
+
def normalize
|
24
|
+
normalize_geo_purl
|
25
|
+
normalize_dc_image
|
26
|
+
normalize_gml_id
|
27
|
+
normalize_empty_resource
|
28
|
+
ng_xml
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
attr_reader :ng_xml, :druid
|
34
|
+
|
35
|
+
def normalize_geo_purl
|
36
|
+
ng_xml.root.xpath('//mods:extension[@displayLabel="geo"]//rdf:Description',
|
37
|
+
mods: ModsNormalizer::MODS_NS,
|
38
|
+
rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#').each do |node|
|
39
|
+
node['rdf:about'] = "http://purl.stanford.edu/#{druid.delete_prefix('druid:')}"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def normalize_dc_image
|
44
|
+
ng_xml.root.xpath('//mods:extension[@displayLabel="geo"]//dc:type[text() = "image"]',
|
45
|
+
mods: ModsNormalizer::MODS_NS,
|
46
|
+
dc: 'http://purl.org/dc/elements/1.1/').each do |node|
|
47
|
+
node.content = 'Image'
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def normalize_gml_id
|
52
|
+
ng_xml.root.xpath("//gml:Point[@gml:id='ID']", gml: 'http://www.opengis.net/gml/3.2/').each do |point_node|
|
53
|
+
point_node.delete('id')
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def normalize_empty_resource
|
58
|
+
ng_xml.root.xpath('//dc:coverage[@rdf:resource = ""]',
|
59
|
+
dc: 'http://purl.org/dc/elements/1.1/',
|
60
|
+
rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#').each do |coverage_node|
|
61
|
+
coverage_node.delete('resource')
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,191 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Cocina
|
4
|
+
module Models
|
5
|
+
module Mapping
|
6
|
+
module Normalizers
|
7
|
+
module Mods
|
8
|
+
# Normalizes a Fedora MODS document for name elements.
|
9
|
+
class NameNormalizer # rubocop:disable Metrics/ClassLength
|
10
|
+
# @param [Nokogiri::Document] mods_ng_xml MODS to be normalized
|
11
|
+
# @return [Nokogiri::Document] normalized MODS
|
12
|
+
def self.normalize(mods_ng_xml:)
|
13
|
+
new(mods_ng_xml: mods_ng_xml).normalize
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize(mods_ng_xml:)
|
17
|
+
@ng_xml = mods_ng_xml.dup
|
18
|
+
@ng_xml.encoding = 'UTF-8'
|
19
|
+
end
|
20
|
+
|
21
|
+
def normalize
|
22
|
+
normalize_parallel_name_role
|
23
|
+
normalize_text_role_term
|
24
|
+
normalize_role_term
|
25
|
+
normalize_role # must be after normalize_role_term
|
26
|
+
normalize_name
|
27
|
+
normalize_corporate_needing_primary
|
28
|
+
normalize_dupes
|
29
|
+
normalize_type
|
30
|
+
normalize_name_part_type
|
31
|
+
ng_xml
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
attr_reader :ng_xml
|
37
|
+
|
38
|
+
def normalize_parallel_name_role
|
39
|
+
# For parallel names, all should have the same roles.
|
40
|
+
name_nodes = ng_xml.root.xpath('//mods:name[@altRepGroup]', mods: ModsNormalizer::MODS_NS)
|
41
|
+
grouped_name_nodes = name_nodes.group_by { |name_node| name_node['altRepGroup'] }.values.reject { |name_node_group| name_node_group.size == 1 }
|
42
|
+
grouped_name_nodes.each do |name_node_group|
|
43
|
+
name_node_with_role = name_node_group.find { |name_node| role_node_for(name_node) }
|
44
|
+
next unless name_node_with_role
|
45
|
+
|
46
|
+
name_node_group.each do |name_node|
|
47
|
+
next if name_node == name_node_with_role
|
48
|
+
|
49
|
+
existing_role_node = role_node_for(name_node)
|
50
|
+
existing_role_node&.remove
|
51
|
+
|
52
|
+
name_node << role_node_for(name_node_with_role).dup
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def role_node_for(name_node)
|
58
|
+
name_node.xpath('mods:role', mods: ModsNormalizer::MODS_NS).first
|
59
|
+
end
|
60
|
+
|
61
|
+
def normalize_text_role_term
|
62
|
+
# Add the type="text" attribute to roleTerms that don't have a type (seen in MODS 3.3 druid:yy910cj7795)
|
63
|
+
ng_xml.root.xpath('//mods:roleTerm[not(@type)]', mods: ModsNormalizer::MODS_NS).each do |role_term_node|
|
64
|
+
role_term_node['type'] = 'text'
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def normalize_name
|
69
|
+
ng_xml.root.xpath('//mods:namePart[not(text())]', mods: ModsNormalizer::MODS_NS).each(&:remove)
|
70
|
+
ng_xml.root.xpath('//mods:name[not(mods:namePart) and not(@xlink:href) and not(mods:etal) and not(@valueURI)]',
|
71
|
+
mods: ModsNormalizer::MODS_NS, xlink: ModsNormalizer::XLINK_NS).each(&:remove)
|
72
|
+
|
73
|
+
# Some MODS 3.3 items have xlink:href attributes. See https://argo.stanford.edu/view/druid:yy910cj7795
|
74
|
+
# Move them only when there are children.
|
75
|
+
ng_xml.xpath('//mods:name[@xlink:href and mods:*]', mods: ModsNormalizer::MODS_NS, xlink: ModsNormalizer::XLINK_NS).each do |node|
|
76
|
+
node['valueURI'] = node.remove_attribute('href').value
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
# assign usage="primary" to a single corporate name with nameTitleGroup if there is no other "primary" usage designation
|
81
|
+
def normalize_corporate_needing_primary
|
82
|
+
existing_primary_name = ng_xml.root.xpath('//mods:mods/mods:name[@usage="primary"]', mods: ModsNormalizer::MODS_NS)
|
83
|
+
return if existing_primary_name.present?
|
84
|
+
|
85
|
+
name_title_group_names = ng_xml.root.xpath('//mods:mods/mods:name[@nameTitleGroup][@type="corporate"]', mods: ModsNormalizer::MODS_NS)
|
86
|
+
return unless name_title_group_names.size == 1
|
87
|
+
|
88
|
+
name_title_group_names.first['usage'] = 'primary'
|
89
|
+
end
|
90
|
+
|
91
|
+
def normalize_dupes
|
92
|
+
normalize_dupes_for(ng_xml.root)
|
93
|
+
ng_xml.root.xpath('mods:relatedItem', mods: ModsNormalizer::MODS_NS).each { |related_item_node| normalize_dupes_for(related_item_node) }
|
94
|
+
end
|
95
|
+
|
96
|
+
def normalize_dupes_for(base_node)
|
97
|
+
name_nodes = base_node.xpath('mods:name', mods: ModsNormalizer::MODS_NS)
|
98
|
+
dupe_name_nodes_groups = name_nodes.group_by { |name_node| name_node_comparitor(name_node) }
|
99
|
+
dupe_name_nodes_groups.each_value do |grouped_name_nodes|
|
100
|
+
if grouped_name_nodes.size == 1
|
101
|
+
include_all_uniq_roles(grouped_name_nodes, base_node)
|
102
|
+
else
|
103
|
+
# If there is a name with nameTitleGroup, prefer retaining it.
|
104
|
+
nametitle_names, other_names = grouped_name_nodes.partition { |name_node| name_node['nameTitleGroup'] }
|
105
|
+
ordered_name_nodes = nametitle_names + other_names
|
106
|
+
|
107
|
+
uniq_name_nodes = ordered_name_nodes.uniq { |name_node| name_node_comparitor(name_node) }
|
108
|
+
include_all_uniq_roles(uniq_name_nodes, base_node)
|
109
|
+
|
110
|
+
ordered_name_nodes[1..].each(&:remove)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def name_node_comparitor(name_node)
|
116
|
+
dup_name_node = name_node.dup
|
117
|
+
dup_name_node.delete('usage')
|
118
|
+
dup_name_node.delete('nameTitleGroup')
|
119
|
+
dup_name_node.xpath('mods:role', mods: ModsNormalizer::MODS_NS).each(&:unlink)
|
120
|
+
dup_name_node.to_s.strip.gsub(/\s+/, ' ')
|
121
|
+
end
|
122
|
+
|
123
|
+
# ensure all roles for each uniq name node are present
|
124
|
+
# @return [Array<Nokogiri::XML::Node] the uniq name nodes with all roles present
|
125
|
+
def include_all_uniq_roles(uniq_name_nodes, base_node)
|
126
|
+
names_to_roles = name_comparitor_2_role_nodes(base_node) # compute this once
|
127
|
+
uniq_name_nodes.each do |uniq_name_node|
|
128
|
+
role_nodes = names_to_roles[name_node_comparitor(uniq_name_node)]
|
129
|
+
next if role_nodes.blank?
|
130
|
+
|
131
|
+
uniq_name_node.xpath('mods:role', mods: ModsNormalizer::MODS_NS).each(&:unlink)
|
132
|
+
role_nodes.each { |role_node| uniq_name_node.add_child(role_node) }
|
133
|
+
end
|
134
|
+
uniq_name_nodes
|
135
|
+
end
|
136
|
+
|
137
|
+
# @return [Hash<String, Array[Nokogiri::XML::Node]] key is the string comparitor for a name node;
|
138
|
+
# value is an Array of uniq role nodes
|
139
|
+
def name_comparitor_2_role_nodes(base_node)
|
140
|
+
result = {}
|
141
|
+
|
142
|
+
# we must do this outside the loop in case of duplicate name nodes
|
143
|
+
all_role_nodes = base_node.xpath('mods:name/mods:role', mods: 'http://www.loc.gov/mods/v3')
|
144
|
+
all_role_nodes.each do |role_node|
|
145
|
+
name_comparitor = name_node_comparitor(role_node.parent)
|
146
|
+
result[name_comparitor] = if result[name_comparitor]
|
147
|
+
result[name_comparitor] << role_node
|
148
|
+
else
|
149
|
+
[role_node]
|
150
|
+
end
|
151
|
+
end
|
152
|
+
result.each { |_k, role_nodes| role_nodes.uniq! { |role_node| name_node_comparitor(role_node) } }
|
153
|
+
end
|
154
|
+
|
155
|
+
def normalize_type
|
156
|
+
ng_xml.root.xpath('//mods:name[@type]', mods: ModsNormalizer::MODS_NS).each do |name_node_w_type|
|
157
|
+
raw_type = name_node_w_type['type']
|
158
|
+
next if Cocina::Models::Mapping::FromMods::Contributor::ROLES.key?(raw_type)
|
159
|
+
|
160
|
+
if Cocina::Models::Mapping::FromMods::Contributor::ROLES.key?(raw_type.downcase)
|
161
|
+
name_node_w_type['type'] = raw_type.downcase
|
162
|
+
else
|
163
|
+
name_node_w_type.remove_attribute('type')
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
def normalize_name_part_type
|
169
|
+
ng_xml.root.xpath('//mods:namePart[(@type)]', mods: ModsNormalizer::MODS_NS).each do |name_part_node|
|
170
|
+
raw_type = name_part_node['type']
|
171
|
+
next if Cocina::Models::Mapping::FromMods::Contributor::NAME_PART.key?(raw_type)
|
172
|
+
|
173
|
+
name_part_node.remove_attribute('type')
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
# remove the roleTerm when there is no text value and no valueURI or URI attribute
|
178
|
+
def normalize_role_term
|
179
|
+
ng_xml.root.xpath('//mods:roleTerm[not(text()) and not(@valueURI) and not(@authorityURI)]', mods: ModsNormalizer::MODS_NS).each(&:remove)
|
180
|
+
end
|
181
|
+
|
182
|
+
# remove the role when there are no child elements and no attributes
|
183
|
+
def normalize_role
|
184
|
+
ng_xml.root.xpath('//mods:role[not(mods:*) and not(@*)]', mods: ModsNormalizer::MODS_NS).each(&:remove)
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|