stanford-mods 2.6.4 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +1 -1
- data/lib/stanford-mods/{geo_spatial.rb → concerns/geo_spatial.rb} +3 -5
- data/lib/stanford-mods/concerns/name.rb +57 -0
- data/lib/stanford-mods/concerns/origin_info.rb +113 -0
- data/lib/stanford-mods/{physical_location.rb → concerns/physical_location.rb} +2 -2
- data/lib/stanford-mods/concerns/searchworks.rb +125 -0
- data/lib/stanford-mods/concerns/searchworks_subjects.rb +126 -0
- data/lib/stanford-mods/concerns/title.rb +87 -0
- data/lib/stanford-mods/coordinate.rb +24 -3
- data/lib/stanford-mods/date_parsing.rb +32 -289
- data/lib/stanford-mods/imprint.rb +170 -322
- data/lib/stanford-mods/record.rb +20 -0
- data/lib/stanford-mods/version.rb +1 -1
- data/lib/stanford-mods/{searchworks_languages.rb → vocabularies/searchworks_languages.rb} +0 -0
- data/lib/stanford-mods.rb +12 -11
- data/spec/fixtures/searchworks_imprint_data.rb +38 -39
- data/spec/fixtures/searchworks_pub_date_data.rb +7 -7
- data/spec/fixtures/spotlight_pub_date_data.rb +7 -7
- data/spec/geo_spatial_spec.rb +1 -6
- data/spec/imprint_spec.rb +263 -207
- data/spec/lib/stanford-mods/coordinate_spec.rb +3 -5
- data/spec/name_spec.rb +26 -230
- data/spec/origin_info_spec.rb +34 -300
- data/spec/searchworks_basic_spec.rb +1 -3
- data/spec/searchworks_pub_dates_spec.rb +0 -215
- data/spec/searchworks_spec.rb +0 -21
- data/spec/searchworks_subject_raw_spec.rb +106 -105
- data/spec/searchworks_subject_spec.rb +19 -55
- data/spec/searchworks_title_spec.rb +5 -5
- data/stanford-mods.gemspec +1 -1
- metadata +19 -15
- data/lib/marc_countries.rb +0 -387
- data/lib/stanford-mods/geo_utils.rb +0 -28
- data/lib/stanford-mods/name.rb +0 -80
- data/lib/stanford-mods/origin_info.rb +0 -489
- data/lib/stanford-mods/searchworks.rb +0 -333
- data/lib/stanford-mods/searchworks_subjects.rb +0 -196
- data/spec/date_parsing_spec.rb +0 -905
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 22cc1cc8aafefb053ea3856e273e17be7089862160e91d87c483393b60c02aca
|
4
|
+
data.tar.gz: dbeea673e2c79744215c278ebf2fa73b78de3de7f79602590948bf4ee117b1df
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fb3d7b5761a6e4811dd85b6d4972ce238e5b507e957d617b5e4a3935bcd3e9f791176d250418a198d2b68a63babfc58dcb9c39d1b8b4fbbbdd01eecc504fa8a2
|
7
|
+
data.tar.gz: 3fe0f6181376c4cca9618a90e9a03244328f3a93a16ea55f1b5a72c7f842ddd0de8addbfb2da75efa4875bc4c04e4dd73caaad543745faf9461da7928566487e
|
data/.github/workflows/ruby.yml
CHANGED
@@ -1,10 +1,9 @@
|
|
1
|
-
#
|
2
|
-
require 'mods'
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
3
|
module Stanford
|
5
4
|
module Mods
|
6
5
|
# NON-SearchWorks specific wranglings of MODS cartographics metadata
|
7
|
-
|
6
|
+
module Geospatial
|
8
7
|
GMLNS = 'http://www.opengis.net/gml/3.2/'.freeze
|
9
8
|
|
10
9
|
# @return [Array{String}] subject cartographic coordinates values
|
@@ -27,8 +26,7 @@ module Stanford
|
|
27
26
|
lowers = v.xpath('gml:lowerCorner', 'gml' => GMLNS).text.split
|
28
27
|
"ENVELOPE(#{lowers[0]}, #{uppers[0]}, #{uppers[1]}, #{lowers[1]})"
|
29
28
|
end
|
30
|
-
rescue RuntimeError
|
31
|
-
logger.warn "failure parsing <extension> element: #{e.message}"
|
29
|
+
rescue RuntimeError
|
32
30
|
[]
|
33
31
|
end
|
34
32
|
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# NON-SearchWorks specific wranglings of MODS <name> metadata as a mixin to the Stanford::Mods::Record object
|
4
|
+
module Stanford
|
5
|
+
module Mods
|
6
|
+
module Name
|
7
|
+
# the first encountered <mods><name> element with marcrelator flavor role of 'Creator' or 'Author'.
|
8
|
+
# if no marcrelator 'Creator' or 'Author', the first name without a role.
|
9
|
+
# if no name without a role, then nil
|
10
|
+
# @return [String] value for author_1xx_search field
|
11
|
+
def sw_main_author
|
12
|
+
result = mods_ng_xml.plain_name.find { |n| n.role.any? { |r| r.authority.include?('marcrelator') && r.value.any? { |v| v.match(/creator/i) || v.match?(/author/i) } } }
|
13
|
+
result ||= mods_ng_xml.plain_name.find { |n| n.role.empty? }
|
14
|
+
|
15
|
+
result&.display_value_w_date
|
16
|
+
end
|
17
|
+
|
18
|
+
# all names, in display form, except the main_author
|
19
|
+
# names will be the display_value_w_date form
|
20
|
+
# see Mods::Record.name in nom_terminology for details on the display_value algorithm
|
21
|
+
# @return [Array<String>] values for author_7xx_search field
|
22
|
+
def sw_addl_authors
|
23
|
+
mods_ng_xml.plain_name.map(&:display_value_w_date) - [sw_main_author]
|
24
|
+
end
|
25
|
+
|
26
|
+
# @return [Array<String>] values for author_person_facet, author_person_display
|
27
|
+
def sw_person_authors
|
28
|
+
mods_ng_xml.personal_name.map(&:display_value_w_date)
|
29
|
+
end
|
30
|
+
|
31
|
+
# return the display_value_w_date for all <mods><name> elements that do not have type='personal'
|
32
|
+
# @return [Array<String>] values for author_other_facet
|
33
|
+
def sw_impersonal_authors
|
34
|
+
mods_ng_xml.plain_name.select { |n| n.type_at != 'personal' }.map(&:display_value_w_date)
|
35
|
+
end
|
36
|
+
|
37
|
+
# @return [Array<String>] values for author_corp_display
|
38
|
+
def sw_corporate_authors
|
39
|
+
mods_ng_xml.corporate_name.map(&:display_value_w_date)
|
40
|
+
end
|
41
|
+
|
42
|
+
# @return [Array<String>] values for author_meeting_display
|
43
|
+
def sw_meeting_authors
|
44
|
+
mods_ng_xml.conference_name.map(&:display_value_w_date)
|
45
|
+
end
|
46
|
+
|
47
|
+
# Returns a sortable version of the main_author:
|
48
|
+
# main_author + sorting title
|
49
|
+
# which is the mods approximation of the value created for a marc record
|
50
|
+
# @return [String] value for author_sort field
|
51
|
+
def sw_sort_author
|
52
|
+
# substitute java Character.MAX_CODE_POINT for nil main_author so missing main authors sort last
|
53
|
+
"#{sw_main_author || "\u{10FFFF} " }#{sort_title}".gsub(/[[:punct:]]*/, '').strip
|
54
|
+
end
|
55
|
+
end # class Record
|
56
|
+
end # Module Mods
|
57
|
+
end # Module Stanford
|
@@ -0,0 +1,113 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Parsing MODS /originInfo for Publication/Imprint data:
|
4
|
+
# * pub year for date slider facet
|
5
|
+
# * pub year for sorting
|
6
|
+
# * pub year for single display value
|
7
|
+
# * imprint info for display
|
8
|
+
# *
|
9
|
+
# These methods may be used by searchworks.rb file or by downstream apps
|
10
|
+
module Stanford
|
11
|
+
module Mods
|
12
|
+
module OriginInfo
|
13
|
+
# return pub year as an Integer
|
14
|
+
# prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
|
15
|
+
# look for a keyDate and use it if there is one; otherwise pick earliest date
|
16
|
+
# @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute) should be ignored; false if approximate dates should be included
|
17
|
+
# @return [Integer] publication year as an Integer
|
18
|
+
# @note for sorting: 5 B.C. => -5; 666 B.C. => -666
|
19
|
+
def pub_year_int(fields = [:dateIssued, :dateCreated, :dateCaptured], ignore_approximate: false)
|
20
|
+
fields.each do |date_key|
|
21
|
+
values = mods_ng_xml.origin_info.send(date_key)
|
22
|
+
values = values.reject(&method(:is_approximate)) if ignore_approximate
|
23
|
+
|
24
|
+
earliest_date = Stanford::Mods::OriginInfo.best_or_earliest_year(values)
|
25
|
+
return earliest_date.year_int_from_date_str if earliest_date&.year_int_from_date_str
|
26
|
+
end; nil
|
27
|
+
end
|
28
|
+
|
29
|
+
# return a single string intended for lexical sorting for pub date
|
30
|
+
# prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
|
31
|
+
# look for a keyDate and use it if there is one; otherwise pick earliest date
|
32
|
+
# @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute) should be ignored; false if approximate dates should be included
|
33
|
+
# @return [String] single String containing publication year for lexical sorting
|
34
|
+
# @note for string sorting 5 B.C. = -5 => -995; 6 B.C. => -994, so 6 B.C. sorts before 5 B.C.
|
35
|
+
# @deprecated use pub_year_int
|
36
|
+
def pub_year_sort_str(fields = [:dateIssued, :dateCreated, :dateCaptured], ignore_approximate: false)
|
37
|
+
fields.each do |date_key|
|
38
|
+
values = mods_ng_xml.origin_info.send(date_key)
|
39
|
+
values = values.reject(&method(:is_approximate)) if ignore_approximate
|
40
|
+
|
41
|
+
earliest_date = Stanford::Mods::OriginInfo.best_or_earliest_year(values)
|
42
|
+
return earliest_date.sortable_year_string_from_date_str if earliest_date&.sortable_year_string_from_date_str
|
43
|
+
end; nil
|
44
|
+
end
|
45
|
+
|
46
|
+
# return a single string intended for display of pub year
|
47
|
+
# 0 < year < 1000: add A.D. suffix
|
48
|
+
# year < 0: add B.C. suffix. ('-5' => '5 B.C.', '700 B.C.' => '700 B.C.')
|
49
|
+
# 195u => 195x
|
50
|
+
# 19uu => 19xx
|
51
|
+
# '-5' => '5 B.C.'
|
52
|
+
# '700 B.C.' => '700 B.C.'
|
53
|
+
# '7th century' => '7th century'
|
54
|
+
# date ranges?
|
55
|
+
# prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
|
56
|
+
# look for a keyDate and use it if there is one; otherwise pick earliest date
|
57
|
+
# @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
|
58
|
+
# should be ignored; false if approximate dates should be included
|
59
|
+
def pub_year_display_str(fields = [:dateIssued, :dateCreated, :dateCaptured], ignore_approximate: false)
|
60
|
+
fields.each do |date_key|
|
61
|
+
values = mods_ng_xml.origin_info.send(date_key)
|
62
|
+
values = values.reject(&method(:is_approximate)) if ignore_approximate
|
63
|
+
|
64
|
+
earliest_date = Stanford::Mods::OriginInfo.best_or_earliest_year(values)
|
65
|
+
return earliest_date.date_str_for_display if earliest_date&.date_str_for_display
|
66
|
+
end; nil
|
67
|
+
end
|
68
|
+
|
69
|
+
# @return [Array<Stanford::Mods::Imprint>] array of imprint objects
|
70
|
+
# @private
|
71
|
+
def imprints
|
72
|
+
origin_info.map { |el| Stanford::Mods::Imprint.new(el) }
|
73
|
+
end
|
74
|
+
|
75
|
+
def place
|
76
|
+
term_values([:origin_info, :place, :placeTerm])
|
77
|
+
end
|
78
|
+
|
79
|
+
# @return [String] single String containing imprint information for display
|
80
|
+
def imprint_display_str
|
81
|
+
imprints.map(&:display_str).reject(&:empty?).join('; ')
|
82
|
+
end
|
83
|
+
|
84
|
+
# remove Elements from NodeSet if they have a qualifier attribute of 'approximate' or 'questionable'
|
85
|
+
# @param [Nokogiri::XML::Element] node the date element
|
86
|
+
# @return [Boolean]
|
87
|
+
# @private
|
88
|
+
def is_approximate(node)
|
89
|
+
qualifier = node["qualifier"] if node.respond_to?('[]')
|
90
|
+
qualifier == 'approximate' || qualifier == 'questionable'
|
91
|
+
end
|
92
|
+
|
93
|
+
# get earliest parseable year from the passed date elements
|
94
|
+
# @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
|
95
|
+
# @return [Stanford::Mods::DateParsing]
|
96
|
+
def self.best_or_earliest_year(date_el_array)
|
97
|
+
key_dates, other_dates = date_el_array.partition { |node| node['keyDate'] == 'yes' }
|
98
|
+
|
99
|
+
sortable_dates = key_dates.map { |x| DateParsing.new(x) }.select(&:sortable_year_string_from_date_str)
|
100
|
+
sortable_dates = other_dates.map { |x| DateParsing.new(x) }.select(&:sortable_year_string_from_date_str) if sortable_dates.empty?
|
101
|
+
results = {}
|
102
|
+
|
103
|
+
# this is a little weird; instead of just the earliest sorting date, if there are multiple
|
104
|
+
# dates with the same sort key, we want to make sure we get the last occurring one?
|
105
|
+
sortable_dates.each do |v|
|
106
|
+
results[v.sortable_year_string_from_date_str] = v
|
107
|
+
end
|
108
|
+
|
109
|
+
results[results.keys.min]
|
110
|
+
end
|
111
|
+
end # class Record
|
112
|
+
end
|
113
|
+
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Stanford
|
4
4
|
module Mods
|
@@ -7,7 +7,7 @@ module Stanford
|
|
7
7
|
# Note: mods_ng_xml_location.physicalLocation should find top level and relatedItem.
|
8
8
|
# Each method here expects to find at most ONE matching element. Subsequent potential matches
|
9
9
|
# are ignored.
|
10
|
-
|
10
|
+
module PhysicalLocation
|
11
11
|
# data in location/physicalLocation or in relatedItem/location/physicalLocation
|
12
12
|
# so use _location to get the data from either one of them
|
13
13
|
# @return [String] box number (note: single valued and might be something like 35A)
|
@@ -0,0 +1,125 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# SearchWorks specific wranglings of MODS metadata as a mixin to the Stanford::Mods::Record object
|
4
|
+
module Stanford
|
5
|
+
module Mods
|
6
|
+
module Searchworks
|
7
|
+
# include langagues known to SearchWorks; try to error correct when possible (e.g. when ISO-639 disagrees with MARC standard)
|
8
|
+
def sw_language_facet
|
9
|
+
mods_ng_xml.language.flat_map do |n|
|
10
|
+
# get languageTerm codes and add their translations to the result
|
11
|
+
result = n.code_term.flat_map do |ct|
|
12
|
+
if ct.authority =~ /^iso639/
|
13
|
+
vals = ct.text.split(/[,|\ ]/).reject { |x| x.strip.empty? }
|
14
|
+
vals.select { |v| ISO_639.find(v.strip) }.map do |v|
|
15
|
+
iso639_val = ISO_639.find(v.strip).english_name
|
16
|
+
|
17
|
+
if SEARCHWORKS_LANGUAGES.has_value?(iso639_val)
|
18
|
+
iso639_val
|
19
|
+
else
|
20
|
+
SEARCHWORKS_LANGUAGES[v.strip]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
else
|
24
|
+
vals = ct.text.split(/[,|\ ]/).reject { |x| x.strip.empty? }
|
25
|
+
|
26
|
+
vals.map do |v|
|
27
|
+
SEARCHWORKS_LANGUAGES[v.strip]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# add languageTerm text values
|
33
|
+
result.concat(n.text_term.map { |tt| tt.text.strip }.select { |val| !val.empty? && SEARCHWORKS_LANGUAGES.has_value?(val) })
|
34
|
+
|
35
|
+
# add language values that aren't in languageTerm subelement
|
36
|
+
result << n.text if n.languageTerm.empty? && SEARCHWORKS_LANGUAGES.has_value?(n.text)
|
37
|
+
|
38
|
+
result
|
39
|
+
end.uniq
|
40
|
+
end
|
41
|
+
|
42
|
+
# select one or more format values from the controlled vocabulary per JVine Summer 2014
|
43
|
+
# http://searchworks-solr-lb.stanford.edu:8983/solr/select?facet.field=format_main_ssim&rows=0&facet.sort=index
|
44
|
+
# https://github.com/sul-dlss/stanford-mods/issues/66 - For geodata, the
|
45
|
+
# resource type should be only Map and not include Software, multimedia.
|
46
|
+
# @return <Array[String]> value in the SearchWorks controlled vocabulary
|
47
|
+
def format_main
|
48
|
+
types = typeOfResource
|
49
|
+
return [] unless types
|
50
|
+
|
51
|
+
val = []
|
52
|
+
genres = term_values(:genre) || []
|
53
|
+
issuance = term_values([:origin_info, :issuance]) || []
|
54
|
+
frequency = term_values([:origin_info, :frequency]) || []
|
55
|
+
|
56
|
+
val << 'Dataset' if genres.include?('dataset') || genres.include?('Dataset')
|
57
|
+
val << 'Archive/Manuscript' if types.any? { |t| t.manuscript == 'yes' }
|
58
|
+
|
59
|
+
val.concat(types.flat_map do |type|
|
60
|
+
case type.text
|
61
|
+
when 'cartographic'
|
62
|
+
'Map'
|
63
|
+
when 'mixed material'
|
64
|
+
'Archive/Manuscript'
|
65
|
+
when 'moving image'
|
66
|
+
'Video'
|
67
|
+
when 'notated music'
|
68
|
+
'Music score'
|
69
|
+
when 'software, multimedia'
|
70
|
+
'Software/Multimedia' unless types.map(&:text).include?('cartographic') || (genres.include?('dataset') || genres.include?('Dataset'))
|
71
|
+
when 'sound recording-musical'
|
72
|
+
'Music recording'
|
73
|
+
when 'sound recording-nonmusical', 'sound recording'
|
74
|
+
'Sound recording'
|
75
|
+
when 'still image'
|
76
|
+
'Image'
|
77
|
+
when 'text'
|
78
|
+
is_periodical = issuance.include?('continuing') || issuance.include?('serial') || frequency.any? { |x| !x.empty? }
|
79
|
+
is_archived_website = genres.any? { |x| x.casecmp('archived website') == 0 }
|
80
|
+
|
81
|
+
if is_periodical || is_archived_website
|
82
|
+
[
|
83
|
+
('Journal/Periodical' if is_periodical),
|
84
|
+
('Archived website' if is_archived_website)
|
85
|
+
].compact
|
86
|
+
else
|
87
|
+
'Book'
|
88
|
+
end
|
89
|
+
when 'three dimensional object'
|
90
|
+
'Object'
|
91
|
+
end
|
92
|
+
end)
|
93
|
+
|
94
|
+
val.compact.uniq
|
95
|
+
end
|
96
|
+
|
97
|
+
# @return <Array[String]> values for the genre facet in SearchWorks
|
98
|
+
def sw_genre
|
99
|
+
genres = term_values(:genre)
|
100
|
+
return [] unless genres
|
101
|
+
|
102
|
+
val = genres.map(&:to_s)
|
103
|
+
thesis_pub = ['thesis', 'Thesis']
|
104
|
+
val << 'Thesis/Dissertation' if (genres & thesis_pub).any?
|
105
|
+
|
106
|
+
conf_pub = ['conference publication', 'Conference publication', 'Conference Publication']
|
107
|
+
gov_pub = ['government publication', 'Government publication', 'Government Publication']
|
108
|
+
tech_rpt = ['technical report', 'Technical report', 'Technical Report']
|
109
|
+
|
110
|
+
val << 'Conference proceedings' if (genres & conf_pub).any?
|
111
|
+
val << 'Government document' if (genres & gov_pub).any?
|
112
|
+
val << 'Technical report' if (genres & tech_rpt).any?
|
113
|
+
|
114
|
+
val.uniq
|
115
|
+
end
|
116
|
+
|
117
|
+
# @return [String] value with the numeric catkey in it, or nil if none exists
|
118
|
+
def catkey
|
119
|
+
catkey = term_values([:record_info, :recordIdentifier])
|
120
|
+
|
121
|
+
catkey.first&.tr('a', '') # ensure catkey is numeric only
|
122
|
+
end
|
123
|
+
end # class Record
|
124
|
+
end # Module Mods
|
125
|
+
end # Module Stanford
|
@@ -0,0 +1,126 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# SearchWorks specific wranglings of MODS *subject* metadata as a mixin to the Stanford::Mods::Record object
|
4
|
+
module Stanford
|
5
|
+
module Mods
|
6
|
+
module SearchworksSubjects
|
7
|
+
# Values are the contents of:
|
8
|
+
# mods/subject/topic
|
9
|
+
# @return [Array<String>] values for the topic_search Solr field for this document or nil if none
|
10
|
+
def topic_search
|
11
|
+
subject_topics
|
12
|
+
end
|
13
|
+
|
14
|
+
# Values are the contents of:
|
15
|
+
# subject/topic
|
16
|
+
# subject/name
|
17
|
+
# subject/title
|
18
|
+
# subject/occupation
|
19
|
+
# with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
20
|
+
# @return [Array<String>] values for the topic_facet Solr field for this document or nil if none
|
21
|
+
def topic_facet
|
22
|
+
strip_punctuation(subject_topics + subject_names + subject_titles + subject_occupations)
|
23
|
+
end
|
24
|
+
|
25
|
+
# geographic_search values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
26
|
+
# @return [Array<String>] values for the geographic_facet Solr field for this document or nil if none
|
27
|
+
def geographic_facet
|
28
|
+
strip_punctuation(geographic_search)
|
29
|
+
end
|
30
|
+
|
31
|
+
# subject/temporal values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
32
|
+
# @return [Array<String>] values for the era_facet Solr field for this document or nil if none
|
33
|
+
def era_facet
|
34
|
+
strip_punctuation(subject_temporal)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Values are the contents of:
|
38
|
+
# subject/geographic
|
39
|
+
# subject/hierarchicalGeographic
|
40
|
+
# subject/geographicCode (only include the translated value if it isn't already present from other mods geo fields)
|
41
|
+
# @return [Array<String>] values for the geographic_search Solr field for this document or nil if none
|
42
|
+
def geographic_search
|
43
|
+
result = term_values([:subject, :geographic]) || []
|
44
|
+
|
45
|
+
# hierarchicalGeographic has sub elements
|
46
|
+
hierarchical_vals = mods_ng_xml.subject.hierarchicalGeographic.map do |hg_node|
|
47
|
+
hg_vals = hg_node.element_children.map(&:text).reject(&:empty?)
|
48
|
+
hg_vals.join(' ') unless hg_vals.empty?
|
49
|
+
end
|
50
|
+
|
51
|
+
trans_code_vals = mods_ng_xml.subject.geographicCode.translated_value || []
|
52
|
+
|
53
|
+
(result + hierarchical_vals + trans_code_vals).compact.uniq
|
54
|
+
end
|
55
|
+
|
56
|
+
# Values are the contents of:
|
57
|
+
# subject/name
|
58
|
+
# subject/occupation - no subelements
|
59
|
+
# subject/titleInfo
|
60
|
+
# @return [Array<String>] values for the subject_other_search Solr field for this document or nil if none
|
61
|
+
def subject_other_search
|
62
|
+
subject_occupations + subject_names + subject_titles
|
63
|
+
end
|
64
|
+
|
65
|
+
# Values are the contents of:
|
66
|
+
# subject/temporal
|
67
|
+
# subject/genre
|
68
|
+
# @return [Array<String>] values for the subject_other_subvy_search Solr field for this document or nil if none
|
69
|
+
def subject_other_subvy_search
|
70
|
+
vals = Array(subject_temporal)
|
71
|
+
gvals = term_values([:subject, :genre])
|
72
|
+
|
73
|
+
vals + Array(gvals)
|
74
|
+
end
|
75
|
+
|
76
|
+
# Values are the contents of:
|
77
|
+
# all subject subelements except subject/cartographic plus genre top level element
|
78
|
+
# @return [Array<String>] values for the subject_all_search Solr field for this document or nil if none
|
79
|
+
def subject_all_search
|
80
|
+
topic_search + geographic_search + subject_other_search + subject_other_subvy_search
|
81
|
+
end
|
82
|
+
|
83
|
+
protected #----------------------------------------------------------
|
84
|
+
|
85
|
+
# convenience method for subject/name/namePart values (to avoid parsing the mods for the same thing multiple times)
|
86
|
+
def subject_names
|
87
|
+
mods_ng_xml.subject.name_el
|
88
|
+
.select { |n_el| n_el.namePart }
|
89
|
+
.map { |name_el_w_np| name_el_w_np.namePart.map(&:text).reject(&:empty?) }
|
90
|
+
.reject(&:empty?)
|
91
|
+
.map { |parts| parts.join(', ').strip }
|
92
|
+
end
|
93
|
+
|
94
|
+
# convenience method for subject/occupation values (to avoid parsing the mods for the same thing multiple times)
|
95
|
+
def subject_occupations
|
96
|
+
term_values([:subject, :occupation]) || []
|
97
|
+
end
|
98
|
+
|
99
|
+
# convenience method for subject/temporal values (to avoid parsing the mods for the same thing multiple times)
|
100
|
+
def subject_temporal
|
101
|
+
term_values([:subject, :temporal]) || []
|
102
|
+
end
|
103
|
+
|
104
|
+
# Values are the contents of:
|
105
|
+
# subject/titleInfo/(subelements)
|
106
|
+
# convenience method for subject/titleInfo values (to avoid parsing the mods for the same thing multiple times)
|
107
|
+
def subject_titles
|
108
|
+
mods_ng_xml.subject.titleInfo.map do |ti_el|
|
109
|
+
parts = ti_el.element_children.map(&:text).reject(&:empty?)
|
110
|
+
parts.join(' ').strip unless parts.empty?
|
111
|
+
end.compact
|
112
|
+
end
|
113
|
+
|
114
|
+
# convenience method for subject/topic values (to avoid parsing the mods for the same thing multiple times)
|
115
|
+
def subject_topics
|
116
|
+
term_values([:subject, :topic]) || []
|
117
|
+
end
|
118
|
+
|
119
|
+
private
|
120
|
+
|
121
|
+
def strip_punctuation(arr)
|
122
|
+
arr&.map { |val| val.gsub(/[\\,;]$/, '').strip }
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
module Stanford
|
2
|
+
module Mods
|
3
|
+
module Title
|
4
|
+
# @return [String] value for title_245a_search field
|
5
|
+
def sw_short_title
|
6
|
+
short_titles&.compact&.reject(&:empty?)&.first
|
7
|
+
end
|
8
|
+
|
9
|
+
# Searchworks requires that the MODS has a '//titleInfo/title'
|
10
|
+
# @return [String] value for title_245_search, title_full_display
|
11
|
+
def sw_full_title(title_info = first_title_info_node, sortable: false)
|
12
|
+
return unless title_info&.children&.any?
|
13
|
+
|
14
|
+
title = title_info.title&.text&.strip
|
15
|
+
return if title.nil? || title.empty?
|
16
|
+
|
17
|
+
title = ''
|
18
|
+
previous_element = nil
|
19
|
+
|
20
|
+
title_info.children.select { |value| title_parts.include? value.name }.each do |value|
|
21
|
+
next if value.name == 'nonSort' && sortable
|
22
|
+
|
23
|
+
str = value.text.strip
|
24
|
+
next if str.empty?
|
25
|
+
|
26
|
+
delimiter = if title.empty? || title.end_with?(' ')
|
27
|
+
nil
|
28
|
+
elsif previous_element&.name == 'nonSort' && title.end_with?('-', '\'')
|
29
|
+
nil
|
30
|
+
elsif title.end_with?('.', ',', ':', ';')
|
31
|
+
' '
|
32
|
+
elsif value.name == 'subTitle'
|
33
|
+
' : '
|
34
|
+
elsif value.name == 'partName' && previous_element.name == 'partNumber'
|
35
|
+
', '
|
36
|
+
elsif value.name == 'partNumber' || value.name == 'partName'
|
37
|
+
'. '
|
38
|
+
else
|
39
|
+
' '
|
40
|
+
end
|
41
|
+
|
42
|
+
title += delimiter if delimiter
|
43
|
+
title += str
|
44
|
+
|
45
|
+
previous_element = value
|
46
|
+
end
|
47
|
+
|
48
|
+
title += "." unless title =~ /\s*[[:punct:]]$/
|
49
|
+
|
50
|
+
title.strip
|
51
|
+
end
|
52
|
+
|
53
|
+
def title_parts
|
54
|
+
%w[nonSort title subTitle partName partNumber]
|
55
|
+
end
|
56
|
+
|
57
|
+
# like sw_full_title without trailing \,/;:.
|
58
|
+
# spec from solrmarc-sw sw_index.properties
|
59
|
+
# title_display = custom, removeTrailingPunct(245abdefghijklmnopqrstuvwxyz, [\\\\,/;:], ([A-Za-z]{4}|[0-9]{3}|\\)|\\,))
|
60
|
+
# @return [String] value for title_display (like title_full_display without trailing punctuation)
|
61
|
+
def sw_title_display
|
62
|
+
sw_full_title&.sub(/[\.,;:\/\\]+$/, '')&.strip
|
63
|
+
end
|
64
|
+
|
65
|
+
# this includes all titles except
|
66
|
+
# @return [Array<String>] values for title_variant_search
|
67
|
+
def sw_addl_titles
|
68
|
+
(full_titles - Array(first_title_info_node&.full_title)).reject(&:blank?)
|
69
|
+
end
|
70
|
+
|
71
|
+
# Returns a sortable version of the main title
|
72
|
+
# @return [String] value for title_sort field
|
73
|
+
def sw_sort_title
|
74
|
+
val = sw_full_title(sortable: true) || ''
|
75
|
+
val.gsub(/[[:punct:]]*/, '').squeeze(" ").strip
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
# @return [Nokogiri::XML::Node] the first titleInfo node if present, else nil
|
81
|
+
def first_title_info_node
|
82
|
+
non_blank_nodes = mods_ng_xml.title_info.reject { |node| node.text.strip.empty? }
|
83
|
+
non_blank_nodes.find { |node| node.type_at != 'alternative' } || non_blank_nodes.first
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
@@ -4,9 +4,6 @@ module Stanford
|
|
4
4
|
##
|
5
5
|
# Geospatial coordinate parsing
|
6
6
|
class Coordinate
|
7
|
-
require 'stanford-mods/geo_utils'
|
8
|
-
include ::Stanford::Mods::GeoUtils
|
9
|
-
|
10
7
|
attr_reader :value
|
11
8
|
|
12
9
|
def initialize(value)
|
@@ -57,6 +54,30 @@ module Stanford
|
|
57
54
|
def coord
|
58
55
|
cleaner_coordinate(value)
|
59
56
|
end
|
57
|
+
|
58
|
+
# @param [String] val Coordinates value
|
59
|
+
# @return [String] cleaned value (strips parens and period), or the original value
|
60
|
+
def cleaner_coordinate(val)
|
61
|
+
matches = val.match(/^\(?([^)]+)\)?\.?$/)
|
62
|
+
matches ? matches[1] : val
|
63
|
+
end
|
64
|
+
|
65
|
+
# @param [String] point coordinate point in degrees notation
|
66
|
+
# @return [Float] converted value in decimal notation
|
67
|
+
def coord_to_decimal(point)
|
68
|
+
regex = Regexp.union(
|
69
|
+
/(?<dir>[NESW])\s*(?<deg>\d+)[°⁰º](?:(?<min>\d+)[ʹ'])?(?:(?<sec>\d+)[ʺ"])?/,
|
70
|
+
/^\s*(?<dir>[NESW])\s*(?<deg>\d+(?:[.]\d+)?)\s*$/
|
71
|
+
)
|
72
|
+
match = regex.match(point)
|
73
|
+
return Float::INFINITY unless match
|
74
|
+
|
75
|
+
dec = match['deg'].to_f
|
76
|
+
dec += match['min'].to_f / 60
|
77
|
+
dec += match['sec'].to_f / 60 / 60
|
78
|
+
dec = -1 * dec if match['dir'] == 'W' || match['dir'] == 'S'
|
79
|
+
dec
|
80
|
+
end
|
60
81
|
end
|
61
82
|
end
|
62
83
|
end
|