stanford-mods 2.6.2 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +24 -0
- data/lib/stanford-mods/{geo_spatial.rb → concerns/geo_spatial.rb} +3 -5
- data/lib/stanford-mods/concerns/name.rb +57 -0
- data/lib/stanford-mods/concerns/origin_info.rb +113 -0
- data/lib/stanford-mods/{physical_location.rb → concerns/physical_location.rb} +2 -2
- data/lib/stanford-mods/concerns/searchworks.rb +125 -0
- data/lib/stanford-mods/concerns/searchworks_subjects.rb +126 -0
- data/lib/stanford-mods/concerns/title.rb +87 -0
- data/lib/stanford-mods/coordinate.rb +21 -3
- data/lib/stanford-mods/date_parsing.rb +32 -288
- data/lib/stanford-mods/imprint.rb +149 -325
- data/lib/stanford-mods/record.rb +20 -0
- data/lib/stanford-mods/version.rb +1 -1
- data/lib/stanford-mods/{searchworks_languages.rb → vocabularies/searchworks_languages.rb} +2 -0
- data/lib/stanford-mods.rb +13 -11
- data/spec/fixtures/searchworks_imprint_data.rb +38 -39
- data/spec/fixtures/searchworks_pub_date_data.rb +7 -7
- data/spec/fixtures/spotlight_pub_date_data.rb +7 -7
- data/spec/geo_spatial_spec.rb +1 -6
- data/spec/imprint_spec.rb +238 -207
- data/spec/name_spec.rb +28 -232
- data/spec/origin_info_spec.rb +34 -300
- data/spec/searchworks_basic_spec.rb +1 -3
- data/spec/searchworks_pub_dates_spec.rb +0 -215
- data/spec/searchworks_spec.rb +0 -21
- data/spec/searchworks_subject_raw_spec.rb +106 -105
- data/spec/searchworks_subject_spec.rb +19 -55
- data/spec/searchworks_title_spec.rb +5 -5
- data/stanford-mods.gemspec +1 -1
- metadata +24 -20
- data/.travis.yml +0 -17
- data/lib/marc_countries.rb +0 -387
- data/lib/stanford-mods/geo_utils.rb +0 -28
- data/lib/stanford-mods/name.rb +0 -80
- data/lib/stanford-mods/origin_info.rb +0 -489
- data/lib/stanford-mods/searchworks.rb +0 -333
- data/lib/stanford-mods/searchworks_subjects.rb +0 -196
- data/spec/date_parsing_spec.rb +0 -905
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 70d3b7093c830baa3c12f4c2c438549eb451fa4b6bb6c57f458382f0e8e53dc2
|
4
|
+
data.tar.gz: 044edaeef524c4a701ebbc0e25f08d0c3fb5068b04cc36ab8769772004a73a85
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '097830e7c3b1136a279dfce41ac426ae860ab020f27bb1550a3f2a5f23b7fcbc1c7f7952f35f06a44aed7100d0d410c39adcbb17a70b270174c85b757d1aca4b'
|
7
|
+
data.tar.gz: 7aa1c33f53fdbd4160d99a14739b4dcbd37950f4d8ab8238ee1886d4b6db4aaa9b5dec4c92f74842db80aa9839ec2d7e27a570a4fb7cde4862afc54b79c9e6a4
|
@@ -0,0 +1,24 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
branches: [ master ]
|
6
|
+
pull_request:
|
7
|
+
branches: [ master ]
|
8
|
+
|
9
|
+
jobs:
|
10
|
+
tests:
|
11
|
+
runs-on: ubuntu-latest
|
12
|
+
strategy:
|
13
|
+
matrix:
|
14
|
+
ruby: [jruby-9.3.2.0, 2.7, '3.0', '3.1']
|
15
|
+
steps:
|
16
|
+
- uses: actions/checkout@v2
|
17
|
+
- name: Set up Ruby
|
18
|
+
uses: ruby/setup-ruby@v1
|
19
|
+
with:
|
20
|
+
ruby-version: ${{ matrix.ruby }}
|
21
|
+
- name: Install dependencies
|
22
|
+
run: bundle install
|
23
|
+
- name: Run tests
|
24
|
+
run: bundle exec rake
|
@@ -1,10 +1,9 @@
|
|
1
|
-
#
|
2
|
-
require 'mods'
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
3
|
module Stanford
|
5
4
|
module Mods
|
6
5
|
# NON-SearchWorks specific wranglings of MODS cartographics metadata
|
7
|
-
|
6
|
+
module Geospatial
|
8
7
|
GMLNS = 'http://www.opengis.net/gml/3.2/'.freeze
|
9
8
|
|
10
9
|
# @return [Array{String}] subject cartographic coordinates values
|
@@ -27,8 +26,7 @@ module Stanford
|
|
27
26
|
lowers = v.xpath('gml:lowerCorner', 'gml' => GMLNS).text.split
|
28
27
|
"ENVELOPE(#{lowers[0]}, #{uppers[0]}, #{uppers[1]}, #{lowers[1]})"
|
29
28
|
end
|
30
|
-
rescue RuntimeError
|
31
|
-
logger.warn "failure parsing <extension> element: #{e.message}"
|
29
|
+
rescue RuntimeError
|
32
30
|
[]
|
33
31
|
end
|
34
32
|
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# NON-SearchWorks specific wranglings of MODS <name> metadata as a mixin to the Stanford::Mods::Record object
|
4
|
+
module Stanford
|
5
|
+
module Mods
|
6
|
+
module Name
|
7
|
+
# the first encountered <mods><name> element with marcrelator flavor role of 'Creator' or 'Author'.
|
8
|
+
# if no marcrelator 'Creator' or 'Author', the first name without a role.
|
9
|
+
# if no name without a role, then nil
|
10
|
+
# @return [String] value for author_1xx_search field
|
11
|
+
def sw_main_author
|
12
|
+
result = mods_ng_xml.plain_name.find { |n| n.role.any? { |r| r.authority.include?('marcrelator') && r.value.any? { |v| v.match(/creator/i) || v.match?(/author/i) } } }
|
13
|
+
result ||= mods_ng_xml.plain_name.find { |n| n.role.empty? }
|
14
|
+
|
15
|
+
result&.display_value_w_date
|
16
|
+
end
|
17
|
+
|
18
|
+
# all names, in display form, except the main_author
|
19
|
+
# names will be the display_value_w_date form
|
20
|
+
# see Mods::Record.name in nom_terminology for details on the display_value algorithm
|
21
|
+
# @return [Array<String>] values for author_7xx_search field
|
22
|
+
def sw_addl_authors
|
23
|
+
mods_ng_xml.plain_name.map(&:display_value_w_date) - [sw_main_author]
|
24
|
+
end
|
25
|
+
|
26
|
+
# @return [Array<String>] values for author_person_facet, author_person_display
|
27
|
+
def sw_person_authors
|
28
|
+
mods_ng_xml.personal_name.map(&:display_value_w_date)
|
29
|
+
end
|
30
|
+
|
31
|
+
# return the display_value_w_date for all <mods><name> elements that do not have type='personal'
|
32
|
+
# @return [Array<String>] values for author_other_facet
|
33
|
+
def sw_impersonal_authors
|
34
|
+
mods_ng_xml.plain_name.select { |n| n.type_at != 'personal' }.map(&:display_value_w_date)
|
35
|
+
end
|
36
|
+
|
37
|
+
# @return [Array<String>] values for author_corp_display
|
38
|
+
def sw_corporate_authors
|
39
|
+
mods_ng_xml.corporate_name.map(&:display_value_w_date)
|
40
|
+
end
|
41
|
+
|
42
|
+
# @return [Array<String>] values for author_meeting_display
|
43
|
+
def sw_meeting_authors
|
44
|
+
mods_ng_xml.conference_name.map(&:display_value_w_date)
|
45
|
+
end
|
46
|
+
|
47
|
+
# Returns a sortable version of the main_author:
|
48
|
+
# main_author + sorting title
|
49
|
+
# which is the mods approximation of the value created for a marc record
|
50
|
+
# @return [String] value for author_sort field
|
51
|
+
def sw_sort_author
|
52
|
+
# substitute java Character.MAX_CODE_POINT for nil main_author so missing main authors sort last
|
53
|
+
"#{sw_main_author || "\u{10FFFF} " }#{sort_title}".gsub(/[[:punct:]]*/, '').strip
|
54
|
+
end
|
55
|
+
end # class Record
|
56
|
+
end # Module Mods
|
57
|
+
end # Module Stanford
|
@@ -0,0 +1,113 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Parsing MODS /originInfo for Publication/Imprint data:
|
4
|
+
# * pub year for date slider facet
|
5
|
+
# * pub year for sorting
|
6
|
+
# * pub year for single display value
|
7
|
+
# * imprint info for display
|
8
|
+
# *
|
9
|
+
# These methods may be used by searchworks.rb file or by downstream apps
|
10
|
+
module Stanford
|
11
|
+
module Mods
|
12
|
+
module OriginInfo
|
13
|
+
# return pub year as an Integer
|
14
|
+
# prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
|
15
|
+
# look for a keyDate and use it if there is one; otherwise pick earliest date
|
16
|
+
# @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute) should be ignored; false if approximate dates should be included
|
17
|
+
# @return [Integer] publication year as an Integer
|
18
|
+
# @note for sorting: 5 B.C. => -5; 666 B.C. => -666
|
19
|
+
def pub_year_int(fields = [:dateIssued, :dateCreated, :dateCaptured], ignore_approximate: false)
|
20
|
+
fields.each do |date_key|
|
21
|
+
values = mods_ng_xml.origin_info.send(date_key)
|
22
|
+
values = values.reject(&method(:is_approximate)) if ignore_approximate
|
23
|
+
|
24
|
+
earliest_date = Stanford::Mods::OriginInfo.best_or_earliest_year(values)
|
25
|
+
return earliest_date.year_int_from_date_str if earliest_date&.year_int_from_date_str
|
26
|
+
end; nil
|
27
|
+
end
|
28
|
+
|
29
|
+
# return a single string intended for lexical sorting for pub date
|
30
|
+
# prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
|
31
|
+
# look for a keyDate and use it if there is one; otherwise pick earliest date
|
32
|
+
# @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute) should be ignored; false if approximate dates should be included
|
33
|
+
# @return [String] single String containing publication year for lexical sorting
|
34
|
+
# @note for string sorting 5 B.C. = -5 => -995; 6 B.C. => -994, so 6 B.C. sorts before 5 B.C.
|
35
|
+
# @deprecated use pub_year_int
|
36
|
+
def pub_year_sort_str(fields = [:dateIssued, :dateCreated, :dateCaptured], ignore_approximate: false)
|
37
|
+
fields.each do |date_key|
|
38
|
+
values = mods_ng_xml.origin_info.send(date_key)
|
39
|
+
values = values.reject(&method(:is_approximate)) if ignore_approximate
|
40
|
+
|
41
|
+
earliest_date = Stanford::Mods::OriginInfo.best_or_earliest_year(values)
|
42
|
+
return earliest_date.sortable_year_string_from_date_str if earliest_date&.sortable_year_string_from_date_str
|
43
|
+
end; nil
|
44
|
+
end
|
45
|
+
|
46
|
+
# return a single string intended for display of pub year
|
47
|
+
# 0 < year < 1000: add A.D. suffix
|
48
|
+
# year < 0: add B.C. suffix. ('-5' => '5 B.C.', '700 B.C.' => '700 B.C.')
|
49
|
+
# 195u => 195x
|
50
|
+
# 19uu => 19xx
|
51
|
+
# '-5' => '5 B.C.'
|
52
|
+
# '700 B.C.' => '700 B.C.'
|
53
|
+
# '7th century' => '7th century'
|
54
|
+
# date ranges?
|
55
|
+
# prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
|
56
|
+
# look for a keyDate and use it if there is one; otherwise pick earliest date
|
57
|
+
# @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
|
58
|
+
# should be ignored; false if approximate dates should be included
|
59
|
+
def pub_year_display_str(fields = [:dateIssued, :dateCreated, :dateCaptured], ignore_approximate: false)
|
60
|
+
fields.each do |date_key|
|
61
|
+
values = mods_ng_xml.origin_info.send(date_key)
|
62
|
+
values = values.reject(&method(:is_approximate)) if ignore_approximate
|
63
|
+
|
64
|
+
earliest_date = Stanford::Mods::OriginInfo.best_or_earliest_year(values)
|
65
|
+
return earliest_date.date_str_for_display if earliest_date&.date_str_for_display
|
66
|
+
end; nil
|
67
|
+
end
|
68
|
+
|
69
|
+
# @return [Array<Stanford::Mods::Imprint>] array of imprint objects
|
70
|
+
# @private
|
71
|
+
def imprints
|
72
|
+
origin_info.map { |el| Stanford::Mods::Imprint.new(el) }
|
73
|
+
end
|
74
|
+
|
75
|
+
def place
|
76
|
+
term_values([:origin_info, :place, :placeTerm])
|
77
|
+
end
|
78
|
+
|
79
|
+
# @return [String] single String containing imprint information for display
|
80
|
+
def imprint_display_str
|
81
|
+
imprints.map(&:display_str).reject(&:empty?).join('; ')
|
82
|
+
end
|
83
|
+
|
84
|
+
# remove Elements from NodeSet if they have a qualifier attribute of 'approximate' or 'questionable'
|
85
|
+
# @param [Nokogiri::XML::Element] node the date element
|
86
|
+
# @return [Boolean]
|
87
|
+
# @private
|
88
|
+
def is_approximate(node)
|
89
|
+
qualifier = node["qualifier"] if node.respond_to?('[]')
|
90
|
+
qualifier == 'approximate' || qualifier == 'questionable'
|
91
|
+
end
|
92
|
+
|
93
|
+
# get earliest parseable year from the passed date elements
|
94
|
+
# @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
|
95
|
+
# @return [Stanford::Mods::DateParsing]
|
96
|
+
def self.best_or_earliest_year(date_el_array)
|
97
|
+
key_dates, other_dates = date_el_array.partition { |node| node['keyDate'] == 'yes' }
|
98
|
+
|
99
|
+
sortable_dates = key_dates.map { |x| DateParsing.new(x) }.select(&:sortable_year_string_from_date_str)
|
100
|
+
sortable_dates = other_dates.map { |x| DateParsing.new(x) }.select(&:sortable_year_string_from_date_str) if sortable_dates.empty?
|
101
|
+
results = {}
|
102
|
+
|
103
|
+
# this is a little weird; instead of just the earliest sorting date, if there are multiple
|
104
|
+
# dates with the same sort key, we want to make sure we get the last occurring one?
|
105
|
+
sortable_dates.each do |v|
|
106
|
+
results[v.sortable_year_string_from_date_str] = v
|
107
|
+
end
|
108
|
+
|
109
|
+
results[results.keys.min]
|
110
|
+
end
|
111
|
+
end # class Record
|
112
|
+
end
|
113
|
+
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Stanford
|
4
4
|
module Mods
|
@@ -7,7 +7,7 @@ module Stanford
|
|
7
7
|
# Note: mods_ng_xml_location.physicalLocation should find top level and relatedItem.
|
8
8
|
# Each method here expects to find at most ONE matching element. Subsequent potential matches
|
9
9
|
# are ignored.
|
10
|
-
|
10
|
+
module PhysicalLocation
|
11
11
|
# data in location/physicalLocation or in relatedItem/location/physicalLocation
|
12
12
|
# so use _location to get the data from either one of them
|
13
13
|
# @return [String] box number (note: single valued and might be something like 35A)
|
@@ -0,0 +1,125 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# SearchWorks specific wranglings of MODS metadata as a mixin to the Stanford::Mods::Record object
|
4
|
+
module Stanford
|
5
|
+
module Mods
|
6
|
+
module Searchworks
|
7
|
+
# include langagues known to SearchWorks; try to error correct when possible (e.g. when ISO-639 disagrees with MARC standard)
|
8
|
+
def sw_language_facet
|
9
|
+
mods_ng_xml.language.flat_map do |n|
|
10
|
+
# get languageTerm codes and add their translations to the result
|
11
|
+
result = n.code_term.flat_map do |ct|
|
12
|
+
if ct.authority =~ /^iso639/
|
13
|
+
vals = ct.text.split(/[,|\ ]/).reject { |x| x.strip.empty? }
|
14
|
+
vals.select { |v| ISO_639.find(v.strip) }.map do |v|
|
15
|
+
iso639_val = ISO_639.find(v.strip).english_name
|
16
|
+
|
17
|
+
if SEARCHWORKS_LANGUAGES.has_value?(iso639_val)
|
18
|
+
iso639_val
|
19
|
+
else
|
20
|
+
SEARCHWORKS_LANGUAGES[v.strip]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
else
|
24
|
+
vals = ct.text.split(/[,|\ ]/).reject { |x| x.strip.empty? }
|
25
|
+
|
26
|
+
vals.map do |v|
|
27
|
+
SEARCHWORKS_LANGUAGES[v.strip]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# add languageTerm text values
|
33
|
+
result.concat(n.text_term.map { |tt| tt.text.strip }.select { |val| !val.empty? && SEARCHWORKS_LANGUAGES.has_value?(val) })
|
34
|
+
|
35
|
+
# add language values that aren't in languageTerm subelement
|
36
|
+
result << n.text if n.languageTerm.empty? && SEARCHWORKS_LANGUAGES.has_value?(n.text)
|
37
|
+
|
38
|
+
result
|
39
|
+
end.uniq
|
40
|
+
end
|
41
|
+
|
42
|
+
# select one or more format values from the controlled vocabulary per JVine Summer 2014
|
43
|
+
# http://searchworks-solr-lb.stanford.edu:8983/solr/select?facet.field=format_main_ssim&rows=0&facet.sort=index
|
44
|
+
# https://github.com/sul-dlss/stanford-mods/issues/66 - For geodata, the
|
45
|
+
# resource type should be only Map and not include Software, multimedia.
|
46
|
+
# @return <Array[String]> value in the SearchWorks controlled vocabulary
|
47
|
+
def format_main
|
48
|
+
types = typeOfResource
|
49
|
+
return [] unless types
|
50
|
+
|
51
|
+
val = []
|
52
|
+
genres = term_values(:genre) || []
|
53
|
+
issuance = term_values([:origin_info, :issuance]) || []
|
54
|
+
frequency = term_values([:origin_info, :frequency]) || []
|
55
|
+
|
56
|
+
val << 'Dataset' if genres.include?('dataset') || genres.include?('Dataset')
|
57
|
+
val << 'Archive/Manuscript' if types.any? { |t| t.manuscript == 'yes' }
|
58
|
+
|
59
|
+
val.concat(types.flat_map do |type|
|
60
|
+
case type.text
|
61
|
+
when 'cartographic'
|
62
|
+
'Map'
|
63
|
+
when 'mixed material'
|
64
|
+
'Archive/Manuscript'
|
65
|
+
when 'moving image'
|
66
|
+
'Video'
|
67
|
+
when 'notated music'
|
68
|
+
'Music score'
|
69
|
+
when 'software, multimedia'
|
70
|
+
'Software/Multimedia' unless types.map(&:text).include?('cartographic') || (genres.include?('dataset') || genres.include?('Dataset'))
|
71
|
+
when 'sound recording-musical'
|
72
|
+
'Music recording'
|
73
|
+
when 'sound recording-nonmusical', 'sound recording'
|
74
|
+
'Sound recording'
|
75
|
+
when 'still image'
|
76
|
+
'Image'
|
77
|
+
when 'text'
|
78
|
+
is_periodical = issuance.include?('continuing') || issuance.include?('serial') || frequency.any? { |x| !x.empty? }
|
79
|
+
is_archived_website = genres.any? { |x| x.casecmp('archived website') == 0 }
|
80
|
+
|
81
|
+
if is_periodical || is_archived_website
|
82
|
+
[
|
83
|
+
('Journal/Periodical' if is_periodical),
|
84
|
+
('Archived website' if is_archived_website)
|
85
|
+
].compact
|
86
|
+
else
|
87
|
+
'Book'
|
88
|
+
end
|
89
|
+
when 'three dimensional object'
|
90
|
+
'Object'
|
91
|
+
end
|
92
|
+
end)
|
93
|
+
|
94
|
+
val.compact.uniq
|
95
|
+
end
|
96
|
+
|
97
|
+
# @return <Array[String]> values for the genre facet in SearchWorks
|
98
|
+
def sw_genre
|
99
|
+
genres = term_values(:genre)
|
100
|
+
return [] unless genres
|
101
|
+
|
102
|
+
val = genres.map(&:to_s)
|
103
|
+
thesis_pub = ['thesis', 'Thesis']
|
104
|
+
val << 'Thesis/Dissertation' if (genres & thesis_pub).any?
|
105
|
+
|
106
|
+
conf_pub = ['conference publication', 'Conference publication', 'Conference Publication']
|
107
|
+
gov_pub = ['government publication', 'Government publication', 'Government Publication']
|
108
|
+
tech_rpt = ['technical report', 'Technical report', 'Technical Report']
|
109
|
+
|
110
|
+
val << 'Conference proceedings' if (genres & conf_pub).any?
|
111
|
+
val << 'Government document' if (genres & gov_pub).any?
|
112
|
+
val << 'Technical report' if (genres & tech_rpt).any?
|
113
|
+
|
114
|
+
val.uniq
|
115
|
+
end
|
116
|
+
|
117
|
+
# @return [String] value with the numeric catkey in it, or nil if none exists
|
118
|
+
def catkey
|
119
|
+
catkey = term_values([:record_info, :recordIdentifier])
|
120
|
+
|
121
|
+
catkey.first&.tr('a', '') # ensure catkey is numeric only
|
122
|
+
end
|
123
|
+
end # class Record
|
124
|
+
end # Module Mods
|
125
|
+
end # Module Stanford
|
@@ -0,0 +1,126 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# SearchWorks specific wranglings of MODS *subject* metadata as a mixin to the Stanford::Mods::Record object
|
4
|
+
module Stanford
|
5
|
+
module Mods
|
6
|
+
module SearchworksSubjects
|
7
|
+
# Values are the contents of:
|
8
|
+
# mods/subject/topic
|
9
|
+
# @return [Array<String>] values for the topic_search Solr field for this document or nil if none
|
10
|
+
def topic_search
|
11
|
+
subject_topics
|
12
|
+
end
|
13
|
+
|
14
|
+
# Values are the contents of:
|
15
|
+
# subject/topic
|
16
|
+
# subject/name
|
17
|
+
# subject/title
|
18
|
+
# subject/occupation
|
19
|
+
# with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
20
|
+
# @return [Array<String>] values for the topic_facet Solr field for this document or nil if none
|
21
|
+
def topic_facet
|
22
|
+
strip_punctuation(subject_topics + subject_names + subject_titles + subject_occupations)
|
23
|
+
end
|
24
|
+
|
25
|
+
# geographic_search values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
26
|
+
# @return [Array<String>] values for the geographic_facet Solr field for this document or nil if none
|
27
|
+
def geographic_facet
|
28
|
+
strip_punctuation(geographic_search)
|
29
|
+
end
|
30
|
+
|
31
|
+
# subject/temporal values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
32
|
+
# @return [Array<String>] values for the era_facet Solr field for this document or nil if none
|
33
|
+
def era_facet
|
34
|
+
strip_punctuation(subject_temporal)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Values are the contents of:
|
38
|
+
# subject/geographic
|
39
|
+
# subject/hierarchicalGeographic
|
40
|
+
# subject/geographicCode (only include the translated value if it isn't already present from other mods geo fields)
|
41
|
+
# @return [Array<String>] values for the geographic_search Solr field for this document or nil if none
|
42
|
+
def geographic_search
|
43
|
+
result = term_values([:subject, :geographic]) || []
|
44
|
+
|
45
|
+
# hierarchicalGeographic has sub elements
|
46
|
+
hierarchical_vals = mods_ng_xml.subject.hierarchicalGeographic.map do |hg_node|
|
47
|
+
hg_vals = hg_node.element_children.map(&:text).reject(&:empty?)
|
48
|
+
hg_vals.join(' ') unless hg_vals.empty?
|
49
|
+
end
|
50
|
+
|
51
|
+
trans_code_vals = mods_ng_xml.subject.geographicCode.translated_value || []
|
52
|
+
|
53
|
+
(result + hierarchical_vals + trans_code_vals).compact.uniq
|
54
|
+
end
|
55
|
+
|
56
|
+
# Values are the contents of:
|
57
|
+
# subject/name
|
58
|
+
# subject/occupation - no subelements
|
59
|
+
# subject/titleInfo
|
60
|
+
# @return [Array<String>] values for the subject_other_search Solr field for this document or nil if none
|
61
|
+
def subject_other_search
|
62
|
+
subject_occupations + subject_names + subject_titles
|
63
|
+
end
|
64
|
+
|
65
|
+
# Values are the contents of:
|
66
|
+
# subject/temporal
|
67
|
+
# subject/genre
|
68
|
+
# @return [Array<String>] values for the subject_other_subvy_search Solr field for this document or nil if none
|
69
|
+
def subject_other_subvy_search
|
70
|
+
vals = Array(subject_temporal)
|
71
|
+
gvals = term_values([:subject, :genre])
|
72
|
+
|
73
|
+
vals + Array(gvals)
|
74
|
+
end
|
75
|
+
|
76
|
+
# Values are the contents of:
|
77
|
+
# all subject subelements except subject/cartographic plus genre top level element
|
78
|
+
# @return [Array<String>] values for the subject_all_search Solr field for this document or nil if none
|
79
|
+
def subject_all_search
|
80
|
+
topic_search + geographic_search + subject_other_search + subject_other_subvy_search
|
81
|
+
end
|
82
|
+
|
83
|
+
protected #----------------------------------------------------------
|
84
|
+
|
85
|
+
# convenience method for subject/name/namePart values (to avoid parsing the mods for the same thing multiple times)
|
86
|
+
def subject_names
|
87
|
+
mods_ng_xml.subject.name_el
|
88
|
+
.select { |n_el| n_el.namePart }
|
89
|
+
.map { |name_el_w_np| name_el_w_np.namePart.map(&:text).reject(&:empty?) }
|
90
|
+
.reject(&:empty?)
|
91
|
+
.map { |parts| parts.join(', ').strip }
|
92
|
+
end
|
93
|
+
|
94
|
+
# convenience method for subject/occupation values (to avoid parsing the mods for the same thing multiple times)
|
95
|
+
def subject_occupations
|
96
|
+
term_values([:subject, :occupation]) || []
|
97
|
+
end
|
98
|
+
|
99
|
+
# convenience method for subject/temporal values (to avoid parsing the mods for the same thing multiple times)
|
100
|
+
def subject_temporal
|
101
|
+
term_values([:subject, :temporal]) || []
|
102
|
+
end
|
103
|
+
|
104
|
+
# Values are the contents of:
|
105
|
+
# subject/titleInfo/(subelements)
|
106
|
+
# convenience method for subject/titleInfo values (to avoid parsing the mods for the same thing multiple times)
|
107
|
+
def subject_titles
|
108
|
+
mods_ng_xml.subject.titleInfo.map do |ti_el|
|
109
|
+
parts = ti_el.element_children.map(&:text).reject(&:empty?)
|
110
|
+
parts.join(' ').strip unless parts.empty?
|
111
|
+
end.compact
|
112
|
+
end
|
113
|
+
|
114
|
+
# convenience method for subject/topic values (to avoid parsing the mods for the same thing multiple times)
|
115
|
+
def subject_topics
|
116
|
+
term_values([:subject, :topic]) || []
|
117
|
+
end
|
118
|
+
|
119
|
+
private
|
120
|
+
|
121
|
+
def strip_punctuation(arr)
|
122
|
+
arr&.map { |val| val.gsub(/[\\,;]$/, '').strip }
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
module Stanford
|
2
|
+
module Mods
|
3
|
+
module Title
|
4
|
+
# @return [String] value for title_245a_search field
|
5
|
+
def sw_short_title
|
6
|
+
short_titles&.compact&.reject(&:empty?)&.first
|
7
|
+
end
|
8
|
+
|
9
|
+
# Searchworks requires that the MODS has a '//titleInfo/title'
|
10
|
+
# @return [String] value for title_245_search, title_full_display
|
11
|
+
def sw_full_title(title_info = first_title_info_node, sortable: false)
|
12
|
+
return unless title_info&.children&.any?
|
13
|
+
|
14
|
+
title = title_info.title&.text&.strip
|
15
|
+
return if title.nil? || title.empty?
|
16
|
+
|
17
|
+
title = ''
|
18
|
+
previous_element = nil
|
19
|
+
|
20
|
+
title_info.children.select { |value| title_parts.include? value.name }.each do |value|
|
21
|
+
next if value.name == 'nonSort' && sortable
|
22
|
+
|
23
|
+
str = value.text.strip
|
24
|
+
next if str.empty?
|
25
|
+
|
26
|
+
delimiter = if title.empty? || title.end_with?(' ')
|
27
|
+
nil
|
28
|
+
elsif previous_element&.name == 'nonSort' && title.end_with?('-', '\'')
|
29
|
+
nil
|
30
|
+
elsif title.end_with?('.', ',', ':', ';')
|
31
|
+
' '
|
32
|
+
elsif value.name == 'subTitle'
|
33
|
+
' : '
|
34
|
+
elsif value.name == 'partName' && previous_element.name == 'partNumber'
|
35
|
+
', '
|
36
|
+
elsif value.name == 'partNumber' || value.name == 'partName'
|
37
|
+
'. '
|
38
|
+
else
|
39
|
+
' '
|
40
|
+
end
|
41
|
+
|
42
|
+
title += delimiter if delimiter
|
43
|
+
title += str
|
44
|
+
|
45
|
+
previous_element = value
|
46
|
+
end
|
47
|
+
|
48
|
+
title += "." unless title =~ /\s*[[:punct:]]$/
|
49
|
+
|
50
|
+
title.strip
|
51
|
+
end
|
52
|
+
|
53
|
+
def title_parts
|
54
|
+
%w[nonSort title subTitle partName partNumber]
|
55
|
+
end
|
56
|
+
|
57
|
+
# like sw_full_title without trailing \,/;:.
|
58
|
+
# spec from solrmarc-sw sw_index.properties
|
59
|
+
# title_display = custom, removeTrailingPunct(245abdefghijklmnopqrstuvwxyz, [\\\\,/;:], ([A-Za-z]{4}|[0-9]{3}|\\)|\\,))
|
60
|
+
# @return [String] value for title_display (like title_full_display without trailing punctuation)
|
61
|
+
def sw_title_display
|
62
|
+
sw_full_title&.sub(/[\.,;:\/\\]+$/, '')&.strip
|
63
|
+
end
|
64
|
+
|
65
|
+
# this includes all titles except
|
66
|
+
# @return [Array<String>] values for title_variant_search
|
67
|
+
def sw_addl_titles
|
68
|
+
(full_titles - Array(first_title_info_node&.full_title)).reject(&:blank?)
|
69
|
+
end
|
70
|
+
|
71
|
+
# Returns a sortable version of the main title
|
72
|
+
# @return [String] value for title_sort field
|
73
|
+
def sw_sort_title
|
74
|
+
val = sw_full_title(sortable: true) || ''
|
75
|
+
val.gsub(/[[:punct:]]*/, '').squeeze(" ").strip
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
# @return [Nokogiri::XML::Node] the first titleInfo node if present, else nil
|
81
|
+
def first_title_info_node
|
82
|
+
non_blank_nodes = mods_ng_xml.title_info.reject { |node| node.text.strip.empty? }
|
83
|
+
non_blank_nodes.find { |node| node.type_at != 'alternative' } || non_blank_nodes.first
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
@@ -4,9 +4,6 @@ module Stanford
|
|
4
4
|
##
|
5
5
|
# Geospatial coordinate parsing
|
6
6
|
class Coordinate
|
7
|
-
require 'stanford-mods/geo_utils'
|
8
|
-
include ::Stanford::Mods::GeoUtils
|
9
|
-
|
10
7
|
attr_reader :value
|
11
8
|
|
12
9
|
def initialize(value)
|
@@ -57,6 +54,27 @@ module Stanford
|
|
57
54
|
def coord
|
58
55
|
cleaner_coordinate(value)
|
59
56
|
end
|
57
|
+
|
58
|
+
# @param [String] val Coordinates value
|
59
|
+
# @return [String] cleaned value (strips parens and period), or the original value
|
60
|
+
def cleaner_coordinate(val)
|
61
|
+
matches = val.match(/^\(?([^)]+)\)?\.?$/)
|
62
|
+
matches ? matches[1] : val
|
63
|
+
end
|
64
|
+
|
65
|
+
# @param [String] point coordinate point in degrees notation
|
66
|
+
# @return [Float] converted value in decimal notation
|
67
|
+
def coord_to_decimal(point)
|
68
|
+
regex = /(?<dir>[NESW])\s*(?<deg>\d+)[°⁰º](?:(?<min>\d+)[ʹ'])?(?:(?<sec>\d+)[ʺ"])?/
|
69
|
+
match = regex.match(point)
|
70
|
+
return Float::INFINITY unless match
|
71
|
+
|
72
|
+
dec = match['deg'].to_i
|
73
|
+
dec += match['min'].to_f / 60
|
74
|
+
dec += match['sec'].to_f / 60 / 60
|
75
|
+
dec = -1 * dec if match['dir'] == 'W' || match['dir'] == 'S'
|
76
|
+
dec
|
77
|
+
end
|
60
78
|
end
|
61
79
|
end
|
62
80
|
end
|