stanford-mods 2.6.2 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +24 -0
  3. data/lib/stanford-mods/{geo_spatial.rb → concerns/geo_spatial.rb} +3 -5
  4. data/lib/stanford-mods/concerns/name.rb +57 -0
  5. data/lib/stanford-mods/concerns/origin_info.rb +113 -0
  6. data/lib/stanford-mods/{physical_location.rb → concerns/physical_location.rb} +2 -2
  7. data/lib/stanford-mods/concerns/searchworks.rb +125 -0
  8. data/lib/stanford-mods/concerns/searchworks_subjects.rb +126 -0
  9. data/lib/stanford-mods/concerns/title.rb +87 -0
  10. data/lib/stanford-mods/coordinate.rb +21 -3
  11. data/lib/stanford-mods/date_parsing.rb +32 -288
  12. data/lib/stanford-mods/imprint.rb +149 -325
  13. data/lib/stanford-mods/record.rb +20 -0
  14. data/lib/stanford-mods/version.rb +1 -1
  15. data/lib/stanford-mods/{searchworks_languages.rb → vocabularies/searchworks_languages.rb} +2 -0
  16. data/lib/stanford-mods.rb +13 -11
  17. data/spec/fixtures/searchworks_imprint_data.rb +38 -39
  18. data/spec/fixtures/searchworks_pub_date_data.rb +7 -7
  19. data/spec/fixtures/spotlight_pub_date_data.rb +7 -7
  20. data/spec/geo_spatial_spec.rb +1 -6
  21. data/spec/imprint_spec.rb +238 -207
  22. data/spec/name_spec.rb +28 -232
  23. data/spec/origin_info_spec.rb +34 -300
  24. data/spec/searchworks_basic_spec.rb +1 -3
  25. data/spec/searchworks_pub_dates_spec.rb +0 -215
  26. data/spec/searchworks_spec.rb +0 -21
  27. data/spec/searchworks_subject_raw_spec.rb +106 -105
  28. data/spec/searchworks_subject_spec.rb +19 -55
  29. data/spec/searchworks_title_spec.rb +5 -5
  30. data/stanford-mods.gemspec +1 -1
  31. metadata +24 -20
  32. data/.travis.yml +0 -17
  33. data/lib/marc_countries.rb +0 -387
  34. data/lib/stanford-mods/geo_utils.rb +0 -28
  35. data/lib/stanford-mods/name.rb +0 -80
  36. data/lib/stanford-mods/origin_info.rb +0 -489
  37. data/lib/stanford-mods/searchworks.rb +0 -333
  38. data/lib/stanford-mods/searchworks_subjects.rb +0 -196
  39. data/spec/date_parsing_spec.rb +0 -905
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1edb227541109c47923e060caf25a50ffeeaf2d362eb4a9276cc9bc89cfe9f51
4
- data.tar.gz: 7b28b171dd95a6a0c8d2206bede0310cd3c6cd04f3a6280c84b27ce7aa8d4566
3
+ metadata.gz: 70d3b7093c830baa3c12f4c2c438549eb451fa4b6bb6c57f458382f0e8e53dc2
4
+ data.tar.gz: 044edaeef524c4a701ebbc0e25f08d0c3fb5068b04cc36ab8769772004a73a85
5
5
  SHA512:
6
- metadata.gz: 124ece600b2e52bfea0dbfd125ecc2f2b6dfaad5774009fd76a21a14e06cff916287497c76929616b45e7bf461b7ba72ee700a49e362d4e22bdfb5f28af54e0b
7
- data.tar.gz: 1b8d1a934be80ad1f113e0e82eaa7e3084256dd5ef7cfee0c0d1d74b3d4f9b809592aa1fc34972e003d0ecbca18a411aa6c64edf40e2ae4aaddc091ffc6cbc8e
6
+ metadata.gz: '097830e7c3b1136a279dfce41ac426ae860ab020f27bb1550a3f2a5f23b7fcbc1c7f7952f35f06a44aed7100d0d410c39adcbb17a70b270174c85b757d1aca4b'
7
+ data.tar.gz: 7aa1c33f53fdbd4160d99a14739b4dcbd37950f4d8ab8238ee1886d4b6db4aaa9b5dec4c92f74842db80aa9839ec2d7e27a570a4fb7cde4862afc54b79c9e6a4
@@ -0,0 +1,24 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [ master ]
6
+ pull_request:
7
+ branches: [ master ]
8
+
9
+ jobs:
10
+ tests:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ ruby: [jruby-9.3.2.0, 2.7, '3.0', '3.1']
15
+ steps:
16
+ - uses: actions/checkout@v2
17
+ - name: Set up Ruby
18
+ uses: ruby/setup-ruby@v1
19
+ with:
20
+ ruby-version: ${{ matrix.ruby }}
21
+ - name: Install dependencies
22
+ run: bundle install
23
+ - name: Run tests
24
+ run: bundle exec rake
@@ -1,10 +1,9 @@
1
- # encoding: UTF-8
2
- require 'mods'
1
+ # frozen_string_literal: true
3
2
 
4
3
  module Stanford
5
4
  module Mods
6
5
  # NON-SearchWorks specific wranglings of MODS cartographics metadata
7
- class Record < ::Mods::Record
6
+ module Geospatial
8
7
  GMLNS = 'http://www.opengis.net/gml/3.2/'.freeze
9
8
 
10
9
  # @return [Array{String}] subject cartographic coordinates values
@@ -27,8 +26,7 @@ module Stanford
27
26
  lowers = v.xpath('gml:lowerCorner', 'gml' => GMLNS).text.split
28
27
  "ENVELOPE(#{lowers[0]}, #{uppers[0]}, #{uppers[1]}, #{lowers[1]})"
29
28
  end
30
- rescue RuntimeError => e
31
- logger.warn "failure parsing <extension> element: #{e.message}"
29
+ rescue RuntimeError
32
30
  []
33
31
  end
34
32
 
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ # NON-SearchWorks specific wranglings of MODS <name> metadata as a mixin to the Stanford::Mods::Record object
4
+ module Stanford
5
+ module Mods
6
+ module Name
7
+ # the first encountered <mods><name> element with marcrelator flavor role of 'Creator' or 'Author'.
8
+ # if no marcrelator 'Creator' or 'Author', the first name without a role.
9
+ # if no name without a role, then nil
10
+ # @return [String] value for author_1xx_search field
11
+ def sw_main_author
12
+ result = mods_ng_xml.plain_name.find { |n| n.role.any? { |r| r.authority.include?('marcrelator') && r.value.any? { |v| v.match(/creator/i) || v.match?(/author/i) } } }
13
+ result ||= mods_ng_xml.plain_name.find { |n| n.role.empty? }
14
+
15
+ result&.display_value_w_date
16
+ end
17
+
18
+ # all names, in display form, except the main_author
19
+ # names will be the display_value_w_date form
20
+ # see Mods::Record.name in nom_terminology for details on the display_value algorithm
21
+ # @return [Array<String>] values for author_7xx_search field
22
+ def sw_addl_authors
23
+ mods_ng_xml.plain_name.map(&:display_value_w_date) - [sw_main_author]
24
+ end
25
+
26
+ # @return [Array<String>] values for author_person_facet, author_person_display
27
+ def sw_person_authors
28
+ mods_ng_xml.personal_name.map(&:display_value_w_date)
29
+ end
30
+
31
+ # return the display_value_w_date for all <mods><name> elements that do not have type='personal'
32
+ # @return [Array<String>] values for author_other_facet
33
+ def sw_impersonal_authors
34
+ mods_ng_xml.plain_name.select { |n| n.type_at != 'personal' }.map(&:display_value_w_date)
35
+ end
36
+
37
+ # @return [Array<String>] values for author_corp_display
38
+ def sw_corporate_authors
39
+ mods_ng_xml.corporate_name.map(&:display_value_w_date)
40
+ end
41
+
42
+ # @return [Array<String>] values for author_meeting_display
43
+ def sw_meeting_authors
44
+ mods_ng_xml.conference_name.map(&:display_value_w_date)
45
+ end
46
+
47
+ # Returns a sortable version of the main_author:
48
+ # main_author + sorting title
49
+ # which is the mods approximation of the value created for a marc record
50
+ # @return [String] value for author_sort field
51
+ def sw_sort_author
52
+ # substitute java Character.MAX_CODE_POINT for nil main_author so missing main authors sort last
53
+ "#{sw_main_author || "\u{10FFFF} " }#{sort_title}".gsub(/[[:punct:]]*/, '').strip
54
+ end
55
+ end # class Record
56
+ end # Module Mods
57
+ end # Module Stanford
@@ -0,0 +1,113 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Parsing MODS /originInfo for Publication/Imprint data:
4
+ # * pub year for date slider facet
5
+ # * pub year for sorting
6
+ # * pub year for single display value
7
+ # * imprint info for display
8
+ # *
9
+ # These methods may be used by searchworks.rb file or by downstream apps
10
+ module Stanford
11
+ module Mods
12
+ module OriginInfo
13
+ # return pub year as an Integer
14
+ # prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
15
+ # look for a keyDate and use it if there is one; otherwise pick earliest date
16
+ # @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute) should be ignored; false if approximate dates should be included
17
+ # @return [Integer] publication year as an Integer
18
+ # @note for sorting: 5 B.C. => -5; 666 B.C. => -666
19
+ def pub_year_int(fields = [:dateIssued, :dateCreated, :dateCaptured], ignore_approximate: false)
20
+ fields.each do |date_key|
21
+ values = mods_ng_xml.origin_info.send(date_key)
22
+ values = values.reject(&method(:is_approximate)) if ignore_approximate
23
+
24
+ earliest_date = Stanford::Mods::OriginInfo.best_or_earliest_year(values)
25
+ return earliest_date.year_int_from_date_str if earliest_date&.year_int_from_date_str
26
+ end; nil
27
+ end
28
+
29
+ # return a single string intended for lexical sorting for pub date
30
+ # prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
31
+ # look for a keyDate and use it if there is one; otherwise pick earliest date
32
+ # @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute) should be ignored; false if approximate dates should be included
33
+ # @return [String] single String containing publication year for lexical sorting
34
+ # @note for string sorting 5 B.C. = -5 => -995; 6 B.C. => -994, so 6 B.C. sorts before 5 B.C.
35
+ # @deprecated use pub_year_int
36
+ def pub_year_sort_str(fields = [:dateIssued, :dateCreated, :dateCaptured], ignore_approximate: false)
37
+ fields.each do |date_key|
38
+ values = mods_ng_xml.origin_info.send(date_key)
39
+ values = values.reject(&method(:is_approximate)) if ignore_approximate
40
+
41
+ earliest_date = Stanford::Mods::OriginInfo.best_or_earliest_year(values)
42
+ return earliest_date.sortable_year_string_from_date_str if earliest_date&.sortable_year_string_from_date_str
43
+ end; nil
44
+ end
45
+
46
+ # return a single string intended for display of pub year
47
+ # 0 < year < 1000: add A.D. suffix
48
+ # year < 0: add B.C. suffix. ('-5' => '5 B.C.', '700 B.C.' => '700 B.C.')
49
+ # 195u => 195x
50
+ # 19uu => 19xx
51
+ # '-5' => '5 B.C.'
52
+ # '700 B.C.' => '700 B.C.'
53
+ # '7th century' => '7th century'
54
+ # date ranges?
55
+ # prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
56
+ # look for a keyDate and use it if there is one; otherwise pick earliest date
57
+ # @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
58
+ # should be ignored; false if approximate dates should be included
59
+ def pub_year_display_str(fields = [:dateIssued, :dateCreated, :dateCaptured], ignore_approximate: false)
60
+ fields.each do |date_key|
61
+ values = mods_ng_xml.origin_info.send(date_key)
62
+ values = values.reject(&method(:is_approximate)) if ignore_approximate
63
+
64
+ earliest_date = Stanford::Mods::OriginInfo.best_or_earliest_year(values)
65
+ return earliest_date.date_str_for_display if earliest_date&.date_str_for_display
66
+ end; nil
67
+ end
68
+
69
+ # @return [Array<Stanford::Mods::Imprint>] array of imprint objects
70
+ # @private
71
+ def imprints
72
+ origin_info.map { |el| Stanford::Mods::Imprint.new(el) }
73
+ end
74
+
75
+ def place
76
+ term_values([:origin_info, :place, :placeTerm])
77
+ end
78
+
79
+ # @return [String] single String containing imprint information for display
80
+ def imprint_display_str
81
+ imprints.map(&:display_str).reject(&:empty?).join('; ')
82
+ end
83
+
84
+ # remove Elements from NodeSet if they have a qualifier attribute of 'approximate' or 'questionable'
85
+ # @param [Nokogiri::XML::Element] node the date element
86
+ # @return [Boolean]
87
+ # @private
88
+ def is_approximate(node)
89
+ qualifier = node["qualifier"] if node.respond_to?('[]')
90
+ qualifier == 'approximate' || qualifier == 'questionable'
91
+ end
92
+
93
+ # get earliest parseable year from the passed date elements
94
+ # @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
95
+ # @return [Stanford::Mods::DateParsing]
96
+ def self.best_or_earliest_year(date_el_array)
97
+ key_dates, other_dates = date_el_array.partition { |node| node['keyDate'] == 'yes' }
98
+
99
+ sortable_dates = key_dates.map { |x| DateParsing.new(x) }.select(&:sortable_year_string_from_date_str)
100
+ sortable_dates = other_dates.map { |x| DateParsing.new(x) }.select(&:sortable_year_string_from_date_str) if sortable_dates.empty?
101
+ results = {}
102
+
103
+ # this is a little weird; instead of just the earliest sorting date, if there are multiple
104
+ # dates with the same sort key, we want to make sure we get the last occurring one?
105
+ sortable_dates.each do |v|
106
+ results[v.sortable_year_string_from_date_str] = v
107
+ end
108
+
109
+ results[results.keys.min]
110
+ end
111
+ end # class Record
112
+ end
113
+ end
@@ -1,4 +1,4 @@
1
- require 'mods'
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Stanford
4
4
  module Mods
@@ -7,7 +7,7 @@ module Stanford
7
7
  # Note: mods_ng_xml_location.physicalLocation should find top level and relatedItem.
8
8
  # Each method here expects to find at most ONE matching element. Subsequent potential matches
9
9
  # are ignored.
10
- class Record < ::Mods::Record
10
+ module PhysicalLocation
11
11
  # data in location/physicalLocation or in relatedItem/location/physicalLocation
12
12
  # so use _location to get the data from either one of them
13
13
  # @return [String] box number (note: single valued and might be something like 35A)
@@ -0,0 +1,125 @@
1
+ # frozen_string_literal: true
2
+
3
+ # SearchWorks specific wranglings of MODS metadata as a mixin to the Stanford::Mods::Record object
4
+ module Stanford
5
+ module Mods
6
+ module Searchworks
7
+ # include langagues known to SearchWorks; try to error correct when possible (e.g. when ISO-639 disagrees with MARC standard)
8
+ def sw_language_facet
9
+ mods_ng_xml.language.flat_map do |n|
10
+ # get languageTerm codes and add their translations to the result
11
+ result = n.code_term.flat_map do |ct|
12
+ if ct.authority =~ /^iso639/
13
+ vals = ct.text.split(/[,|\ ]/).reject { |x| x.strip.empty? }
14
+ vals.select { |v| ISO_639.find(v.strip) }.map do |v|
15
+ iso639_val = ISO_639.find(v.strip).english_name
16
+
17
+ if SEARCHWORKS_LANGUAGES.has_value?(iso639_val)
18
+ iso639_val
19
+ else
20
+ SEARCHWORKS_LANGUAGES[v.strip]
21
+ end
22
+ end
23
+ else
24
+ vals = ct.text.split(/[,|\ ]/).reject { |x| x.strip.empty? }
25
+
26
+ vals.map do |v|
27
+ SEARCHWORKS_LANGUAGES[v.strip]
28
+ end
29
+ end
30
+ end
31
+
32
+ # add languageTerm text values
33
+ result.concat(n.text_term.map { |tt| tt.text.strip }.select { |val| !val.empty? && SEARCHWORKS_LANGUAGES.has_value?(val) })
34
+
35
+ # add language values that aren't in languageTerm subelement
36
+ result << n.text if n.languageTerm.empty? && SEARCHWORKS_LANGUAGES.has_value?(n.text)
37
+
38
+ result
39
+ end.uniq
40
+ end
41
+
42
+ # select one or more format values from the controlled vocabulary per JVine Summer 2014
43
+ # http://searchworks-solr-lb.stanford.edu:8983/solr/select?facet.field=format_main_ssim&rows=0&facet.sort=index
44
+ # https://github.com/sul-dlss/stanford-mods/issues/66 - For geodata, the
45
+ # resource type should be only Map and not include Software, multimedia.
46
+ # @return <Array[String]> value in the SearchWorks controlled vocabulary
47
+ def format_main
48
+ types = typeOfResource
49
+ return [] unless types
50
+
51
+ val = []
52
+ genres = term_values(:genre) || []
53
+ issuance = term_values([:origin_info, :issuance]) || []
54
+ frequency = term_values([:origin_info, :frequency]) || []
55
+
56
+ val << 'Dataset' if genres.include?('dataset') || genres.include?('Dataset')
57
+ val << 'Archive/Manuscript' if types.any? { |t| t.manuscript == 'yes' }
58
+
59
+ val.concat(types.flat_map do |type|
60
+ case type.text
61
+ when 'cartographic'
62
+ 'Map'
63
+ when 'mixed material'
64
+ 'Archive/Manuscript'
65
+ when 'moving image'
66
+ 'Video'
67
+ when 'notated music'
68
+ 'Music score'
69
+ when 'software, multimedia'
70
+ 'Software/Multimedia' unless types.map(&:text).include?('cartographic') || (genres.include?('dataset') || genres.include?('Dataset'))
71
+ when 'sound recording-musical'
72
+ 'Music recording'
73
+ when 'sound recording-nonmusical', 'sound recording'
74
+ 'Sound recording'
75
+ when 'still image'
76
+ 'Image'
77
+ when 'text'
78
+ is_periodical = issuance.include?('continuing') || issuance.include?('serial') || frequency.any? { |x| !x.empty? }
79
+ is_archived_website = genres.any? { |x| x.casecmp('archived website') == 0 }
80
+
81
+ if is_periodical || is_archived_website
82
+ [
83
+ ('Journal/Periodical' if is_periodical),
84
+ ('Archived website' if is_archived_website)
85
+ ].compact
86
+ else
87
+ 'Book'
88
+ end
89
+ when 'three dimensional object'
90
+ 'Object'
91
+ end
92
+ end)
93
+
94
+ val.compact.uniq
95
+ end
96
+
97
+ # @return <Array[String]> values for the genre facet in SearchWorks
98
+ def sw_genre
99
+ genres = term_values(:genre)
100
+ return [] unless genres
101
+
102
+ val = genres.map(&:to_s)
103
+ thesis_pub = ['thesis', 'Thesis']
104
+ val << 'Thesis/Dissertation' if (genres & thesis_pub).any?
105
+
106
+ conf_pub = ['conference publication', 'Conference publication', 'Conference Publication']
107
+ gov_pub = ['government publication', 'Government publication', 'Government Publication']
108
+ tech_rpt = ['technical report', 'Technical report', 'Technical Report']
109
+
110
+ val << 'Conference proceedings' if (genres & conf_pub).any?
111
+ val << 'Government document' if (genres & gov_pub).any?
112
+ val << 'Technical report' if (genres & tech_rpt).any?
113
+
114
+ val.uniq
115
+ end
116
+
117
+ # @return [String] value with the numeric catkey in it, or nil if none exists
118
+ def catkey
119
+ catkey = term_values([:record_info, :recordIdentifier])
120
+
121
+ catkey.first&.tr('a', '') # ensure catkey is numeric only
122
+ end
123
+ end # class Record
124
+ end # Module Mods
125
+ end # Module Stanford
@@ -0,0 +1,126 @@
1
+ # frozen_string_literal: true
2
+
3
+ # SearchWorks specific wranglings of MODS *subject* metadata as a mixin to the Stanford::Mods::Record object
4
+ module Stanford
5
+ module Mods
6
+ module SearchworksSubjects
7
+ # Values are the contents of:
8
+ # mods/subject/topic
9
+ # @return [Array<String>] values for the topic_search Solr field for this document or nil if none
10
+ def topic_search
11
+ subject_topics
12
+ end
13
+
14
+ # Values are the contents of:
15
+ # subject/topic
16
+ # subject/name
17
+ # subject/title
18
+ # subject/occupation
19
+ # with trailing comma, semicolon, and backslash (and any preceding spaces) removed
20
+ # @return [Array<String>] values for the topic_facet Solr field for this document or nil if none
21
+ def topic_facet
22
+ strip_punctuation(subject_topics + subject_names + subject_titles + subject_occupations)
23
+ end
24
+
25
+ # geographic_search values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
26
+ # @return [Array<String>] values for the geographic_facet Solr field for this document or nil if none
27
+ def geographic_facet
28
+ strip_punctuation(geographic_search)
29
+ end
30
+
31
+ # subject/temporal values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
32
+ # @return [Array<String>] values for the era_facet Solr field for this document or nil if none
33
+ def era_facet
34
+ strip_punctuation(subject_temporal)
35
+ end
36
+
37
+ # Values are the contents of:
38
+ # subject/geographic
39
+ # subject/hierarchicalGeographic
40
+ # subject/geographicCode (only include the translated value if it isn't already present from other mods geo fields)
41
+ # @return [Array<String>] values for the geographic_search Solr field for this document or nil if none
42
+ def geographic_search
43
+ result = term_values([:subject, :geographic]) || []
44
+
45
+ # hierarchicalGeographic has sub elements
46
+ hierarchical_vals = mods_ng_xml.subject.hierarchicalGeographic.map do |hg_node|
47
+ hg_vals = hg_node.element_children.map(&:text).reject(&:empty?)
48
+ hg_vals.join(' ') unless hg_vals.empty?
49
+ end
50
+
51
+ trans_code_vals = mods_ng_xml.subject.geographicCode.translated_value || []
52
+
53
+ (result + hierarchical_vals + trans_code_vals).compact.uniq
54
+ end
55
+
56
+ # Values are the contents of:
57
+ # subject/name
58
+ # subject/occupation - no subelements
59
+ # subject/titleInfo
60
+ # @return [Array<String>] values for the subject_other_search Solr field for this document or nil if none
61
+ def subject_other_search
62
+ subject_occupations + subject_names + subject_titles
63
+ end
64
+
65
+ # Values are the contents of:
66
+ # subject/temporal
67
+ # subject/genre
68
+ # @return [Array<String>] values for the subject_other_subvy_search Solr field for this document or nil if none
69
+ def subject_other_subvy_search
70
+ vals = Array(subject_temporal)
71
+ gvals = term_values([:subject, :genre])
72
+
73
+ vals + Array(gvals)
74
+ end
75
+
76
+ # Values are the contents of:
77
+ # all subject subelements except subject/cartographic plus genre top level element
78
+ # @return [Array<String>] values for the subject_all_search Solr field for this document or nil if none
79
+ def subject_all_search
80
+ topic_search + geographic_search + subject_other_search + subject_other_subvy_search
81
+ end
82
+
83
+ protected #----------------------------------------------------------
84
+
85
+ # convenience method for subject/name/namePart values (to avoid parsing the mods for the same thing multiple times)
86
+ def subject_names
87
+ mods_ng_xml.subject.name_el
88
+ .select { |n_el| n_el.namePart }
89
+ .map { |name_el_w_np| name_el_w_np.namePart.map(&:text).reject(&:empty?) }
90
+ .reject(&:empty?)
91
+ .map { |parts| parts.join(', ').strip }
92
+ end
93
+
94
+ # convenience method for subject/occupation values (to avoid parsing the mods for the same thing multiple times)
95
+ def subject_occupations
96
+ term_values([:subject, :occupation]) || []
97
+ end
98
+
99
+ # convenience method for subject/temporal values (to avoid parsing the mods for the same thing multiple times)
100
+ def subject_temporal
101
+ term_values([:subject, :temporal]) || []
102
+ end
103
+
104
+ # Values are the contents of:
105
+ # subject/titleInfo/(subelements)
106
+ # convenience method for subject/titleInfo values (to avoid parsing the mods for the same thing multiple times)
107
+ def subject_titles
108
+ mods_ng_xml.subject.titleInfo.map do |ti_el|
109
+ parts = ti_el.element_children.map(&:text).reject(&:empty?)
110
+ parts.join(' ').strip unless parts.empty?
111
+ end.compact
112
+ end
113
+
114
+ # convenience method for subject/topic values (to avoid parsing the mods for the same thing multiple times)
115
+ def subject_topics
116
+ term_values([:subject, :topic]) || []
117
+ end
118
+
119
+ private
120
+
121
+ def strip_punctuation(arr)
122
+ arr&.map { |val| val.gsub(/[\\,;]$/, '').strip }
123
+ end
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,87 @@
1
+ module Stanford
2
+ module Mods
3
+ module Title
4
+ # @return [String] value for title_245a_search field
5
+ def sw_short_title
6
+ short_titles&.compact&.reject(&:empty?)&.first
7
+ end
8
+
9
+ # Searchworks requires that the MODS has a '//titleInfo/title'
10
+ # @return [String] value for title_245_search, title_full_display
11
+ def sw_full_title(title_info = first_title_info_node, sortable: false)
12
+ return unless title_info&.children&.any?
13
+
14
+ title = title_info.title&.text&.strip
15
+ return if title.nil? || title.empty?
16
+
17
+ title = ''
18
+ previous_element = nil
19
+
20
+ title_info.children.select { |value| title_parts.include? value.name }.each do |value|
21
+ next if value.name == 'nonSort' && sortable
22
+
23
+ str = value.text.strip
24
+ next if str.empty?
25
+
26
+ delimiter = if title.empty? || title.end_with?(' ')
27
+ nil
28
+ elsif previous_element&.name == 'nonSort' && title.end_with?('-', '\'')
29
+ nil
30
+ elsif title.end_with?('.', ',', ':', ';')
31
+ ' '
32
+ elsif value.name == 'subTitle'
33
+ ' : '
34
+ elsif value.name == 'partName' && previous_element.name == 'partNumber'
35
+ ', '
36
+ elsif value.name == 'partNumber' || value.name == 'partName'
37
+ '. '
38
+ else
39
+ ' '
40
+ end
41
+
42
+ title += delimiter if delimiter
43
+ title += str
44
+
45
+ previous_element = value
46
+ end
47
+
48
+ title += "." unless title =~ /\s*[[:punct:]]$/
49
+
50
+ title.strip
51
+ end
52
+
53
+ def title_parts
54
+ %w[nonSort title subTitle partName partNumber]
55
+ end
56
+
57
+ # like sw_full_title without trailing \,/;:.
58
+ # spec from solrmarc-sw sw_index.properties
59
+ # title_display = custom, removeTrailingPunct(245abdefghijklmnopqrstuvwxyz, [\\\\,/;:], ([A-Za-z]{4}|[0-9]{3}|\\)|\\,))
60
+ # @return [String] value for title_display (like title_full_display without trailing punctuation)
61
+ def sw_title_display
62
+ sw_full_title&.sub(/[\.,;:\/\\]+$/, '')&.strip
63
+ end
64
+
65
+ # this includes all titles except
66
+ # @return [Array<String>] values for title_variant_search
67
+ def sw_addl_titles
68
+ (full_titles - Array(first_title_info_node&.full_title)).reject(&:blank?)
69
+ end
70
+
71
+ # Returns a sortable version of the main title
72
+ # @return [String] value for title_sort field
73
+ def sw_sort_title
74
+ val = sw_full_title(sortable: true) || ''
75
+ val.gsub(/[[:punct:]]*/, '').squeeze(" ").strip
76
+ end
77
+
78
+ private
79
+
80
+ # @return [Nokogiri::XML::Node] the first titleInfo node if present, else nil
81
+ def first_title_info_node
82
+ non_blank_nodes = mods_ng_xml.title_info.reject { |node| node.text.strip.empty? }
83
+ non_blank_nodes.find { |node| node.type_at != 'alternative' } || non_blank_nodes.first
84
+ end
85
+ end
86
+ end
87
+ end
@@ -4,9 +4,6 @@ module Stanford
4
4
  ##
5
5
  # Geospatial coordinate parsing
6
6
  class Coordinate
7
- require 'stanford-mods/geo_utils'
8
- include ::Stanford::Mods::GeoUtils
9
-
10
7
  attr_reader :value
11
8
 
12
9
  def initialize(value)
@@ -57,6 +54,27 @@ module Stanford
57
54
  def coord
58
55
  cleaner_coordinate(value)
59
56
  end
57
+
58
+ # @param [String] val Coordinates value
59
+ # @return [String] cleaned value (strips parens and period), or the original value
60
+ def cleaner_coordinate(val)
61
+ matches = val.match(/^\(?([^)]+)\)?\.?$/)
62
+ matches ? matches[1] : val
63
+ end
64
+
65
+ # @param [String] point coordinate point in degrees notation
66
+ # @return [Float] converted value in decimal notation
67
+ def coord_to_decimal(point)
68
+ regex = /(?<dir>[NESW])\s*(?<deg>\d+)[°⁰º](?:(?<min>\d+)[ʹ'])?(?:(?<sec>\d+)[ʺ"])?/
69
+ match = regex.match(point)
70
+ return Float::INFINITY unless match
71
+
72
+ dec = match['deg'].to_i
73
+ dec += match['min'].to_f / 60
74
+ dec += match['sec'].to_f / 60 / 60
75
+ dec = -1 * dec if match['dir'] == 'W' || match['dir'] == 'S'
76
+ dec
77
+ end
60
78
  end
61
79
  end
62
80
  end