stanford-mods 2.6.4 → 3.0.0.alpha1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +1 -1
  3. data/lib/stanford-mods/{geo_spatial.rb → concerns/geo_spatial.rb} +3 -5
  4. data/lib/stanford-mods/concerns/name.rb +57 -0
  5. data/lib/stanford-mods/concerns/origin_info.rb +109 -0
  6. data/lib/stanford-mods/{physical_location.rb → concerns/physical_location.rb} +2 -2
  7. data/lib/stanford-mods/concerns/searchworks.rb +125 -0
  8. data/lib/stanford-mods/concerns/searchworks_subjects.rb +126 -0
  9. data/lib/stanford-mods/concerns/title.rb +79 -0
  10. data/lib/stanford-mods/coordinate.rb +21 -3
  11. data/lib/stanford-mods/date_parsing.rb +32 -289
  12. data/lib/stanford-mods/imprint.rb +148 -325
  13. data/lib/stanford-mods/record.rb +20 -0
  14. data/lib/stanford-mods/version.rb +1 -1
  15. data/lib/stanford-mods/{searchworks_languages.rb → vocabularies/searchworks_languages.rb} +0 -0
  16. data/lib/stanford-mods.rb +12 -11
  17. data/spec/fixtures/searchworks_imprint_data.rb +38 -39
  18. data/spec/fixtures/searchworks_pub_date_data.rb +7 -7
  19. data/spec/fixtures/spotlight_pub_date_data.rb +7 -7
  20. data/spec/geo_spatial_spec.rb +1 -6
  21. data/spec/imprint_spec.rb +238 -207
  22. data/spec/name_spec.rb +26 -230
  23. data/spec/origin_info_spec.rb +34 -300
  24. data/spec/searchworks_basic_spec.rb +1 -3
  25. data/spec/searchworks_pub_dates_spec.rb +0 -215
  26. data/spec/searchworks_spec.rb +0 -21
  27. data/spec/searchworks_subject_raw_spec.rb +106 -105
  28. data/spec/searchworks_subject_spec.rb +19 -55
  29. data/spec/searchworks_title_spec.rb +1 -1
  30. data/stanford-mods.gemspec +1 -1
  31. metadata +21 -17
  32. data/lib/marc_countries.rb +0 -387
  33. data/lib/stanford-mods/geo_utils.rb +0 -28
  34. data/lib/stanford-mods/name.rb +0 -80
  35. data/lib/stanford-mods/origin_info.rb +0 -489
  36. data/lib/stanford-mods/searchworks.rb +0 -333
  37. data/lib/stanford-mods/searchworks_subjects.rb +0 -196
  38. data/spec/date_parsing_spec.rb +0 -905
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 922ecb4ab53df951ef735de705037f218347938cac621886b54dc71463c215a6
4
- data.tar.gz: 5426c132435bdc34a74df664b4bcbc6797682482689e0843ebdc7527e8df0715
3
+ metadata.gz: bccde0bc740f4135ee9de0b7255f3ac347a16c858ca463801111f37f8ad84ecc
4
+ data.tar.gz: a6e3fc9534efe4e37b05bf047007a5b9e3cd41f662e568b1b2333a1cc1213dce
5
5
  SHA512:
6
- metadata.gz: eb12a388c219e4b9f99746d543ecc38cc1a67bc68302dc4255c81a088ffaad2fada7f6387b9525441d199c7ec536f54ba109f38ad92bf2d1d6d771077cdbc6d5
7
- data.tar.gz: 2b904ae85c26eef0717a8e1250a7385889b00fe55c2e8ee59c83798f55f46e4e0ee4593f054dc3ab1a02e18095813ba3b16632f77340ddc212a54f36a871931a
6
+ metadata.gz: 7ab80915b8f299e35822e9f2b6e0047cf1b32c62bec9ac0218daeb0d7bd3fb4753d1668075b754e5a9a955098fb785c94a9c188654295894c2867fc7d04f7574
7
+ data.tar.gz: fb1e1835b67fb30c7d19c2046c08a9a60bddda35b6b4067bb3518c8d8d9385a09d80846fc46e958f67d9a0eb89dcb2aef36ede3da03fc723f0158d1db3b81ee9
@@ -11,7 +11,7 @@ jobs:
11
11
  runs-on: ubuntu-latest
12
12
  strategy:
13
13
  matrix:
14
- ruby: [jruby-9.2.14.0, 2.7, 3.0]
14
+ ruby: [jruby-9.3.2.0, 2.7, '3.0', '3.1']
15
15
  steps:
16
16
  - uses: actions/checkout@v2
17
17
  - name: Set up Ruby
@@ -1,10 +1,9 @@
1
- # encoding: UTF-8
2
- require 'mods'
1
+ # frozen_string_literal: true
3
2
 
4
3
  module Stanford
5
4
  module Mods
6
5
  # NON-SearchWorks specific wranglings of MODS cartographics metadata
7
- class Record < ::Mods::Record
6
+ module Geospatial
8
7
  GMLNS = 'http://www.opengis.net/gml/3.2/'.freeze
9
8
 
10
9
  # @return [Array{String}] subject cartographic coordinates values
@@ -27,8 +26,7 @@ module Stanford
27
26
  lowers = v.xpath('gml:lowerCorner', 'gml' => GMLNS).text.split
28
27
  "ENVELOPE(#{lowers[0]}, #{uppers[0]}, #{uppers[1]}, #{lowers[1]})"
29
28
  end
30
- rescue RuntimeError => e
31
- logger.warn "failure parsing <extension> element: #{e.message}"
29
+ rescue RuntimeError
32
30
  []
33
31
  end
34
32
 
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ # NON-SearchWorks specific wranglings of MODS <name> metadata as a mixin to the Stanford::Mods::Record object
4
+ module Stanford
5
+ module Mods
6
+ module Name
7
+ # the first encountered <mods><name> element with marcrelator flavor role of 'Creator' or 'Author'.
8
+ # if no marcrelator 'Creator' or 'Author', the first name without a role.
9
+ # if no name without a role, then nil
10
+ # @return [String] value for author_1xx_search field
11
+ def sw_main_author
12
+ result = mods_ng_xml.plain_name.find { |n| n.role.any? { |r| r.authority.include?('marcrelator') && r.value.any? { |v| v.match(/creator/i) || v.match?(/author/i) } } }
13
+ result ||= mods_ng_xml.plain_name.find { |n| n.role.empty? }
14
+
15
+ result&.display_value_w_date
16
+ end
17
+
18
+ # all names, in display form, except the main_author
19
+ # names will be the display_value_w_date form
20
+ # see Mods::Record.name in nom_terminology for details on the display_value algorithm
21
+ # @return [Array<String>] values for author_7xx_search field
22
+ def sw_addl_authors
23
+ mods_ng_xml.plain_name.map(&:display_value_w_date) - [sw_main_author]
24
+ end
25
+
26
+ # @return [Array<String>] values for author_person_facet, author_person_display
27
+ def sw_person_authors
28
+ mods_ng_xml.personal_names.map(&:display_value_w_date)
29
+ end
30
+
31
+ # return the display_value_w_date for all <mods><name> elements that do not have type='personal'
32
+ # @return [Array<String>] values for author_other_facet
33
+ def sw_impersonal_authors
34
+ mods_ng_xml.plain_name.select { |n| n.type_at != 'personal' }.map(&:display_value_w_date)
35
+ end
36
+
37
+ # @return [Array<String>] values for author_corp_display
38
+ def sw_corporate_authors
39
+ mods_ng_xml.corporate_name.map(&:display_value_w_date)
40
+ end
41
+
42
+ # @return [Array<String>] values for author_meeting_display
43
+ def sw_meeting_authors
44
+ mods_ng_xml.conference_name.map(&:display_value_w_date)
45
+ end
46
+
47
+ # Returns a sortable version of the main_author:
48
+ # main_author + sorting title
49
+ # which is the mods approximation of the value created for a marc record
50
+ # @return [String] value for author_sort field
51
+ def sw_sort_author
52
+ # substitute java Character.MAX_CODE_POINT for nil main_author so missing main authors sort last
53
+ "#{sw_main_author || "\u{10FFFF} " }#{sort_title}".gsub(/[[:punct:]]*/, '').strip
54
+ end
55
+ end # class Record
56
+ end # Module Mods
57
+ end # Module Stanford
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Parsing MODS /originInfo for Publication/Imprint data:
4
+ # * pub year for date slider facet
5
+ # * pub year for sorting
6
+ # * pub year for single display value
7
+ # * imprint info for display
8
+ # *
9
+ # These methods may be used by searchworks.rb file or by downstream apps
10
+ module Stanford
11
+ module Mods
12
+ module OriginInfo
13
+ # return pub year as an Integer
14
+ # prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
15
+ # look for a keyDate and use it if there is one; otherwise pick earliest date
16
+ # @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute) should be ignored; false if approximate dates should be included
17
+ # @return [Integer] publication year as an Integer
18
+ # @note for sorting: 5 B.C. => -5; 666 B.C. => -666
19
+ def pub_year_int(fields = [:dateIssued, :dateCreated, :dateCaptured], ignore_approximate: false)
20
+ fields.each do |date_key|
21
+ values = mods_ng_xml.origin_info.send(date_key)
22
+ values = values.reject(&method(:is_approximate)) if ignore_approximate
23
+
24
+ earliest_date = Stanford::Mods::OriginInfo.best_or_earliest_year(values)
25
+ return earliest_date.year_int_from_date_str if earliest_date&.year_int_from_date_str
26
+ end; nil
27
+ end
28
+
29
+ # return a single string intended for lexical sorting for pub date
30
+ # prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
31
+ # look for a keyDate and use it if there is one; otherwise pick earliest date
32
+ # @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute) should be ignored; false if approximate dates should be included
33
+ # @return [String] single String containing publication year for lexical sorting
34
+ # @note for string sorting 5 B.C. = -5 => -995; 6 B.C. => -994, so 6 B.C. sorts before 5 B.C.
35
+ # @deprecated use pub_year_int
36
+ def pub_year_sort_str(fields = [:dateIssued, :dateCreated, :dateCaptured], ignore_approximate: false)
37
+ fields.each do |date_key|
38
+ values = mods_ng_xml.origin_info.send(date_key)
39
+ values = values.reject(&method(:is_approximate)) if ignore_approximate
40
+
41
+ earliest_date = Stanford::Mods::OriginInfo.best_or_earliest_year(values)
42
+ return earliest_date.sortable_year_string_from_date_str if earliest_date&.sortable_year_string_from_date_str
43
+ end; nil
44
+ end
45
+
46
+ # return a single string intended for display of pub year
47
+ # 0 < year < 1000: add A.D. suffix
48
+ # year < 0: add B.C. suffix. ('-5' => '5 B.C.', '700 B.C.' => '700 B.C.')
49
+ # 195u => 195x
50
+ # 19uu => 19xx
51
+ # '-5' => '5 B.C.'
52
+ # '700 B.C.' => '700 B.C.'
53
+ # '7th century' => '7th century'
54
+ # date ranges?
55
+ # prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
56
+ # look for a keyDate and use it if there is one; otherwise pick earliest date
57
+ # @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
58
+ # should be ignored; false if approximate dates should be included
59
+ def pub_year_display_str(fields = [:dateIssued, :dateCreated, :dateCaptured], ignore_approximate: false)
60
+ fields.each do |date_key|
61
+ values = mods_ng_xml.origin_info.send(date_key)
62
+ values = values.reject(&method(:is_approximate)) if ignore_approximate
63
+
64
+ earliest_date = Stanford::Mods::OriginInfo.best_or_earliest_year(values)
65
+ return earliest_date.date_str_for_display if earliest_date&.date_str_for_display
66
+ end; nil
67
+ end
68
+
69
+ # @return [Array<Stanford::Mods::Imprint>] array of imprint objects
70
+ # @private
71
+ def imprints
72
+ origin_info.map { |el| Stanford::Mods::Imprint.new(el) }
73
+ end
74
+
75
+ # @return [String] single String containing imprint information for display
76
+ def imprint_display_str
77
+ imprints.map(&:display_str).reject(&:empty?).join('; ')
78
+ end
79
+
80
+ # remove Elements from NodeSet if they have a qualifier attribute of 'approximate' or 'questionable'
81
+ # @param [Nokogiri::XML::Element] node the date element
82
+ # @return [Boolean]
83
+ # @private
84
+ def is_approximate(node)
85
+ qualifier = node["qualifier"] if node.respond_to?('[]')
86
+ qualifier == 'approximate' || qualifier == 'questionable'
87
+ end
88
+
89
+ # get earliest parseable year from the passed date elements
90
+ # @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
91
+ # @return [Stanford::Mods::DateParsing]
92
+ def self.best_or_earliest_year(date_el_array)
93
+ key_dates, other_dates = date_el_array.partition { |node| node['keyDate'] == 'yes' }
94
+
95
+ sortable_dates = key_dates.map { |x| DateParsing.new(x) }.select(&:sortable_year_string_from_date_str)
96
+ sortable_dates = other_dates.map { |x| DateParsing.new(x) }.select(&:sortable_year_string_from_date_str) if sortable_dates.empty?
97
+ results = {}
98
+
99
+ # this is a little weird; instead of just the earliest sorting date, if there are multiple
100
+ # dates with the same sort key, we want to make sure we get the last occurring one?
101
+ sortable_dates.each do |v|
102
+ results[v.sortable_year_string_from_date_str] = v
103
+ end
104
+
105
+ results[results.keys.min]
106
+ end
107
+ end # class Record
108
+ end
109
+ end
@@ -1,4 +1,4 @@
1
- require 'mods'
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Stanford
4
4
  module Mods
@@ -7,7 +7,7 @@ module Stanford
7
7
  # Note: mods_ng_xml_location.physicalLocation should find top level and relatedItem.
8
8
  # Each method here expects to find at most ONE matching element. Subsequent potential matches
9
9
  # are ignored.
10
- class Record < ::Mods::Record
10
+ module PhysicalLocation
11
11
  # data in location/physicalLocation or in relatedItem/location/physicalLocation
12
12
  # so use _location to get the data from either one of them
13
13
  # @return [String] box number (note: single valued and might be something like 35A)
@@ -0,0 +1,125 @@
1
+ # frozen_string_literal: true
2
+
3
+ # SearchWorks specific wranglings of MODS metadata as a mixin to the Stanford::Mods::Record object
4
+ module Stanford
5
+ module Mods
6
+ module Searchworks
7
+ # include langagues known to SearchWorks; try to error correct when possible (e.g. when ISO-639 disagrees with MARC standard)
8
+ def sw_language_facet
9
+ mods_ng_xml.language.flat_map do |n|
10
+ # get languageTerm codes and add their translations to the result
11
+ result = n.code_term.flat_map do |ct|
12
+ if ct.authority =~ /^iso639/
13
+ vals = ct.text.split(/[,|\ ]/).reject { |x| x.strip.empty? }
14
+ vals.select { |v| ISO_639.find(v.strip) }.map do |v|
15
+ iso639_val = ISO_639.find(v.strip).english_name
16
+
17
+ if SEARCHWORKS_LANGUAGES.has_value?(iso639_val)
18
+ iso639_val
19
+ else
20
+ SEARCHWORKS_LANGUAGES[v.strip]
21
+ end
22
+ end
23
+ else
24
+ vals = ct.text.split(/[,|\ ]/).reject { |x| x.strip.empty? }
25
+
26
+ vals.map do |v|
27
+ SEARCHWORKS_LANGUAGES[v.strip]
28
+ end
29
+ end
30
+ end
31
+
32
+ # add languageTerm text values
33
+ result.concat(n.text_term.map { |tt| tt.text.strip }.select { |val| !val.empty? && SEARCHWORKS_LANGUAGES.has_value?(val) })
34
+
35
+ # add language values that aren't in languageTerm subelement
36
+ result << n.text if n.languageTerm.empty? && SEARCHWORKS_LANGUAGES.has_value?(n.text)
37
+
38
+ result
39
+ end.uniq
40
+ end
41
+
42
+ # select one or more format values from the controlled vocabulary per JVine Summer 2014
43
+ # http://searchworks-solr-lb.stanford.edu:8983/solr/select?facet.field=format_main_ssim&rows=0&facet.sort=index
44
+ # https://github.com/sul-dlss/stanford-mods/issues/66 - For geodata, the
45
+ # resource type should be only Map and not include Software, multimedia.
46
+ # @return <Array[String]> value in the SearchWorks controlled vocabulary
47
+ def format_main
48
+ types = typeOfResource
49
+ return [] unless types
50
+
51
+ val = []
52
+ genres = term_values(:genre) || []
53
+ issuance = term_values([:origin_info, :issuance]) || []
54
+ frequency = term_values([:origin_info, :frequency]) || []
55
+
56
+ val << 'Dataset' if genres.include?('dataset') || genres.include?('Dataset')
57
+ val << 'Archive/Manuscript' if types.any? { |t| t.manuscript == 'yes' }
58
+
59
+ val.concat(types.flat_map do |type|
60
+ case type.text
61
+ when 'cartographic'
62
+ 'Map'
63
+ when 'mixed material'
64
+ 'Archive/Manuscript'
65
+ when 'moving image'
66
+ 'Video'
67
+ when 'notated music'
68
+ 'Music score'
69
+ when 'software, multimedia'
70
+ 'Software/Multimedia' unless types.map(&:text).include?('cartographic') || (genres.include?('dataset') || genres.include?('Dataset'))
71
+ when 'sound recording-musical'
72
+ 'Music recording'
73
+ when 'sound recording-nonmusical', 'sound recording'
74
+ 'Sound recording'
75
+ when 'still image'
76
+ 'Image'
77
+ when 'text'
78
+ is_periodical = issuance.include?('continuing') || issuance.include?('serial') || frequency.any? { |x| !x.empty? }
79
+ is_archived_website = genres.any? { |x| x.casecmp('archived website') == 0 }
80
+
81
+ if is_periodical || is_archived_website
82
+ [
83
+ ('Journal/Periodical' if is_periodical),
84
+ ('Archived website' if is_archived_website)
85
+ ].compact
86
+ else
87
+ 'Book'
88
+ end
89
+ when 'three dimensional object'
90
+ 'Object'
91
+ end
92
+ end)
93
+
94
+ val.compact.uniq
95
+ end
96
+
97
+ # @return <Array[String]> values for the genre facet in SearchWorks
98
+ def sw_genre
99
+ genres = term_values(:genre)
100
+ return [] unless genres
101
+
102
+ val = genres.map(&:to_s)
103
+ thesis_pub = ['thesis', 'Thesis']
104
+ val << 'Thesis/Dissertation' if (genres & thesis_pub).any?
105
+
106
+ conf_pub = ['conference publication', 'Conference publication', 'Conference Publication']
107
+ gov_pub = ['government publication', 'Government publication', 'Government Publication']
108
+ tech_rpt = ['technical report', 'Technical report', 'Technical Report']
109
+
110
+ val << 'Conference proceedings' if (genres & conf_pub).any?
111
+ val << 'Government document' if (genres & gov_pub).any?
112
+ val << 'Technical report' if (genres & tech_rpt).any?
113
+
114
+ val.uniq
115
+ end
116
+
117
+ # @return [String] value with the numeric catkey in it, or nil if none exists
118
+ def catkey
119
+ catkey = term_values([:record_info, :recordIdentifier])
120
+
121
+ catkey.first&.tr('a', '') # ensure catkey is numeric only
122
+ end
123
+ end # class Record
124
+ end # Module Mods
125
+ end # Module Stanford
@@ -0,0 +1,126 @@
1
+ # frozen_string_literal: true
2
+
3
+ # SearchWorks specific wranglings of MODS *subject* metadata as a mixin to the Stanford::Mods::Record object
4
+ module Stanford
5
+ module Mods
6
+ module SearchworksSubjects
7
+ # Values are the contents of:
8
+ # mods/subject/topic
9
+ # @return [Array<String>] values for the topic_search Solr field for this document or nil if none
10
+ def topic_search
11
+ subject_topics
12
+ end
13
+
14
+ # Values are the contents of:
15
+ # subject/topic
16
+ # subject/name
17
+ # subject/title
18
+ # subject/occupation
19
+ # with trailing comma, semicolon, and backslash (and any preceding spaces) removed
20
+ # @return [Array<String>] values for the topic_facet Solr field for this document or nil if none
21
+ def topic_facet
22
+ strip_punctuation(subject_topics + subject_names + subject_titles + subject_occupations)
23
+ end
24
+
25
+ # geographic_search values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
26
+ # @return [Array<String>] values for the geographic_facet Solr field for this document or nil if none
27
+ def geographic_facet
28
+ strip_punctuation(geographic_search)
29
+ end
30
+
31
+ # subject/temporal values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
32
+ # @return [Array<String>] values for the era_facet Solr field for this document or nil if none
33
+ def era_facet
34
+ strip_punctuation(subject_temporal)
35
+ end
36
+
37
+ # Values are the contents of:
38
+ # subject/geographic
39
+ # subject/hierarchicalGeographic
40
+ # subject/geographicCode (only include the translated value if it isn't already present from other mods geo fields)
41
+ # @return [Array<String>] values for the geographic_search Solr field for this document or nil if none
42
+ def geographic_search
43
+ result = term_values([:subject, :geographic]) || []
44
+
45
+ # hierarchicalGeographic has sub elements
46
+ hierarchical_vals = mods_ng_xml.subject.hierarchicalGeographic.map do |hg_node|
47
+ hg_vals = hg_node.element_children.map(&:text).reject(&:empty?)
48
+ hg_vals.join(' ') unless hg_vals.empty?
49
+ end
50
+
51
+ trans_code_vals = mods_ng_xml.subject.geographicCode.translated_value || []
52
+
53
+ (result + hierarchical_vals + trans_code_vals).compact.uniq
54
+ end
55
+
56
+ # Values are the contents of:
57
+ # subject/name
58
+ # subject/occupation - no subelements
59
+ # subject/titleInfo
60
+ # @return [Array<String>] values for the subject_other_search Solr field for this document or nil if none
61
+ def subject_other_search
62
+ subject_occupations + subject_names + subject_titles
63
+ end
64
+
65
+ # Values are the contents of:
66
+ # subject/temporal
67
+ # subject/genre
68
+ # @return [Array<String>] values for the subject_other_subvy_search Solr field for this document or nil if none
69
+ def subject_other_subvy_search
70
+ vals = Array(subject_temporal)
71
+ gvals = term_values([:subject, :genre])
72
+
73
+ vals + Array(gvals)
74
+ end
75
+
76
+ # Values are the contents of:
77
+ # all subject subelements except subject/cartographic plus genre top level element
78
+ # @return [Array<String>] values for the subject_all_search Solr field for this document or nil if none
79
+ def subject_all_search
80
+ topic_search + geographic_search + subject_other_search + subject_other_subvy_search
81
+ end
82
+
83
+ protected #----------------------------------------------------------
84
+
85
+ # convenience method for subject/name/namePart values (to avoid parsing the mods for the same thing multiple times)
86
+ def subject_names
87
+ mods_ng_xml.subject.name_el
88
+ .select { |n_el| n_el.namePart }
89
+ .map { |name_el_w_np| name_el_w_np.namePart.map(&:text).reject(&:empty?) }
90
+ .reject(&:empty?)
91
+ .map { |parts| parts.join(', ').strip }
92
+ end
93
+
94
+ # convenience method for subject/occupation values (to avoid parsing the mods for the same thing multiple times)
95
+ def subject_occupations
96
+ term_values([:subject, :occupation]) || []
97
+ end
98
+
99
+ # convenience method for subject/temporal values (to avoid parsing the mods for the same thing multiple times)
100
+ def subject_temporal
101
+ term_values([:subject, :temporal]) || []
102
+ end
103
+
104
+ # Values are the contents of:
105
+ # subject/titleInfo/(subelements)
106
+ # convenience method for subject/titleInfo values (to avoid parsing the mods for the same thing multiple times)
107
+ def subject_titles
108
+ mods_ng_xml.subject.titleInfo.map do |ti_el|
109
+ parts = ti_el.element_children.map(&:text).reject(&:empty?)
110
+ parts.join(' ').strip unless parts.empty?
111
+ end.compact
112
+ end
113
+
114
+ # convenience method for subject/topic values (to avoid parsing the mods for the same thing multiple times)
115
+ def subject_topics
116
+ term_values([:subject, :topic]) || []
117
+ end
118
+
119
+ private
120
+
121
+ def strip_punctuation(arr)
122
+ arr&.map { |val| val.gsub(/[\\,;]$/, '').strip }
123
+ end
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,79 @@
1
+ module Stanford
2
+ module Mods
3
+ module Title
4
+ # @return [String] value for title_245a_search field
5
+ def sw_short_title
6
+ short_titles&.compact&.reject(&:empty?)&.first
7
+ end
8
+
9
+ # Searchworks requires that the MODS has a '//titleInfo/title'
10
+ # @return [String] value for title_245_search, title_full_display
11
+ def sw_full_title(title_info = first_title_info_node, sortable: false)
12
+ return unless title_info
13
+
14
+ title = title_info.title&.text&.strip
15
+
16
+ return if title.nil? || title.empty?
17
+
18
+ nonSort_title = title_info.nonSort&.text&.strip
19
+
20
+ preSubTitle = [(nonSort_title unless sortable), title].compact.join(' ')
21
+
22
+ preSubTitle.sub!(/:$/, '')
23
+
24
+ subTitle = title_info.subTitle.text.strip
25
+ preParts = subTitle.empty? ? preSubTitle : preSubTitle + " : " + subTitle
26
+ preParts.sub!(/\.$/, '') if preParts # remove trailing period
27
+
28
+ partName = title_info.partName.text.strip unless title_info.partName.text.strip.empty?
29
+ partNumber = title_info.partNumber.text.strip unless title_info.partNumber.text.strip.empty?
30
+ partNumber.sub!(/,$/, '') if partNumber # remove trailing comma
31
+ if partNumber && partName
32
+ parts = partNumber + ", " + partName
33
+ elsif partNumber
34
+ parts = partNumber
35
+ elsif partName
36
+ parts = partName
37
+ end
38
+ parts.sub!(/\.$/, '') if parts
39
+
40
+ result = parts ? preParts + ". " + parts : preParts
41
+ return nil unless result
42
+
43
+ result += "." unless result =~ /[[:punct:]]$/
44
+ result.strip!
45
+ result = nil if result.empty?
46
+ result
47
+ end
48
+
49
+ # like sw_full_title without trailing \,/;:.
50
+ # spec from solrmarc-sw sw_index.properties
51
+ # title_display = custom, removeTrailingPunct(245abdefghijklmnopqrstuvwxyz, [\\\\,/;:], ([A-Za-z]{4}|[0-9]{3}|\\)|\\,))
52
+ # @return [String] value for title_display (like title_full_display without trailing punctuation)
53
+ def sw_title_display
54
+ sw_full_title&.sub(/[\.,;:\/\\]+$/, '')&.strip
55
+ end
56
+
57
+ # this includes all titles except
58
+ # @return [Array<String>] values for title_variant_search
59
+ def sw_addl_titles
60
+ (full_titles - first_title_info_node.full_title).reject(&:blank?)
61
+ end
62
+
63
+ # Returns a sortable version of the main title
64
+ # @return [String] value for title_sort field
65
+ def sw_sort_title
66
+ val = sw_full_title(sortable: true) || ''
67
+ val.gsub(/[[:punct:]]*/, '').squeeze(" ").strip
68
+ end
69
+
70
+ private
71
+
72
+ # @return [Nokogiri::XML::Node] the first titleInfo node if present, else nil
73
+ def first_title_info_node
74
+ non_blank_nodes = mods_ng_xml.title_info.reject { |node| node.text.strip.empty? }
75
+ non_blank_nodes.find { |node| node.type_at != 'alternative' } || non_blank_nodes.first
76
+ end
77
+ end
78
+ end
79
+ end
@@ -4,9 +4,6 @@ module Stanford
4
4
  ##
5
5
  # Geospatial coordinate parsing
6
6
  class Coordinate
7
- require 'stanford-mods/geo_utils'
8
- include ::Stanford::Mods::GeoUtils
9
-
10
7
  attr_reader :value
11
8
 
12
9
  def initialize(value)
@@ -57,6 +54,27 @@ module Stanford
57
54
  def coord
58
55
  cleaner_coordinate(value)
59
56
  end
57
+
58
+ # @param [String] val Coordinates value
59
+ # @return [String] cleaned value (strips parens and period), or the original value
60
+ def cleaner_coordinate(val)
61
+ matches = val.match(/^\(?([^)]+)\)?\.?$/)
62
+ matches ? matches[1] : val
63
+ end
64
+
65
+ # @param [String] point coordinate point in degrees notation
66
+ # @return [Float] converted value in decimal notation
67
+ def coord_to_decimal(point)
68
+ regex = /(?<dir>[NESW])\s*(?<deg>\d+)[°⁰º](?:(?<min>\d+)[ʹ'])?(?:(?<sec>\d+)[ʺ"])?/
69
+ match = regex.match(point)
70
+ return Float::INFINITY unless match
71
+
72
+ dec = match['deg'].to_i
73
+ dec += match['min'].to_f / 60
74
+ dec += match['sec'].to_f / 60 / 60
75
+ dec = -1 * dec if match['dir'] == 'W' || match['dir'] == 'S'
76
+ dec
77
+ end
60
78
  end
61
79
  end
62
80
  end