stanford-mods 2.6.4 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +1 -1
  3. data/lib/stanford-mods/{geo_spatial.rb → concerns/geo_spatial.rb} +3 -5
  4. data/lib/stanford-mods/concerns/name.rb +57 -0
  5. data/lib/stanford-mods/concerns/origin_info.rb +113 -0
  6. data/lib/stanford-mods/{physical_location.rb → concerns/physical_location.rb} +2 -2
  7. data/lib/stanford-mods/concerns/searchworks.rb +125 -0
  8. data/lib/stanford-mods/concerns/searchworks_subjects.rb +126 -0
  9. data/lib/stanford-mods/concerns/title.rb +87 -0
  10. data/lib/stanford-mods/coordinate.rb +24 -3
  11. data/lib/stanford-mods/date_parsing.rb +32 -289
  12. data/lib/stanford-mods/imprint.rb +170 -322
  13. data/lib/stanford-mods/record.rb +20 -0
  14. data/lib/stanford-mods/version.rb +1 -1
  15. data/lib/stanford-mods/{searchworks_languages.rb → vocabularies/searchworks_languages.rb} +0 -0
  16. data/lib/stanford-mods.rb +12 -11
  17. data/spec/fixtures/searchworks_imprint_data.rb +38 -39
  18. data/spec/fixtures/searchworks_pub_date_data.rb +7 -7
  19. data/spec/fixtures/spotlight_pub_date_data.rb +7 -7
  20. data/spec/geo_spatial_spec.rb +1 -6
  21. data/spec/imprint_spec.rb +263 -207
  22. data/spec/lib/stanford-mods/coordinate_spec.rb +3 -5
  23. data/spec/name_spec.rb +26 -230
  24. data/spec/origin_info_spec.rb +34 -300
  25. data/spec/searchworks_basic_spec.rb +1 -3
  26. data/spec/searchworks_pub_dates_spec.rb +0 -215
  27. data/spec/searchworks_spec.rb +0 -21
  28. data/spec/searchworks_subject_raw_spec.rb +106 -105
  29. data/spec/searchworks_subject_spec.rb +19 -55
  30. data/spec/searchworks_title_spec.rb +5 -5
  31. data/stanford-mods.gemspec +1 -1
  32. metadata +19 -15
  33. data/lib/marc_countries.rb +0 -387
  34. data/lib/stanford-mods/geo_utils.rb +0 -28
  35. data/lib/stanford-mods/name.rb +0 -80
  36. data/lib/stanford-mods/origin_info.rb +0 -489
  37. data/lib/stanford-mods/searchworks.rb +0 -333
  38. data/lib/stanford-mods/searchworks_subjects.rb +0 -196
  39. data/spec/date_parsing_spec.rb +0 -905
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 922ecb4ab53df951ef735de705037f218347938cac621886b54dc71463c215a6
4
- data.tar.gz: 5426c132435bdc34a74df664b4bcbc6797682482689e0843ebdc7527e8df0715
3
+ metadata.gz: 22cc1cc8aafefb053ea3856e273e17be7089862160e91d87c483393b60c02aca
4
+ data.tar.gz: dbeea673e2c79744215c278ebf2fa73b78de3de7f79602590948bf4ee117b1df
5
5
  SHA512:
6
- metadata.gz: eb12a388c219e4b9f99746d543ecc38cc1a67bc68302dc4255c81a088ffaad2fada7f6387b9525441d199c7ec536f54ba109f38ad92bf2d1d6d771077cdbc6d5
7
- data.tar.gz: 2b904ae85c26eef0717a8e1250a7385889b00fe55c2e8ee59c83798f55f46e4e0ee4593f054dc3ab1a02e18095813ba3b16632f77340ddc212a54f36a871931a
6
+ metadata.gz: fb3d7b5761a6e4811dd85b6d4972ce238e5b507e957d617b5e4a3935bcd3e9f791176d250418a198d2b68a63babfc58dcb9c39d1b8b4fbbbdd01eecc504fa8a2
7
+ data.tar.gz: 3fe0f6181376c4cca9618a90e9a03244328f3a93a16ea55f1b5a72c7f842ddd0de8addbfb2da75efa4875bc4c04e4dd73caaad543745faf9461da7928566487e
@@ -11,7 +11,7 @@ jobs:
11
11
  runs-on: ubuntu-latest
12
12
  strategy:
13
13
  matrix:
14
- ruby: [jruby-9.2.14.0, 2.7, 3.0]
14
+ ruby: [jruby-9.3.2.0, 2.7, '3.0', '3.1']
15
15
  steps:
16
16
  - uses: actions/checkout@v2
17
17
  - name: Set up Ruby
@@ -1,10 +1,9 @@
1
- # encoding: UTF-8
2
- require 'mods'
1
+ # frozen_string_literal: true
3
2
 
4
3
  module Stanford
5
4
  module Mods
6
5
  # NON-SearchWorks specific wranglings of MODS cartographics metadata
7
- class Record < ::Mods::Record
6
+ module Geospatial
8
7
  GMLNS = 'http://www.opengis.net/gml/3.2/'.freeze
9
8
 
10
9
  # @return [Array{String}] subject cartographic coordinates values
@@ -27,8 +26,7 @@ module Stanford
27
26
  lowers = v.xpath('gml:lowerCorner', 'gml' => GMLNS).text.split
28
27
  "ENVELOPE(#{lowers[0]}, #{uppers[0]}, #{uppers[1]}, #{lowers[1]})"
29
28
  end
30
- rescue RuntimeError => e
31
- logger.warn "failure parsing <extension> element: #{e.message}"
29
+ rescue RuntimeError
32
30
  []
33
31
  end
34
32
 
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ # NON-SearchWorks specific wranglings of MODS <name> metadata as a mixin to the Stanford::Mods::Record object
4
+ module Stanford
5
+ module Mods
6
+ module Name
7
+ # the first encountered <mods><name> element with marcrelator flavor role of 'Creator' or 'Author'.
8
+ # if no marcrelator 'Creator' or 'Author', the first name without a role.
9
+ # if no name without a role, then nil
10
+ # @return [String] value for author_1xx_search field
11
+ def sw_main_author
12
+ result = mods_ng_xml.plain_name.find { |n| n.role.any? { |r| r.authority.include?('marcrelator') && r.value.any? { |v| v.match(/creator/i) || v.match?(/author/i) } } }
13
+ result ||= mods_ng_xml.plain_name.find { |n| n.role.empty? }
14
+
15
+ result&.display_value_w_date
16
+ end
17
+
18
+ # all names, in display form, except the main_author
19
+ # names will be the display_value_w_date form
20
+ # see Mods::Record.name in nom_terminology for details on the display_value algorithm
21
+ # @return [Array<String>] values for author_7xx_search field
22
+ def sw_addl_authors
23
+ mods_ng_xml.plain_name.map(&:display_value_w_date) - [sw_main_author]
24
+ end
25
+
26
+ # @return [Array<String>] values for author_person_facet, author_person_display
27
+ def sw_person_authors
28
+ mods_ng_xml.personal_name.map(&:display_value_w_date)
29
+ end
30
+
31
+ # return the display_value_w_date for all <mods><name> elements that do not have type='personal'
32
+ # @return [Array<String>] values for author_other_facet
33
+ def sw_impersonal_authors
34
+ mods_ng_xml.plain_name.select { |n| n.type_at != 'personal' }.map(&:display_value_w_date)
35
+ end
36
+
37
+ # @return [Array<String>] values for author_corp_display
38
+ def sw_corporate_authors
39
+ mods_ng_xml.corporate_name.map(&:display_value_w_date)
40
+ end
41
+
42
+ # @return [Array<String>] values for author_meeting_display
43
+ def sw_meeting_authors
44
+ mods_ng_xml.conference_name.map(&:display_value_w_date)
45
+ end
46
+
47
+ # Returns a sortable version of the main_author:
48
+ # main_author + sorting title
49
+ # which is the mods approximation of the value created for a marc record
50
+ # @return [String] value for author_sort field
51
+ def sw_sort_author
52
+ # substitute java Character.MAX_CODE_POINT for nil main_author so missing main authors sort last
53
+ "#{sw_main_author || "\u{10FFFF} " }#{sort_title}".gsub(/[[:punct:]]*/, '').strip
54
+ end
55
+ end # class Record
56
+ end # Module Mods
57
+ end # Module Stanford
@@ -0,0 +1,113 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Parsing MODS /originInfo for Publication/Imprint data:
4
+ # * pub year for date slider facet
5
+ # * pub year for sorting
6
+ # * pub year for single display value
7
+ # * imprint info for display
8
+ # *
9
+ # These methods may be used by searchworks.rb file or by downstream apps
10
+ module Stanford
11
+ module Mods
12
+ module OriginInfo
13
+ # return pub year as an Integer
14
+ # prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
15
+ # look for a keyDate and use it if there is one; otherwise pick earliest date
16
+ # @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute) should be ignored; false if approximate dates should be included
17
+ # @return [Integer] publication year as an Integer
18
+ # @note for sorting: 5 B.C. => -5; 666 B.C. => -666
19
+ def pub_year_int(fields = [:dateIssued, :dateCreated, :dateCaptured], ignore_approximate: false)
20
+ fields.each do |date_key|
21
+ values = mods_ng_xml.origin_info.send(date_key)
22
+ values = values.reject(&method(:is_approximate)) if ignore_approximate
23
+
24
+ earliest_date = Stanford::Mods::OriginInfo.best_or_earliest_year(values)
25
+ return earliest_date.year_int_from_date_str if earliest_date&.year_int_from_date_str
26
+ end; nil
27
+ end
28
+
29
+ # return a single string intended for lexical sorting for pub date
30
+ # prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
31
+ # look for a keyDate and use it if there is one; otherwise pick earliest date
32
+ # @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute) should be ignored; false if approximate dates should be included
33
+ # @return [String] single String containing publication year for lexical sorting
34
+ # @note for string sorting 5 B.C. = -5 => -995; 6 B.C. => -994, so 6 B.C. sorts before 5 B.C.
35
+ # @deprecated use pub_year_int
36
+ def pub_year_sort_str(fields = [:dateIssued, :dateCreated, :dateCaptured], ignore_approximate: false)
37
+ fields.each do |date_key|
38
+ values = mods_ng_xml.origin_info.send(date_key)
39
+ values = values.reject(&method(:is_approximate)) if ignore_approximate
40
+
41
+ earliest_date = Stanford::Mods::OriginInfo.best_or_earliest_year(values)
42
+ return earliest_date.sortable_year_string_from_date_str if earliest_date&.sortable_year_string_from_date_str
43
+ end; nil
44
+ end
45
+
46
+ # return a single string intended for display of pub year
47
+ # 0 < year < 1000: add A.D. suffix
48
+ # year < 0: add B.C. suffix. ('-5' => '5 B.C.', '700 B.C.' => '700 B.C.')
49
+ # 195u => 195x
50
+ # 19uu => 19xx
51
+ # '-5' => '5 B.C.'
52
+ # '700 B.C.' => '700 B.C.'
53
+ # '7th century' => '7th century'
54
+ # date ranges?
55
+ # prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
56
+ # look for a keyDate and use it if there is one; otherwise pick earliest date
57
+ # @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
58
+ # should be ignored; false if approximate dates should be included
59
+ def pub_year_display_str(fields = [:dateIssued, :dateCreated, :dateCaptured], ignore_approximate: false)
60
+ fields.each do |date_key|
61
+ values = mods_ng_xml.origin_info.send(date_key)
62
+ values = values.reject(&method(:is_approximate)) if ignore_approximate
63
+
64
+ earliest_date = Stanford::Mods::OriginInfo.best_or_earliest_year(values)
65
+ return earliest_date.date_str_for_display if earliest_date&.date_str_for_display
66
+ end; nil
67
+ end
68
+
69
+ # @return [Array<Stanford::Mods::Imprint>] array of imprint objects
70
+ # @private
71
+ def imprints
72
+ origin_info.map { |el| Stanford::Mods::Imprint.new(el) }
73
+ end
74
+
75
+ def place
76
+ term_values([:origin_info, :place, :placeTerm])
77
+ end
78
+
79
+ # @return [String] single String containing imprint information for display
80
+ def imprint_display_str
81
+ imprints.map(&:display_str).reject(&:empty?).join('; ')
82
+ end
83
+
84
+ # remove Elements from NodeSet if they have a qualifier attribute of 'approximate' or 'questionable'
85
+ # @param [Nokogiri::XML::Element] node the date element
86
+ # @return [Boolean]
87
+ # @private
88
+ def is_approximate(node)
89
+ qualifier = node["qualifier"] if node.respond_to?('[]')
90
+ qualifier == 'approximate' || qualifier == 'questionable'
91
+ end
92
+
93
+ # get earliest parseable year from the passed date elements
94
+ # @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
95
+ # @return [Stanford::Mods::DateParsing]
96
+ def self.best_or_earliest_year(date_el_array)
97
+ key_dates, other_dates = date_el_array.partition { |node| node['keyDate'] == 'yes' }
98
+
99
+ sortable_dates = key_dates.map { |x| DateParsing.new(x) }.select(&:sortable_year_string_from_date_str)
100
+ sortable_dates = other_dates.map { |x| DateParsing.new(x) }.select(&:sortable_year_string_from_date_str) if sortable_dates.empty?
101
+ results = {}
102
+
103
+ # this is a little weird; instead of just the earliest sorting date, if there are multiple
104
+ # dates with the same sort key, we want to make sure we get the last occurring one?
105
+ sortable_dates.each do |v|
106
+ results[v.sortable_year_string_from_date_str] = v
107
+ end
108
+
109
+ results[results.keys.min]
110
+ end
111
+ end # class Record
112
+ end
113
+ end
@@ -1,4 +1,4 @@
1
- require 'mods'
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Stanford
4
4
  module Mods
@@ -7,7 +7,7 @@ module Stanford
7
7
  # Note: mods_ng_xml_location.physicalLocation should find top level and relatedItem.
8
8
  # Each method here expects to find at most ONE matching element. Subsequent potential matches
9
9
  # are ignored.
10
- class Record < ::Mods::Record
10
+ module PhysicalLocation
11
11
  # data in location/physicalLocation or in relatedItem/location/physicalLocation
12
12
  # so use _location to get the data from either one of them
13
13
  # @return [String] box number (note: single valued and might be something like 35A)
@@ -0,0 +1,125 @@
1
+ # frozen_string_literal: true
2
+
3
+ # SearchWorks specific wranglings of MODS metadata as a mixin to the Stanford::Mods::Record object
4
+ module Stanford
5
+ module Mods
6
+ module Searchworks
7
+ # include langagues known to SearchWorks; try to error correct when possible (e.g. when ISO-639 disagrees with MARC standard)
8
+ def sw_language_facet
9
+ mods_ng_xml.language.flat_map do |n|
10
+ # get languageTerm codes and add their translations to the result
11
+ result = n.code_term.flat_map do |ct|
12
+ if ct.authority =~ /^iso639/
13
+ vals = ct.text.split(/[,|\ ]/).reject { |x| x.strip.empty? }
14
+ vals.select { |v| ISO_639.find(v.strip) }.map do |v|
15
+ iso639_val = ISO_639.find(v.strip).english_name
16
+
17
+ if SEARCHWORKS_LANGUAGES.has_value?(iso639_val)
18
+ iso639_val
19
+ else
20
+ SEARCHWORKS_LANGUAGES[v.strip]
21
+ end
22
+ end
23
+ else
24
+ vals = ct.text.split(/[,|\ ]/).reject { |x| x.strip.empty? }
25
+
26
+ vals.map do |v|
27
+ SEARCHWORKS_LANGUAGES[v.strip]
28
+ end
29
+ end
30
+ end
31
+
32
+ # add languageTerm text values
33
+ result.concat(n.text_term.map { |tt| tt.text.strip }.select { |val| !val.empty? && SEARCHWORKS_LANGUAGES.has_value?(val) })
34
+
35
+ # add language values that aren't in languageTerm subelement
36
+ result << n.text if n.languageTerm.empty? && SEARCHWORKS_LANGUAGES.has_value?(n.text)
37
+
38
+ result
39
+ end.uniq
40
+ end
41
+
42
+ # select one or more format values from the controlled vocabulary per JVine Summer 2014
43
+ # http://searchworks-solr-lb.stanford.edu:8983/solr/select?facet.field=format_main_ssim&rows=0&facet.sort=index
44
+ # https://github.com/sul-dlss/stanford-mods/issues/66 - For geodata, the
45
+ # resource type should be only Map and not include Software, multimedia.
46
+ # @return <Array[String]> value in the SearchWorks controlled vocabulary
47
+ def format_main
48
+ types = typeOfResource
49
+ return [] unless types
50
+
51
+ val = []
52
+ genres = term_values(:genre) || []
53
+ issuance = term_values([:origin_info, :issuance]) || []
54
+ frequency = term_values([:origin_info, :frequency]) || []
55
+
56
+ val << 'Dataset' if genres.include?('dataset') || genres.include?('Dataset')
57
+ val << 'Archive/Manuscript' if types.any? { |t| t.manuscript == 'yes' }
58
+
59
+ val.concat(types.flat_map do |type|
60
+ case type.text
61
+ when 'cartographic'
62
+ 'Map'
63
+ when 'mixed material'
64
+ 'Archive/Manuscript'
65
+ when 'moving image'
66
+ 'Video'
67
+ when 'notated music'
68
+ 'Music score'
69
+ when 'software, multimedia'
70
+ 'Software/Multimedia' unless types.map(&:text).include?('cartographic') || (genres.include?('dataset') || genres.include?('Dataset'))
71
+ when 'sound recording-musical'
72
+ 'Music recording'
73
+ when 'sound recording-nonmusical', 'sound recording'
74
+ 'Sound recording'
75
+ when 'still image'
76
+ 'Image'
77
+ when 'text'
78
+ is_periodical = issuance.include?('continuing') || issuance.include?('serial') || frequency.any? { |x| !x.empty? }
79
+ is_archived_website = genres.any? { |x| x.casecmp('archived website') == 0 }
80
+
81
+ if is_periodical || is_archived_website
82
+ [
83
+ ('Journal/Periodical' if is_periodical),
84
+ ('Archived website' if is_archived_website)
85
+ ].compact
86
+ else
87
+ 'Book'
88
+ end
89
+ when 'three dimensional object'
90
+ 'Object'
91
+ end
92
+ end)
93
+
94
+ val.compact.uniq
95
+ end
96
+
97
+ # @return <Array[String]> values for the genre facet in SearchWorks
98
+ def sw_genre
99
+ genres = term_values(:genre)
100
+ return [] unless genres
101
+
102
+ val = genres.map(&:to_s)
103
+ thesis_pub = ['thesis', 'Thesis']
104
+ val << 'Thesis/Dissertation' if (genres & thesis_pub).any?
105
+
106
+ conf_pub = ['conference publication', 'Conference publication', 'Conference Publication']
107
+ gov_pub = ['government publication', 'Government publication', 'Government Publication']
108
+ tech_rpt = ['technical report', 'Technical report', 'Technical Report']
109
+
110
+ val << 'Conference proceedings' if (genres & conf_pub).any?
111
+ val << 'Government document' if (genres & gov_pub).any?
112
+ val << 'Technical report' if (genres & tech_rpt).any?
113
+
114
+ val.uniq
115
+ end
116
+
117
+ # @return [String] value with the numeric catkey in it, or nil if none exists
118
+ def catkey
119
+ catkey = term_values([:record_info, :recordIdentifier])
120
+
121
+ catkey.first&.tr('a', '') # ensure catkey is numeric only
122
+ end
123
+ end # class Record
124
+ end # Module Mods
125
+ end # Module Stanford
@@ -0,0 +1,126 @@
1
+ # frozen_string_literal: true
2
+
3
+ # SearchWorks specific wranglings of MODS *subject* metadata as a mixin to the Stanford::Mods::Record object
4
+ module Stanford
5
+ module Mods
6
+ module SearchworksSubjects
7
+ # Values are the contents of:
8
+ # mods/subject/topic
9
+ # @return [Array<String>] values for the topic_search Solr field for this document or nil if none
10
+ def topic_search
11
+ subject_topics
12
+ end
13
+
14
+ # Values are the contents of:
15
+ # subject/topic
16
+ # subject/name
17
+ # subject/title
18
+ # subject/occupation
19
+ # with trailing comma, semicolon, and backslash (and any preceding spaces) removed
20
+ # @return [Array<String>] values for the topic_facet Solr field for this document or nil if none
21
+ def topic_facet
22
+ strip_punctuation(subject_topics + subject_names + subject_titles + subject_occupations)
23
+ end
24
+
25
+ # geographic_search values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
26
+ # @return [Array<String>] values for the geographic_facet Solr field for this document or nil if none
27
+ def geographic_facet
28
+ strip_punctuation(geographic_search)
29
+ end
30
+
31
+ # subject/temporal values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
32
+ # @return [Array<String>] values for the era_facet Solr field for this document or nil if none
33
+ def era_facet
34
+ strip_punctuation(subject_temporal)
35
+ end
36
+
37
+ # Values are the contents of:
38
+ # subject/geographic
39
+ # subject/hierarchicalGeographic
40
+ # subject/geographicCode (only include the translated value if it isn't already present from other mods geo fields)
41
+ # @return [Array<String>] values for the geographic_search Solr field for this document or nil if none
42
+ def geographic_search
43
+ result = term_values([:subject, :geographic]) || []
44
+
45
+ # hierarchicalGeographic has sub elements
46
+ hierarchical_vals = mods_ng_xml.subject.hierarchicalGeographic.map do |hg_node|
47
+ hg_vals = hg_node.element_children.map(&:text).reject(&:empty?)
48
+ hg_vals.join(' ') unless hg_vals.empty?
49
+ end
50
+
51
+ trans_code_vals = mods_ng_xml.subject.geographicCode.translated_value || []
52
+
53
+ (result + hierarchical_vals + trans_code_vals).compact.uniq
54
+ end
55
+
56
+ # Values are the contents of:
57
+ # subject/name
58
+ # subject/occupation - no subelements
59
+ # subject/titleInfo
60
+ # @return [Array<String>] values for the subject_other_search Solr field for this document or nil if none
61
+ def subject_other_search
62
+ subject_occupations + subject_names + subject_titles
63
+ end
64
+
65
+ # Values are the contents of:
66
+ # subject/temporal
67
+ # subject/genre
68
+ # @return [Array<String>] values for the subject_other_subvy_search Solr field for this document or nil if none
69
+ def subject_other_subvy_search
70
+ vals = Array(subject_temporal)
71
+ gvals = term_values([:subject, :genre])
72
+
73
+ vals + Array(gvals)
74
+ end
75
+
76
+ # Values are the contents of:
77
+ # all subject subelements except subject/cartographic plus genre top level element
78
+ # @return [Array<String>] values for the subject_all_search Solr field for this document or nil if none
79
+ def subject_all_search
80
+ topic_search + geographic_search + subject_other_search + subject_other_subvy_search
81
+ end
82
+
83
+ protected #----------------------------------------------------------
84
+
85
+ # convenience method for subject/name/namePart values (to avoid parsing the mods for the same thing multiple times)
86
+ def subject_names
87
+ mods_ng_xml.subject.name_el
88
+ .select { |n_el| n_el.namePart }
89
+ .map { |name_el_w_np| name_el_w_np.namePart.map(&:text).reject(&:empty?) }
90
+ .reject(&:empty?)
91
+ .map { |parts| parts.join(', ').strip }
92
+ end
93
+
94
+ # convenience method for subject/occupation values (to avoid parsing the mods for the same thing multiple times)
95
+ def subject_occupations
96
+ term_values([:subject, :occupation]) || []
97
+ end
98
+
99
+ # convenience method for subject/temporal values (to avoid parsing the mods for the same thing multiple times)
100
+ def subject_temporal
101
+ term_values([:subject, :temporal]) || []
102
+ end
103
+
104
+ # Values are the contents of:
105
+ # subject/titleInfo/(subelements)
106
+ # convenience method for subject/titleInfo values (to avoid parsing the mods for the same thing multiple times)
107
+ def subject_titles
108
+ mods_ng_xml.subject.titleInfo.map do |ti_el|
109
+ parts = ti_el.element_children.map(&:text).reject(&:empty?)
110
+ parts.join(' ').strip unless parts.empty?
111
+ end.compact
112
+ end
113
+
114
+ # convenience method for subject/topic values (to avoid parsing the mods for the same thing multiple times)
115
+ def subject_topics
116
+ term_values([:subject, :topic]) || []
117
+ end
118
+
119
+ private
120
+
121
+ def strip_punctuation(arr)
122
+ arr&.map { |val| val.gsub(/[\\,;]$/, '').strip }
123
+ end
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,87 @@
1
+ module Stanford
2
+ module Mods
3
+ module Title
4
+ # @return [String] value for title_245a_search field
5
+ def sw_short_title
6
+ short_titles&.compact&.reject(&:empty?)&.first
7
+ end
8
+
9
+ # Searchworks requires that the MODS has a '//titleInfo/title'
10
+ # @return [String] value for title_245_search, title_full_display
11
+ def sw_full_title(title_info = first_title_info_node, sortable: false)
12
+ return unless title_info&.children&.any?
13
+
14
+ title = title_info.title&.text&.strip
15
+ return if title.nil? || title.empty?
16
+
17
+ title = ''
18
+ previous_element = nil
19
+
20
+ title_info.children.select { |value| title_parts.include? value.name }.each do |value|
21
+ next if value.name == 'nonSort' && sortable
22
+
23
+ str = value.text.strip
24
+ next if str.empty?
25
+
26
+ delimiter = if title.empty? || title.end_with?(' ')
27
+ nil
28
+ elsif previous_element&.name == 'nonSort' && title.end_with?('-', '\'')
29
+ nil
30
+ elsif title.end_with?('.', ',', ':', ';')
31
+ ' '
32
+ elsif value.name == 'subTitle'
33
+ ' : '
34
+ elsif value.name == 'partName' && previous_element.name == 'partNumber'
35
+ ', '
36
+ elsif value.name == 'partNumber' || value.name == 'partName'
37
+ '. '
38
+ else
39
+ ' '
40
+ end
41
+
42
+ title += delimiter if delimiter
43
+ title += str
44
+
45
+ previous_element = value
46
+ end
47
+
48
+ title += "." unless title =~ /\s*[[:punct:]]$/
49
+
50
+ title.strip
51
+ end
52
+
53
+ def title_parts
54
+ %w[nonSort title subTitle partName partNumber]
55
+ end
56
+
57
+ # like sw_full_title without trailing \,/;:.
58
+ # spec from solrmarc-sw sw_index.properties
59
+ # title_display = custom, removeTrailingPunct(245abdefghijklmnopqrstuvwxyz, [\\\\,/;:], ([A-Za-z]{4}|[0-9]{3}|\\)|\\,))
60
+ # @return [String] value for title_display (like title_full_display without trailing punctuation)
61
+ def sw_title_display
62
+ sw_full_title&.sub(/[\.,;:\/\\]+$/, '')&.strip
63
+ end
64
+
65
+ # this includes all titles except
66
+ # @return [Array<String>] values for title_variant_search
67
+ def sw_addl_titles
68
+ (full_titles - Array(first_title_info_node&.full_title)).reject(&:blank?)
69
+ end
70
+
71
+ # Returns a sortable version of the main title
72
+ # @return [String] value for title_sort field
73
+ def sw_sort_title
74
+ val = sw_full_title(sortable: true) || ''
75
+ val.gsub(/[[:punct:]]*/, '').squeeze(" ").strip
76
+ end
77
+
78
+ private
79
+
80
+ # @return [Nokogiri::XML::Node] the first titleInfo node if present, else nil
81
+ def first_title_info_node
82
+ non_blank_nodes = mods_ng_xml.title_info.reject { |node| node.text.strip.empty? }
83
+ non_blank_nodes.find { |node| node.type_at != 'alternative' } || non_blank_nodes.first
84
+ end
85
+ end
86
+ end
87
+ end
@@ -4,9 +4,6 @@ module Stanford
4
4
  ##
5
5
  # Geospatial coordinate parsing
6
6
  class Coordinate
7
- require 'stanford-mods/geo_utils'
8
- include ::Stanford::Mods::GeoUtils
9
-
10
7
  attr_reader :value
11
8
 
12
9
  def initialize(value)
@@ -57,6 +54,30 @@ module Stanford
57
54
  def coord
58
55
  cleaner_coordinate(value)
59
56
  end
57
+
58
+ # @param [String] val Coordinates value
59
+ # @return [String] cleaned value (strips parens and period), or the original value
60
+ def cleaner_coordinate(val)
61
+ matches = val.match(/^\(?([^)]+)\)?\.?$/)
62
+ matches ? matches[1] : val
63
+ end
64
+
65
+ # @param [String] point coordinate point in degrees notation
66
+ # @return [Float] converted value in decimal notation
67
+ def coord_to_decimal(point)
68
+ regex = Regexp.union(
69
+ /(?<dir>[NESW])\s*(?<deg>\d+)[°⁰º](?:(?<min>\d+)[ʹ'])?(?:(?<sec>\d+)[ʺ"])?/,
70
+ /^\s*(?<dir>[NESW])\s*(?<deg>\d+(?:[.]\d+)?)\s*$/
71
+ )
72
+ match = regex.match(point)
73
+ return Float::INFINITY unless match
74
+
75
+ dec = match['deg'].to_f
76
+ dec += match['min'].to_f / 60
77
+ dec += match['sec'].to_f / 60 / 60
78
+ dec = -1 * dec if match['dir'] == 'W' || match['dir'] == 'S'
79
+ dec
80
+ end
60
81
  end
61
82
  end
62
83
  end