RubyGems - stanford-mods - Versions diffs - 2.6.4 → 3.0.0.alpha1 - Mend

stanford-mods 2.6.4 → 3.0.0.alpha1

Files changed (38) hide show

checksums.yaml +4 -4
data/.github/workflows/ruby.yml +1 -1
data/lib/stanford-mods/{geo_spatial.rb → concerns/geo_spatial.rb} +3 -5
data/lib/stanford-mods/concerns/name.rb +57 -0
data/lib/stanford-mods/concerns/origin_info.rb +109 -0
data/lib/stanford-mods/{physical_location.rb → concerns/physical_location.rb} +2 -2
data/lib/stanford-mods/concerns/searchworks.rb +125 -0
data/lib/stanford-mods/concerns/searchworks_subjects.rb +126 -0
data/lib/stanford-mods/concerns/title.rb +79 -0
data/lib/stanford-mods/coordinate.rb +21 -3
data/lib/stanford-mods/date_parsing.rb +32 -289
data/lib/stanford-mods/imprint.rb +148 -325
data/lib/stanford-mods/record.rb +20 -0
data/lib/stanford-mods/version.rb +1 -1
data/lib/stanford-mods/{searchworks_languages.rb → vocabularies/searchworks_languages.rb} +0 -0
data/lib/stanford-mods.rb +12 -11
data/spec/fixtures/searchworks_imprint_data.rb +38 -39
data/spec/fixtures/searchworks_pub_date_data.rb +7 -7
data/spec/fixtures/spotlight_pub_date_data.rb +7 -7
data/spec/geo_spatial_spec.rb +1 -6
data/spec/imprint_spec.rb +238 -207
data/spec/name_spec.rb +26 -230
data/spec/origin_info_spec.rb +34 -300
data/spec/searchworks_basic_spec.rb +1 -3
data/spec/searchworks_pub_dates_spec.rb +0 -215
data/spec/searchworks_spec.rb +0 -21
data/spec/searchworks_subject_raw_spec.rb +106 -105
data/spec/searchworks_subject_spec.rb +19 -55
data/spec/searchworks_title_spec.rb +1 -1
data/stanford-mods.gemspec +1 -1
metadata +21 -17
data/lib/marc_countries.rb +0 -387
data/lib/stanford-mods/geo_utils.rb +0 -28
data/lib/stanford-mods/name.rb +0 -80
data/lib/stanford-mods/origin_info.rb +0 -489
data/lib/stanford-mods/searchworks.rb +0 -333
data/lib/stanford-mods/searchworks_subjects.rb +0 -196
data/spec/date_parsing_spec.rb +0 -905

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 922ecb4ab53df951ef735de705037f218347938cac621886b54dc71463c215a6
-  data.tar.gz: 5426c132435bdc34a74df664b4bcbc6797682482689e0843ebdc7527e8df0715
+  metadata.gz: bccde0bc740f4135ee9de0b7255f3ac347a16c858ca463801111f37f8ad84ecc
+  data.tar.gz: a6e3fc9534efe4e37b05bf047007a5b9e3cd41f662e568b1b2333a1cc1213dce
 SHA512:
-  metadata.gz: eb12a388c219e4b9f99746d543ecc38cc1a67bc68302dc4255c81a088ffaad2fada7f6387b9525441d199c7ec536f54ba109f38ad92bf2d1d6d771077cdbc6d5
-  data.tar.gz: 2b904ae85c26eef0717a8e1250a7385889b00fe55c2e8ee59c83798f55f46e4e0ee4593f054dc3ab1a02e18095813ba3b16632f77340ddc212a54f36a871931a
+  metadata.gz: 7ab80915b8f299e35822e9f2b6e0047cf1b32c62bec9ac0218daeb0d7bd3fb4753d1668075b754e5a9a955098fb785c94a9c188654295894c2867fc7d04f7574
+  data.tar.gz: fb1e1835b67fb30c7d19c2046c08a9a60bddda35b6b4067bb3518c8d8d9385a09d80846fc46e958f67d9a0eb89dcb2aef36ede3da03fc723f0158d1db3b81ee9

data/.github/workflows/ruby.yml CHANGED Viewed

@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        ruby: [jruby-9.2.14.0, 2.7, 3.0]
+        ruby: [jruby-9.3.2.0, 2.7, '3.0', '3.1']
     steps:
     - uses: actions/checkout@v2
     - name: Set up Ruby

data/lib/stanford-mods/{geo_spatial.rb → concerns/geo_spatial.rb} RENAMED Viewed

@@ -1,10 +1,9 @@
-# encoding: UTF-8
-require 'mods'
+# frozen_string_literal: true
 module Stanford
   module Mods
     # NON-SearchWorks specific wranglings of MODS cartographics metadata
-    class Record < ::Mods::Record
+    module Geospatial
       GMLNS = 'http://www.opengis.net/gml/3.2/'.freeze
       # @return [Array{String}] subject cartographic coordinates values
@@ -27,8 +26,7 @@ module Stanford
                      lowers = v.xpath('gml:lowerCorner', 'gml' => GMLNS).text.split
                      "ENVELOPE(#{lowers[0]}, #{uppers[0]}, #{uppers[1]}, #{lowers[1]})"
                    end
-      rescue RuntimeError => e
-        logger.warn "failure parsing <extension> element: #{e.message}"
+      rescue RuntimeError
         []
       end

data/lib/stanford-mods/concerns/name.rb ADDED Viewed

@@ -0,0 +1,57 @@
+# frozen_string_literal: true
+# NON-SearchWorks specific wranglings of MODS <name> metadata as a mixin to the Stanford::Mods::Record object
+module Stanford
+  module Mods
+    module Name
+      # the first encountered <mods><name> element with marcrelator flavor role of 'Creator' or 'Author'.
+      # if no marcrelator 'Creator' or 'Author', the first name without a role.
+      # if no name without a role, then nil
+      # @return [String] value for author_1xx_search field
+      def sw_main_author
+        result = mods_ng_xml.plain_name.find { |n| n.role.any? { |r| r.authority.include?('marcrelator') && r.value.any? { |v| v.match(/creator/i) || v.match?(/author/i) } } }
+        result ||= mods_ng_xml.plain_name.find { |n| n.role.empty? }
+        result&.display_value_w_date
+      end
+      # all names, in display form, except the main_author
+      #  names will be the display_value_w_date form
+      #  see Mods::Record.name  in nom_terminology for details on the display_value algorithm
+      # @return [Array<String>] values for author_7xx_search field
+      def sw_addl_authors
+        mods_ng_xml.plain_name.map(&:display_value_w_date) - [sw_main_author]
+      end
+      # @return [Array<String>] values for author_person_facet, author_person_display
+      def sw_person_authors
+        mods_ng_xml.personal_names.map(&:display_value_w_date)
+      end
+      # return the display_value_w_date for all <mods><name> elements that do not have type='personal'
+      # @return [Array<String>] values for author_other_facet
+      def sw_impersonal_authors
+        mods_ng_xml.plain_name.select { |n| n.type_at != 'personal' }.map(&:display_value_w_date)
+      end
+      # @return [Array<String>] values for author_corp_display
+      def sw_corporate_authors
+        mods_ng_xml.corporate_name.map(&:display_value_w_date)
+      end
+      # @return [Array<String>] values for author_meeting_display
+      def sw_meeting_authors
+        mods_ng_xml.conference_name.map(&:display_value_w_date)
+      end
+      # Returns a sortable version of the main_author:
+      #  main_author + sorting title
+      # which is the mods approximation of the value created for a marc record
+      # @return [String] value for author_sort field
+      def sw_sort_author
+        #  substitute java Character.MAX_CODE_POINT for nil main_author so missing main authors sort last
+        "#{sw_main_author || "\u{10FFFF} " }#{sort_title}".gsub(/[[:punct:]]*/, '').strip
+      end
+    end # class Record
+  end # Module Mods
+end # Module Stanford

data/lib/stanford-mods/concerns/origin_info.rb ADDED Viewed

@@ -0,0 +1,109 @@
+# frozen_string_literal: true
+# Parsing MODS /originInfo for Publication/Imprint data:
+#  * pub year for date slider facet
+#  * pub year for sorting
+#  * pub year for single display value
+#  * imprint info for display
+#  *
+# These methods may be used by searchworks.rb file or by downstream apps
+module Stanford
+  module Mods
+    module OriginInfo
+      # return pub year as an Integer
+      # prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
+      #  look for a keyDate and use it if there is one;  otherwise pick earliest date
+      # @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute) should be ignored; false if approximate dates should be included
+      # @return [Integer] publication year as an Integer
+      # @note for sorting:  5 B.C. => -5;  666 B.C. => -666
+      def pub_year_int(fields = [:dateIssued, :dateCreated, :dateCaptured], ignore_approximate: false)
+        fields.each do |date_key|
+          values = mods_ng_xml.origin_info.send(date_key)
+          values = values.reject(&method(:is_approximate)) if ignore_approximate
+          earliest_date = Stanford::Mods::OriginInfo.best_or_earliest_year(values)
+          return earliest_date.year_int_from_date_str if earliest_date&.year_int_from_date_str
+        end; nil
+      end
+      # return a single string intended for lexical sorting for pub date
+      # prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
+      #  look for a keyDate and use it if there is one;  otherwise pick earliest date
+      # @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute) should be ignored; false if approximate dates should be included
+      # @return [String] single String containing publication year for lexical sorting
+      # @note for string sorting  5 B.C. = -5  => -995;  6 B.C. => -994, so 6 B.C. sorts before 5 B.C.
+      # @deprecated use pub_year_int
+      def pub_year_sort_str(fields = [:dateIssued, :dateCreated, :dateCaptured], ignore_approximate: false)
+        fields.each do |date_key|
+          values = mods_ng_xml.origin_info.send(date_key)
+          values = values.reject(&method(:is_approximate)) if ignore_approximate
+          earliest_date = Stanford::Mods::OriginInfo.best_or_earliest_year(values)
+          return earliest_date.sortable_year_string_from_date_str if earliest_date&.sortable_year_string_from_date_str
+        end; nil
+      end
+      # return a single string intended for display of pub year
+      # 0 < year < 1000:  add A.D. suffix
+      # year < 0:  add B.C. suffix.  ('-5'  =>  '5 B.C.', '700 B.C.'  => '700 B.C.')
+      # 195u =>  195x
+      # 19uu => 19xx
+      #   '-5'  =>  '5 B.C.'
+      #   '700 B.C.'  => '700 B.C.'
+      #   '7th century' => '7th century'
+      # date ranges?
+      # prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
+      #  look for a keyDate and use it if there is one;  otherwise pick earliest date
+      # @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
+      #   should be ignored; false if approximate dates should be included
+      def pub_year_display_str(fields = [:dateIssued, :dateCreated, :dateCaptured], ignore_approximate: false)
+        fields.each do |date_key|
+          values = mods_ng_xml.origin_info.send(date_key)
+          values = values.reject(&method(:is_approximate)) if ignore_approximate
+          earliest_date = Stanford::Mods::OriginInfo.best_or_earliest_year(values)
+          return earliest_date.date_str_for_display if earliest_date&.date_str_for_display
+        end; nil
+      end
+      # @return [Array<Stanford::Mods::Imprint>] array of imprint objects
+      # @private
+      def imprints
+        origin_info.map { |el| Stanford::Mods::Imprint.new(el) }
+      end
+      # @return [String] single String containing imprint information for display
+      def imprint_display_str
+        imprints.map(&:display_str).reject(&:empty?).join('; ')
+      end
+      # remove Elements from NodeSet if they have a qualifier attribute of 'approximate' or 'questionable'
+      # @param [Nokogiri::XML::Element] node the date element
+      # @return [Boolean]
+      # @private
+      def is_approximate(node)
+        qualifier = node["qualifier"] if node.respond_to?('[]')
+        qualifier == 'approximate' || qualifier == 'questionable'
+      end
+      # get earliest parseable year from the passed date elements
+      # @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
+      # @return [Stanford::Mods::DateParsing]
+      def self.best_or_earliest_year(date_el_array)
+        key_dates, other_dates = date_el_array.partition { |node| node['keyDate'] == 'yes' }
+        sortable_dates = key_dates.map { |x| DateParsing.new(x) }.select(&:sortable_year_string_from_date_str)
+        sortable_dates = other_dates.map { |x| DateParsing.new(x) }.select(&:sortable_year_string_from_date_str) if sortable_dates.empty?
+        results = {}
+        # this is a little weird; instead of just the earliest sorting date, if there are multiple
+        # dates with the same sort key, we want to make sure we get the last occurring one?
+        sortable_dates.each do |v|
+          results[v.sortable_year_string_from_date_str] = v
+        end
+        results[results.keys.min]
+      end
+    end # class Record
+  end
+end

data/lib/stanford-mods/{physical_location.rb → concerns/physical_location.rb} RENAMED Viewed

@@ -1,4 +1,4 @@
-require 'mods'
+# frozen_string_literal: true
 module Stanford
   module Mods
@@ -7,7 +7,7 @@ module Stanford
     # Note: mods_ng_xml_location.physicalLocation should find top level and relatedItem.
     # Each method here expects to find at most ONE matching element.  Subsequent potential matches
     # are ignored.
-    class Record < ::Mods::Record
+    module PhysicalLocation
       # data in location/physicalLocation or in relatedItem/location/physicalLocation
       # so use _location to get the data from either one of them
       # @return [String] box number (note: single valued and might be something like 35A)

data/lib/stanford-mods/concerns/searchworks.rb ADDED Viewed

@@ -0,0 +1,125 @@
+# frozen_string_literal: true
+# SearchWorks specific wranglings of MODS metadata as a mixin to the Stanford::Mods::Record object
+module Stanford
+  module Mods
+    module Searchworks
+      # include langagues known to SearchWorks; try to error correct when possible (e.g. when ISO-639 disagrees with MARC standard)
+      def sw_language_facet
+        mods_ng_xml.language.flat_map do |n|
+          # get languageTerm codes and add their translations to the result
+          result = n.code_term.flat_map do |ct|
+            if ct.authority =~ /^iso639/
+              vals = ct.text.split(/[,|\ ]/).reject { |x| x.strip.empty? }
+              vals.select { |v| ISO_639.find(v.strip) }.map do |v|
+                iso639_val = ISO_639.find(v.strip).english_name
+                if SEARCHWORKS_LANGUAGES.has_value?(iso639_val)
+                  iso639_val
+                else
+                  SEARCHWORKS_LANGUAGES[v.strip]
+                end
+              end
+            else
+              vals = ct.text.split(/[,|\ ]/).reject { |x| x.strip.empty? }
+              vals.map do |v|
+                SEARCHWORKS_LANGUAGES[v.strip]
+              end
+            end
+          end
+          # add languageTerm text values
+          result.concat(n.text_term.map { |tt| tt.text.strip }.select { |val| !val.empty? && SEARCHWORKS_LANGUAGES.has_value?(val) })
+          # add language values that aren't in languageTerm subelement
+          result << n.text if n.languageTerm.empty? && SEARCHWORKS_LANGUAGES.has_value?(n.text)
+          result
+        end.uniq
+      end
+      # select one or more format values from the controlled vocabulary per JVine Summer 2014
+      #   http://searchworks-solr-lb.stanford.edu:8983/solr/select?facet.field=format_main_ssim&rows=0&facet.sort=index
+      # https://github.com/sul-dlss/stanford-mods/issues/66 - For geodata, the
+      # resource type should be only Map and not include Software, multimedia.
+      # @return <Array[String]> value in the SearchWorks controlled vocabulary
+      def format_main
+        types = typeOfResource
+        return [] unless types
+        val = []
+        genres = term_values(:genre) || []
+        issuance = term_values([:origin_info, :issuance]) || []
+        frequency = term_values([:origin_info, :frequency]) || []
+        val << 'Dataset' if genres.include?('dataset') || genres.include?('Dataset')
+        val << 'Archive/Manuscript' if types.any? { |t| t.manuscript == 'yes' }
+        val.concat(types.flat_map do |type|
+          case type.text
+            when 'cartographic'
+              'Map'
+            when 'mixed material'
+              'Archive/Manuscript'
+            when 'moving image'
+              'Video'
+            when 'notated music'
+              'Music score'
+            when 'software, multimedia'
+              'Software/Multimedia' unless types.map(&:text).include?('cartographic') || (genres.include?('dataset') || genres.include?('Dataset'))
+            when 'sound recording-musical'
+              'Music recording'
+            when 'sound recording-nonmusical', 'sound recording'
+              'Sound recording'
+            when 'still image'
+              'Image'
+            when 'text'
+              is_periodical = issuance.include?('continuing') || issuance.include?('serial') || frequency.any? { |x| !x.empty? }
+              is_archived_website = genres.any? { |x| x.casecmp('archived website') == 0 }
+              if is_periodical || is_archived_website
+                [
+                  ('Journal/Periodical' if is_periodical),
+                  ('Archived website' if is_archived_website)
+                ].compact
+              else
+                'Book'
+              end
+            when 'three dimensional object'
+              'Object'
+          end
+        end)
+        val.compact.uniq
+      end
+      # @return <Array[String]> values for the genre facet in SearchWorks
+      def sw_genre
+        genres = term_values(:genre)
+        return [] unless genres
+        val = genres.map(&:to_s)
+        thesis_pub = ['thesis', 'Thesis']
+        val << 'Thesis/Dissertation' if (genres & thesis_pub).any?
+        conf_pub = ['conference publication', 'Conference publication', 'Conference Publication']
+        gov_pub  = ['government publication', 'Government publication', 'Government Publication']
+        tech_rpt = ['technical report', 'Technical report', 'Technical Report']
+        val << 'Conference proceedings' if (genres & conf_pub).any?
+        val << 'Government document' if (genres & gov_pub).any?
+        val << 'Technical report' if (genres & tech_rpt).any?
+        val.uniq
+      end
+      # @return [String] value with the numeric catkey in it, or nil if none exists
+      def catkey
+        catkey = term_values([:record_info, :recordIdentifier])
+        catkey.first&.tr('a', '') # ensure catkey is numeric only
+      end
+    end # class Record
+  end # Module Mods
+end # Module Stanford

data/lib/stanford-mods/concerns/searchworks_subjects.rb ADDED Viewed

@@ -0,0 +1,126 @@
+# frozen_string_literal: true
+# SearchWorks specific wranglings of MODS  *subject* metadata as a mixin to the Stanford::Mods::Record object
+module Stanford
+  module Mods
+    module SearchworksSubjects
+      # Values are the contents of:
+      #   mods/subject/topic
+      # @return [Array<String>] values for the topic_search Solr field for this document or nil if none
+      def topic_search
+        subject_topics
+      end
+      # Values are the contents of:
+      #   subject/topic
+      #   subject/name
+      #   subject/title
+      #   subject/occupation
+      #  with trailing comma, semicolon, and backslash (and any preceding spaces) removed
+      # @return [Array<String>] values for the topic_facet Solr field for this document or nil if none
+      def topic_facet
+        strip_punctuation(subject_topics + subject_names + subject_titles + subject_occupations)
+      end
+      # geographic_search values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
+      # @return [Array<String>] values for the geographic_facet Solr field for this document or nil if none
+      def geographic_facet
+        strip_punctuation(geographic_search)
+      end
+      # subject/temporal values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
+      # @return [Array<String>] values for the era_facet Solr field for this document or nil if none
+      def era_facet
+        strip_punctuation(subject_temporal)
+      end
+      # Values are the contents of:
+      #   subject/geographic
+      #   subject/hierarchicalGeographic
+      #   subject/geographicCode  (only include the translated value if it isn't already present from other mods geo fields)
+      # @return [Array<String>] values for the geographic_search Solr field for this document or nil if none
+      def geographic_search
+        result = term_values([:subject, :geographic]) || []
+        # hierarchicalGeographic has sub elements
+        hierarchical_vals = mods_ng_xml.subject.hierarchicalGeographic.map do |hg_node|
+          hg_vals = hg_node.element_children.map(&:text).reject(&:empty?)
+          hg_vals.join(' ') unless hg_vals.empty?
+        end
+        trans_code_vals = mods_ng_xml.subject.geographicCode.translated_value || []
+        (result + hierarchical_vals + trans_code_vals).compact.uniq
+      end
+      # Values are the contents of:
+      #   subject/name
+      #   subject/occupation  - no subelements
+      #   subject/titleInfo
+      # @return [Array<String>] values for the subject_other_search Solr field for this document or nil if none
+      def subject_other_search
+        subject_occupations + subject_names + subject_titles
+      end
+      # Values are the contents of:
+      #   subject/temporal
+      #   subject/genre
+      # @return [Array<String>] values for the subject_other_subvy_search Solr field for this document or nil if none
+      def subject_other_subvy_search
+        vals = Array(subject_temporal)
+        gvals = term_values([:subject, :genre])
+        vals + Array(gvals)
+      end
+      # Values are the contents of:
+      #  all subject subelements except subject/cartographic plus  genre top level element
+      # @return [Array<String>] values for the subject_all_search Solr field for this document or nil if none
+      def subject_all_search
+        topic_search + geographic_search + subject_other_search + subject_other_subvy_search
+      end
+      protected #----------------------------------------------------------
+      # convenience method for subject/name/namePart values (to avoid parsing the mods for the same thing multiple times)
+      def subject_names
+        mods_ng_xml.subject.name_el
+          .select { |n_el| n_el.namePart }
+          .map { |name_el_w_np| name_el_w_np.namePart.map(&:text).reject(&:empty?) }
+          .reject(&:empty?)
+          .map { |parts| parts.join(', ').strip }
+      end
+      # convenience method for subject/occupation values (to avoid parsing the mods for the same thing multiple times)
+      def subject_occupations
+        term_values([:subject, :occupation]) || []
+      end
+      # convenience method for subject/temporal values (to avoid parsing the mods for the same thing multiple times)
+      def subject_temporal
+        term_values([:subject, :temporal]) || []
+      end
+      # Values are the contents of:
+      #   subject/titleInfo/(subelements)
+      # convenience method for subject/titleInfo values (to avoid parsing the mods for the same thing multiple times)
+      def subject_titles
+        mods_ng_xml.subject.titleInfo.map do |ti_el|
+          parts = ti_el.element_children.map(&:text).reject(&:empty?)
+          parts.join(' ').strip unless parts.empty?
+        end.compact
+      end
+      # convenience method for subject/topic values (to avoid parsing the mods for the same thing multiple times)
+      def subject_topics
+        term_values([:subject, :topic]) || []
+      end
+      private
+      def strip_punctuation(arr)
+        arr&.map { |val| val.gsub(/[\\,;]$/, '').strip }
+      end
+    end
+  end
+end

data/lib/stanford-mods/concerns/title.rb ADDED Viewed

@@ -0,0 +1,79 @@
+module Stanford
+  module Mods
+    module Title
+      # @return [String] value for title_245a_search field
+      def sw_short_title
+        short_titles&.compact&.reject(&:empty?)&.first
+      end
+      # Searchworks requires that the MODS has a '//titleInfo/title'
+      # @return [String] value for title_245_search, title_full_display
+      def sw_full_title(title_info = first_title_info_node, sortable: false)
+        return unless title_info
+        title = title_info.title&.text&.strip
+        return if title.nil? || title.empty?
+        nonSort_title = title_info.nonSort&.text&.strip
+        preSubTitle = [(nonSort_title unless sortable), title].compact.join(' ')
+        preSubTitle.sub!(/:$/, '')
+        subTitle = title_info.subTitle.text.strip
+        preParts = subTitle.empty? ? preSubTitle : preSubTitle + " : " + subTitle
+        preParts.sub!(/\.$/, '') if preParts # remove trailing period
+        partName   = title_info.partName.text.strip   unless title_info.partName.text.strip.empty?
+        partNumber = title_info.partNumber.text.strip unless title_info.partNumber.text.strip.empty?
+        partNumber.sub!(/,$/, '') if partNumber # remove trailing comma
+        if partNumber && partName
+          parts = partNumber + ", " + partName
+        elsif partNumber
+          parts = partNumber
+        elsif partName
+          parts = partName
+        end
+        parts.sub!(/\.$/, '') if parts
+        result = parts ? preParts + ". " + parts : preParts
+        return nil unless result
+        result += "." unless result =~ /[[:punct:]]$/
+        result.strip!
+        result = nil if result.empty?
+        result
+      end
+      # like sw_full_title without trailing \,/;:.
+      # spec from solrmarc-sw   sw_index.properties
+      #    title_display = custom, removeTrailingPunct(245abdefghijklmnopqrstuvwxyz, [\\\\,/;:], ([A-Za-z]{4}|[0-9]{3}|\\)|\\,))
+      # @return [String] value for title_display (like title_full_display without trailing punctuation)
+      def sw_title_display
+        sw_full_title&.sub(/[\.,;:\/\\]+$/, '')&.strip
+      end
+      # this includes all titles except
+      # @return [Array<String>] values for title_variant_search
+      def sw_addl_titles
+        (full_titles - first_title_info_node.full_title).reject(&:blank?)
+      end
+      # Returns a sortable version of the main title
+      # @return [String] value for title_sort field
+      def sw_sort_title
+        val = sw_full_title(sortable: true) || ''
+        val.gsub(/[[:punct:]]*/, '').squeeze(" ").strip
+      end
+      private
+      # @return [Nokogiri::XML::Node] the first titleInfo node if present, else nil
+      def first_title_info_node
+        non_blank_nodes = mods_ng_xml.title_info.reject { |node| node.text.strip.empty? }
+        non_blank_nodes.find { |node| node.type_at != 'alternative' } || non_blank_nodes.first
+      end
+    end
+  end
+end

data/lib/stanford-mods/coordinate.rb CHANGED Viewed

@@ -4,9 +4,6 @@ module Stanford
     ##
     # Geospatial coordinate parsing
     class Coordinate
-      require 'stanford-mods/geo_utils'
-      include ::Stanford::Mods::GeoUtils
       attr_reader :value
       def initialize(value)
@@ -57,6 +54,27 @@ module Stanford
       def coord
         cleaner_coordinate(value)
       end
+      # @param [String] val Coordinates value
+      # @return [String] cleaned value (strips parens and period), or the original value
+      def cleaner_coordinate(val)
+        matches = val.match(/^\(?([^)]+)\)?\.?$/)
+        matches ? matches[1] : val
+      end
+      # @param [String] point coordinate point in degrees notation
+      # @return [Float] converted value in decimal notation
+      def coord_to_decimal(point)
+        regex = /(?<dir>[NESW])\s*(?<deg>\d+)[°⁰º](?:(?<min>\d+)[ʹ'])?(?:(?<sec>\d+)[ʺ"])?/
+        match = regex.match(point)
+        return Float::INFINITY unless match
+        dec = match['deg'].to_i
+        dec += match['min'].to_f / 60
+        dec += match['sec'].to_f / 60 / 60
+        dec = -1 * dec if match['dir'] == 'W' || match['dir'] == 'S'
+        dec
+      end
     end
   end
 end