stanford-mods 1.3.4 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/stanford-mods/date_parsing.rb +80 -34
- data/lib/stanford-mods/origin_info.rb +143 -88
- data/lib/stanford-mods/searchworks.rb +1 -0
- data/lib/stanford-mods/version.rb +1 -1
- data/spec/date_parsing_spec.rb +121 -6
- data/spec/origin_info_spec.rb +80 -35
- data/spec/searchworks_format_spec.rb +1 -0
- data/spec/searchworks_pub_dates_spec.rb +154 -137
- metadata +2 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 77f36520f38dfd83cd8cc79480fc5fbda45b2df6
         | 
| 4 | 
            +
              data.tar.gz: b1e7a4db9be39cdbeeca3124a62e3c0403ba31a2
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: fd0ab891b9f578611b928231dcf45debcffc209068032fdbcd98d93ada56e95fc5afa82be97dfc163287297e190ff1ef258abef27dcd58170ec18152381da398
         | 
| 7 | 
            +
              data.tar.gz: f604d45d319fbce30215ff436ad62ebc23a0bab11d995d6599bef137dae9a5465344d69d2492d84b29436711106ae7e80ce9090cb3005c3be4fe1fb004b58b7e
         | 
| @@ -12,7 +12,14 @@ module Stanford | |
| 12 12 | 
             
                  # @param [String] date_str String containing a date (we hope)
         | 
| 13 13 | 
             
                  # @return [String, nil] String facet value for year if we could parse one, nil otherwise
         | 
| 14 14 | 
             
                  def self.facet_string_from_date_str(date_str)
         | 
| 15 | 
            -
                     | 
| 15 | 
            +
                    DateParsing.new(date_str).facet_string_from_date_str
         | 
| 16 | 
            +
                  end
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                  # get year as Integer if we can parse date_str to get a year.
         | 
| 19 | 
            +
                  # @param [String] date_str String containing a date (we hope)
         | 
| 20 | 
            +
                  # @return [Integer, nil] Integer year if we could parse one, nil otherwise
         | 
| 21 | 
            +
                  def self.year_int_from_date_str(date_str)
         | 
| 22 | 
            +
                    DateParsing.new(date_str).year_int_from_date_str
         | 
| 16 23 | 
             
                  end
         | 
| 17 24 |  | 
| 18 25 | 
             
                  # get String sortable value year if we can parse date_str to get a year.
         | 
| @@ -22,7 +29,7 @@ module Stanford | |
| 22 29 | 
             
                  # @return [String, nil] String sortable year if we could parse one, nil otherwise
         | 
| 23 30 | 
             
                  #  note that these values must *lexically* sort to create a chronological sort.
         | 
| 24 31 | 
             
                  def self.sortable_year_string_from_date_str(date_str)
         | 
| 25 | 
            -
                     | 
| 32 | 
            +
                    DateParsing.new(date_str).sortable_year_string_from_date_str
         | 
| 26 33 | 
             
                  end
         | 
| 27 34 |  | 
| 28 35 | 
             
                  # true if the year is between -999 and (current year + 1)
         | 
| @@ -33,6 +40,13 @@ module Stanford | |
| 33 40 | 
             
                    (-1000 < year_str.to_i) && (year_str.to_i < Date.today.year + 2)
         | 
| 34 41 | 
             
                  end
         | 
| 35 42 |  | 
| 43 | 
            +
                  # true if the year is between -9999 and (current year + 1)
         | 
| 44 | 
            +
                  # @return [Boolean] true if the year is between -9999 and (current year + 1); false otherwise
         | 
| 45 | 
            +
                  def self.year_int_valid?(year)
         | 
| 46 | 
            +
                    return false unless year.is_a? Integer
         | 
| 47 | 
            +
                    (-1000 < year.to_i) && (year < Date.today.year + 2)
         | 
| 48 | 
            +
                  end
         | 
| 49 | 
            +
             | 
| 36 50 | 
             
                  attr_reader :orig_date_str
         | 
| 37 51 |  | 
| 38 52 | 
             
                  def initialize(date_str)
         | 
| @@ -48,18 +62,11 @@ module Stanford | |
| 48 62 | 
             
                    return if orig_date_str == '0000-00-00' # shpc collection has these useless dates
         | 
| 49 63 | 
             
                    # B.C. first in case there are 4 digits, e.g. 1600 B.C.
         | 
| 50 64 | 
             
                    return facet_string_for_bc if orig_date_str.match(BC_REGEX)
         | 
| 51 | 
            -
                     | 
| 52 | 
            -
                    result ||= sortable_year_for_yyyy
         | 
| 53 | 
            -
                    # 2 digit year will always be 19xx or 20xx; sortable version will make a good facet string
         | 
| 54 | 
            -
                    result ||= sortable_year_for_yy
         | 
| 55 | 
            -
                    # decades are always 19xx or 20xx; sortable version will make a good facet string
         | 
| 56 | 
            -
                    result ||= sortable_year_for_decade
         | 
| 65 | 
            +
                    result = sortable_year_for_yyyy_yy_or_decade
         | 
| 57 66 | 
             
                    unless result
         | 
| 58 67 | 
             
                      # try removing brackets between digits in case we have 169[5] or [18]91
         | 
| 59 | 
            -
                       | 
| 60 | 
            -
             | 
| 61 | 
            -
                        return DateParsing.new(no_brackets).facet_string_from_date_str
         | 
| 62 | 
            -
                      end
         | 
| 68 | 
            +
                      no_brackets = remove_brackets
         | 
| 69 | 
            +
                      return DateParsing.new(no_brackets).facet_string_from_date_str if no_brackets
         | 
| 63 70 | 
             
                    end
         | 
| 64 71 | 
             
                    # parsing below this line gives string inapprop for year_str_valid?
         | 
| 65 72 | 
             
                    unless self.class.year_str_valid?(result)
         | 
| @@ -71,6 +78,23 @@ module Stanford | |
| 71 78 | 
             
                    result
         | 
| 72 79 | 
             
                  end
         | 
| 73 80 |  | 
| 81 | 
            +
                  # get Integer year if we can parse date_str to get a year.
         | 
| 82 | 
            +
                  # @return [Integer, nil] Integer year if we could parse one, nil otherwise
         | 
| 83 | 
            +
                  def year_int_from_date_str
         | 
| 84 | 
            +
                    return if orig_date_str == '0000-00-00' # shpc collection has these useless dates
         | 
| 85 | 
            +
                    # B.C. first in case there are 4 digits, e.g. 1600 B.C.
         | 
| 86 | 
            +
                    return sortable_year_int_for_bc if orig_date_str.match(BC_REGEX)
         | 
| 87 | 
            +
                    result = sortable_year_for_yyyy_yy_or_decade
         | 
| 88 | 
            +
                    result ||= sortable_year_for_century
         | 
| 89 | 
            +
                    result ||= sortable_year_int_for_early_numeric
         | 
| 90 | 
            +
                    unless result
         | 
| 91 | 
            +
                      # try removing brackets between digits in case we have 169[5] or [18]91
         | 
| 92 | 
            +
                      no_brackets = remove_brackets
         | 
| 93 | 
            +
                      return DateParsing.new(no_brackets).year_int_from_date_str if no_brackets
         | 
| 94 | 
            +
                    end
         | 
| 95 | 
            +
                    result.to_i if result && self.class.year_int_valid?(result.to_i)
         | 
| 96 | 
            +
                  end
         | 
| 97 | 
            +
             | 
| 74 98 | 
             
                  # get String sortable value year if we can parse date_str to get a year.
         | 
| 75 99 | 
             
                  #   SearchWorks currently uses a string field for pub date sorting; thus so does Spotlight.
         | 
| 76 100 | 
             
                  #   The values returned must *lexically* sort in chronological order, so the B.C. dates are tricky
         | 
| @@ -79,28 +103,39 @@ module Stanford | |
| 79 103 | 
             
                  def sortable_year_string_from_date_str
         | 
| 80 104 | 
             
                    return if orig_date_str == '0000-00-00' # shpc collection has these useless dates
         | 
| 81 105 | 
             
                    # B.C. first in case there are 4 digits, e.g. 1600 B.C.
         | 
| 82 | 
            -
                    return  | 
| 83 | 
            -
                     | 
| 84 | 
            -
                    result = sortable_year_for_yyyy
         | 
| 85 | 
            -
                    result ||= sortable_year_for_yy
         | 
| 86 | 
            -
                    result ||= sortable_year_for_decade
         | 
| 106 | 
            +
                    return sortable_year_str_for_bc if orig_date_str.match(BC_REGEX)
         | 
| 107 | 
            +
                    result = sortable_year_for_yyyy_yy_or_decade
         | 
| 87 108 | 
             
                    result ||= sortable_year_for_century
         | 
| 88 | 
            -
                    result ||=  | 
| 109 | 
            +
                    result ||= sortable_year_str_for_early_numeric
         | 
| 89 110 | 
             
                    unless result
         | 
| 90 111 | 
             
                      # try removing brackets between digits in case we have 169[5] or [18]91
         | 
| 91 | 
            -
                       | 
| 92 | 
            -
             | 
| 93 | 
            -
                        return DateParsing.new(no_brackets).sortable_year_string_from_date_str
         | 
| 94 | 
            -
                      end
         | 
| 112 | 
            +
                      no_brackets = remove_brackets
         | 
| 113 | 
            +
                      return DateParsing.new(no_brackets).sortable_year_string_from_date_str if no_brackets
         | 
| 95 114 | 
             
                    end
         | 
| 96 115 | 
             
                    result if self.class.year_str_valid?(result)
         | 
| 97 116 | 
             
                  end
         | 
| 98 117 |  | 
| 118 | 
            +
                  # get String sortable value year if we can parse date_str to get a year.
         | 
| 119 | 
            +
                  # @return [String, nil] String sortable year if we could parse one, nil otherwise
         | 
| 120 | 
            +
                  #  note that these values must *lexically* sort to create a chronological sort.
         | 
| 121 | 
            +
                  def sortable_year_for_yyyy_yy_or_decade
         | 
| 122 | 
            +
                    # most date strings have a four digit year
         | 
| 123 | 
            +
                    result = sortable_year_for_yyyy
         | 
| 124 | 
            +
                    result ||= sortable_year_for_yy # 19xx or 20xx
         | 
| 125 | 
            +
                    result ||= sortable_year_for_decade # 19xx or 20xx
         | 
| 126 | 
            +
                    result
         | 
| 127 | 
            +
                  end
         | 
| 128 | 
            +
             | 
| 129 | 
            +
                  # removes brackets between digits such as 169[5] or [18]91
         | 
| 130 | 
            +
                  def remove_brackets
         | 
| 131 | 
            +
                    orig_date_str.delete('[]') if orig_date_str.match(BRACKETS_BETWEEN_DIGITS_REXEXP)
         | 
| 132 | 
            +
                  end
         | 
| 133 | 
            +
             | 
| 99 134 | 
             
                  # looks for 4 consecutive digits in orig_date_str and returns first occurrence if found
         | 
| 100 135 | 
             
                  # @return [String, nil] 4 digit year (e.g. 1865, 0950) if orig_date_str has yyyy, nil otherwise
         | 
| 101 136 | 
             
                  def sortable_year_for_yyyy
         | 
| 102 137 | 
             
                    matches = orig_date_str.match(/\d{4}/) if orig_date_str
         | 
| 103 | 
            -
                     | 
| 138 | 
            +
                    matches.to_s if matches
         | 
| 104 139 | 
             
                  end
         | 
| 105 140 |  | 
| 106 141 | 
             
                  # returns 4 digit year as String if we have a x/x/yy or x-x-yy pattern
         | 
| @@ -131,10 +166,8 @@ module Stanford | |
| 131 166 | 
             
                  # @return [String, nil] 4 digit year (e.g. 1860, 1950) if orig_date_str matches pattern, nil otherwise
         | 
| 132 167 | 
             
                  def sortable_year_for_decade
         | 
| 133 168 | 
             
                    decade_matches = orig_date_str.match(/\d{3}[u\-?x]/) if orig_date_str
         | 
| 134 | 
            -
                    if decade_matches
         | 
| 135 | 
            -
             | 
| 136 | 
            -
                      return DateParsing.new(changed_to_zero).sortable_year_for_yyyy
         | 
| 137 | 
            -
                    end
         | 
| 169 | 
            +
                    changed_to_zero = decade_matches.to_s.tr('u\-?x', '0') if decade_matches
         | 
| 170 | 
            +
                    DateParsing.new(changed_to_zero).sortable_year_for_yyyy if changed_to_zero
         | 
| 138 171 | 
             
                  end
         | 
| 139 172 |  | 
| 140 173 | 
             
                  CENTURY_WORD_REGEXP = Regexp.new('(\d{1,2}).*century')
         | 
| @@ -177,22 +210,29 @@ module Stanford | |
| 177 210 |  | 
| 178 211 | 
             
                  BC_REGEX = Regexp.new('(\d{1,4}).*' + Regexp.escape('B.C.'))
         | 
| 179 212 |  | 
| 180 | 
            -
                  # get String sortable value for B.C. if we have | 
| 213 | 
            +
                  # get String sortable value for B.C. if we have B.C. pattern
         | 
| 181 214 | 
             
                  #  note that these values must *lexically* sort to create a chronological sort.
         | 
| 182 215 | 
             
                  #  We know our data does not contain B.C. dates older than 999, so we can make them
         | 
| 183 216 | 
             
                  #  lexically sort by subtracting 1000.  So we get:
         | 
| 184 217 | 
             
                  #    -700 for 300 B.C., -750 for 250 B.C., -800 for 200 B.C., -801 for 199 B.C.
         | 
| 185 218 | 
             
                  # @return [String, nil] String sortable -ddd if B.C. in pattern; nil otherwise
         | 
| 186 | 
            -
                  def  | 
| 219 | 
            +
                  def sortable_year_str_for_bc
         | 
| 220 | 
            +
                    bc_matches = orig_date_str.match(BC_REGEX) if orig_date_str
         | 
| 221 | 
            +
                    ($1.to_i - 1000).to_s if bc_matches
         | 
| 222 | 
            +
                  end
         | 
| 223 | 
            +
             | 
| 224 | 
            +
                  # get Integer sortable value for B.C. if we have B.C. pattern
         | 
| 225 | 
            +
                  # @return [Integer, nil] Integer sortable -ddd if B.C. in pattern; nil otherwise
         | 
| 226 | 
            +
                  def sortable_year_int_for_bc
         | 
| 187 227 | 
             
                    bc_matches = orig_date_str.match(BC_REGEX) if orig_date_str
         | 
| 188 | 
            -
                     | 
| 228 | 
            +
                    "-#{$1}".to_i if bc_matches
         | 
| 189 229 | 
             
                  end
         | 
| 190 230 |  | 
| 191 231 | 
             
                  # get single facet value for B.C. if we have  B.C. pattern
         | 
| 192 232 | 
             
                  # @return [String, nil] ddd B.C.  if ddd B.C. in pattern; nil otherwise
         | 
| 193 233 | 
             
                  def facet_string_for_bc
         | 
| 194 234 | 
             
                    bc_matches = orig_date_str.match(BC_REGEX) if orig_date_str
         | 
| 195 | 
            -
                     | 
| 235 | 
            +
                    bc_matches.to_s if bc_matches
         | 
| 196 236 | 
             
                  end
         | 
| 197 237 |  | 
| 198 238 | 
             
                  EARLY_NUMERIC = Regexp.new('^\-?\d{1,3}$')
         | 
| @@ -203,7 +243,7 @@ module Stanford | |
| 203 243 | 
             
                  #  lexically sort by subtracting 1000.  So we get:
         | 
| 204 244 | 
             
                  #    -983 for -17, -999 for -1, 0000 for 0, 0001 for 1, 0017 for 17
         | 
| 205 245 | 
             
                  # @return [String, nil] String sortable -ddd if orig_date_str matches pattern; nil otherwise
         | 
| 206 | 
            -
                  def  | 
| 246 | 
            +
                  def sortable_year_str_for_early_numeric
         | 
| 207 247 | 
             
                    return unless orig_date_str.match(EARLY_NUMERIC)
         | 
| 208 248 | 
             
                    if orig_date_str.match(/^\-/)
         | 
| 209 249 | 
             
                      # negative number becomes x - 1000 for sorting; -005 for -995
         | 
| @@ -214,6 +254,13 @@ module Stanford | |
| 214 254 | 
             
                    end
         | 
| 215 255 | 
             
                  end
         | 
| 216 256 |  | 
| 257 | 
            +
                  # get Integer sortable value from date String containing yyy, yy, y, -y, -yy, -yyy, -yyyy
         | 
| 258 | 
            +
                  # @return [Integer, nil] Integer sortable -ddd if orig_date_str matches pattern; nil otherwise
         | 
| 259 | 
            +
                  def sortable_year_int_for_early_numeric
         | 
| 260 | 
            +
                    return orig_date_str.to_i if orig_date_str.match(EARLY_NUMERIC)
         | 
| 261 | 
            +
                    orig_date_str.to_i if orig_date_str.match(/^-\d{4}$/)
         | 
| 262 | 
            +
                  end
         | 
| 263 | 
            +
             | 
| 217 264 | 
             
                  # get single facet value for date String containing yyy, yy, y, -y, -yy, -yyy
         | 
| 218 265 | 
             
                  #   negative number strings will be changed to B.C. strings
         | 
| 219 266 | 
             
                  def facet_string_for_early_numeric
         | 
| @@ -239,7 +286,6 @@ module Stanford | |
| 239 286 | 
             
                  rescue ArgumentError
         | 
| 240 287 | 
             
                    nil # explicitly want nil if date won't parse
         | 
| 241 288 | 
             
                  end
         | 
| 242 | 
            -
             | 
| 243 289 | 
             
                end
         | 
| 244 290 | 
             
              end
         | 
| 245 | 
            -
            end
         | 
| 291 | 
            +
            end
         | 
| @@ -19,12 +19,18 @@ module Stanford | |
| 19 19 | 
             
                  #   should be ignored; false if approximate dates should be included
         | 
| 20 20 | 
             
                  # @return [String] single String containing publication year for facet use
         | 
| 21 21 | 
             
                  def pub_date_facet_single_value(ignore_approximate = false)
         | 
| 22 | 
            -
                     | 
| 23 | 
            -
             | 
| 24 | 
            -
             | 
| 25 | 
            -
             | 
| 26 | 
            -
             | 
| 27 | 
            -
             | 
| 22 | 
            +
                    single_pub_year(ignore_approximate, :year_facet_str)
         | 
| 23 | 
            +
                  end
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                  # return pub year as an Integer
         | 
| 26 | 
            +
                  # prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
         | 
| 27 | 
            +
                  #  look for a keyDate and use it if there is one;  otherwise pick earliest date
         | 
| 28 | 
            +
                  # @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
         | 
| 29 | 
            +
                  #   should be ignored; false if approximate dates should be included
         | 
| 30 | 
            +
                  # @return [Integer] publication year as an Integer
         | 
| 31 | 
            +
                  #   note that for sorting  5 B.C. => -5;  666 B.C. => -666
         | 
| 32 | 
            +
                  def pub_year_int(ignore_approximate = false)
         | 
| 33 | 
            +
                    single_pub_year(ignore_approximate, :year_int)
         | 
| 28 34 | 
             
                  end
         | 
| 29 35 |  | 
| 30 36 | 
             
                  # return a single string intended for lexical sorting for pub date
         | 
| @@ -34,47 +40,44 @@ module Stanford | |
| 34 40 | 
             
                  #   should be ignored; false if approximate dates should be included
         | 
| 35 41 | 
             
                  # @return [String] single String containing publication year for lexical sorting
         | 
| 36 42 | 
             
                  #   note that for string sorting  5 B.C. = -5  => -995;  6 B.C. => -994  so 6 B.C. sorts before 5 B.C.
         | 
| 37 | 
            -
                   | 
| 38 | 
            -
             | 
| 39 | 
            -
                     | 
| 40 | 
            -
                    result ||= pub_date_best_sort_str_value(date_created_elements(ignore_approximate))
         | 
| 41 | 
            -
                    # dateCaptured for web archive seed records
         | 
| 42 | 
            -
                    result ||= pub_date_best_sort_str_value(@mods_ng_xml.origin_info.dateCaptured.to_a)
         | 
| 43 | 
            -
                    result
         | 
| 43 | 
            +
                  # @deprecated  use pub_year_int
         | 
| 44 | 
            +
                  def pub_year_sort_str(ignore_approximate = false)
         | 
| 45 | 
            +
                    single_pub_year(ignore_approximate, :year_sort_str)
         | 
| 44 46 | 
             
                  end
         | 
| 45 47 |  | 
| 46 48 | 
             
                  # given the passed date elements, look for a single keyDate and use it if there is one;
         | 
| 47 49 | 
             
                  #    otherwise pick earliest parseable date
         | 
| 48 50 | 
             
                  # @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
         | 
| 49 51 | 
             
                  # @return [String] single String containing publication year for facet use
         | 
| 50 | 
            -
                  def  | 
| 51 | 
            -
                     | 
| 52 | 
            -
                    # prefer keyDate
         | 
| 53 | 
            -
                    key_date_el = self.class.keyDate(date_el_array)
         | 
| 54 | 
            -
                    result = DateParsing.facet_string_from_date_str(key_date_el.content) if key_date_el
         | 
| 52 | 
            +
                  def year_facet_str(date_el_array)
         | 
| 53 | 
            +
                    result = date_parsing_result(date_el_array, :facet_string_from_date_str)
         | 
| 55 54 | 
             
                    return result if result
         | 
| 56 | 
            -
                     | 
| 57 | 
            -
                    _ignore, orig_str_to_parse = self.class.earliest_date(date_el_array)
         | 
| 55 | 
            +
                    _ignore, orig_str_to_parse = self.class.earliest_year_str(date_el_array)
         | 
| 58 56 | 
             
                    DateParsing.facet_string_from_date_str(orig_str_to_parse) if orig_str_to_parse
         | 
| 59 57 | 
             
                  end
         | 
| 60 58 |  | 
| 59 | 
            +
                  # given the passed date elements, look for a single keyDate and use it if there is one;
         | 
| 60 | 
            +
                  #    otherwise pick earliest parseable date
         | 
| 61 | 
            +
                  # @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
         | 
| 62 | 
            +
                  # @return [Integer] publication year as an Integer
         | 
| 63 | 
            +
                  def year_int(date_el_array)
         | 
| 64 | 
            +
                    result = date_parsing_result(date_el_array, :year_int_from_date_str)
         | 
| 65 | 
            +
                    return result if result
         | 
| 66 | 
            +
                    year_int, _ignore = self.class.earliest_year_int(date_el_array)
         | 
| 67 | 
            +
                    year_int if year_int
         | 
| 68 | 
            +
                  end
         | 
| 69 | 
            +
             | 
| 61 70 | 
             
                  # given the passed date elements, look for a single keyDate and use it if there is one;
         | 
| 62 71 | 
             
                  #    otherwise pick earliest parseable date
         | 
| 63 72 | 
             
                  # @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
         | 
| 64 73 | 
             
                  # @return [String] single String containing publication year for lexical sorting
         | 
| 65 | 
            -
                  def  | 
| 66 | 
            -
                     | 
| 67 | 
            -
                    # prefer keyDate
         | 
| 68 | 
            -
                    key_date_el = self.class.keyDate(date_el_array)
         | 
| 69 | 
            -
                    result = DateParsing.sortable_year_string_from_date_str(key_date_el.content) if key_date_el
         | 
| 74 | 
            +
                  def year_sort_str(date_el_array)
         | 
| 75 | 
            +
                    result = date_parsing_result(date_el_array, :sortable_year_string_from_date_str)
         | 
| 70 76 | 
             
                    return result if result
         | 
| 71 | 
            -
                     | 
| 72 | 
            -
                    sortable_str, _ignore = self.class.earliest_date(date_el_array)
         | 
| 77 | 
            +
                    sortable_str, _ignore = self.class.earliest_year_str(date_el_array)
         | 
| 73 78 | 
             
                    sortable_str if sortable_str
         | 
| 74 79 | 
             
                  end
         | 
| 75 80 |  | 
| 76 | 
            -
                  protected :pub_date_best_single_facet_value, :pub_date_best_sort_str_value
         | 
| 77 | 
            -
             | 
| 78 81 | 
             
                  # return /originInfo/dateCreated elements in MODS records
         | 
| 79 82 | 
             
                  # @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
         | 
| 80 83 | 
             
                  #   should be excluded; false approximate dates should be included
         | 
| @@ -122,19 +125,71 @@ module Stanford | |
| 122 125 | 
             
                    qualifier == 'approximate' || qualifier == 'questionable'
         | 
| 123 126 | 
             
                  end
         | 
| 124 127 |  | 
| 125 | 
            -
                  # get earliest parseable  | 
| 128 | 
            +
                  # get earliest parseable year (as an Integer) from the passed date elements
         | 
| 126 129 | 
             
                  # @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
         | 
| 127 130 | 
             
                  # @return two String values:
         | 
| 128 | 
            -
                  #   the first is the  | 
| 131 | 
            +
                  #   the first is the Integer value of the earliest year;
         | 
| 129 132 | 
             
                  #   the second is the original String value of the chosen element
         | 
| 130 | 
            -
                  def self. | 
| 131 | 
            -
                     | 
| 132 | 
            -
             | 
| 133 | 
            -
             | 
| 134 | 
            -
             | 
| 135 | 
            -
             | 
| 136 | 
            -
             | 
| 137 | 
            -
             | 
| 133 | 
            +
                  def self.earliest_year_int(date_el_array)
         | 
| 134 | 
            +
                    earliest_year(date_el_array, :year_int_from_date_str)
         | 
| 135 | 
            +
                  end
         | 
| 136 | 
            +
             | 
| 137 | 
            +
                  # get earliest parseable year (as a String) from the passed date elements
         | 
| 138 | 
            +
                  # @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
         | 
| 139 | 
            +
                  # @return two String values:
         | 
| 140 | 
            +
                  #   the first is the lexically sortable String value of the earliest year;
         | 
| 141 | 
            +
                  #   the second is the original String value of the chosen element
         | 
| 142 | 
            +
                  def self.earliest_year_str(date_el_array)
         | 
| 143 | 
            +
                    earliest_year(date_el_array, :sortable_year_string_from_date_str)
         | 
| 144 | 
            +
                  end
         | 
| 145 | 
            +
             | 
| 146 | 
            +
                  # return a single value intended for pub date flavor indicated by method_sym
         | 
| 147 | 
            +
                  # prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
         | 
| 148 | 
            +
                  #  look for a keyDate and use it if there is one;  otherwise pick earliest date
         | 
| 149 | 
            +
                  # @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
         | 
| 150 | 
            +
                  #   should be ignored; false if approximate dates should be included
         | 
| 151 | 
            +
                  # @param [Symbol] method_sym method name in DateParsing, as a symbol
         | 
| 152 | 
            +
                  # @return [String, Integer] publication year as String or Integer
         | 
| 153 | 
            +
                  def single_pub_year(ignore_approximate, method_sym)
         | 
| 154 | 
            +
                    result = send(method_sym, date_issued_elements(ignore_approximate))
         | 
| 155 | 
            +
                    result ||= send(method_sym, date_created_elements(ignore_approximate))
         | 
| 156 | 
            +
                    # dateCaptured for web archive seed records
         | 
| 157 | 
            +
                    result ||= send(method_sym, @mods_ng_xml.origin_info.dateCaptured.to_a)
         | 
| 158 | 
            +
                    result
         | 
| 159 | 
            +
                  end
         | 
| 160 | 
            +
             | 
| 161 | 
            +
                  # given the passed date elements, look for a single keyDate and use it if there is one;
         | 
| 162 | 
            +
                  #    otherwise pick earliest parseable date
         | 
| 163 | 
            +
                  # @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
         | 
| 164 | 
            +
                  # @param [Symbol] method_sym method name in DateParsing, as a symbol
         | 
| 165 | 
            +
                  # @return [Integer, String] year as a String or Integer, depending on method_sym
         | 
| 166 | 
            +
                  def date_parsing_result(date_el_array, method_sym)
         | 
| 167 | 
            +
                    return if date_el_array.empty?
         | 
| 168 | 
            +
                    # prefer keyDate
         | 
| 169 | 
            +
                    key_date_el = self.class.keyDate(date_el_array)
         | 
| 170 | 
            +
                    DateParsing.send(method_sym, key_date_el.content) if key_date_el
         | 
| 171 | 
            +
                  end
         | 
| 172 | 
            +
                  # temporarily use this technique to mark methods private until we get rid of old date parsing methods below
         | 
| 173 | 
            +
                  private :single_pub_year, :date_parsing_result
         | 
| 174 | 
            +
             | 
| 175 | 
            +
                  class << self
         | 
| 176 | 
            +
                    private
         | 
| 177 | 
            +
                    # get earliest parseable year from the passed date elements
         | 
| 178 | 
            +
                    # @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
         | 
| 179 | 
            +
                    # @param [Symbol] method_sym method name in DateParsing, as a symbol
         | 
| 180 | 
            +
                    # @return two values:
         | 
| 181 | 
            +
                    #   the first is either:  the lexically sortable String value of the earliest date or the Integer value of same,
         | 
| 182 | 
            +
                    #     depending on the method_sym passed in
         | 
| 183 | 
            +
                    #   the second is the original String value of the chosen element
         | 
| 184 | 
            +
                    def earliest_year(date_el_array, method_sym)
         | 
| 185 | 
            +
                      poss_results = {}
         | 
| 186 | 
            +
                      date_el_array.each { |el|
         | 
| 187 | 
            +
                        result = DateParsing.send(method_sym, el.content)
         | 
| 188 | 
            +
                        poss_results[result] = el.content if result
         | 
| 189 | 
            +
                      }
         | 
| 190 | 
            +
                      earliest = poss_results.keys.sort.first if poss_results.present?
         | 
| 191 | 
            +
                      return earliest, poss_results[earliest] if earliest
         | 
| 192 | 
            +
                    end
         | 
| 138 193 | 
             
                  end
         | 
| 139 194 |  | 
| 140 195 |  | 
| @@ -145,25 +200,58 @@ module Stanford | |
| 145 200 | 
             
                    vals
         | 
| 146 201 | 
             
                  end
         | 
| 147 202 |  | 
| 203 | 
            +
                  # Values for the pub date facet. This is less strict than the 4 year date requirements for pub_date
         | 
| 204 | 
            +
                  # Jan 2016:  used to populate Solr pub_date field for Spotlight and SearchWorks
         | 
| 205 | 
            +
                  #   Spotlight:  pub_date field should be replaced by pub_year_w_approx_isi and pub_year_no_approx_isi
         | 
| 206 | 
            +
                  #   SearchWorks:  pub_date field used for display in search results and show view; for sorting nearby-on-shelf
         | 
| 207 | 
            +
                  #      these could be done with more approp fields/methods (pub_year_int for sorting;  new pub year methods to populate field)
         | 
| 208 | 
            +
                  # TODO:  prob should deprecated this in favor of pub_date_facet_single_value;
         | 
| 209 | 
            +
                  #    need head-to-head testing with pub_date_facet_single_value
         | 
| 210 | 
            +
                  # @return <Array[String]> with values for the pub date facet
         | 
| 211 | 
            +
                  def pub_date_facet
         | 
| 212 | 
            +
                    if pub_date
         | 
| 213 | 
            +
                      if pub_date.start_with?('-')
         | 
| 214 | 
            +
                        return (pub_date.to_i + 1000).to_s + ' B.C.'
         | 
| 215 | 
            +
                      end
         | 
| 216 | 
            +
                      if pub_date.include? '--'
         | 
| 217 | 
            +
                        cent = pub_date[0, 2].to_i
         | 
| 218 | 
            +
                        cent += 1
         | 
| 219 | 
            +
                        cent = cent.to_s + 'th century'
         | 
| 220 | 
            +
                        return cent
         | 
| 221 | 
            +
                      else
         | 
| 222 | 
            +
                        return pub_date
         | 
| 223 | 
            +
                      end
         | 
| 224 | 
            +
                    end
         | 
| 225 | 
            +
                    nil
         | 
| 226 | 
            +
                  end
         | 
| 227 | 
            +
             | 
| 228 | 
            +
                  # creates a date suitable for sorting. Guarnteed to be 4 digits or nil
         | 
| 229 | 
            +
                  # @deprecated:  use pub_year_int, or pub_year_sort_str if you must have a string (why?)
         | 
| 230 | 
            +
                  def pub_date_sort
         | 
| 231 | 
            +
                    if pub_date
         | 
| 232 | 
            +
                      pd = pub_date
         | 
| 233 | 
            +
                      pd = '0' + pd if pd.length == 3
         | 
| 234 | 
            +
                      pd = pd.gsub('--', '00')
         | 
| 235 | 
            +
                    end
         | 
| 236 | 
            +
                    fail "pub_date_sort was about to return a non 4 digit value #{pd}!" if pd && pd.length != 4
         | 
| 237 | 
            +
                    pd
         | 
| 238 | 
            +
                  end
         | 
| 239 | 
            +
             | 
| 148 240 | 
             
                  # For the date display only, the first place to look is in the dates without encoding=marc array.
         | 
| 149 241 | 
             
                  # If no such dates, select the first date in the dates_marc_encoding array.  Otherwise return nil
         | 
| 150 242 | 
             
                  # @return [String] value for the pub_date_display Solr field for this document or nil if none
         | 
| 243 | 
            +
                  # @deprecated:  DO NOT USE: this is no longer used in SW, Revs or Spotlight Jan 2016
         | 
| 151 244 | 
             
                  def pub_date_display
         | 
| 152 245 | 
             
                    return dates_no_marc_encoding.first unless dates_no_marc_encoding.empty?
         | 
| 153 246 | 
             
                    return dates_marc_encoding.first unless dates_marc_encoding.empty?
         | 
| 154 247 | 
             
                    nil
         | 
| 155 248 | 
             
                  end
         | 
| 156 249 |  | 
| 157 | 
            -
             | 
| 158 | 
            -
             | 
| 159 | 
            -
             | 
| 160 | 
            -
                  def pub_dates
         | 
| 161 | 
            -
                    return dates_marc_encoding unless dates_marc_encoding.empty?
         | 
| 162 | 
            -
                    return dates_no_marc_encoding unless dates_no_marc_encoding.empty?
         | 
| 163 | 
            -
                    nil
         | 
| 164 | 
            -
                  end
         | 
| 250 | 
            +
            # ----   old date parsing protected methods will be deprecated/replaced with new date parsing methods (see also DateParsing)
         | 
| 251 | 
            +
             | 
| 252 | 
            +
                protected
         | 
| 165 253 |  | 
| 166 | 
            -
                  #  | 
| 254 | 
            +
                  # The year the object was published
         | 
| 167 255 | 
             
                  # @return [String] 4 character year or nil if no valid date was found
         | 
| 168 256 | 
             
                  def pub_year
         | 
| 169 257 | 
             
                    # use the cached year if there is one
         | 
| @@ -201,47 +289,17 @@ module Stanford | |
| 201 289 | 
             
                    @pub_year = ''
         | 
| 202 290 | 
             
                    nil
         | 
| 203 291 | 
             
                  end
         | 
| 292 | 
            +
                  alias_method :pub_date, :pub_year
         | 
| 204 293 |  | 
| 205 | 
            -
                  #  | 
| 206 | 
            -
                   | 
| 207 | 
            -
             | 
| 208 | 
            -
             | 
| 209 | 
            -
             | 
| 210 | 
            -
             | 
| 211 | 
            -
                    end
         | 
| 212 | 
            -
                    fail "pub_date_sort was about to return a non 4 digit value #{pd}!" if pd && pd.length != 4
         | 
| 213 | 
            -
                    pd
         | 
| 214 | 
            -
                  end
         | 
| 215 | 
            -
             | 
| 216 | 
            -
                  # The year the object was published, filtered based on max_pub_date and min_pub_date from the config file
         | 
| 217 | 
            -
                  # @return [String] 4 character year or nil
         | 
| 218 | 
            -
                  def pub_date
         | 
| 219 | 
            -
                    pub_year || nil
         | 
| 220 | 
            -
                  end
         | 
| 221 | 
            -
             | 
| 222 | 
            -
                  # Values for the pub date facet. This is less strict than the 4 year date requirements for pub_date
         | 
| 223 | 
            -
                  # @return <Array[String]> with values for the pub date facet
         | 
| 224 | 
            -
                  def pub_date_facet
         | 
| 225 | 
            -
                    if pub_date
         | 
| 226 | 
            -
                      if pub_date.start_with?('-')
         | 
| 227 | 
            -
                        return (pub_date.to_i + 1000).to_s + ' B.C.'
         | 
| 228 | 
            -
                      end
         | 
| 229 | 
            -
                      if pub_date.include? '--'
         | 
| 230 | 
            -
                        cent = pub_date[0, 2].to_i
         | 
| 231 | 
            -
                        cent += 1
         | 
| 232 | 
            -
                        cent = cent.to_s + 'th century'
         | 
| 233 | 
            -
                        return cent
         | 
| 234 | 
            -
                      else
         | 
| 235 | 
            -
                        return pub_date
         | 
| 236 | 
            -
                      end
         | 
| 237 | 
            -
                    end
         | 
| 294 | 
            +
                  # For the date indexing, sorting and faceting, the first place to look is in the dates with encoding=marc array.
         | 
| 295 | 
            +
                  # If that doesn't exist, look in the dates without encoding=marc array.  Otherwise return nil
         | 
| 296 | 
            +
                  # @return [Array<String>] values for the date Solr field for this document or nil if none
         | 
| 297 | 
            +
                  def pub_dates
         | 
| 298 | 
            +
                    return dates_marc_encoding unless dates_marc_encoding.empty?
         | 
| 299 | 
            +
                    return dates_no_marc_encoding unless dates_no_marc_encoding.empty?
         | 
| 238 300 | 
             
                    nil
         | 
| 239 301 | 
             
                  end
         | 
| 240 302 |  | 
| 241 | 
            -
            # ----   old date parsing methods will be deprecated/replaced with new date parsing methods
         | 
| 242 | 
            -
             | 
| 243 | 
            -
                protected
         | 
| 244 | 
            -
             | 
| 245 303 | 
             
                  # @return [Array<String>] dates from dateIssued and dateCreated tags from origin_info with encoding="marc"
         | 
| 246 304 | 
             
                  def dates_marc_encoding
         | 
| 247 305 | 
             
                    @dates_marc_encoding ||= begin
         | 
| @@ -279,7 +337,6 @@ module Stanford | |
| 279 337 | 
             
                    }
         | 
| 280 338 | 
             
                  end
         | 
| 281 339 |  | 
| 282 | 
            -
             | 
| 283 340 | 
             
                  def is_number?(object)
         | 
| 284 341 | 
             
                    true if Integer(object) rescue false
         | 
| 285 342 | 
             
                  end
         | 
| @@ -288,8 +345,6 @@ module Stanford | |
| 288 345 | 
             
                    true if Date.parse(object) rescue false
         | 
| 289 346 | 
             
                  end
         | 
| 290 347 |  | 
| 291 | 
            -
                  # TODO:  need tests for these methods
         | 
| 292 | 
            -
             | 
| 293 348 | 
             
                  # get a 4 digit year like 1865 from array of dates
         | 
| 294 349 | 
             
                  # @param [Array<String>] dates an array of potential year strings
         | 
| 295 350 | 
             
                  def get_plain_four_digit_year(dates)
         |