stanford-mods 1.3.3 → 1.3.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 821295322c2777baac42e0142a690a24b44cf931
4
- data.tar.gz: 0134f68110a263ddedf3fb9fec7439bedbbad6dd
3
+ metadata.gz: 8ea1bae95a44c285bf8594fa40a73c2aa0b328d1
4
+ data.tar.gz: 479f7c52ae3c7c29592a870a819e10e6e1abb692
5
5
  SHA512:
6
- metadata.gz: d9976025d220435fc45c4f97901bba4ef506b5e0f3c503b3fb377f3a6a10f4bb74239802abf66fb407c23a121b4339451511485ca94cd06fa620ddec8223fa92
7
- data.tar.gz: 758b2840f7bb52959d4bf5ffceb0e9d281325ff5adcf848176535aa9b2a81f98386bfdfbcc03374c8b1622b99b3bb01184e38cb66ef460c95187a8272c9ff0cf
6
+ metadata.gz: b6f535642244577ecfb2f7cc2d4b4291c9a7cb052869543cf989f6cce72cb5b8609e668e4ace238940c2c252b8613e63908b6627aef608b5e716068b52de23f5
7
+ data.tar.gz: 97db3a6affbc9e74b62432961d712ace6f860cbf77b36137f22287f2e3c4f491d5cbfeeb24a540bd7c48ed21e38e91663d0dd9436e2cc62e7835bc50bd319a11
data/.gitignore CHANGED
@@ -22,3 +22,4 @@ tmp
22
22
  *.tmproj
23
23
  tmtags
24
24
  .idea/*
25
+ .pry_history
data/.rspec CHANGED
@@ -1 +1,2 @@
1
1
  --color
2
+ --require spec_helper
data/.rubocop.yml CHANGED
@@ -4,3 +4,7 @@ require: rubocop-rspec
4
4
 
5
5
  Metrics/LineLength:
6
6
  Max: 120
7
+
8
+ # shut hound up re: quote styles
9
+ Style/StringLiterals:
10
+ Enabled: false
data/Gemfile CHANGED
@@ -6,6 +6,7 @@ gemspec
6
6
  group :test, :development do
7
7
  gem 'rubocop', require: false
8
8
  gem 'rubocop-rspec', require: false
9
+ gem 'pry-byebug', require: false, platform: [:ruby_20, :ruby_21]
9
10
  end
10
11
 
11
12
  group :test do
data/lib/stanford-mods.rb CHANGED
@@ -1,16 +1,16 @@
1
- require 'stanford-mods/version'
2
1
  require 'mods'
2
+ require 'stanford-mods/date_parsing'
3
+ require 'stanford-mods/geo_spatial'
3
4
  require 'stanford-mods/name'
4
- require 'stanford-mods/searchworks'
5
+ require 'stanford-mods/origin_info'
5
6
  require 'stanford-mods/physical_location'
6
- require 'stanford-mods/geo_spatial'
7
+ require 'stanford-mods/searchworks'
8
+ require 'stanford-mods/version'
7
9
 
8
10
  # Stanford specific wranglings of MODS metadata as an extension of the Mods::Record object
9
11
  module Stanford
10
12
  module Mods
11
-
12
13
  class Record < ::Mods::Record
13
-
14
14
  end # Record class
15
15
  end # Mods module
16
16
  end # Stanford module
@@ -0,0 +1,245 @@
1
+ module Stanford
2
+ module Mods
3
+ # Parsing date strings
4
+ # TODO: this should become its own gem and/or become eclipsed by/merged with timetwister gem
5
+ # When this is "gemified":
6
+ # - we may want an integer or date sort field as well as lexical
7
+ # - we could add methods like my_date.bc?
8
+ class DateParsing
9
+
10
+ # get single facet value for date, generally an explicit year or "17th century" or "5 B.C."
11
+ # returns '845', not 0845
12
+ # @param [String] date_str String containing a date (we hope)
13
+ # @return [String, nil] String facet value for year if we could parse one, nil otherwise
14
+ def self.facet_string_from_date_str(date_str)
15
+ return DateParsing.new(date_str).facet_string_from_date_str
16
+ end
17
+
18
+ # get String sortable value year if we can parse date_str to get a year.
19
+ # SearchWorks currently uses a string field for pub date sorting; thus so does Spotlight.
20
+ # The values returned must *lexically* sort in chronological order, so the B.C. dates are tricky
21
+ # @param [String] date_str String containing a date (we hope)
22
+ # @return [String, nil] String sortable year if we could parse one, nil otherwise
23
+ # note that these values must *lexically* sort to create a chronological sort.
24
+ def self.sortable_year_string_from_date_str(date_str)
25
+ return DateParsing.new(date_str).sortable_year_string_from_date_str
26
+ end
27
+
28
+ # true if the year is between -999 and (current year + 1)
29
+ # @param [String] year_str String containing a date in format: -yyy, -yy, -y, y, yy, yyy, yyyy
30
+ # @return [Boolean] true if the year is between -999 and (current year + 1); false otherwise
31
+ def self.year_str_valid?(year_str)
32
+ return false unless year_str && (year_str.match(/^\d{1,4}$/) || year_str.match(/^-\d{1,3}$/))
33
+ (-1000 < year_str.to_i) && (year_str.to_i < Date.today.year + 2)
34
+ end
35
+
36
+ attr_reader :orig_date_str
37
+
38
+ def initialize(date_str)
39
+ @orig_date_str = date_str
40
+ @orig_date_str.freeze
41
+ end
42
+
43
+ BRACKETS_BETWEEN_DIGITS_REXEXP = Regexp.new('\d[' + Regexp.escape('[]') + ']\d')
44
+
45
+ # get single facet value for date, generally an explicit year or "17th century" or "5 B.C."
46
+ # @return [String, nil] String facet value for year if we could parse one, nil otherwise
47
+ def facet_string_from_date_str
48
+ return if orig_date_str == '0000-00-00' # shpc collection has these useless dates
49
+ # B.C. first in case there are 4 digits, e.g. 1600 B.C.
50
+ return facet_string_for_bc if orig_date_str.match(BC_REGEX)
51
+ # most date strings have a four digit year
52
+ result ||= sortable_year_for_yyyy
53
+ # 2 digit year will always be 19xx or 20xx; sortable version will make a good facet string
54
+ result ||= sortable_year_for_yy
55
+ # decades are always 19xx or 20xx; sortable version will make a good facet string
56
+ result ||= sortable_year_for_decade
57
+ unless result
58
+ # try removing brackets between digits in case we have 169[5] or [18]91
59
+ if orig_date_str.match(BRACKETS_BETWEEN_DIGITS_REXEXP)
60
+ no_brackets = orig_date_str.delete('[]')
61
+ return DateParsing.new(no_brackets).facet_string_from_date_str
62
+ end
63
+ end
64
+ # parsing below this line gives string inapprop for year_str_valid?
65
+ unless self.class.year_str_valid?(result)
66
+ result = facet_string_for_century
67
+ result ||= facet_string_for_early_numeric
68
+ end
69
+ # remove leading 0s from early dates
70
+ result = result.to_i.to_s if result && result.match(/^\d+$/)
71
+ result
72
+ end
73
+
74
+ # get String sortable value year if we can parse date_str to get a year.
75
+ # SearchWorks currently uses a string field for pub date sorting; thus so does Spotlight.
76
+ # The values returned must *lexically* sort in chronological order, so the B.C. dates are tricky
77
+ # @return [String, nil] String sortable year if we could parse one, nil otherwise
78
+ # note that these values must *lexically* sort to create a chronological sort.
79
+ def sortable_year_string_from_date_str
80
+ return if orig_date_str == '0000-00-00' # shpc collection has these useless dates
81
+ # B.C. first in case there are 4 digits, e.g. 1600 B.C.
82
+ return sortable_year_for_bc if orig_date_str.match(BC_REGEX)
83
+ # most date strings have a four digit year
84
+ result = sortable_year_for_yyyy
85
+ result ||= sortable_year_for_yy
86
+ result ||= sortable_year_for_decade
87
+ result ||= sortable_year_for_century
88
+ result ||= sortable_year_for_early_numeric
89
+ unless result
90
+ # try removing brackets between digits in case we have 169[5] or [18]91
91
+ if orig_date_str.match(BRACKETS_BETWEEN_DIGITS_REXEXP)
92
+ no_brackets = orig_date_str.delete('[]')
93
+ return DateParsing.new(no_brackets).sortable_year_string_from_date_str
94
+ end
95
+ end
96
+ result if self.class.year_str_valid?(result)
97
+ end
98
+
99
+ # looks for 4 consecutive digits in orig_date_str and returns first occurrence if found
100
+ # @return [String, nil] 4 digit year (e.g. 1865, 0950) if orig_date_str has yyyy, nil otherwise
101
+ def sortable_year_for_yyyy
102
+ matches = orig_date_str.match(/\d{4}/) if orig_date_str
103
+ return matches.to_s if matches
104
+ end
105
+
106
+ # returns 4 digit year as String if we have a x/x/yy or x-x-yy pattern
107
+ # note that these are the only 2 digit year patterns found in our actual date strings in MODS records
108
+ # we use 20 as century digits unless it is greater than current year:
109
+ # 1/1/15 -> 2015
110
+ # 1/1/25 -> 1925
111
+ # @return [String, nil] 4 digit year (e.g. 1865, 0950) if orig_date_str matches pattern, nil otherwise
112
+ def sortable_year_for_yy
113
+ return unless orig_date_str
114
+ slash_matches = orig_date_str.match(/\d{1,2}\/\d{1,2}\/\d{2}/)
115
+ if slash_matches
116
+ date_obj = Date.strptime(orig_date_str, '%m/%d/%y')
117
+ else
118
+ hyphen_matches = orig_date_str.match(/\d{1,2}-\d{1,2}-\d{2}/)
119
+ date_obj = Date.strptime(orig_date_str, '%m-%d-%y') if hyphen_matches
120
+ end
121
+ if date_obj && date_obj > Date.today
122
+ date_obj = Date.new(date_obj.year - 100, date_obj.month, date_obj.mday)
123
+ end
124
+ date_obj.year.to_s if date_obj
125
+ rescue ArgumentError
126
+ nil # explicitly want nil if date won't parse
127
+ end
128
+
129
+ # get first year of decade (as String) if we have: yyyu, yyy-, yyy? or yyyx pattern
130
+ # note that these are the only decade patterns found in our actual date strings in MODS records
131
+ # @return [String, nil] 4 digit year (e.g. 1860, 1950) if orig_date_str matches pattern, nil otherwise
132
+ def sortable_year_for_decade
133
+ decade_matches = orig_date_str.match(/\d{3}[u\-?x]/) if orig_date_str
134
+ if decade_matches
135
+ changed_to_zero = decade_matches.to_s.tr('u\-?x', '0')
136
+ return DateParsing.new(changed_to_zero).sortable_year_for_yyyy
137
+ end
138
+ end
139
+
140
+ CENTURY_WORD_REGEXP = Regexp.new('(\d{1,2}).*century')
141
+ CENTURY_4CHAR_REGEXP = Regexp.new('(\d{1,2})[u\-]{2}')
142
+
143
+ # get first year of century (as String) if we have: yyuu, yy--, yy--? or xxth century pattern
144
+ # note that these are the only century patterns found in our actual date strings in MODS records
145
+ # @return [String, nil] yy00 if orig_date_str matches pattern, nil otherwise; also nil if B.C. in pattern
146
+ def sortable_year_for_century
147
+ return unless orig_date_str
148
+ return if orig_date_str.match(/B\.C\./)
149
+ century_matches = orig_date_str.match(CENTURY_4CHAR_REGEXP)
150
+ if century_matches
151
+ return $1 + '00' if $1.length == 2
152
+ return '0' + $1 + '00' if $1.length == 1
153
+ end
154
+ century_str_matches = orig_date_str.match(CENTURY_WORD_REGEXP)
155
+ if century_str_matches
156
+ yy = ($1.to_i - 1).to_s
157
+ return yy + '00' if yy.length == 2
158
+ return '0' + yy + '00' if yy.length == 1
159
+ end
160
+ end
161
+
162
+ # get single facet value for century (17th century) if we have: yyuu, yy--, yy--? or xxth century pattern
163
+ # note that these are the only century patterns found in our actual date strings in MODS records
164
+ # @return [String, nil] yy(th) Century if orig_date_str matches pattern, nil otherwise; also nil if B.C. in pattern
165
+ def facet_string_for_century
166
+ return unless orig_date_str
167
+ return if orig_date_str.match(/B\.C\./)
168
+ century_str_matches = orig_date_str.match(CENTURY_WORD_REGEXP)
169
+ return century_str_matches.to_s if century_str_matches
170
+
171
+ century_matches = orig_date_str.match(CENTURY_4CHAR_REGEXP)
172
+ if century_matches
173
+ require 'active_support/core_ext/integer/inflections'
174
+ return "#{($1.to_i + 1).ordinalize} century"
175
+ end
176
+ end
177
+
178
+ BC_REGEX = Regexp.new('(\d{1,4}).*' + Regexp.escape('B.C.'))
179
+
180
+ # get String sortable value for B.C. if we have B.C. pattern
181
+ # note that these values must *lexically* sort to create a chronological sort.
182
+ # We know our data does not contain B.C. dates older than 999, so we can make them
183
+ # lexically sort by subtracting 1000. So we get:
184
+ # -700 for 300 B.C., -750 for 250 B.C., -800 for 200 B.C., -801 for 199 B.C.
185
+ # @return [String, nil] String sortable -ddd if B.C. in pattern; nil otherwise
186
+ def sortable_year_for_bc
187
+ bc_matches = orig_date_str.match(BC_REGEX) if orig_date_str
188
+ return ($1.to_i - 1000).to_s if bc_matches
189
+ end
190
+
191
+ # get single facet value for B.C. if we have B.C. pattern
192
+ # @return [String, nil] ddd B.C. if ddd B.C. in pattern; nil otherwise
193
+ def facet_string_for_bc
194
+ bc_matches = orig_date_str.match(BC_REGEX) if orig_date_str
195
+ return bc_matches.to_s if bc_matches
196
+ end
197
+
198
+ EARLY_NUMERIC = Regexp.new('^\-?\d{1,3}$')
199
+
200
+ # get String sortable value from date String containing yyy, yy, y, -y, -yy, -yyy
201
+ # note that these values must *lexically* sort to create a chronological sort.
202
+ # We know our data does not contain negative dates older than -999, so we can make them
203
+ # lexically sort by subtracting 1000. So we get:
204
+ # -983 for -17, -999 for -1, 0000 for 0, 0001 for 1, 0017 for 17
205
+ # @return [String, nil] String sortable -ddd if orig_date_str matches pattern; nil otherwise
206
+ def sortable_year_for_early_numeric
207
+ return unless orig_date_str.match(EARLY_NUMERIC)
208
+ if orig_date_str.match(/^\-/)
209
+ # negative number becomes x - 1000 for sorting; -005 for -995
210
+ num = orig_date_str[1..-1].to_i - 1000
211
+ return '-' + num.to_s[1..-1].rjust(3, '0')
212
+ else
213
+ return orig_date_str.rjust(4, '0')
214
+ end
215
+ end
216
+
217
+ # get single facet value for date String containing yyy, yy, y, -y, -yy, -yyy
218
+ # negative number strings will be changed to B.C. strings
219
+ def facet_string_for_early_numeric
220
+ return unless orig_date_str.match(EARLY_NUMERIC)
221
+ # negative number becomes B.C.
222
+ return orig_date_str[1..-1] + " B.C." if orig_date_str.match(/^\-/)
223
+ # remove leading 0s from early dates
224
+ orig_date_str.to_i.to_s
225
+ end
226
+
227
+ # NOTE: while Date.parse() works for many dates, the *sortable_year_for_yyyy
228
+ # actually works for nearly all those cases and a lot more besides. Trial and error
229
+ # with an extensive set of test data culled from actual date strings in our MODS records
230
+ # has made this method bogus.
231
+ # @return [String, nil] sortable 4 digit year (e.g. 1865, 0950) if orig_date_str is parseable via ruby Date, nil otherwise
232
+ def year_via_ruby_parsing
233
+ return unless orig_date_str.match(/\d\d/) # need at least 2 digits
234
+ # need more in string than only 2 digits
235
+ return if orig_date_str.match(/^\d\d$/) || orig_date_str.match(/^\D*\d\d\D*$/)
236
+ return if orig_date_str.match(/\d\s*B.C./) # skip B.C. dates
237
+ date_obj = Date.parse(orig_date_str)
238
+ date_obj.year.to_s
239
+ rescue ArgumentError
240
+ nil # explicitly want nil if date won't parse
241
+ end
242
+
243
+ end
244
+ end
245
+ end
@@ -0,0 +1,411 @@
1
+ require 'logger'
2
+ require 'mods'
3
+
4
+ # Parsing MODS /originInfo for Publication/Imprint data:
5
+ # * pub year for date slider facet
6
+ # * pub year for sorting
7
+ # * pub year for single facet value
8
+ # * imprint info for display
9
+ # *
10
+ # These methods may be used by searchworks.rb file or by downstream apps
11
+ module Stanford
12
+ module Mods
13
+ class Record < ::Mods::Record
14
+
15
+ # return a single string intended for facet use for pub date
16
+ # prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
17
+ # look for a keyDate and use it if there is one; otherwise pick earliest date
18
+ # @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
19
+ # should be ignored; false if approximate dates should be included
20
+ # @return [String] single String containing publication year for facet use
21
+ def pub_date_facet_single_value(ignore_approximate = false)
22
+ # prefer dateIssued
23
+ result = pub_date_best_single_facet_value(date_issued_elements(ignore_approximate))
24
+ result ||= pub_date_best_single_facet_value(date_created_elements(ignore_approximate))
25
+ # dateCaptured for web archive seed records
26
+ result ||= pub_date_best_single_facet_value(@mods_ng_xml.origin_info.dateCaptured.to_a)
27
+ result
28
+ end
29
+
30
+ # return a single string intended for lexical sorting for pub date
31
+ # prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
32
+ # look for a keyDate and use it if there is one; otherwise pick earliest date
33
+ # @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
34
+ # should be ignored; false if approximate dates should be included
35
+ # @return [String] single String containing publication year for lexical sorting
36
+ # note that for string sorting 5 B.C. = -5 => -995; 6 B.C. => -994 so 6 B.C. sorts before 5 B.C.
37
+ def pub_date_sortable_string(ignore_approximate = false)
38
+ # prefer dateIssued
39
+ result = pub_date_best_sort_str_value(date_issued_elements(ignore_approximate))
40
+ result ||= pub_date_best_sort_str_value(date_created_elements(ignore_approximate))
41
+ # dateCaptured for web archive seed records
42
+ result ||= pub_date_best_sort_str_value(@mods_ng_xml.origin_info.dateCaptured.to_a)
43
+ result
44
+ end
45
+
46
+ # given the passed date elements, look for a single keyDate and use it if there is one;
47
+ # otherwise pick earliest parseable date
48
+ # @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
49
+ # @return [String] single String containing publication year for facet use
50
+ def pub_date_best_single_facet_value(date_el_array)
51
+ return if date_el_array.empty?
52
+ # prefer keyDate
53
+ key_date_el = self.class.keyDate(date_el_array)
54
+ result = DateParsing.facet_string_from_date_str(key_date_el.content) if key_date_el
55
+ return result if result
56
+ # settle for earliest parseable date
57
+ _ignore, orig_str_to_parse = self.class.earliest_date(date_el_array)
58
+ DateParsing.facet_string_from_date_str(orig_str_to_parse) if orig_str_to_parse
59
+ end
60
+
61
+ # given the passed date elements, look for a single keyDate and use it if there is one;
62
+ # otherwise pick earliest parseable date
63
+ # @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
64
+ # @return [String] single String containing publication year for lexical sorting
65
+ def pub_date_best_sort_str_value(date_el_array)
66
+ return if date_el_array.empty?
67
+ # prefer keyDate
68
+ key_date_el = self.class.keyDate(date_el_array)
69
+ result = DateParsing.sortable_year_string_from_date_str(key_date_el.content) if key_date_el
70
+ return result if result
71
+ # settle for earliest parseable date
72
+ sortable_str, _ignore = self.class.earliest_date(date_el_array)
73
+ sortable_str if sortable_str
74
+ end
75
+
76
+ protected :pub_date_best_single_facet_value, :pub_date_best_sort_str_value
77
+
78
+ # return /originInfo/dateCreated elements in MODS records
79
+ # @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
80
+ # should be excluded; false approximate dates should be included
81
+ # @return [Array<Nokogiri::XML::Element>]
82
+ def date_created_elements(ignore_approximate=false)
83
+ date_created_nodeset = @mods_ng_xml.origin_info.dateCreated
84
+ return self.class.remove_approximate(date_created_nodeset) if ignore_approximate
85
+ date_created_nodeset.to_a
86
+ end
87
+
88
+ # return /originInfo/dateIssued elements in MODS records
89
+ # @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
90
+ # should be excluded; false approximate dates should be included
91
+ # @return [Array<Nokogiri::XML::Element>]
92
+ def date_issued_elements(ignore_approximate=false)
93
+ date_issued_nodeset = @mods_ng_xml.origin_info.dateIssued
94
+ return self.class.remove_approximate(date_issued_nodeset) if ignore_approximate
95
+ date_issued_nodeset.to_a
96
+ end
97
+
98
+ # given a set of date elements, return the single element with attribute keyDate="yes"
99
+ # or return nil if no elements have attribute keyDate="yes", or if multiple elements have keyDate="yes"
100
+ # @param [Array<Nokogiri::XML::Element>] Array of date elements
101
+ # @return [Nokogiri::XML::Element, nil] single date element with attribute keyDate="yes", or nil
102
+ def self.keyDate(elements)
103
+ keyDates = elements.select { |node| node["keyDate"] == 'yes' }
104
+ keyDates.first if keyDates.size == 1
105
+ end
106
+
107
+ # remove Elements from NodeSet if they have a qualifier attribute of 'approximate' or 'questionable'
108
+ # @param [Nokogiri::XML::NodeSet<Nokogiri::XML::Element>] nodeset set of date elements
109
+ # @return [Array<Nokogiri::XML::Element>] the set of date elements minus any that
110
+ # had a qualifier attribute of 'approximate' or 'questionable'
111
+ def self.remove_approximate(nodeset)
112
+ nodeset.select { |node| node unless date_is_approximate?(node) }
113
+ end
114
+
115
+ # NOTE: legal values for MODS date elements with attribute qualifier are
116
+ # 'approximate', 'inferred' or 'questionable'
117
+ # @param [Nokogiri::XML::Element] date_element MODS date element
118
+ # @return [Boolean] true if date_element has a qualifier attribute of "approximate" or "questionable",
119
+ # false if no qualifier attribute, or if attribute is 'inferred' or some other value
120
+ def self.date_is_approximate?(date_element)
121
+ qualifier = date_element["qualifier"] if date_element.respond_to?('[]')
122
+ qualifier == 'approximate' || qualifier == 'questionable'
123
+ end
124
+
125
+ # get earliest parseable date from the passed date elements
126
+ # @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
127
+ # @return two String values:
128
+ # the first is the lexically sortable String value of the earliest date;
129
+ # the second is the original String value of the chosen element
130
+ def self.earliest_date(date_el_array)
131
+ poss_results = {}
132
+ date_el_array.each { |el|
133
+ result = DateParsing.sortable_year_string_from_date_str(el.content)
134
+ poss_results[result] = el.content if result
135
+ }
136
+ earliest = poss_results.keys.sort.first if poss_results.present?
137
+ return earliest, poss_results[earliest] if earliest
138
+ end
139
+
140
+
141
+ # ---- old date parsing methods used downstream of gem; will be deprecated/replaced with new date parsing methods
142
+
143
+ def place
144
+ vals = self.term_values([:origin_info, :place, :placeTerm])
145
+ vals
146
+ end
147
+
148
+ # For the date display only, the first place to look is in the dates without encoding=marc array.
149
+ # If no such dates, select the first date in the dates_marc_encoding array. Otherwise return nil
150
+ # @return [String] value for the pub_date_display Solr field for this document or nil if none
151
+ def pub_date_display
152
+ return dates_no_marc_encoding.first unless dates_no_marc_encoding.empty?
153
+ return dates_marc_encoding.first unless dates_marc_encoding.empty?
154
+ nil
155
+ end
156
+
157
+ # For the date indexing, sorting and faceting, the first place to look is in the dates with encoding=marc array.
158
+ # If that doesn't exist, look in the dates without encoding=marc array. Otherwise return nil
159
+ # @return [Array<String>] values for the date Solr field for this document or nil if none
160
+ def pub_dates
161
+ return dates_marc_encoding unless dates_marc_encoding.empty?
162
+ return dates_no_marc_encoding unless dates_no_marc_encoding.empty?
163
+ nil
164
+ end
165
+
166
+ # Get the publish year from mods
167
+ # @return [String] 4 character year or nil if no valid date was found
168
+ def pub_year
169
+ # use the cached year if there is one
170
+ if @pub_year
171
+ return nil if @pub_year == ''
172
+ return @pub_year
173
+ end
174
+
175
+ dates = pub_dates
176
+ if dates
177
+ pruned_dates = []
178
+ dates.each do |f_date|
179
+ # remove ? and []
180
+ if f_date.length == 4 && f_date.end_with?('?')
181
+ pruned_dates << f_date.tr('?', '0')
182
+ else
183
+ pruned_dates << f_date.delete('?[]')
184
+ end
185
+ end
186
+ # try to find a date starting with the most normal date formats and progressing to more wonky ones
187
+ @pub_year = get_plain_four_digit_year pruned_dates
188
+ return @pub_year if @pub_year
189
+ # Check for years in u notation, e.g., 198u
190
+ @pub_year = get_u_year pruned_dates
191
+ return @pub_year if @pub_year
192
+ @pub_year = get_double_digit_century pruned_dates
193
+ return @pub_year if @pub_year
194
+ @pub_year = get_bc_year pruned_dates
195
+ return @pub_year if @pub_year
196
+ @pub_year = get_three_digit_year pruned_dates
197
+ return @pub_year if @pub_year
198
+ @pub_year = get_single_digit_century pruned_dates
199
+ return @pub_year if @pub_year
200
+ end
201
+ @pub_year = ''
202
+ nil
203
+ end
204
+
205
+ # creates a date suitable for sorting. Guarnteed to be 4 digits or nil
206
+ def pub_date_sort
207
+ if pub_date
208
+ pd = pub_date
209
+ pd = '0' + pd if pd.length == 3
210
+ pd = pd.gsub('--', '00')
211
+ end
212
+ fail "pub_date_sort was about to return a non 4 digit value #{pd}!" if pd && pd.length != 4
213
+ pd
214
+ end
215
+
216
+ # The year the object was published, filtered based on max_pub_date and min_pub_date from the config file
217
+ # @return [String] 4 character year or nil
218
+ def pub_date
219
+ pub_year || nil
220
+ end
221
+
222
+ # Values for the pub date facet. This is less strict than the 4 year date requirements for pub_date
223
+ # @return <Array[String]> with values for the pub date facet
224
+ def pub_date_facet
225
+ if pub_date
226
+ if pub_date.start_with?('-')
227
+ return (pub_date.to_i + 1000).to_s + ' B.C.'
228
+ end
229
+ if pub_date.include? '--'
230
+ cent = pub_date[0, 2].to_i
231
+ cent += 1
232
+ cent = cent.to_s + 'th century'
233
+ return cent
234
+ else
235
+ return pub_date
236
+ end
237
+ end
238
+ nil
239
+ end
240
+
241
+ # ---- old date parsing methods will be deprecated/replaced with new date parsing methods
242
+
243
+ protected
244
+
245
+ # @return [Array<String>] dates from dateIssued and dateCreated tags from origin_info with encoding="marc"
246
+ def dates_marc_encoding
247
+ @dates_marc_encoding ||= begin
248
+ parse_dates_from_originInfo
249
+ @dates_marc_encoding
250
+ end
251
+ end
252
+
253
+ # @return [Array<String>] dates from dateIssued and dateCreated tags from origin_info with encoding not "marc"
254
+ def dates_no_marc_encoding
255
+ @dates_no_marc_encoding ||= begin
256
+ parse_dates_from_originInfo
257
+ @dates_no_marc_encoding
258
+ end
259
+ end
260
+
261
+ # Populate @dates_marc_encoding and @dates_no_marc_encoding from dateIssued and dateCreated tags from origin_info
262
+ # with and without encoding=marc
263
+ def parse_dates_from_originInfo
264
+ @dates_marc_encoding = []
265
+ @dates_no_marc_encoding = []
266
+ self.origin_info.dateIssued.each { |di|
267
+ if di.encoding == "marc"
268
+ @dates_marc_encoding << di.text
269
+ else
270
+ @dates_no_marc_encoding << di.text
271
+ end
272
+ }
273
+ self.origin_info.dateCreated.each { |dc|
274
+ if dc.encoding == "marc"
275
+ @dates_marc_encoding << dc.text
276
+ else
277
+ @dates_no_marc_encoding << dc.text
278
+ end
279
+ }
280
+ end
281
+
282
+
283
+ def is_number?(object)
284
+ true if Integer(object) rescue false
285
+ end
286
+
287
+ def is_date?(object)
288
+ true if Date.parse(object) rescue false
289
+ end
290
+
291
+ # TODO: need tests for these methods
292
+
293
+ # get a 4 digit year like 1865 from array of dates
294
+ # @param [Array<String>] dates an array of potential year strings
295
+ def get_plain_four_digit_year(dates)
296
+ dates.each do |f_date|
297
+ matches = f_date.scan(/\d{4}/)
298
+ if matches.length == 1
299
+ @pub_year = matches.first
300
+ return matches.first
301
+ else
302
+ # when there are multiple matches, check for ones with CE after them
303
+ matches.each do |match|
304
+ # look for things like '1865-6 CE'
305
+ pos = f_date.index(Regexp.new(match + '...CE'))
306
+ pos = pos ? pos.to_i : 0
307
+ if f_date.include?(match+' CE') or pos > 0
308
+ @pub_year = match
309
+ return match
310
+ end
311
+ end
312
+ return matches.first
313
+ end
314
+ end
315
+ nil
316
+ end
317
+
318
+ # get a 3 digit year like 965 from the date array
319
+ # @param [Array<String>] dates an array of potential year strings
320
+ def get_three_digit_year(dates)
321
+ dates.each do |f_date|
322
+ matches = f_date.scan(/\d{3}/)
323
+ return matches.first if matches.length > 0
324
+ end
325
+ nil
326
+ end
327
+
328
+ # get the 3 digit BC year, return it as a negative, so -700 for 300 BC.
329
+ # Other methods will translate it to proper display, this is good for sorting.
330
+ # @param [Array<String>] dates an array of potential year strings
331
+ def get_bc_year(dates)
332
+ dates.each do |f_date|
333
+ matches = f_date.scan(/\d{3} B.C./)
334
+ if matches.length > 0
335
+ bc_year = matches.first[0..2]
336
+ return (bc_year.to_i - 1000).to_s
337
+ end
338
+ end
339
+ nil
340
+ end
341
+
342
+ # get a single digit century like '9th century' from the date array
343
+ # @param [Array<String>] dates an array of potential year strings
344
+ # @return [String] y-- if we identify century digit in string
345
+ def get_single_digit_century(dates)
346
+ dates.each do |f_date|
347
+ matches = f_date.scan(/\d{1}th/)
348
+ next if matches.length == 0
349
+ if matches.length == 1
350
+ @pub_year = ((matches.first[0, 2].to_i) - 1).to_s + '--'
351
+ return @pub_year
352
+ else
353
+ # when there are multiple matches, check for ones with CE after them
354
+ matches.each do |match|
355
+ pos = f_date.index(Regexp.new(match + '...CE'))
356
+ pos = pos ? pos.to_i : f_date.index(Regexp.new(match + ' century CE'))
357
+ pos = pos ? pos.to_i : 0
358
+ if f_date.include?(match + ' CE') || pos > 0
359
+ @pub_year = ((match[0, 1].to_i) - 1).to_s + '--'
360
+ return @pub_year
361
+ end
362
+ end
363
+ end
364
+ end
365
+ nil
366
+ end
367
+
368
+ # get a double digit century like '12th century' from the date array
369
+ # @param [Array<String>] dates an array of potential year strings
370
+ # @return [String] yy-- if we identify century digits in string
371
+ def get_double_digit_century(dates)
372
+ dates.each do |f_date|
373
+ matches = f_date.scan(/\d{2}th/)
374
+ next if matches.length == 0
375
+ if matches.length == 1
376
+ @pub_year=((matches.first[0, 2].to_i) - 1).to_s + '--'
377
+ return @pub_year
378
+ else
379
+ # when there are multiple matches, check for ones with CE after them
380
+ matches.each do |match|
381
+ pos = f_date.index(Regexp.new(match + '...CE'))
382
+ pos = pos ? pos.to_i : f_date.index(Regexp.new(match + ' century CE'))
383
+ pos = pos ? pos.to_i : 0
384
+ if f_date.include?(match+' CE') or pos > 0
385
+ @pub_year = ((match[0, 2].to_i) - 1).to_s + '--'
386
+ return @pub_year
387
+ end
388
+ end
389
+ end
390
+ end
391
+ nil
392
+ end
393
+
394
+ # If a year has a "u" in it, replace u with 0 for yyyu (becomes yyy0)
395
+ # and replace u with '-' for yyuu (becomes yy--)
396
+ # @param [String] dates looking for matches on yyyu or yyuu in these strings
397
+ # @return [String, nil] String of format yyy0 or yy--, or nil
398
+ def get_u_year(dates)
399
+ dates.each do |f_date|
400
+ # Single digit u notation
401
+ matches = f_date.scan(/\d{3}u/)
402
+ return matches.first.tr('u', '0') if matches.length == 1
403
+ # Double digit u notation
404
+ matches = f_date.scan(/\d{2}u{2}/)
405
+ return matches.first.tr('u', '-') if matches.length == 1
406
+ end
407
+ nil
408
+ end
409
+ end # class Record
410
+ end
411
+ end