stanford-mods 1.3.3 → 1.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 821295322c2777baac42e0142a690a24b44cf931
4
- data.tar.gz: 0134f68110a263ddedf3fb9fec7439bedbbad6dd
3
+ metadata.gz: 8ea1bae95a44c285bf8594fa40a73c2aa0b328d1
4
+ data.tar.gz: 479f7c52ae3c7c29592a870a819e10e6e1abb692
5
5
  SHA512:
6
- metadata.gz: d9976025d220435fc45c4f97901bba4ef506b5e0f3c503b3fb377f3a6a10f4bb74239802abf66fb407c23a121b4339451511485ca94cd06fa620ddec8223fa92
7
- data.tar.gz: 758b2840f7bb52959d4bf5ffceb0e9d281325ff5adcf848176535aa9b2a81f98386bfdfbcc03374c8b1622b99b3bb01184e38cb66ef460c95187a8272c9ff0cf
6
+ metadata.gz: b6f535642244577ecfb2f7cc2d4b4291c9a7cb052869543cf989f6cce72cb5b8609e668e4ace238940c2c252b8613e63908b6627aef608b5e716068b52de23f5
7
+ data.tar.gz: 97db3a6affbc9e74b62432961d712ace6f860cbf77b36137f22287f2e3c4f491d5cbfeeb24a540bd7c48ed21e38e91663d0dd9436e2cc62e7835bc50bd319a11
data/.gitignore CHANGED
@@ -22,3 +22,4 @@ tmp
22
22
  *.tmproj
23
23
  tmtags
24
24
  .idea/*
25
+ .pry_history
data/.rspec CHANGED
@@ -1 +1,2 @@
1
1
  --color
2
+ --require spec_helper
data/.rubocop.yml CHANGED
@@ -4,3 +4,7 @@ require: rubocop-rspec
4
4
 
5
5
  Metrics/LineLength:
6
6
  Max: 120
7
+
8
+ # shut hound up re: quote styles
9
+ Style/StringLiterals:
10
+ Enabled: false
data/Gemfile CHANGED
@@ -6,6 +6,7 @@ gemspec
6
6
  group :test, :development do
7
7
  gem 'rubocop', require: false
8
8
  gem 'rubocop-rspec', require: false
9
+ gem 'pry-byebug', require: false, platform: [:ruby_20, :ruby_21]
9
10
  end
10
11
 
11
12
  group :test do
data/lib/stanford-mods.rb CHANGED
@@ -1,16 +1,16 @@
1
- require 'stanford-mods/version'
2
1
  require 'mods'
2
+ require 'stanford-mods/date_parsing'
3
+ require 'stanford-mods/geo_spatial'
3
4
  require 'stanford-mods/name'
4
- require 'stanford-mods/searchworks'
5
+ require 'stanford-mods/origin_info'
5
6
  require 'stanford-mods/physical_location'
6
- require 'stanford-mods/geo_spatial'
7
+ require 'stanford-mods/searchworks'
8
+ require 'stanford-mods/version'
7
9
 
8
10
  # Stanford specific wranglings of MODS metadata as an extension of the Mods::Record object
9
11
  module Stanford
10
12
  module Mods
11
-
12
13
  class Record < ::Mods::Record
13
-
14
14
  end # Record class
15
15
  end # Mods module
16
16
  end # Stanford module
@@ -0,0 +1,245 @@
1
+ module Stanford
2
+ module Mods
3
+ # Parsing date strings
4
+ # TODO: this should become its own gem and/or become eclipsed by/merged with timetwister gem
5
+ # When this is "gemified":
6
+ # - we may want an integer or date sort field as well as lexical
7
+ # - we could add methods like my_date.bc?
8
+ class DateParsing
9
+
10
+ # get single facet value for date, generally an explicit year or "17th century" or "5 B.C."
11
+ # returns '845', not 0845
12
+ # @param [String] date_str String containing a date (we hope)
13
+ # @return [String, nil] String facet value for year if we could parse one, nil otherwise
14
+ def self.facet_string_from_date_str(date_str)
15
+ return DateParsing.new(date_str).facet_string_from_date_str
16
+ end
17
+
18
+ # get String sortable value year if we can parse date_str to get a year.
19
+ # SearchWorks currently uses a string field for pub date sorting; thus so does Spotlight.
20
+ # The values returned must *lexically* sort in chronological order, so the B.C. dates are tricky
21
+ # @param [String] date_str String containing a date (we hope)
22
+ # @return [String, nil] String sortable year if we could parse one, nil otherwise
23
+ # note that these values must *lexically* sort to create a chronological sort.
24
+ def self.sortable_year_string_from_date_str(date_str)
25
+ return DateParsing.new(date_str).sortable_year_string_from_date_str
26
+ end
27
+
28
+ # true if the year is between -999 and (current year + 1)
29
+ # @param [String] year_str String containing a date in format: -yyy, -yy, -y, y, yy, yyy, yyyy
30
+ # @return [Boolean] true if the year is between -999 and (current year + 1); false otherwise
31
+ def self.year_str_valid?(year_str)
32
+ return false unless year_str && (year_str.match(/^\d{1,4}$/) || year_str.match(/^-\d{1,3}$/))
33
+ (-1000 < year_str.to_i) && (year_str.to_i < Date.today.year + 2)
34
+ end
35
+
36
+ attr_reader :orig_date_str
37
+
38
+ def initialize(date_str)
39
+ @orig_date_str = date_str
40
+ @orig_date_str.freeze
41
+ end
42
+
43
+ BRACKETS_BETWEEN_DIGITS_REXEXP = Regexp.new('\d[' + Regexp.escape('[]') + ']\d')
44
+
45
+ # get single facet value for date, generally an explicit year or "17th century" or "5 B.C."
46
+ # @return [String, nil] String facet value for year if we could parse one, nil otherwise
47
+ def facet_string_from_date_str
48
+ return if orig_date_str == '0000-00-00' # shpc collection has these useless dates
49
+ # B.C. first in case there are 4 digits, e.g. 1600 B.C.
50
+ return facet_string_for_bc if orig_date_str.match(BC_REGEX)
51
+ # most date strings have a four digit year
52
+ result ||= sortable_year_for_yyyy
53
+ # 2 digit year will always be 19xx or 20xx; sortable version will make a good facet string
54
+ result ||= sortable_year_for_yy
55
+ # decades are always 19xx or 20xx; sortable version will make a good facet string
56
+ result ||= sortable_year_for_decade
57
+ unless result
58
+ # try removing brackets between digits in case we have 169[5] or [18]91
59
+ if orig_date_str.match(BRACKETS_BETWEEN_DIGITS_REXEXP)
60
+ no_brackets = orig_date_str.delete('[]')
61
+ return DateParsing.new(no_brackets).facet_string_from_date_str
62
+ end
63
+ end
64
+ # parsing below this line gives string inapprop for year_str_valid?
65
+ unless self.class.year_str_valid?(result)
66
+ result = facet_string_for_century
67
+ result ||= facet_string_for_early_numeric
68
+ end
69
+ # remove leading 0s from early dates
70
+ result = result.to_i.to_s if result && result.match(/^\d+$/)
71
+ result
72
+ end
73
+
74
+ # get String sortable value year if we can parse date_str to get a year.
75
+ # SearchWorks currently uses a string field for pub date sorting; thus so does Spotlight.
76
+ # The values returned must *lexically* sort in chronological order, so the B.C. dates are tricky
77
+ # @return [String, nil] String sortable year if we could parse one, nil otherwise
78
+ # note that these values must *lexically* sort to create a chronological sort.
79
+ def sortable_year_string_from_date_str
80
+ return if orig_date_str == '0000-00-00' # shpc collection has these useless dates
81
+ # B.C. first in case there are 4 digits, e.g. 1600 B.C.
82
+ return sortable_year_for_bc if orig_date_str.match(BC_REGEX)
83
+ # most date strings have a four digit year
84
+ result = sortable_year_for_yyyy
85
+ result ||= sortable_year_for_yy
86
+ result ||= sortable_year_for_decade
87
+ result ||= sortable_year_for_century
88
+ result ||= sortable_year_for_early_numeric
89
+ unless result
90
+ # try removing brackets between digits in case we have 169[5] or [18]91
91
+ if orig_date_str.match(BRACKETS_BETWEEN_DIGITS_REXEXP)
92
+ no_brackets = orig_date_str.delete('[]')
93
+ return DateParsing.new(no_brackets).sortable_year_string_from_date_str
94
+ end
95
+ end
96
+ result if self.class.year_str_valid?(result)
97
+ end
98
+
99
+ # looks for 4 consecutive digits in orig_date_str and returns first occurrence if found
100
+ # @return [String, nil] 4 digit year (e.g. 1865, 0950) if orig_date_str has yyyy, nil otherwise
101
+ def sortable_year_for_yyyy
102
+ matches = orig_date_str.match(/\d{4}/) if orig_date_str
103
+ return matches.to_s if matches
104
+ end
105
+
106
+ # returns 4 digit year as String if we have a x/x/yy or x-x-yy pattern
107
+ # note that these are the only 2 digit year patterns found in our actual date strings in MODS records
108
+ # we use 20 as century digits unless it is greater than current year:
109
+ # 1/1/15 -> 2015
110
+ # 1/1/25 -> 1925
111
+ # @return [String, nil] 4 digit year (e.g. 1865, 0950) if orig_date_str matches pattern, nil otherwise
112
+ def sortable_year_for_yy
113
+ return unless orig_date_str
114
+ slash_matches = orig_date_str.match(/\d{1,2}\/\d{1,2}\/\d{2}/)
115
+ if slash_matches
116
+ date_obj = Date.strptime(orig_date_str, '%m/%d/%y')
117
+ else
118
+ hyphen_matches = orig_date_str.match(/\d{1,2}-\d{1,2}-\d{2}/)
119
+ date_obj = Date.strptime(orig_date_str, '%m-%d-%y') if hyphen_matches
120
+ end
121
+ if date_obj && date_obj > Date.today
122
+ date_obj = Date.new(date_obj.year - 100, date_obj.month, date_obj.mday)
123
+ end
124
+ date_obj.year.to_s if date_obj
125
+ rescue ArgumentError
126
+ nil # explicitly want nil if date won't parse
127
+ end
128
+
129
+ # get first year of decade (as String) if we have: yyyu, yyy-, yyy? or yyyx pattern
130
+ # note that these are the only decade patterns found in our actual date strings in MODS records
131
+ # @return [String, nil] 4 digit year (e.g. 1860, 1950) if orig_date_str matches pattern, nil otherwise
132
+ def sortable_year_for_decade
133
+ decade_matches = orig_date_str.match(/\d{3}[u\-?x]/) if orig_date_str
134
+ if decade_matches
135
+ changed_to_zero = decade_matches.to_s.tr('u\-?x', '0')
136
+ return DateParsing.new(changed_to_zero).sortable_year_for_yyyy
137
+ end
138
+ end
139
+
140
+ CENTURY_WORD_REGEXP = Regexp.new('(\d{1,2}).*century')
141
+ CENTURY_4CHAR_REGEXP = Regexp.new('(\d{1,2})[u\-]{2}')
142
+
143
+ # get first year of century (as String) if we have: yyuu, yy--, yy--? or xxth century pattern
144
+ # note that these are the only century patterns found in our actual date strings in MODS records
145
+ # @return [String, nil] yy00 if orig_date_str matches pattern, nil otherwise; also nil if B.C. in pattern
146
+ def sortable_year_for_century
147
+ return unless orig_date_str
148
+ return if orig_date_str.match(/B\.C\./)
149
+ century_matches = orig_date_str.match(CENTURY_4CHAR_REGEXP)
150
+ if century_matches
151
+ return $1 + '00' if $1.length == 2
152
+ return '0' + $1 + '00' if $1.length == 1
153
+ end
154
+ century_str_matches = orig_date_str.match(CENTURY_WORD_REGEXP)
155
+ if century_str_matches
156
+ yy = ($1.to_i - 1).to_s
157
+ return yy + '00' if yy.length == 2
158
+ return '0' + yy + '00' if yy.length == 1
159
+ end
160
+ end
161
+
162
+ # get single facet value for century (17th century) if we have: yyuu, yy--, yy--? or xxth century pattern
163
+ # note that these are the only century patterns found in our actual date strings in MODS records
164
+ # @return [String, nil] yy(th) Century if orig_date_str matches pattern, nil otherwise; also nil if B.C. in pattern
165
+ def facet_string_for_century
166
+ return unless orig_date_str
167
+ return if orig_date_str.match(/B\.C\./)
168
+ century_str_matches = orig_date_str.match(CENTURY_WORD_REGEXP)
169
+ return century_str_matches.to_s if century_str_matches
170
+
171
+ century_matches = orig_date_str.match(CENTURY_4CHAR_REGEXP)
172
+ if century_matches
173
+ require 'active_support/core_ext/integer/inflections'
174
+ return "#{($1.to_i + 1).ordinalize} century"
175
+ end
176
+ end
177
+
178
+ BC_REGEX = Regexp.new('(\d{1,4}).*' + Regexp.escape('B.C.'))
179
+
180
+ # get String sortable value for B.C. if we have B.C. pattern
181
+ # note that these values must *lexically* sort to create a chronological sort.
182
+ # We know our data does not contain B.C. dates older than 999, so we can make them
183
+ # lexically sort by subtracting 1000. So we get:
184
+ # -700 for 300 B.C., -750 for 250 B.C., -800 for 200 B.C., -801 for 199 B.C.
185
+ # @return [String, nil] String sortable -ddd if B.C. in pattern; nil otherwise
186
+ def sortable_year_for_bc
187
+ bc_matches = orig_date_str.match(BC_REGEX) if orig_date_str
188
+ return ($1.to_i - 1000).to_s if bc_matches
189
+ end
190
+
191
+ # get single facet value for B.C. if we have B.C. pattern
192
+ # @return [String, nil] ddd B.C. if ddd B.C. in pattern; nil otherwise
193
+ def facet_string_for_bc
194
+ bc_matches = orig_date_str.match(BC_REGEX) if orig_date_str
195
+ return bc_matches.to_s if bc_matches
196
+ end
197
+
198
+ EARLY_NUMERIC = Regexp.new('^\-?\d{1,3}$')
199
+
200
+ # get String sortable value from date String containing yyy, yy, y, -y, -yy, -yyy
201
+ # note that these values must *lexically* sort to create a chronological sort.
202
+ # We know our data does not contain negative dates older than -999, so we can make them
203
+ # lexically sort by subtracting 1000. So we get:
204
+ # -983 for -17, -999 for -1, 0000 for 0, 0001 for 1, 0017 for 17
205
+ # @return [String, nil] String sortable -ddd if orig_date_str matches pattern; nil otherwise
206
+ def sortable_year_for_early_numeric
207
+ return unless orig_date_str.match(EARLY_NUMERIC)
208
+ if orig_date_str.match(/^\-/)
209
+ # negative number becomes x - 1000 for sorting; -005 for -995
210
+ num = orig_date_str[1..-1].to_i - 1000
211
+ return '-' + num.to_s[1..-1].rjust(3, '0')
212
+ else
213
+ return orig_date_str.rjust(4, '0')
214
+ end
215
+ end
216
+
217
+ # get single facet value for date String containing yyy, yy, y, -y, -yy, -yyy
218
+ # negative number strings will be changed to B.C. strings
219
+ def facet_string_for_early_numeric
220
+ return unless orig_date_str.match(EARLY_NUMERIC)
221
+ # negative number becomes B.C.
222
+ return orig_date_str[1..-1] + " B.C." if orig_date_str.match(/^\-/)
223
+ # remove leading 0s from early dates
224
+ orig_date_str.to_i.to_s
225
+ end
226
+
227
+ # NOTE: while Date.parse() works for many dates, the *sortable_year_for_yyyy
228
+ # actually works for nearly all those cases and a lot more besides. Trial and error
229
+ # with an extensive set of test data culled from actual date strings in our MODS records
230
+ # has made this method bogus.
231
+ # @return [String, nil] sortable 4 digit year (e.g. 1865, 0950) if orig_date_str is parseable via ruby Date, nil otherwise
232
+ def year_via_ruby_parsing
233
+ return unless orig_date_str.match(/\d\d/) # need at least 2 digits
234
+ # need more in string than only 2 digits
235
+ return if orig_date_str.match(/^\d\d$/) || orig_date_str.match(/^\D*\d\d\D*$/)
236
+ return if orig_date_str.match(/\d\s*B.C./) # skip B.C. dates
237
+ date_obj = Date.parse(orig_date_str)
238
+ date_obj.year.to_s
239
+ rescue ArgumentError
240
+ nil # explicitly want nil if date won't parse
241
+ end
242
+
243
+ end
244
+ end
245
+ end
@@ -0,0 +1,411 @@
1
+ require 'logger'
2
+ require 'mods'
3
+
4
+ # Parsing MODS /originInfo for Publication/Imprint data:
5
+ # * pub year for date slider facet
6
+ # * pub year for sorting
7
+ # * pub year for single facet value
8
+ # * imprint info for display
9
+ # *
10
+ # These methods may be used by searchworks.rb file or by downstream apps
11
+ module Stanford
12
+ module Mods
13
+ class Record < ::Mods::Record
14
+
15
+ # return a single string intended for facet use for pub date
16
+ # prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
17
+ # look for a keyDate and use it if there is one; otherwise pick earliest date
18
+ # @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
19
+ # should be ignored; false if approximate dates should be included
20
+ # @return [String] single String containing publication year for facet use
21
+ def pub_date_facet_single_value(ignore_approximate = false)
22
+ # prefer dateIssued
23
+ result = pub_date_best_single_facet_value(date_issued_elements(ignore_approximate))
24
+ result ||= pub_date_best_single_facet_value(date_created_elements(ignore_approximate))
25
+ # dateCaptured for web archive seed records
26
+ result ||= pub_date_best_single_facet_value(@mods_ng_xml.origin_info.dateCaptured.to_a)
27
+ result
28
+ end
29
+
30
+ # return a single string intended for lexical sorting for pub date
31
+ # prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
32
+ # look for a keyDate and use it if there is one; otherwise pick earliest date
33
+ # @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
34
+ # should be ignored; false if approximate dates should be included
35
+ # @return [String] single String containing publication year for lexical sorting
36
+ # note that for string sorting 5 B.C. = -5 => -995; 6 B.C. => -994 so 6 B.C. sorts before 5 B.C.
37
+ def pub_date_sortable_string(ignore_approximate = false)
38
+ # prefer dateIssued
39
+ result = pub_date_best_sort_str_value(date_issued_elements(ignore_approximate))
40
+ result ||= pub_date_best_sort_str_value(date_created_elements(ignore_approximate))
41
+ # dateCaptured for web archive seed records
42
+ result ||= pub_date_best_sort_str_value(@mods_ng_xml.origin_info.dateCaptured.to_a)
43
+ result
44
+ end
45
+
46
+ # given the passed date elements, look for a single keyDate and use it if there is one;
47
+ # otherwise pick earliest parseable date
48
+ # @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
49
+ # @return [String] single String containing publication year for facet use
50
+ def pub_date_best_single_facet_value(date_el_array)
51
+ return if date_el_array.empty?
52
+ # prefer keyDate
53
+ key_date_el = self.class.keyDate(date_el_array)
54
+ result = DateParsing.facet_string_from_date_str(key_date_el.content) if key_date_el
55
+ return result if result
56
+ # settle for earliest parseable date
57
+ _ignore, orig_str_to_parse = self.class.earliest_date(date_el_array)
58
+ DateParsing.facet_string_from_date_str(orig_str_to_parse) if orig_str_to_parse
59
+ end
60
+
61
+ # given the passed date elements, look for a single keyDate and use it if there is one;
62
+ # otherwise pick earliest parseable date
63
+ # @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
64
+ # @return [String] single String containing publication year for lexical sorting
65
+ def pub_date_best_sort_str_value(date_el_array)
66
+ return if date_el_array.empty?
67
+ # prefer keyDate
68
+ key_date_el = self.class.keyDate(date_el_array)
69
+ result = DateParsing.sortable_year_string_from_date_str(key_date_el.content) if key_date_el
70
+ return result if result
71
+ # settle for earliest parseable date
72
+ sortable_str, _ignore = self.class.earliest_date(date_el_array)
73
+ sortable_str if sortable_str
74
+ end
75
+
76
+ protected :pub_date_best_single_facet_value, :pub_date_best_sort_str_value
77
+
78
+ # return /originInfo/dateCreated elements in MODS records
79
+ # @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
80
+ # should be excluded; false approximate dates should be included
81
+ # @return [Array<Nokogiri::XML::Element>]
82
+ def date_created_elements(ignore_approximate=false)
83
+ date_created_nodeset = @mods_ng_xml.origin_info.dateCreated
84
+ return self.class.remove_approximate(date_created_nodeset) if ignore_approximate
85
+ date_created_nodeset.to_a
86
+ end
87
+
88
+ # return /originInfo/dateIssued elements in MODS records
89
+ # @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
90
+ # should be excluded; false approximate dates should be included
91
+ # @return [Array<Nokogiri::XML::Element>]
92
+ def date_issued_elements(ignore_approximate=false)
93
+ date_issued_nodeset = @mods_ng_xml.origin_info.dateIssued
94
+ return self.class.remove_approximate(date_issued_nodeset) if ignore_approximate
95
+ date_issued_nodeset.to_a
96
+ end
97
+
98
+ # given a set of date elements, return the single element with attribute keyDate="yes"
99
+ # or return nil if no elements have attribute keyDate="yes", or if multiple elements have keyDate="yes"
100
+ # @param [Array<Nokogiri::XML::Element>] Array of date elements
101
+ # @return [Nokogiri::XML::Element, nil] single date element with attribute keyDate="yes", or nil
102
+ def self.keyDate(elements)
103
+ keyDates = elements.select { |node| node["keyDate"] == 'yes' }
104
+ keyDates.first if keyDates.size == 1
105
+ end
106
+
107
+ # remove Elements from NodeSet if they have a qualifier attribute of 'approximate' or 'questionable'
108
+ # @param [Nokogiri::XML::NodeSet<Nokogiri::XML::Element>] nodeset set of date elements
109
+ # @return [Array<Nokogiri::XML::Element>] the set of date elements minus any that
110
+ # had a qualifier attribute of 'approximate' or 'questionable'
111
+ def self.remove_approximate(nodeset)
112
+ nodeset.select { |node| node unless date_is_approximate?(node) }
113
+ end
114
+
115
+ # NOTE: legal values for MODS date elements with attribute qualifier are
116
+ # 'approximate', 'inferred' or 'questionable'
117
+ # @param [Nokogiri::XML::Element] date_element MODS date element
118
+ # @return [Boolean] true if date_element has a qualifier attribute of "approximate" or "questionable",
119
+ # false if no qualifier attribute, or if attribute is 'inferred' or some other value
120
+ def self.date_is_approximate?(date_element)
121
+ qualifier = date_element["qualifier"] if date_element.respond_to?('[]')
122
+ qualifier == 'approximate' || qualifier == 'questionable'
123
+ end
124
+
125
+ # get earliest parseable date from the passed date elements
126
+ # @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
127
+ # @return two String values:
128
+ # the first is the lexically sortable String value of the earliest date;
129
+ # the second is the original String value of the chosen element
130
+ def self.earliest_date(date_el_array)
131
+ poss_results = {}
132
+ date_el_array.each { |el|
133
+ result = DateParsing.sortable_year_string_from_date_str(el.content)
134
+ poss_results[result] = el.content if result
135
+ }
136
+ earliest = poss_results.keys.sort.first if poss_results.present?
137
+ return earliest, poss_results[earliest] if earliest
138
+ end
139
+
140
+
141
+ # ---- old date parsing methods used downstream of gem; will be deprecated/replaced with new date parsing methods
142
+
143
+ def place
144
+ vals = self.term_values([:origin_info, :place, :placeTerm])
145
+ vals
146
+ end
147
+
148
+ # For the date display only, the first place to look is in the dates without encoding=marc array.
149
+ # If no such dates, select the first date in the dates_marc_encoding array. Otherwise return nil
150
+ # @return [String] value for the pub_date_display Solr field for this document or nil if none
151
+ def pub_date_display
152
+ return dates_no_marc_encoding.first unless dates_no_marc_encoding.empty?
153
+ return dates_marc_encoding.first unless dates_marc_encoding.empty?
154
+ nil
155
+ end
156
+
157
+ # For the date indexing, sorting and faceting, the first place to look is in the dates with encoding=marc array.
158
+ # If that doesn't exist, look in the dates without encoding=marc array. Otherwise return nil
159
+ # @return [Array<String>] values for the date Solr field for this document or nil if none
160
+ def pub_dates
161
+ return dates_marc_encoding unless dates_marc_encoding.empty?
162
+ return dates_no_marc_encoding unless dates_no_marc_encoding.empty?
163
+ nil
164
+ end
165
+
166
+ # Get the publish year from mods
167
+ # @return [String] 4 character year or nil if no valid date was found
168
+ def pub_year
169
+ # use the cached year if there is one
170
+ if @pub_year
171
+ return nil if @pub_year == ''
172
+ return @pub_year
173
+ end
174
+
175
+ dates = pub_dates
176
+ if dates
177
+ pruned_dates = []
178
+ dates.each do |f_date|
179
+ # remove ? and []
180
+ if f_date.length == 4 && f_date.end_with?('?')
181
+ pruned_dates << f_date.tr('?', '0')
182
+ else
183
+ pruned_dates << f_date.delete('?[]')
184
+ end
185
+ end
186
+ # try to find a date starting with the most normal date formats and progressing to more wonky ones
187
+ @pub_year = get_plain_four_digit_year pruned_dates
188
+ return @pub_year if @pub_year
189
+ # Check for years in u notation, e.g., 198u
190
+ @pub_year = get_u_year pruned_dates
191
+ return @pub_year if @pub_year
192
+ @pub_year = get_double_digit_century pruned_dates
193
+ return @pub_year if @pub_year
194
+ @pub_year = get_bc_year pruned_dates
195
+ return @pub_year if @pub_year
196
+ @pub_year = get_three_digit_year pruned_dates
197
+ return @pub_year if @pub_year
198
+ @pub_year = get_single_digit_century pruned_dates
199
+ return @pub_year if @pub_year
200
+ end
201
+ @pub_year = ''
202
+ nil
203
+ end
204
+
205
+ # creates a date suitable for sorting. Guarnteed to be 4 digits or nil
206
+ def pub_date_sort
207
+ if pub_date
208
+ pd = pub_date
209
+ pd = '0' + pd if pd.length == 3
210
+ pd = pd.gsub('--', '00')
211
+ end
212
+ fail "pub_date_sort was about to return a non 4 digit value #{pd}!" if pd && pd.length != 4
213
+ pd
214
+ end
215
+
216
+ # The year the object was published, filtered based on max_pub_date and min_pub_date from the config file
217
+ # @return [String] 4 character year or nil
218
+ def pub_date
219
+ pub_year || nil
220
+ end
221
+
222
+ # Values for the pub date facet. This is less strict than the 4 year date requirements for pub_date
223
+ # @return <Array[String]> with values for the pub date facet
224
+ def pub_date_facet
225
+ if pub_date
226
+ if pub_date.start_with?('-')
227
+ return (pub_date.to_i + 1000).to_s + ' B.C.'
228
+ end
229
+ if pub_date.include? '--'
230
+ cent = pub_date[0, 2].to_i
231
+ cent += 1
232
+ cent = cent.to_s + 'th century'
233
+ return cent
234
+ else
235
+ return pub_date
236
+ end
237
+ end
238
+ nil
239
+ end
240
+
241
+ # ---- old date parsing methods will be deprecated/replaced with new date parsing methods
242
+
243
+ protected
244
+
245
+ # @return [Array<String>] dates from dateIssued and dateCreated tags from origin_info with encoding="marc"
246
+ def dates_marc_encoding
247
+ @dates_marc_encoding ||= begin
248
+ parse_dates_from_originInfo
249
+ @dates_marc_encoding
250
+ end
251
+ end
252
+
253
+ # @return [Array<String>] dates from dateIssued and dateCreated tags from origin_info with encoding not "marc"
254
+ def dates_no_marc_encoding
255
+ @dates_no_marc_encoding ||= begin
256
+ parse_dates_from_originInfo
257
+ @dates_no_marc_encoding
258
+ end
259
+ end
260
+
261
+ # Populate @dates_marc_encoding and @dates_no_marc_encoding from dateIssued and dateCreated tags from origin_info
262
+ # with and without encoding=marc
263
+ def parse_dates_from_originInfo
264
+ @dates_marc_encoding = []
265
+ @dates_no_marc_encoding = []
266
+ self.origin_info.dateIssued.each { |di|
267
+ if di.encoding == "marc"
268
+ @dates_marc_encoding << di.text
269
+ else
270
+ @dates_no_marc_encoding << di.text
271
+ end
272
+ }
273
+ self.origin_info.dateCreated.each { |dc|
274
+ if dc.encoding == "marc"
275
+ @dates_marc_encoding << dc.text
276
+ else
277
+ @dates_no_marc_encoding << dc.text
278
+ end
279
+ }
280
+ end
281
+
282
+
283
+ def is_number?(object)
284
+ true if Integer(object) rescue false
285
+ end
286
+
287
+ def is_date?(object)
288
+ true if Date.parse(object) rescue false
289
+ end
290
+
291
+ # TODO: need tests for these methods
292
+
293
+ # get a 4 digit year like 1865 from array of dates
294
+ # @param [Array<String>] dates an array of potential year strings
295
+ def get_plain_four_digit_year(dates)
296
+ dates.each do |f_date|
297
+ matches = f_date.scan(/\d{4}/)
298
+ if matches.length == 1
299
+ @pub_year = matches.first
300
+ return matches.first
301
+ else
302
+ # when there are multiple matches, check for ones with CE after them
303
+ matches.each do |match|
304
+ # look for things like '1865-6 CE'
305
+ pos = f_date.index(Regexp.new(match + '...CE'))
306
+ pos = pos ? pos.to_i : 0
307
+ if f_date.include?(match+' CE') or pos > 0
308
+ @pub_year = match
309
+ return match
310
+ end
311
+ end
312
+ return matches.first
313
+ end
314
+ end
315
+ nil
316
+ end
317
+
318
+ # get a 3 digit year like 965 from the date array
319
+ # @param [Array<String>] dates an array of potential year strings
320
+ def get_three_digit_year(dates)
321
+ dates.each do |f_date|
322
+ matches = f_date.scan(/\d{3}/)
323
+ return matches.first if matches.length > 0
324
+ end
325
+ nil
326
+ end
327
+
328
+ # get the 3 digit BC year, return it as a negative, so -700 for 300 BC.
329
+ # Other methods will translate it to proper display, this is good for sorting.
330
+ # @param [Array<String>] dates an array of potential year strings
331
+ def get_bc_year(dates)
332
+ dates.each do |f_date|
333
+ matches = f_date.scan(/\d{3} B.C./)
334
+ if matches.length > 0
335
+ bc_year = matches.first[0..2]
336
+ return (bc_year.to_i - 1000).to_s
337
+ end
338
+ end
339
+ nil
340
+ end
341
+
342
+ # get a single digit century like '9th century' from the date array
343
+ # @param [Array<String>] dates an array of potential year strings
344
+ # @return [String] y-- if we identify century digit in string
345
+ def get_single_digit_century(dates)
346
+ dates.each do |f_date|
347
+ matches = f_date.scan(/\d{1}th/)
348
+ next if matches.length == 0
349
+ if matches.length == 1
350
+ @pub_year = ((matches.first[0, 2].to_i) - 1).to_s + '--'
351
+ return @pub_year
352
+ else
353
+ # when there are multiple matches, check for ones with CE after them
354
+ matches.each do |match|
355
+ pos = f_date.index(Regexp.new(match + '...CE'))
356
+ pos = pos ? pos.to_i : f_date.index(Regexp.new(match + ' century CE'))
357
+ pos = pos ? pos.to_i : 0
358
+ if f_date.include?(match + ' CE') || pos > 0
359
+ @pub_year = ((match[0, 1].to_i) - 1).to_s + '--'
360
+ return @pub_year
361
+ end
362
+ end
363
+ end
364
+ end
365
+ nil
366
+ end
367
+
368
+ # get a double digit century like '12th century' from the date array
369
+ # @param [Array<String>] dates an array of potential year strings
370
+ # @return [String] yy-- if we identify century digits in string
371
+ def get_double_digit_century(dates)
372
+ dates.each do |f_date|
373
+ matches = f_date.scan(/\d{2}th/)
374
+ next if matches.length == 0
375
+ if matches.length == 1
376
+ @pub_year=((matches.first[0, 2].to_i) - 1).to_s + '--'
377
+ return @pub_year
378
+ else
379
+ # when there are multiple matches, check for ones with CE after them
380
+ matches.each do |match|
381
+ pos = f_date.index(Regexp.new(match + '...CE'))
382
+ pos = pos ? pos.to_i : f_date.index(Regexp.new(match + ' century CE'))
383
+ pos = pos ? pos.to_i : 0
384
+ if f_date.include?(match+' CE') or pos > 0
385
+ @pub_year = ((match[0, 2].to_i) - 1).to_s + '--'
386
+ return @pub_year
387
+ end
388
+ end
389
+ end
390
+ end
391
+ nil
392
+ end
393
+
394
+ # If a year has a "u" in it, replace u with 0 for yyyu (becomes yyy0)
395
+ # and replace u with '-' for yyuu (becomes yy--)
396
+ # @param [String] dates looking for matches on yyyu or yyuu in these strings
397
+ # @return [String, nil] String of format yyy0 or yy--, or nil
398
+ def get_u_year(dates)
399
+ dates.each do |f_date|
400
+ # Single digit u notation
401
+ matches = f_date.scan(/\d{3}u/)
402
+ return matches.first.tr('u', '0') if matches.length == 1
403
+ # Double digit u notation
404
+ matches = f_date.scan(/\d{2}u{2}/)
405
+ return matches.first.tr('u', '-') if matches.length == 1
406
+ end
407
+ nil
408
+ end
409
+ end # class Record
410
+ end
411
+ end