stanford-mods 2.6.4 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +1 -1
- data/lib/stanford-mods/{geo_spatial.rb → concerns/geo_spatial.rb} +3 -5
- data/lib/stanford-mods/concerns/name.rb +57 -0
- data/lib/stanford-mods/concerns/origin_info.rb +113 -0
- data/lib/stanford-mods/{physical_location.rb → concerns/physical_location.rb} +2 -2
- data/lib/stanford-mods/concerns/searchworks.rb +125 -0
- data/lib/stanford-mods/concerns/searchworks_subjects.rb +126 -0
- data/lib/stanford-mods/concerns/title.rb +87 -0
- data/lib/stanford-mods/coordinate.rb +24 -3
- data/lib/stanford-mods/date_parsing.rb +32 -289
- data/lib/stanford-mods/imprint.rb +170 -322
- data/lib/stanford-mods/record.rb +20 -0
- data/lib/stanford-mods/version.rb +1 -1
- data/lib/stanford-mods/{searchworks_languages.rb → vocabularies/searchworks_languages.rb} +0 -0
- data/lib/stanford-mods.rb +12 -11
- data/spec/fixtures/searchworks_imprint_data.rb +38 -39
- data/spec/fixtures/searchworks_pub_date_data.rb +7 -7
- data/spec/fixtures/spotlight_pub_date_data.rb +7 -7
- data/spec/geo_spatial_spec.rb +1 -6
- data/spec/imprint_spec.rb +263 -207
- data/spec/lib/stanford-mods/coordinate_spec.rb +3 -5
- data/spec/name_spec.rb +26 -230
- data/spec/origin_info_spec.rb +34 -300
- data/spec/searchworks_basic_spec.rb +1 -3
- data/spec/searchworks_pub_dates_spec.rb +0 -215
- data/spec/searchworks_spec.rb +0 -21
- data/spec/searchworks_subject_raw_spec.rb +106 -105
- data/spec/searchworks_subject_spec.rb +19 -55
- data/spec/searchworks_title_spec.rb +5 -5
- data/stanford-mods.gemspec +1 -1
- metadata +19 -15
- data/lib/marc_countries.rb +0 -387
- data/lib/stanford-mods/geo_utils.rb +0 -28
- data/lib/stanford-mods/name.rb +0 -80
- data/lib/stanford-mods/origin_info.rb +0 -489
- data/lib/stanford-mods/searchworks.rb +0 -333
- data/lib/stanford-mods/searchworks_subjects.rb +0 -196
- data/spec/date_parsing_spec.rb +0 -905
@@ -1,489 +0,0 @@
|
|
1
|
-
require 'mods'
|
2
|
-
|
3
|
-
# Parsing MODS /originInfo for Publication/Imprint data:
|
4
|
-
# * pub year for date slider facet
|
5
|
-
# * pub year for sorting
|
6
|
-
# * pub year for single display value
|
7
|
-
# * imprint info for display
|
8
|
-
# *
|
9
|
-
# These methods may be used by searchworks.rb file or by downstream apps
|
10
|
-
module Stanford
|
11
|
-
module Mods
|
12
|
-
class Record < ::Mods::Record
|
13
|
-
# return pub year as an Integer
|
14
|
-
# prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
|
15
|
-
# look for a keyDate and use it if there is one; otherwise pick earliest date
|
16
|
-
# @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute) should be ignored; false if approximate dates should be included
|
17
|
-
# @return [Integer] publication year as an Integer
|
18
|
-
# @note for sorting: 5 B.C. => -5; 666 B.C. => -666
|
19
|
-
def pub_year_int(ignore_approximate = false)
|
20
|
-
single_pub_year(ignore_approximate, :year_int)
|
21
|
-
end
|
22
|
-
|
23
|
-
# return a single string intended for lexical sorting for pub date
|
24
|
-
# prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
|
25
|
-
# look for a keyDate and use it if there is one; otherwise pick earliest date
|
26
|
-
# @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute) should be ignored; false if approximate dates should be included
|
27
|
-
# @return [String] single String containing publication year for lexical sorting
|
28
|
-
# @note for string sorting 5 B.C. = -5 => -995; 6 B.C. => -994, so 6 B.C. sorts before 5 B.C.
|
29
|
-
# @deprecated use pub_year_int
|
30
|
-
def pub_year_sort_str(ignore_approximate = false)
|
31
|
-
single_pub_year(ignore_approximate, :year_sort_str)
|
32
|
-
end
|
33
|
-
|
34
|
-
# return a single string intended for display of pub year
|
35
|
-
# 0 < year < 1000: add A.D. suffix
|
36
|
-
# year < 0: add B.C. suffix. ('-5' => '5 B.C.', '700 B.C.' => '700 B.C.')
|
37
|
-
# 195u => 195x
|
38
|
-
# 19uu => 19xx
|
39
|
-
# '-5' => '5 B.C.'
|
40
|
-
# '700 B.C.' => '700 B.C.'
|
41
|
-
# '7th century' => '7th century'
|
42
|
-
# date ranges?
|
43
|
-
# prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
|
44
|
-
# look for a keyDate and use it if there is one; otherwise pick earliest date
|
45
|
-
# @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
|
46
|
-
# should be ignored; false if approximate dates should be included
|
47
|
-
def pub_year_display_str(ignore_approximate = false)
|
48
|
-
single_pub_year(ignore_approximate, :year_display_str)
|
49
|
-
|
50
|
-
# TODO: want range displayed when start and end points
|
51
|
-
# TODO: also want best year in year_isi fields
|
52
|
-
# get_main_title_date
|
53
|
-
# https://github.com/sul-dlss/SearchWorks/blob/7d4d870a9d450fed8b081c38dc3dbd590f0b706e/app/helpers/results_document_helper.rb#L8-L46
|
54
|
-
|
55
|
-
# "publication_year_isi" => "Publication date", <-- do it already
|
56
|
-
# "beginning_year_isi" => "Beginning date",
|
57
|
-
# "earliest_year_isi" => "Earliest date",
|
58
|
-
# "earliest_poss_year_isi" => "Earliest possible date",
|
59
|
-
# "ending_year_isi" => "Ending date",
|
60
|
-
# "latest_year_isi" => "Latest date",
|
61
|
-
# "latest_poss_year_isi" => "Latest possible date",
|
62
|
-
# "production_year_isi" => "Production date",
|
63
|
-
# "original_year_isi" => "Original date",
|
64
|
-
# "copyright_year_isi" => "Copyright date"} %>
|
65
|
-
|
66
|
-
# "creation_year_isi" => "Creation date", <-- do it already
|
67
|
-
# {}"release_year_isi" => "Release date",
|
68
|
-
# {}"reprint_year_isi" => "Reprint/reissue date",
|
69
|
-
# {}"other_year_isi" => "Date",
|
70
|
-
end
|
71
|
-
|
72
|
-
# @return [String] single String containing imprint information for display
|
73
|
-
def imprint_display_str
|
74
|
-
imp = Stanford::Mods::Imprint.new(origin_info)
|
75
|
-
imp.display_str
|
76
|
-
end
|
77
|
-
|
78
|
-
# given the passed date elements, look for a single keyDate and use it if there is one;
|
79
|
-
# otherwise pick earliest parseable date
|
80
|
-
# @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
|
81
|
-
# @return [String] single String containing publication year for display
|
82
|
-
def year_display_str(date_el_array)
|
83
|
-
result = date_parsing_result(date_el_array, :date_str_for_display)
|
84
|
-
return result if result
|
85
|
-
|
86
|
-
_ignore, orig_str_to_parse = self.class.earliest_year_str(date_el_array)
|
87
|
-
DateParsing.date_str_for_display(orig_str_to_parse) if orig_str_to_parse
|
88
|
-
end
|
89
|
-
|
90
|
-
# given the passed date elements, look for a single keyDate and use it if there is one;
|
91
|
-
# otherwise pick earliest parseable date
|
92
|
-
# @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
|
93
|
-
# @return [Integer] publication year as an Integer
|
94
|
-
def year_int(date_el_array)
|
95
|
-
result = date_parsing_result(date_el_array, :year_int_from_date_str)
|
96
|
-
return result if result
|
97
|
-
|
98
|
-
year_int, _ignore = self.class.earliest_year_int(date_el_array)
|
99
|
-
year_int if year_int
|
100
|
-
end
|
101
|
-
|
102
|
-
# given the passed date elements, look for a single keyDate and use it if there is one;
|
103
|
-
# otherwise pick earliest parseable date
|
104
|
-
# @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
|
105
|
-
# @return [String] single String containing publication year for lexical sorting
|
106
|
-
def year_sort_str(date_el_array)
|
107
|
-
result = date_parsing_result(date_el_array, :sortable_year_string_from_date_str)
|
108
|
-
return result if result
|
109
|
-
|
110
|
-
sortable_str, _ignore = self.class.earliest_year_str(date_el_array)
|
111
|
-
sortable_str if sortable_str
|
112
|
-
end
|
113
|
-
|
114
|
-
# return /originInfo/dateCreated elements in MODS records
|
115
|
-
# @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
|
116
|
-
# should be excluded; false approximate dates should be included
|
117
|
-
# @return [Array<Nokogiri::XML::Element>]
|
118
|
-
def date_created_elements(ignore_approximate = false)
|
119
|
-
date_created_nodeset = mods_ng_xml.origin_info.dateCreated
|
120
|
-
return self.class.remove_approximate(date_created_nodeset) if ignore_approximate
|
121
|
-
|
122
|
-
date_created_nodeset.to_a
|
123
|
-
end
|
124
|
-
|
125
|
-
# return /originInfo/dateIssued elements in MODS records
|
126
|
-
# @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
|
127
|
-
# should be excluded; false approximate dates should be included
|
128
|
-
# @return [Array<Nokogiri::XML::Element>]
|
129
|
-
def date_issued_elements(ignore_approximate = false)
|
130
|
-
date_issued_nodeset = mods_ng_xml.origin_info.dateIssued
|
131
|
-
return self.class.remove_approximate(date_issued_nodeset) if ignore_approximate
|
132
|
-
|
133
|
-
date_issued_nodeset.to_a
|
134
|
-
end
|
135
|
-
|
136
|
-
# given a set of date elements, return the single element with attribute keyDate="yes"
|
137
|
-
# or return nil if no elements have attribute keyDate="yes", or if multiple elements have keyDate="yes"
|
138
|
-
# @param [Array<Nokogiri::XML::Element>] Array of date elements
|
139
|
-
# @return [Nokogiri::XML::Element, nil] single date element with attribute keyDate="yes", or nil
|
140
|
-
def self.keyDate(elements)
|
141
|
-
keyDates = elements.select { |node| node["keyDate"] == 'yes' }
|
142
|
-
keyDates.first if keyDates.size == 1
|
143
|
-
end
|
144
|
-
|
145
|
-
# remove Elements from NodeSet if they have a qualifier attribute of 'approximate' or 'questionable'
|
146
|
-
# @param [Nokogiri::XML::NodeSet<Nokogiri::XML::Element>] nodeset set of date elements
|
147
|
-
# @return [Array<Nokogiri::XML::Element>] the set of date elements minus any that
|
148
|
-
# had a qualifier attribute of 'approximate' or 'questionable'
|
149
|
-
def self.remove_approximate(nodeset)
|
150
|
-
nodeset.select { |node| node unless date_is_approximate?(node) }
|
151
|
-
end
|
152
|
-
|
153
|
-
# NOTE: legal values for MODS date elements with attribute qualifier are
|
154
|
-
# 'approximate', 'inferred' or 'questionable'
|
155
|
-
# @param [Nokogiri::XML::Element] date_element MODS date element
|
156
|
-
# @return [Boolean] true if date_element has a qualifier attribute of "approximate" or "questionable",
|
157
|
-
# false if no qualifier attribute, or if attribute is 'inferred' or some other value
|
158
|
-
def self.date_is_approximate?(date_element)
|
159
|
-
qualifier = date_element["qualifier"] if date_element.respond_to?('[]')
|
160
|
-
qualifier == 'approximate' || qualifier == 'questionable'
|
161
|
-
end
|
162
|
-
|
163
|
-
# get earliest parseable year (as an Integer) from the passed date elements
|
164
|
-
# @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
|
165
|
-
# @return two String values:
|
166
|
-
# the first is the Integer value of the earliest year;
|
167
|
-
# the second is the original String value of the chosen element
|
168
|
-
def self.earliest_year_int(date_el_array)
|
169
|
-
earliest_year(date_el_array, :year_int_from_date_str)
|
170
|
-
end
|
171
|
-
|
172
|
-
# get earliest parseable year (as a String) from the passed date elements
|
173
|
-
# @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
|
174
|
-
# @return two String values:
|
175
|
-
# the first is the lexically sortable String value of the earliest year;
|
176
|
-
# the second is the original String value of the chosen element
|
177
|
-
def self.earliest_year_str(date_el_array)
|
178
|
-
earliest_year(date_el_array, :sortable_year_string_from_date_str)
|
179
|
-
end
|
180
|
-
|
181
|
-
# return a single value intended for pub date flavor indicated by method_sym
|
182
|
-
# prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
|
183
|
-
# look for a keyDate and use it if there is one; otherwise pick earliest date
|
184
|
-
# @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
|
185
|
-
# should be ignored; false if approximate dates should be included
|
186
|
-
# @param [Symbol] method_sym method name in DateParsing, as a symbol
|
187
|
-
# @return [String, Integer] publication year as String or Integer
|
188
|
-
def single_pub_year(ignore_approximate, method_sym)
|
189
|
-
result = send(method_sym, date_issued_elements(ignore_approximate))
|
190
|
-
result ||= send(method_sym, date_created_elements(ignore_approximate))
|
191
|
-
# dateCaptured for web archive seed records
|
192
|
-
result || send(method_sym, mods_ng_xml.origin_info.dateCaptured.to_a)
|
193
|
-
end
|
194
|
-
|
195
|
-
# given the passed date elements, look for a single keyDate and use it if there is one;
|
196
|
-
# otherwise pick earliest parseable date
|
197
|
-
# @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
|
198
|
-
# @param [Symbol] method_sym method name in DateParsing, as a symbol
|
199
|
-
# @return [Integer, String] year as a String or Integer, depending on method_sym
|
200
|
-
def date_parsing_result(date_el_array, method_sym)
|
201
|
-
return if date_el_array.empty?
|
202
|
-
|
203
|
-
# prefer keyDate
|
204
|
-
key_date_el = self.class.keyDate(date_el_array)
|
205
|
-
DateParsing.send(method_sym, key_date_el.content) if key_date_el
|
206
|
-
end
|
207
|
-
# temporarily use this technique to mark methods private until we get rid of old date parsing methods below
|
208
|
-
private :single_pub_year, :date_parsing_result
|
209
|
-
|
210
|
-
class << self
|
211
|
-
private
|
212
|
-
|
213
|
-
# get earliest parseable year from the passed date elements
|
214
|
-
# @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
|
215
|
-
# @param [Symbol] method_sym method name in DateParsing, as a symbol
|
216
|
-
# @return [Array<String,Integer>] two values: earliest date and the original element string
|
217
|
-
# - first is earliest date either as lexically sortable String value or the Integer, depending on method_sym
|
218
|
-
# - second is the original String value of the chosen element
|
219
|
-
def earliest_year(date_el_array, method_sym)
|
220
|
-
poss_results = {}
|
221
|
-
date_el_array.each { |el|
|
222
|
-
result = DateParsing.send(method_sym, el.content)
|
223
|
-
poss_results[result] = el.content if result
|
224
|
-
}
|
225
|
-
earliest = poss_results.keys.sort.first if poss_results.present?
|
226
|
-
return earliest, poss_results[earliest] if earliest
|
227
|
-
end
|
228
|
-
end
|
229
|
-
|
230
|
-
# ---- old date parsing methods used downstream of gem; will be deprecated/replaced with new date parsing methods
|
231
|
-
|
232
|
-
def place
|
233
|
-
term_values([:origin_info, :place, :placeTerm])
|
234
|
-
end
|
235
|
-
|
236
|
-
# Values for the pub date facet. This is less strict than the 4 year date requirements for pub_date
|
237
|
-
# Jan 2016: used to populate Solr pub_date field for Spotlight and SearchWorks
|
238
|
-
# Spotlight: pub_date field should be replaced by pub_year_w_approx_isi and pub_year_no_approx_isi
|
239
|
-
# SearchWorks: pub_date field used for display in search results and show view; for sorting nearby-on-shelf
|
240
|
-
# these could be done with more approp fields/methods (pub_year_int for sorting; new pub year methods to populate field)
|
241
|
-
# TODO: prob should deprecate this in favor of pub_year_display_str;
|
242
|
-
# need head-to-head testing with pub_year_display_str
|
243
|
-
# @return [String] value for the pub date facet
|
244
|
-
def pub_date_facet
|
245
|
-
return nil unless pub_date
|
246
|
-
return "#{pub_date.to_i + 1000} B.C." if pub_date.start_with?('-')
|
247
|
-
return pub_date unless pub_date.include? '--'
|
248
|
-
|
249
|
-
"#{pub_date[0, 2].to_i + 1}th century"
|
250
|
-
end
|
251
|
-
|
252
|
-
# creates a date suitable for sorting. Guarnteed to be 4 digits or nil
|
253
|
-
# @deprecated use pub_year_int, or pub_year_sort_str if you must have a string (why?)
|
254
|
-
def pub_date_sort
|
255
|
-
if pub_date
|
256
|
-
pd = pub_date
|
257
|
-
pd = '0' + pd if pd.length == 3
|
258
|
-
pd = pd.gsub('--', '00')
|
259
|
-
end
|
260
|
-
fail "pub_date_sort was about to return a non 4 digit value #{pd}!" if pd && pd.length != 4
|
261
|
-
|
262
|
-
pd
|
263
|
-
end
|
264
|
-
|
265
|
-
# For the date display only, the first place to look is in the dates without encoding=marc array.
|
266
|
-
# If no such dates, select the first date in the dates_marc_encoding array. Otherwise return nil
|
267
|
-
# @return [String] value for the pub_date_display Solr field for this document or nil if none
|
268
|
-
# @deprecated DO NOT USE: this is no longer used in SW, Revs or Spotlight Jan 2016
|
269
|
-
def pub_date_display
|
270
|
-
return dates_no_marc_encoding.first unless dates_no_marc_encoding.empty?
|
271
|
-
return dates_marc_encoding.first unless dates_marc_encoding.empty?
|
272
|
-
|
273
|
-
nil
|
274
|
-
end
|
275
|
-
|
276
|
-
# old date parsing protected methods to be deprecated/replaced with new methods (see also DateParsing)
|
277
|
-
|
278
|
-
protected
|
279
|
-
|
280
|
-
# The year the object was published
|
281
|
-
# @return [String] 4 character year or nil if no valid date was found
|
282
|
-
def pub_year
|
283
|
-
# use the cached year if there is one
|
284
|
-
if @pub_year
|
285
|
-
return nil if @pub_year == ''
|
286
|
-
|
287
|
-
return @pub_year
|
288
|
-
end
|
289
|
-
|
290
|
-
dates = pub_dates.map do |f_date|
|
291
|
-
# remove ? and []
|
292
|
-
if f_date.length == 4 && f_date.end_with?('?')
|
293
|
-
f_date.tr('?', '0')
|
294
|
-
else
|
295
|
-
f_date.delete('?[]')
|
296
|
-
end
|
297
|
-
end
|
298
|
-
|
299
|
-
if dates
|
300
|
-
# try to find a date starting with the most normal date formats and progressing to more wonky ones
|
301
|
-
@pub_year = get_plain_four_digit_year(dates) ||
|
302
|
-
get_u_year(dates) || # Check for years in u notation, e.g., 198u
|
303
|
-
get_double_digit_century(dates) ||
|
304
|
-
get_bc_year(dates) ||
|
305
|
-
get_three_digit_year(dates) ||
|
306
|
-
get_single_digit_century(dates)
|
307
|
-
return @pub_year if @pub_year
|
308
|
-
end
|
309
|
-
@pub_year = ''
|
310
|
-
nil
|
311
|
-
end
|
312
|
-
alias_method :pub_date, :pub_year
|
313
|
-
|
314
|
-
# For the date indexing, sorting and faceting, the first place to look is in the dates with encoding=marc array.
|
315
|
-
# If that doesn't exist, look in the dates without encoding=marc array. Otherwise return nil
|
316
|
-
# @return [Array<String>] values for the date Solr field for this document or nil if none
|
317
|
-
def pub_dates
|
318
|
-
return dates_marc_encoding unless dates_marc_encoding.empty?
|
319
|
-
return dates_no_marc_encoding unless dates_no_marc_encoding.empty?
|
320
|
-
|
321
|
-
nil
|
322
|
-
end
|
323
|
-
|
324
|
-
# @return [Array<String>] dates from dateIssued and dateCreated tags from origin_info with encoding="marc"
|
325
|
-
def dates_marc_encoding
|
326
|
-
@dates_marc_encoding ||= begin
|
327
|
-
parse_dates_from_originInfo
|
328
|
-
@dates_marc_encoding
|
329
|
-
end
|
330
|
-
end
|
331
|
-
|
332
|
-
# @return [Array<String>] dates from dateIssued and dateCreated tags from origin_info with encoding not "marc"
|
333
|
-
def dates_no_marc_encoding
|
334
|
-
@dates_no_marc_encoding ||= begin
|
335
|
-
parse_dates_from_originInfo
|
336
|
-
@dates_no_marc_encoding
|
337
|
-
end
|
338
|
-
end
|
339
|
-
|
340
|
-
# Populate @dates_marc_encoding and @dates_no_marc_encoding from dateIssued and dateCreated tags from origin_info
|
341
|
-
# with and without encoding=marc
|
342
|
-
def parse_dates_from_originInfo
|
343
|
-
@dates_marc_encoding = []
|
344
|
-
@dates_no_marc_encoding = []
|
345
|
-
origin_info.dateIssued.each { |di|
|
346
|
-
if di.encoding == "marc"
|
347
|
-
@dates_marc_encoding << di.text
|
348
|
-
else
|
349
|
-
@dates_no_marc_encoding << di.text
|
350
|
-
end
|
351
|
-
}
|
352
|
-
origin_info.dateCreated.each { |dc|
|
353
|
-
if dc.encoding == "marc"
|
354
|
-
@dates_marc_encoding << dc.text
|
355
|
-
else
|
356
|
-
@dates_no_marc_encoding << dc.text
|
357
|
-
end
|
358
|
-
}
|
359
|
-
end
|
360
|
-
|
361
|
-
def is_number?(object)
|
362
|
-
true if Integer(object) rescue false
|
363
|
-
end
|
364
|
-
|
365
|
-
def is_date?(object)
|
366
|
-
true if Date.parse(object) rescue false
|
367
|
-
end
|
368
|
-
|
369
|
-
# get a 4 digit year like 1865 from array of dates
|
370
|
-
# @param [Array<String>] dates an array of potential year strings
|
371
|
-
def get_plain_four_digit_year(dates)
|
372
|
-
dates.each do |f_date|
|
373
|
-
matches = f_date.scan(/\d{4}/)
|
374
|
-
if matches.length == 1
|
375
|
-
@pub_year = matches.first
|
376
|
-
else
|
377
|
-
# when there are multiple matches, check for ones with CE after them
|
378
|
-
matches.each do |match|
|
379
|
-
# look for things like '1865-6 CE'
|
380
|
-
pos = f_date.index(Regexp.new(match + '...CE'))
|
381
|
-
pos = pos ? pos.to_i : 0
|
382
|
-
if f_date.include?(match + ' CE') || pos > 0
|
383
|
-
@pub_year = match
|
384
|
-
return match
|
385
|
-
end
|
386
|
-
end
|
387
|
-
end
|
388
|
-
return matches.first
|
389
|
-
end
|
390
|
-
nil
|
391
|
-
end
|
392
|
-
|
393
|
-
# get a 3 digit year like 965 from the date array
|
394
|
-
# @param [Array<String>] dates an array of potential year strings
|
395
|
-
def get_three_digit_year(dates)
|
396
|
-
dates.each do |f_date|
|
397
|
-
matches = f_date.scan(/\d{3}/)
|
398
|
-
return matches.first unless matches.empty?
|
399
|
-
end
|
400
|
-
nil
|
401
|
-
end
|
402
|
-
|
403
|
-
# get the 3 digit BC year, return it as a negative, so -700 for 300 BC.
|
404
|
-
# Other methods will translate it to proper display, this is good for sorting.
|
405
|
-
# @param [Array<String>] dates an array of potential year strings
|
406
|
-
def get_bc_year(dates)
|
407
|
-
dates.each do |f_date|
|
408
|
-
matches = f_date.scan(/\d{3} B.C./)
|
409
|
-
unless matches.empty?
|
410
|
-
bc_year = matches.first[0..2]
|
411
|
-
return (bc_year.to_i - 1000).to_s
|
412
|
-
end
|
413
|
-
end
|
414
|
-
nil
|
415
|
-
end
|
416
|
-
|
417
|
-
# get a single digit century like '9th century' from the date array
|
418
|
-
# @param [Array<String>] dates an array of potential year strings
|
419
|
-
# @return [String] y-- if we identify century digit in string
|
420
|
-
def get_single_digit_century(dates)
|
421
|
-
dates.each do |f_date|
|
422
|
-
matches = f_date.scan(/\d{1}th/)
|
423
|
-
next if matches.empty?
|
424
|
-
|
425
|
-
if matches.length == 1
|
426
|
-
@pub_year = (matches.first[0, 2].to_i - 1).to_s + '--'
|
427
|
-
return @pub_year
|
428
|
-
else
|
429
|
-
# when there are multiple matches, check for ones with CE after them
|
430
|
-
matches.each do |match|
|
431
|
-
pos = f_date.index(Regexp.new(match + '...CE'))
|
432
|
-
pos = pos ? pos.to_i : f_date.index(Regexp.new(match + ' century CE'))
|
433
|
-
pos = pos ? pos.to_i : 0
|
434
|
-
if f_date.include?(match + ' CE') || pos > 0
|
435
|
-
@pub_year = (match[0, 1].to_i - 1).to_s + '--'
|
436
|
-
return @pub_year
|
437
|
-
end
|
438
|
-
end
|
439
|
-
end
|
440
|
-
end
|
441
|
-
nil
|
442
|
-
end
|
443
|
-
|
444
|
-
# get a double digit century like '12th century' from the date array
|
445
|
-
# @param [Array<String>] dates an array of potential year strings
|
446
|
-
# @return [String] yy-- if we identify century digits in string
|
447
|
-
def get_double_digit_century(dates)
|
448
|
-
dates.each do |f_date|
|
449
|
-
matches = f_date.scan(/\d{2}th/)
|
450
|
-
next if matches.empty?
|
451
|
-
|
452
|
-
if matches.length == 1
|
453
|
-
@pub_year = (matches.first[0, 2].to_i - 1).to_s + '--'
|
454
|
-
return @pub_year
|
455
|
-
else
|
456
|
-
# when there are multiple matches, check for ones with CE after them
|
457
|
-
matches.each do |match|
|
458
|
-
pos = f_date.index(Regexp.new(match + '...CE'))
|
459
|
-
pos = pos ? pos.to_i : f_date.index(Regexp.new(match + ' century CE'))
|
460
|
-
pos = pos ? pos.to_i : 0
|
461
|
-
if f_date.include?(match + ' CE') || pos > 0
|
462
|
-
@pub_year = (match[0, 2].to_i - 1).to_s + '--'
|
463
|
-
return @pub_year
|
464
|
-
end
|
465
|
-
end
|
466
|
-
end
|
467
|
-
end
|
468
|
-
nil
|
469
|
-
end
|
470
|
-
|
471
|
-
# If a year has a "u" in it, replace u with 0 for yyyu (becomes yyy0)
|
472
|
-
# and replace u with '-' for yyuu (becomes yy--)
|
473
|
-
# @param [String] dates looking for matches on yyyu or yyuu in these strings
|
474
|
-
# @return [String, nil] String of format yyy0 or yy--, or nil
|
475
|
-
def get_u_year(dates)
|
476
|
-
dates.each do |f_date|
|
477
|
-
# Single digit u notation
|
478
|
-
matches = f_date.scan(/\d{3}u/)
|
479
|
-
return matches.first.tr('u', '0') if matches.length == 1
|
480
|
-
|
481
|
-
# Double digit u notation
|
482
|
-
matches = f_date.scan(/\d{2}u{2}/)
|
483
|
-
return matches.first.tr('u', '-') if matches.length == 1
|
484
|
-
end
|
485
|
-
nil
|
486
|
-
end
|
487
|
-
end # class Record
|
488
|
-
end
|
489
|
-
end
|