stanford-mods 2.6.4 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +1 -1
- data/lib/stanford-mods/{geo_spatial.rb → concerns/geo_spatial.rb} +3 -5
- data/lib/stanford-mods/concerns/name.rb +57 -0
- data/lib/stanford-mods/concerns/origin_info.rb +113 -0
- data/lib/stanford-mods/{physical_location.rb → concerns/physical_location.rb} +2 -2
- data/lib/stanford-mods/concerns/searchworks.rb +125 -0
- data/lib/stanford-mods/concerns/searchworks_subjects.rb +126 -0
- data/lib/stanford-mods/concerns/title.rb +87 -0
- data/lib/stanford-mods/coordinate.rb +24 -3
- data/lib/stanford-mods/date_parsing.rb +32 -289
- data/lib/stanford-mods/imprint.rb +170 -322
- data/lib/stanford-mods/record.rb +20 -0
- data/lib/stanford-mods/version.rb +1 -1
- data/lib/stanford-mods/{searchworks_languages.rb → vocabularies/searchworks_languages.rb} +0 -0
- data/lib/stanford-mods.rb +12 -11
- data/spec/fixtures/searchworks_imprint_data.rb +38 -39
- data/spec/fixtures/searchworks_pub_date_data.rb +7 -7
- data/spec/fixtures/spotlight_pub_date_data.rb +7 -7
- data/spec/geo_spatial_spec.rb +1 -6
- data/spec/imprint_spec.rb +263 -207
- data/spec/lib/stanford-mods/coordinate_spec.rb +3 -5
- data/spec/name_spec.rb +26 -230
- data/spec/origin_info_spec.rb +34 -300
- data/spec/searchworks_basic_spec.rb +1 -3
- data/spec/searchworks_pub_dates_spec.rb +0 -215
- data/spec/searchworks_spec.rb +0 -21
- data/spec/searchworks_subject_raw_spec.rb +106 -105
- data/spec/searchworks_subject_spec.rb +19 -55
- data/spec/searchworks_title_spec.rb +5 -5
- data/stanford-mods.gemspec +1 -1
- metadata +19 -15
- data/lib/marc_countries.rb +0 -387
- data/lib/stanford-mods/geo_utils.rb +0 -28
- data/lib/stanford-mods/name.rb +0 -80
- data/lib/stanford-mods/origin_info.rb +0 -489
- data/lib/stanford-mods/searchworks.rb +0 -333
- data/lib/stanford-mods/searchworks_subjects.rb +0 -196
- data/spec/date_parsing_spec.rb +0 -905
@@ -1,102 +1,50 @@
|
|
1
|
-
require 'active_support/core_ext/integer/inflections'
|
2
|
-
|
3
1
|
module Stanford
|
4
2
|
module Mods
|
5
|
-
# Parsing date strings
|
6
|
-
# TODO: this should become its own gem and/or become eclipsed by/merged with timetwister gem
|
7
|
-
# When this is "gemified":
|
8
|
-
# - we may want an integer or date sort field as well as lexical
|
9
|
-
# - we could add methods like my_date.bc?
|
10
3
|
class DateParsing
|
11
|
-
# get display value for year, generally an explicit year or "17th century" or "5 B.C." or "1950s" or '845 A.D.'
|
12
|
-
# @return [String, nil] display value for year if we could parse one, nil otherwise
|
13
|
-
def self.date_str_for_display(date_str)
|
14
|
-
DateParsing.new(date_str).date_str_for_display
|
15
|
-
end
|
16
|
-
|
17
|
-
# get year as Integer if we can parse date_str to get a year.
|
18
|
-
# @return [Integer, nil] Integer year if we could parse one, nil otherwise
|
19
|
-
def self.year_int_from_date_str(date_str)
|
20
|
-
DateParsing.new(date_str).year_int_from_date_str
|
21
|
-
end
|
22
|
-
|
23
|
-
# get String sortable value year if we can parse date_str to get a year.
|
24
|
-
# SearchWorks currently uses a string field for pub date sorting; thus so does Spotlight.
|
25
|
-
# The values returned must *lexically* sort in chronological order, so the B.C. dates are tricky
|
26
|
-
# @return [String, nil] String sortable year if we could parse one, nil otherwise
|
27
|
-
# note that these values must *lexically* sort to create a chronological sort.
|
28
|
-
def self.sortable_year_string_from_date_str(date_str)
|
29
|
-
DateParsing.new(date_str).sortable_year_string_from_date_str
|
30
|
-
end
|
31
|
-
|
32
|
-
# true if the year is between -999 and (current year + 1)
|
33
|
-
# @param [String] year_str String containing a date in format: -yyy, -yy, -y, y, yy, yyy, yyyy
|
34
|
-
# @return [Boolean] true if the year is between -999 and (current year + 1); false otherwise
|
35
|
-
def self.year_str_valid?(year_str)
|
36
|
-
return false unless year_str && (year_str.match(/^\d{1,4}$/) || year_str.match(/^-\d{1,3}$/))
|
37
|
-
|
38
|
-
(-1000 < year_str.to_i) && (year_str.to_i < Date.today.year + 2)
|
39
|
-
end
|
40
|
-
|
41
4
|
# true if the year is between -9999 and (current year + 1)
|
42
5
|
# @return [Boolean] true if the year is between -9999 and (current year + 1); false otherwise
|
43
6
|
def self.year_int_valid?(year)
|
44
7
|
return false unless year.is_a? Integer
|
45
8
|
|
46
|
-
(
|
9
|
+
(year < Date.today.year + 2)
|
47
10
|
end
|
48
11
|
|
49
|
-
attr_reader :
|
12
|
+
attr_reader :xml
|
50
13
|
|
51
|
-
def initialize(
|
52
|
-
@
|
53
|
-
@orig_date_str.freeze
|
14
|
+
def initialize(xml)
|
15
|
+
@xml = xml
|
54
16
|
end
|
55
17
|
|
56
|
-
BRACKETS_BETWEEN_DIGITS_REXEXP = Regexp.new('\d[' + Regexp.escape('[]') + ']\d')
|
57
|
-
|
58
18
|
# get display value for year, generally an explicit year or "17th century" or "5 B.C." or "1950s" or '845 A.D.'
|
59
19
|
# @return [String, nil] String value for year if we could parse one, nil otherwise
|
60
20
|
def date_str_for_display
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
21
|
+
date = xml&.as_object&.date
|
22
|
+
date = date.min || date.max if date.is_a?(EDTF::Epoch) || date.is_a?(EDTF::Interval)
|
23
|
+
|
24
|
+
return case xml.as_object.precision
|
25
|
+
when :century
|
26
|
+
return "#{(date.to_s[0..1].to_i + 1).ordinalize} century"
|
27
|
+
when :decade
|
28
|
+
return "#{date.year}s"
|
29
|
+
when :unknown
|
30
|
+
xml.text
|
31
|
+
else
|
32
|
+
if !self.class.year_int_valid? date.year
|
33
|
+
xml.text
|
34
|
+
elsif date.year < 1
|
35
|
+
"#{date.year.abs + 1} B.C."
|
36
|
+
elsif date.year < 1000
|
37
|
+
"#{date.year} A.D."
|
38
|
+
else
|
39
|
+
date.year.to_s
|
40
|
+
end
|
77
41
|
end
|
78
|
-
# remove leading 0s from early dates
|
79
|
-
result = "#{result.to_i} A.D." if result && result.match(/^0\d+$/)
|
80
|
-
result
|
81
42
|
end
|
82
43
|
|
83
44
|
# get Integer year if we can parse date_str to get a year.
|
84
45
|
# @return [Integer, nil] Integer year if we could parse one, nil otherwise
|
85
46
|
def year_int_from_date_str
|
86
|
-
|
87
|
-
# B.C. first in case there are 4 digits, e.g. 1600 B.C.
|
88
|
-
return sortable_year_int_for_bc if orig_date_str.match(BC_REGEX)
|
89
|
-
|
90
|
-
result = sortable_year_for_yyyy_or_yy
|
91
|
-
result ||= sortable_year_for_decade # 19xx or 20xx
|
92
|
-
result ||= sortable_year_for_century
|
93
|
-
result ||= sortable_year_int_for_early_numeric
|
94
|
-
unless result
|
95
|
-
# try removing brackets between digits in case we have 169[5] or [18]91
|
96
|
-
no_brackets = remove_brackets
|
97
|
-
return DateParsing.new(no_brackets).year_int_from_date_str if no_brackets
|
98
|
-
end
|
99
|
-
result.to_i if result && self.class.year_int_valid?(result.to_i)
|
47
|
+
xml&.as_object&.as_range&.first&.year
|
100
48
|
end
|
101
49
|
|
102
50
|
# get String sortable value year if we can parse date_str to get a year.
|
@@ -105,222 +53,17 @@ module Stanford
|
|
105
53
|
# @return [String, nil] String sortable year if we could parse one, nil otherwise
|
106
54
|
# note that these values must *lexically* sort to create a chronological sort.
|
107
55
|
def sortable_year_string_from_date_str
|
108
|
-
return
|
109
|
-
# B.C. first in case there are 4 digits, e.g. 1600 B.C.
|
110
|
-
return sortable_year_str_for_bc if orig_date_str.match(BC_REGEX)
|
111
|
-
|
112
|
-
result = sortable_year_for_yyyy_or_yy
|
113
|
-
result ||= sortable_year_for_decade # 19xx or 20xx
|
114
|
-
result ||= sortable_year_for_century
|
115
|
-
result ||= sortable_year_str_for_early_numeric
|
116
|
-
unless result
|
117
|
-
# try removing brackets between digits in case we have 169[5] or [18]91
|
118
|
-
no_brackets = remove_brackets
|
119
|
-
return DateParsing.new(no_brackets).sortable_year_string_from_date_str if no_brackets
|
120
|
-
end
|
121
|
-
result if self.class.year_str_valid?(result)
|
122
|
-
end
|
123
|
-
|
124
|
-
# get String sortable value year if we can parse date_str to get a year.
|
125
|
-
# @return [String, nil] String sortable year if we could parse one, nil otherwise
|
126
|
-
# note that these values must *lexically* sort to create a chronological sort.
|
127
|
-
def sortable_year_for_yyyy_or_yy
|
128
|
-
# most date strings have a four digit year
|
129
|
-
result = sortable_year_for_yyyy
|
130
|
-
result ||= sortable_year_for_yy # 19xx or 20xx
|
131
|
-
result
|
132
|
-
end
|
133
|
-
|
134
|
-
# removes brackets between digits such as 169[5] or [18]91
|
135
|
-
def remove_brackets
|
136
|
-
orig_date_str.delete('[]') if orig_date_str.match(BRACKETS_BETWEEN_DIGITS_REXEXP)
|
137
|
-
end
|
138
|
-
|
139
|
-
# looks for 4 consecutive digits in orig_date_str and returns first occurrence if found
|
140
|
-
# @return [String, nil] 4 digit year (e.g. 1865, 0950) if orig_date_str has yyyy, nil otherwise
|
141
|
-
def sortable_year_for_yyyy
|
142
|
-
matches = orig_date_str.match(/\d{4}/) if orig_date_str
|
143
|
-
matches.to_s if matches
|
144
|
-
end
|
56
|
+
return unless xml&.as_object&.date
|
145
57
|
|
146
|
-
|
147
|
-
# note that these are the only 2 digit year patterns found in our actual date strings in MODS records
|
148
|
-
# we use 20 as century digits unless it is greater than current year:
|
149
|
-
# 1/1/15 -> 2015
|
150
|
-
# 1/1/25 -> 1925
|
151
|
-
# @return [String, nil] 4 digit year (e.g. 1865, 0950) if orig_date_str matches pattern, nil otherwise
|
152
|
-
def sortable_year_for_yy
|
153
|
-
return unless orig_date_str
|
58
|
+
date = xml.as_object.date
|
154
59
|
|
155
|
-
|
156
|
-
|
157
|
-
|
60
|
+
if date.is_a?(EDTF::Interval) && date.from.year < 1
|
61
|
+
(-1 * date.from.year - 1000).to_s
|
62
|
+
elsif date.is_a?(Date) && date.year < 1
|
63
|
+
(-1 * date.year - 1000).to_s
|
158
64
|
else
|
159
|
-
|
160
|
-
date_obj = Date.strptime(orig_date_str, '%m-%d-%y') if hyphen_matches
|
65
|
+
date.to_s[0..3]&.gsub('X', '-')
|
161
66
|
end
|
162
|
-
if date_obj && date_obj > Date.today
|
163
|
-
date_obj = Date.new(date_obj.year - 100, date_obj.month, date_obj.mday)
|
164
|
-
end
|
165
|
-
date_obj.year.to_s if date_obj
|
166
|
-
rescue ArgumentError
|
167
|
-
nil # explicitly want nil if date won't parse
|
168
|
-
end
|
169
|
-
|
170
|
-
DECADE_4CHAR_REGEXP = Regexp.new('(^|\D)\d{3}[u\-?x]')
|
171
|
-
|
172
|
-
# get first year of decade (as String) if we have: yyyu, yyy-, yyy? or yyyx pattern
|
173
|
-
# note that these are the only decade patterns found in our actual date strings in MODS records
|
174
|
-
# @return [String, nil] 4 digit year (e.g. 1860, 1950) if orig_date_str matches pattern, nil otherwise
|
175
|
-
def sortable_year_for_decade
|
176
|
-
decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
|
177
|
-
changed_to_zero = decade_matches.to_s.tr('u\-?x', '0') if decade_matches
|
178
|
-
DateParsing.new(changed_to_zero).sortable_year_for_yyyy if changed_to_zero
|
179
|
-
end
|
180
|
-
|
181
|
-
DECADE_S_REGEXP = Regexp.new('\d{3}0\'?s')
|
182
|
-
|
183
|
-
# get, e.g. 1950s, if we have: yyyu, yyy-, yyy? or yyyx pattern or yyy0s or yyy0's
|
184
|
-
# note that these are the only decade patterns found in our actual date strings in MODS records
|
185
|
-
# @return [String, nil] 4 digit year with s (e.g. 1860s, 1950s) if orig_date_str matches pattern, nil otherwise
|
186
|
-
def display_str_for_decade
|
187
|
-
decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
|
188
|
-
if decade_matches
|
189
|
-
changed_to_zero = decade_matches.to_s.tr('u\-?x', '0') if decade_matches
|
190
|
-
zeroth_year = DateParsing.new(changed_to_zero).sortable_year_for_yyyy if changed_to_zero
|
191
|
-
return "#{zeroth_year}s" if zeroth_year
|
192
|
-
else
|
193
|
-
decade_matches = orig_date_str.match(DECADE_S_REGEXP) if orig_date_str
|
194
|
-
return decade_matches.to_s.tr("'", '') if decade_matches
|
195
|
-
end
|
196
|
-
end
|
197
|
-
|
198
|
-
CENTURY_WORD_REGEXP = Regexp.new('(\d{1,2}).*century')
|
199
|
-
CENTURY_4CHAR_REGEXP = Regexp.new('(\d{1,2})[u\-]{2}([^u\-]|$)')
|
200
|
-
|
201
|
-
# get first year of century (as String) if we have: yyuu, yy--, yy--? or xxth century pattern
|
202
|
-
# note that these are the only century patterns found in our actual date strings in MODS records
|
203
|
-
# @return [String, nil] yy00 if orig_date_str matches pattern, nil otherwise; also nil if B.C. in pattern
|
204
|
-
def sortable_year_for_century
|
205
|
-
return unless orig_date_str
|
206
|
-
return if orig_date_str =~ /B\.C\./
|
207
|
-
|
208
|
-
century_matches = orig_date_str.match(CENTURY_4CHAR_REGEXP)
|
209
|
-
if century_matches
|
210
|
-
return $1 + '00' if $1.length == 2
|
211
|
-
return '0' + $1 + '00' if $1.length == 1
|
212
|
-
end
|
213
|
-
century_str_matches = orig_date_str.match(CENTURY_WORD_REGEXP)
|
214
|
-
if century_str_matches
|
215
|
-
yy = ($1.to_i - 1).to_s
|
216
|
-
return yy + '00' if yy.length == 2
|
217
|
-
return '0' + yy + '00' if yy.length == 1
|
218
|
-
end
|
219
|
-
end
|
220
|
-
|
221
|
-
# get display value for century (17th century) if we have: yyuu, yy--, yy--? or xxth century pattern
|
222
|
-
# note that these are the only century patterns found in our actual date strings in MODS records
|
223
|
-
# @return [String, nil] yy(th) Century if orig_date_str matches pattern, nil otherwise; also nil if B.C. in pattern
|
224
|
-
def display_str_for_century
|
225
|
-
return unless orig_date_str
|
226
|
-
return if orig_date_str =~ /B\.C\./
|
227
|
-
|
228
|
-
century_str_matches = orig_date_str.match(CENTURY_WORD_REGEXP)
|
229
|
-
return century_str_matches.to_s if century_str_matches
|
230
|
-
|
231
|
-
century_matches = orig_date_str.match(CENTURY_4CHAR_REGEXP)
|
232
|
-
if century_matches
|
233
|
-
return "#{($1.to_i + 1).ordinalize} century"
|
234
|
-
end
|
235
|
-
end
|
236
|
-
|
237
|
-
BC_REGEX = Regexp.new('(\d{1,4}).*' + Regexp.escape('B.C.'))
|
238
|
-
|
239
|
-
# get String sortable value for B.C. if we have B.C. pattern
|
240
|
-
# note that these values must *lexically* sort to create a chronological sort.
|
241
|
-
# We know our data does not contain B.C. dates older than 999, so we can make them
|
242
|
-
# lexically sort by subtracting 1000. So we get:
|
243
|
-
# -700 for 300 B.C., -750 for 250 B.C., -800 for 200 B.C., -801 for 199 B.C.
|
244
|
-
# @return [String, nil] String sortable -ddd if B.C. in pattern; nil otherwise
|
245
|
-
def sortable_year_str_for_bc
|
246
|
-
bc_matches = orig_date_str.match(BC_REGEX) if orig_date_str
|
247
|
-
($1.to_i - 1000).to_s if bc_matches
|
248
|
-
end
|
249
|
-
|
250
|
-
# get Integer sortable value for B.C. if we have B.C. pattern
|
251
|
-
# @return [Integer, nil] Integer sortable -ddd if B.C. in pattern; nil otherwise
|
252
|
-
def sortable_year_int_for_bc
|
253
|
-
bc_matches = orig_date_str.match(BC_REGEX) if orig_date_str
|
254
|
-
"-#{$1}".to_i if bc_matches
|
255
|
-
end
|
256
|
-
|
257
|
-
# get display value for B.C. if we have B.C. pattern
|
258
|
-
# @return [String, nil] ddd B.C. if ddd B.C. in pattern; nil otherwise
|
259
|
-
def display_str_for_bc
|
260
|
-
bc_matches = orig_date_str.match(BC_REGEX) if orig_date_str
|
261
|
-
bc_matches.to_s if bc_matches
|
262
|
-
end
|
263
|
-
|
264
|
-
EARLY_NUMERIC = Regexp.new('^\-?\d{1,3}$')
|
265
|
-
|
266
|
-
# get String sortable value from date String containing yyy, yy, y, -y, -yy, -yyy
|
267
|
-
# note that these values must *lexically* sort to create a chronological sort.
|
268
|
-
# We know our data does not contain negative dates older than -999, so we can make them
|
269
|
-
# lexically sort by subtracting 1000. So we get:
|
270
|
-
# -983 for -17, -999 for -1, 0000 for 0, 0001 for 1, 0017 for 17
|
271
|
-
# @return [String, nil] String sortable -ddd if orig_date_str matches pattern; nil otherwise
|
272
|
-
def sortable_year_str_for_early_numeric
|
273
|
-
return unless orig_date_str.match(EARLY_NUMERIC)
|
274
|
-
|
275
|
-
if orig_date_str =~ /^\-/
|
276
|
-
# negative number becomes x - 1000 for sorting; -005 for -995
|
277
|
-
num = orig_date_str[1..-1].to_i - 1000
|
278
|
-
return '-' + num.to_s[1..-1].rjust(3, '0')
|
279
|
-
else
|
280
|
-
return orig_date_str.rjust(4, '0')
|
281
|
-
end
|
282
|
-
end
|
283
|
-
|
284
|
-
# get Integer sortable value from date String containing yyy, yy, y, -y, -yy, -yyy, -yyyy
|
285
|
-
# @return [Integer, nil] Integer sortable -ddd if orig_date_str matches pattern; nil otherwise
|
286
|
-
def sortable_year_int_for_early_numeric
|
287
|
-
return orig_date_str.to_i if orig_date_str.match(EARLY_NUMERIC)
|
288
|
-
|
289
|
-
orig_date_str.to_i if orig_date_str =~ /^-\d{4}$/
|
290
|
-
end
|
291
|
-
|
292
|
-
# get display value for date String containing yyy, yy, y, -y, -yy, -yyy
|
293
|
-
# negative number strings will be changed to B.C. strings
|
294
|
-
# note that there is no year 0: from https://en.wikipedia.org/wiki/Anno_Domini
|
295
|
-
# "AD counting years from the start of this epoch, and BC denoting years before the start of the era.
|
296
|
-
# There is no year zero in this scheme, so the year AD 1 immediately follows the year 1 BC."
|
297
|
-
# See also https://consul.stanford.edu/display/chimera/MODS+display+rules for etdf
|
298
|
-
def display_str_for_early_numeric
|
299
|
-
return unless orig_date_str.match(EARLY_NUMERIC)
|
300
|
-
# return 1 B.C. when the date is 0 since there is no 0 year
|
301
|
-
return '1 B.C.' if orig_date_str == '0'
|
302
|
-
# negative number becomes B.C.
|
303
|
-
return "#{orig_date_str[1..-1].to_i + 1} B.C." if orig_date_str =~ /^\-/
|
304
|
-
|
305
|
-
# remove leading 0s from early dates
|
306
|
-
"#{orig_date_str.to_i} A.D."
|
307
|
-
end
|
308
|
-
|
309
|
-
# NOTE: while Date.parse() works for many dates, the *sortable_year_for_yyyy
|
310
|
-
# actually works for nearly all those cases and a lot more besides. Trial and error
|
311
|
-
# with an extensive set of test data culled from actual date strings in our MODS records
|
312
|
-
# has made this method bogus.
|
313
|
-
# @return [String, nil] sortable 4 digit year (e.g. 1865, 0950) if orig_date_str is parseable via ruby Date, nil otherwise
|
314
|
-
def year_via_ruby_parsing
|
315
|
-
return unless orig_date_str =~ /\d\d/ # need at least 2 digits
|
316
|
-
# need more in string than only 2 digits
|
317
|
-
return if orig_date_str.match(/^\d\d$/) || orig_date_str.match(/^\D*\d\d\D*$/)
|
318
|
-
return if orig_date_str =~ /\d\s*B.C./ # skip B.C. dates
|
319
|
-
|
320
|
-
date_obj = Date.parse(orig_date_str)
|
321
|
-
date_obj.year.to_s
|
322
|
-
rescue ArgumentError
|
323
|
-
nil # explicitly want nil if date won't parse
|
324
67
|
end
|
325
68
|
end
|
326
69
|
end
|