mods 3.0.0.alpha2 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/mods/date.rb +51 -17
- data/lib/mods/nom_terminology.rb +1 -1
- data/lib/mods/version.rb +1 -1
- data/spec/lib/date_spec.rb +8 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 30381fee9ce7d47827010ce02902834f10118daeb381bdfa65bcc5adf778b65f
|
4
|
+
data.tar.gz: 65b9fac4ebee7589010039dbba9cbd8b204a94987443ec2c68e36a7509f39685
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e9ecec0384d8c4d931cace14b32ddc9e2b5161ed6ef9f61af27fb89d9993643db790f98966f8ddea419008bd03588cbc6aaf4dfee25b404c5a392deae1b3e78e
|
7
|
+
data.tar.gz: 79b733073abbf0ee6dc8da9835c86e44be67f1f298a4e09f3d6380772ff51713cd63c121daf2bb0ae0751ab9ffb3954003eb24ecffde1a7e89ed3c542cead994
|
data/lib/mods/date.rb
CHANGED
@@ -11,7 +11,7 @@ module Mods
|
|
11
11
|
# @param [Nokogiri::XML::Element] xml A date-flavored MODS field from the XML
|
12
12
|
# @return [Mods::Date]
|
13
13
|
def self.from_element(xml)
|
14
|
-
case xml.attr(:encoding)
|
14
|
+
case xml.attr(:encoding)&.downcase
|
15
15
|
when 'w3cdtf'
|
16
16
|
Mods::Date::W3cdtfFormat.new(xml)
|
17
17
|
when 'iso8601'
|
@@ -23,12 +23,13 @@ module Mods
|
|
23
23
|
# when 'temper'
|
24
24
|
# Mods::Date::TemperFormat.new(xml)
|
25
25
|
else
|
26
|
-
date_class =
|
26
|
+
date_class = UnparseableDate if xml.text =~ /\p{Hebrew}/ || xml.text =~ /^-/
|
27
27
|
date_class ||= [
|
28
28
|
MMDDYYYYFormat,
|
29
29
|
MMDDYYFormat,
|
30
30
|
YearRangeFormat,
|
31
31
|
DecadeAsYearDashFormat,
|
32
|
+
DecadeStringFormat,
|
32
33
|
EmbeddedBCYearFormat,
|
33
34
|
EmbeddedYearFormat,
|
34
35
|
EmbeddedThreeDigitYearFormat,
|
@@ -49,7 +50,7 @@ module Mods
|
|
49
50
|
# Strict ISO8601-encoded date parser
|
50
51
|
class Iso8601Format < Date
|
51
52
|
def self.parse_date(text)
|
52
|
-
|
53
|
+
::Date.parse(normalize_to_edtf(text))
|
53
54
|
end
|
54
55
|
end
|
55
56
|
|
@@ -62,7 +63,16 @@ module Mods
|
|
62
63
|
attr_reader :date
|
63
64
|
|
64
65
|
def self.normalize_to_edtf(text)
|
65
|
-
text
|
66
|
+
return '0000' if text.strip == '0'
|
67
|
+
|
68
|
+
case text
|
69
|
+
when /^\d{1,3}$/
|
70
|
+
text.rjust(4, "0") if text =~ /^\d{1,3}$/
|
71
|
+
when /^-\d{1,3}$/
|
72
|
+
"-#{text.sub(/^-/, '').rjust(4, "0")}"
|
73
|
+
else
|
74
|
+
text
|
75
|
+
end
|
66
76
|
end
|
67
77
|
end
|
68
78
|
|
@@ -100,6 +110,12 @@ module Mods
|
|
100
110
|
end
|
101
111
|
end
|
102
112
|
|
113
|
+
class UnparseableDate < ExtractorDateFormat
|
114
|
+
def self.parse_date(text)
|
115
|
+
nil
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
103
119
|
# Full text extractor for MM/DD/YYYY and MM/DD/YYY-formatted dates
|
104
120
|
class MMDDYYYYFormat < ExtractorDateFormat
|
105
121
|
REGEX = /(?<month>\d{1,2})\/(?<day>\d{1,2})\/(?<year>\d{3,4})/
|
@@ -201,7 +217,17 @@ module Mods
|
|
201
217
|
|
202
218
|
# Full-text extractor for data formatted as YYY-
|
203
219
|
class DecadeAsYearDashFormat < ExtractorDateFormat
|
204
|
-
REGEX = /(?<!\d)(?<year>\d{3})[-
|
220
|
+
REGEX = /(?<!\d)(?<year>\d{3})[-_xu?](?!\d)/
|
221
|
+
|
222
|
+
def self.normalize_to_edtf(text)
|
223
|
+
matches = text.match(REGEX)
|
224
|
+
"#{matches[:year]}X"
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
# Full-text extractor for data formatted as YYY0s
|
229
|
+
class DecadeStringFormat < ExtractorDateFormat
|
230
|
+
REGEX = /(?<!\d)(?<year>\d{3})0s(?!\d)/
|
205
231
|
|
206
232
|
def self.normalize_to_edtf(text)
|
207
233
|
matches = text.match(REGEX)
|
@@ -221,42 +247,42 @@ module Mods
|
|
221
247
|
|
222
248
|
# Full-text extractor that tries hard to pick any year present in the data
|
223
249
|
class EmbeddedYearFormat < ExtractorDateFormat
|
224
|
-
REGEX = /(
|
250
|
+
REGEX = /(?<!\d)(?<year>\d{4})(?!\d)/
|
225
251
|
|
226
252
|
def self.normalize_to_edtf(text)
|
227
253
|
matches = text.match(REGEX)
|
228
|
-
"#{matches[:
|
254
|
+
"#{matches[:year].rjust(4, "0")}"
|
229
255
|
end
|
230
256
|
end
|
231
257
|
|
232
258
|
# Full-text extractor that tries hard to pick any year present in the data
|
233
259
|
class EmbeddedThreeDigitYearFormat < ExtractorDateFormat
|
234
|
-
REGEX = /(
|
260
|
+
REGEX = /(?<!\d)(?<year>\d{3})(?!\d)(?!\d)/
|
235
261
|
|
236
262
|
def self.normalize_to_edtf(text)
|
237
263
|
matches = text.match(REGEX)
|
238
|
-
"#{matches[:
|
264
|
+
"#{matches[:year].rjust(4, "0")}"
|
239
265
|
end
|
240
266
|
end
|
241
267
|
|
242
268
|
# Full-text extractor that tries hard to pick any year present in the data
|
243
269
|
class OneOrTwoDigitYearFormat < ExtractorDateFormat
|
244
|
-
REGEX = /^(?<
|
270
|
+
REGEX = /^(?<year>\d{1,2})$/
|
245
271
|
|
246
272
|
def self.normalize_to_edtf(text)
|
247
273
|
matches = text.match(REGEX)
|
248
|
-
"#{matches[:
|
274
|
+
"#{matches[:year].rjust(4, "0")}"
|
249
275
|
end
|
250
276
|
end
|
251
277
|
|
252
278
|
# Full-text extractor that tries hard to pick any year present in the data
|
253
279
|
class EmbeddedYearWithBracketsFormat < ExtractorDateFormat
|
254
280
|
# [YYY]Y Y[YYY] [YY]YY Y[YY]Y YY[YY] YYY[Y] YY[Y]Y Y[Y]YY [Y]YYY
|
255
|
-
REGEX = /(?<
|
281
|
+
REGEX = /(?<year>[\d\[\]]{6})(?!\d)/
|
256
282
|
|
257
283
|
def self.normalize_to_edtf(text)
|
258
284
|
matches = text.match(REGEX)
|
259
|
-
"#{matches[:
|
285
|
+
"#{matches[:year].gsub('[', '').gsub(']', '')}"
|
260
286
|
end
|
261
287
|
end
|
262
288
|
|
@@ -327,7 +353,9 @@ module Mods
|
|
327
353
|
#
|
328
354
|
# @return [String]
|
329
355
|
def type
|
330
|
-
xml.attr(:type)
|
356
|
+
return if xml.attr(:type)&.empty?
|
357
|
+
|
358
|
+
xml.attr(:type)&.downcase
|
331
359
|
end
|
332
360
|
|
333
361
|
##
|
@@ -335,7 +363,9 @@ module Mods
|
|
335
363
|
#
|
336
364
|
# @return [String]
|
337
365
|
def encoding
|
338
|
-
xml.attr(:encoding)
|
366
|
+
return if xml.attr(:encoding)&.empty?
|
367
|
+
|
368
|
+
xml.attr(:encoding)&.downcase
|
339
369
|
end
|
340
370
|
|
341
371
|
##
|
@@ -359,7 +389,9 @@ module Mods
|
|
359
389
|
#
|
360
390
|
# @return [String]
|
361
391
|
def point
|
362
|
-
xml.attr(:point)
|
392
|
+
return if xml.attr(:point)&.empty?
|
393
|
+
|
394
|
+
xml.attr(:point)&.downcase
|
363
395
|
end
|
364
396
|
|
365
397
|
##
|
@@ -391,7 +423,7 @@ module Mods
|
|
391
423
|
#
|
392
424
|
# @return [String]
|
393
425
|
def qualifier
|
394
|
-
xml.attr(:qualifier)
|
426
|
+
xml.attr(:qualifier)&.downcase
|
395
427
|
end
|
396
428
|
|
397
429
|
##
|
@@ -419,6 +451,8 @@ module Mods
|
|
419
451
|
end
|
420
452
|
|
421
453
|
def precision
|
454
|
+
return :unknown unless date_range || date
|
455
|
+
|
422
456
|
if date_range.is_a? EDTF::Century
|
423
457
|
:century
|
424
458
|
elsif date_range.is_a? EDTF::Decade
|
data/lib/mods/nom_terminology.rb
CHANGED
@@ -258,7 +258,7 @@ module Mods
|
|
258
258
|
n.publisher :path => 'm:publisher'
|
259
259
|
Mods::OriginInfo::DATE_ELEMENTS.each { |date_el|
|
260
260
|
n.send date_el, :path => "m:#{date_el}" do |d|
|
261
|
-
d.as_object :path => '.', :accessor => lambda { |a| Mods::Date.from_element(a) }
|
261
|
+
d.as_object :path => '.', :single => true, :accessor => lambda { |a| Mods::Date.from_element(a) }
|
262
262
|
|
263
263
|
with_attributes(d, Mods::DATE_ATTRIBS)
|
264
264
|
|
data/lib/mods/version.rb
CHANGED
data/spec/lib/date_spec.rb
CHANGED
@@ -209,6 +209,9 @@ RSpec.describe Mods::Date do
|
|
209
209
|
'1900-uu-uu' => Date.parse('1900-01-01')..Date.parse('1900-12-31'),
|
210
210
|
'1900-uu-15' => Date.parse('1900-01-15')..Date.parse('1900-12-15'),
|
211
211
|
'1900-06-uu' => Date.parse('1900-06-01')..Date.parse('1900-06-30'),
|
212
|
+
'-250' => Date.parse('-250-01-01')..Date.parse('-250-12-31'), # EDTF requires a 4 digit year, but what can you do.
|
213
|
+
'63' => Date.parse('0063-01-01')..Date.parse('0063-12-31'),
|
214
|
+
'125' => Date.parse('125-01-01')..Date.parse('125-12-31'),
|
212
215
|
}.each do |data, expected|
|
213
216
|
describe "with #{data}" do
|
214
217
|
let(:date_element) { "<dateCreated encoding=\"edtf\">#{data}</dateCreated>" }
|
@@ -303,7 +306,8 @@ RSpec.describe Mods::Date do
|
|
303
306
|
{
|
304
307
|
'Minguo 19 [1930]' => Date.parse('1930-01-01')..Date.parse('1930-12-31'),
|
305
308
|
'1745 mag. 14' => Date.parse('1745-01-01')..Date.parse('1745-12-31'),
|
306
|
-
'-745' =>
|
309
|
+
'-745' => '', # too ambiguious to even attempt.
|
310
|
+
'-1999' => '', # too ambiguious to even attempt.
|
307
311
|
'[1923]' => Date.parse('1923-01-01')..Date.parse('1923-12-31'),
|
308
312
|
'1532.' => Date.parse('1532-01-01')..Date.parse('1532-12-31'),
|
309
313
|
'[ca 1834]' => Date.parse('1834-01-01')..Date.parse('1834-12-31'),
|
@@ -319,8 +323,11 @@ RSpec.describe Mods::Date do
|
|
319
323
|
'193-' => Date.parse('1930-01-01')..Date.parse('1939-12-31'),
|
320
324
|
'196_' => Date.parse('1960-01-01')..Date.parse('1969-12-31'),
|
321
325
|
'196x' => Date.parse('1960-01-01')..Date.parse('1969-12-31'),
|
326
|
+
'196u' => Date.parse('1960-01-01')..Date.parse('1969-12-31'),
|
327
|
+
'1960s' => Date.parse('1960-01-01')..Date.parse('1969-12-31'),
|
322
328
|
'186?' => Date.parse('1860-01-01')..Date.parse('1869-12-31'),
|
323
329
|
'1700?' => Date.parse('1700-01-01')..Date.parse('1700-12-31'),
|
330
|
+
'early 1730s' => Date.parse('1730-01-01')..Date.parse('1739-12-31'),
|
324
331
|
'[1670-1684]' => Date.parse('1670-01-01')..Date.parse('1684-12-31'),
|
325
332
|
'[18]74' => Date.parse('1874-01-01')..Date.parse('1874-12-31'),
|
326
333
|
'250 B.C.' => Date.parse('-0249-01-01')..Date.parse('-249-12-31'),
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mods
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.0
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naomi Dushay
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2022-01
|
12
|
+
date: 2022-02-01 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -218,9 +218,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
218
218
|
version: '0'
|
219
219
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
220
220
|
requirements:
|
221
|
-
- - "
|
221
|
+
- - ">="
|
222
222
|
- !ruby/object:Gem::Version
|
223
|
-
version:
|
223
|
+
version: '0'
|
224
224
|
requirements: []
|
225
225
|
rubygems_version: 3.2.32
|
226
226
|
signing_key:
|