mods 3.0.0.alpha2 → 3.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/mods/date.rb +56 -17
- data/lib/mods/nom_terminology.rb +1 -1
- data/lib/mods/version.rb +1 -1
- data/spec/lib/date_spec.rb +10 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cea16c53599001b8cede7fb85bffef53aa28de61718785af76fd5445915fc80c
|
4
|
+
data.tar.gz: fba29c21dd8bc15483de289aafb690616b6bd6af817d6252915e41238bf1ed70
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e6ec16f53b9355f57315248e6eabd57d3db32743e4235365f119a519c36abc0d47e75af9c600ac2666bc5db9b2c77a6629e1ace33b847077606ef3bd32d65f78
|
7
|
+
data.tar.gz: 2028457eec91d93b8a44d48d2da3152d6808e850dbc201a199deb43465464497d18611049b888b5a0f2029db5166e7c99e34206f9bde2480e04e4bcf3d6eb7b2
|
data/lib/mods/date.rb
CHANGED
@@ -11,7 +11,7 @@ module Mods
|
|
11
11
|
# @param [Nokogiri::XML::Element] xml A date-flavored MODS field from the XML
|
12
12
|
# @return [Mods::Date]
|
13
13
|
def self.from_element(xml)
|
14
|
-
case xml.attr(:encoding)
|
14
|
+
case xml.attr(:encoding)&.downcase
|
15
15
|
when 'w3cdtf'
|
16
16
|
Mods::Date::W3cdtfFormat.new(xml)
|
17
17
|
when 'iso8601'
|
@@ -23,12 +23,13 @@ module Mods
|
|
23
23
|
# when 'temper'
|
24
24
|
# Mods::Date::TemperFormat.new(xml)
|
25
25
|
else
|
26
|
-
date_class =
|
26
|
+
date_class = UnparseableDate if xml.text =~ /\p{Hebrew}/ || xml.text =~ /^-/
|
27
27
|
date_class ||= [
|
28
28
|
MMDDYYYYFormat,
|
29
29
|
MMDDYYFormat,
|
30
30
|
YearRangeFormat,
|
31
31
|
DecadeAsYearDashFormat,
|
32
|
+
DecadeStringFormat,
|
32
33
|
EmbeddedBCYearFormat,
|
33
34
|
EmbeddedYearFormat,
|
34
35
|
EmbeddedThreeDigitYearFormat,
|
@@ -49,12 +50,15 @@ module Mods
|
|
49
50
|
# Strict ISO8601-encoded date parser
|
50
51
|
class Iso8601Format < Date
|
51
52
|
def self.parse_date(text)
|
52
|
-
|
53
|
+
::Date.parse(normalize_to_edtf(text))
|
53
54
|
end
|
54
55
|
end
|
55
56
|
|
56
57
|
# Less strict W3CDTF-encoded date parser
|
57
58
|
class W3cdtfFormat < Date
|
59
|
+
def self.normalize_to_edtf(text)
|
60
|
+
super.gsub('-00', '')
|
61
|
+
end
|
58
62
|
end
|
59
63
|
|
60
64
|
# Strict EDTF parser
|
@@ -62,7 +66,16 @@ module Mods
|
|
62
66
|
attr_reader :date
|
63
67
|
|
64
68
|
def self.normalize_to_edtf(text)
|
65
|
-
text
|
69
|
+
return '0000' if text.strip == '0'
|
70
|
+
|
71
|
+
case text
|
72
|
+
when /^\d{1,3}$/
|
73
|
+
text.rjust(4, "0") if text =~ /^\d{1,3}$/
|
74
|
+
when /^-\d{1,3}$/
|
75
|
+
"-#{text.sub(/^-/, '').rjust(4, "0")}"
|
76
|
+
else
|
77
|
+
text
|
78
|
+
end
|
66
79
|
end
|
67
80
|
end
|
68
81
|
|
@@ -100,6 +113,12 @@ module Mods
|
|
100
113
|
end
|
101
114
|
end
|
102
115
|
|
116
|
+
class UnparseableDate < ExtractorDateFormat
|
117
|
+
def self.parse_date(text)
|
118
|
+
nil
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
103
122
|
# Full text extractor for MM/DD/YYYY and MM/DD/YYY-formatted dates
|
104
123
|
class MMDDYYYYFormat < ExtractorDateFormat
|
105
124
|
REGEX = /(?<month>\d{1,2})\/(?<day>\d{1,2})\/(?<year>\d{3,4})/
|
@@ -201,7 +220,17 @@ module Mods
|
|
201
220
|
|
202
221
|
# Full-text extractor for data formatted as YYY-
|
203
222
|
class DecadeAsYearDashFormat < ExtractorDateFormat
|
204
|
-
REGEX = /(?<!\d)(?<year>\d{3})[-
|
223
|
+
REGEX = /(?<!\d)(?<year>\d{3})[-_xu?](?!\d)/
|
224
|
+
|
225
|
+
def self.normalize_to_edtf(text)
|
226
|
+
matches = text.match(REGEX)
|
227
|
+
"#{matches[:year]}X"
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
# Full-text extractor for data formatted as YYY0s
|
232
|
+
class DecadeStringFormat < ExtractorDateFormat
|
233
|
+
REGEX = /(?<!\d)(?<year>\d{3})0s(?!\d)/
|
205
234
|
|
206
235
|
def self.normalize_to_edtf(text)
|
207
236
|
matches = text.match(REGEX)
|
@@ -221,42 +250,42 @@ module Mods
|
|
221
250
|
|
222
251
|
# Full-text extractor that tries hard to pick any year present in the data
|
223
252
|
class EmbeddedYearFormat < ExtractorDateFormat
|
224
|
-
REGEX = /(
|
253
|
+
REGEX = /(?<!\d)(?<year>\d{4})(?!\d)/
|
225
254
|
|
226
255
|
def self.normalize_to_edtf(text)
|
227
256
|
matches = text.match(REGEX)
|
228
|
-
"#{matches[:
|
257
|
+
"#{matches[:year].rjust(4, "0")}"
|
229
258
|
end
|
230
259
|
end
|
231
260
|
|
232
261
|
# Full-text extractor that tries hard to pick any year present in the data
|
233
262
|
class EmbeddedThreeDigitYearFormat < ExtractorDateFormat
|
234
|
-
REGEX = /(
|
263
|
+
REGEX = /(?<!\d)(?<year>\d{3})(?!\d)(?!\d)/
|
235
264
|
|
236
265
|
def self.normalize_to_edtf(text)
|
237
266
|
matches = text.match(REGEX)
|
238
|
-
"#{matches[:
|
267
|
+
"#{matches[:year].rjust(4, "0")}"
|
239
268
|
end
|
240
269
|
end
|
241
270
|
|
242
271
|
# Full-text extractor that tries hard to pick any year present in the data
|
243
272
|
class OneOrTwoDigitYearFormat < ExtractorDateFormat
|
244
|
-
REGEX = /^(?<
|
273
|
+
REGEX = /^(?<year>\d{1,2})$/
|
245
274
|
|
246
275
|
def self.normalize_to_edtf(text)
|
247
276
|
matches = text.match(REGEX)
|
248
|
-
"#{matches[:
|
277
|
+
"#{matches[:year].rjust(4, "0")}"
|
249
278
|
end
|
250
279
|
end
|
251
280
|
|
252
281
|
# Full-text extractor that tries hard to pick any year present in the data
|
253
282
|
class EmbeddedYearWithBracketsFormat < ExtractorDateFormat
|
254
283
|
# [YYY]Y Y[YYY] [YY]YY Y[YY]Y YY[YY] YYY[Y] YY[Y]Y Y[Y]YY [Y]YYY
|
255
|
-
REGEX = /(?<
|
284
|
+
REGEX = /(?<year>[\d\[\]]{6})(?!\d)/
|
256
285
|
|
257
286
|
def self.normalize_to_edtf(text)
|
258
287
|
matches = text.match(REGEX)
|
259
|
-
"#{matches[:
|
288
|
+
"#{matches[:year].gsub('[', '').gsub(']', '')}"
|
260
289
|
end
|
261
290
|
end
|
262
291
|
|
@@ -327,7 +356,9 @@ module Mods
|
|
327
356
|
#
|
328
357
|
# @return [String]
|
329
358
|
def type
|
330
|
-
xml.attr(:type)
|
359
|
+
return if xml.attr(:type)&.empty?
|
360
|
+
|
361
|
+
xml.attr(:type)&.downcase
|
331
362
|
end
|
332
363
|
|
333
364
|
##
|
@@ -335,7 +366,9 @@ module Mods
|
|
335
366
|
#
|
336
367
|
# @return [String]
|
337
368
|
def encoding
|
338
|
-
xml.attr(:encoding)
|
369
|
+
return if xml.attr(:encoding)&.empty?
|
370
|
+
|
371
|
+
xml.attr(:encoding)&.downcase
|
339
372
|
end
|
340
373
|
|
341
374
|
##
|
@@ -359,7 +392,9 @@ module Mods
|
|
359
392
|
#
|
360
393
|
# @return [String]
|
361
394
|
def point
|
362
|
-
xml.attr(:point)
|
395
|
+
return if xml.attr(:point)&.empty?
|
396
|
+
|
397
|
+
xml.attr(:point)&.downcase
|
363
398
|
end
|
364
399
|
|
365
400
|
##
|
@@ -391,7 +426,7 @@ module Mods
|
|
391
426
|
#
|
392
427
|
# @return [String]
|
393
428
|
def qualifier
|
394
|
-
xml.attr(:qualifier)
|
429
|
+
xml.attr(:qualifier)&.downcase
|
395
430
|
end
|
396
431
|
|
397
432
|
##
|
@@ -419,10 +454,14 @@ module Mods
|
|
419
454
|
end
|
420
455
|
|
421
456
|
def precision
|
457
|
+
return :unknown unless date_range || date
|
458
|
+
|
422
459
|
if date_range.is_a? EDTF::Century
|
423
460
|
:century
|
424
461
|
elsif date_range.is_a? EDTF::Decade
|
425
462
|
:decade
|
463
|
+
elsif date.is_a? EDTF::Interval
|
464
|
+
date_range.precision
|
426
465
|
else
|
427
466
|
case date.precision
|
428
467
|
when :month
|
data/lib/mods/nom_terminology.rb
CHANGED
@@ -258,7 +258,7 @@ module Mods
|
|
258
258
|
n.publisher :path => 'm:publisher'
|
259
259
|
Mods::OriginInfo::DATE_ELEMENTS.each { |date_el|
|
260
260
|
n.send date_el, :path => "m:#{date_el}" do |d|
|
261
|
-
d.as_object :path => '.', :accessor => lambda { |a| Mods::Date.from_element(a) }
|
261
|
+
d.as_object :path => '.', :single => true, :accessor => lambda { |a| Mods::Date.from_element(a) }
|
262
262
|
|
263
263
|
with_attributes(d, Mods::DATE_ATTRIBS)
|
264
264
|
|
data/lib/mods/version.rb
CHANGED
data/spec/lib/date_spec.rb
CHANGED
@@ -101,6 +101,7 @@ RSpec.describe Mods::Date do
|
|
101
101
|
'1900-06' => :month,
|
102
102
|
'1900-06-uu' => :month,
|
103
103
|
'1900-06-15' => :day,
|
104
|
+
'2014-01-01/2020-12-31' => :day
|
104
105
|
}.each do |data, expected|
|
105
106
|
describe "with #{data}" do
|
106
107
|
let(:date_element) { "<dateCreated encoding=\"edtf\">#{data}</dateCreated>" }
|
@@ -209,6 +210,9 @@ RSpec.describe Mods::Date do
|
|
209
210
|
'1900-uu-uu' => Date.parse('1900-01-01')..Date.parse('1900-12-31'),
|
210
211
|
'1900-uu-15' => Date.parse('1900-01-15')..Date.parse('1900-12-15'),
|
211
212
|
'1900-06-uu' => Date.parse('1900-06-01')..Date.parse('1900-06-30'),
|
213
|
+
'-250' => Date.parse('-250-01-01')..Date.parse('-250-12-31'), # EDTF requires a 4 digit year, but what can you do.
|
214
|
+
'63' => Date.parse('0063-01-01')..Date.parse('0063-12-31'),
|
215
|
+
'125' => Date.parse('125-01-01')..Date.parse('125-12-31'),
|
212
216
|
}.each do |data, expected|
|
213
217
|
describe "with #{data}" do
|
214
218
|
let(:date_element) { "<dateCreated encoding=\"edtf\">#{data}</dateCreated>" }
|
@@ -226,6 +230,7 @@ RSpec.describe Mods::Date do
|
|
226
230
|
{
|
227
231
|
'1753' => Date.parse('1753-01-01')..Date.parse('1753-12-31'),
|
228
232
|
'-1753' => Date.parse('-1753-01-01')..Date.parse('-1753-12-31'),
|
233
|
+
'1992-00-00' => Date.parse('1992-01-01')..Date.parse('1992-12-31'),
|
229
234
|
'1992-05-06' => Date.parse('1992-05-06')..Date.parse('1992-05-06'),
|
230
235
|
'1992-04' => Date.parse('1992-04-01')..Date.parse('1992-04-30'),
|
231
236
|
'2004-02' => Date.parse('2004-02-01')..Date.parse('2004-02-29'),
|
@@ -303,7 +308,8 @@ RSpec.describe Mods::Date do
|
|
303
308
|
{
|
304
309
|
'Minguo 19 [1930]' => Date.parse('1930-01-01')..Date.parse('1930-12-31'),
|
305
310
|
'1745 mag. 14' => Date.parse('1745-01-01')..Date.parse('1745-12-31'),
|
306
|
-
'-745' =>
|
311
|
+
'-745' => '', # too ambiguious to even attempt.
|
312
|
+
'-1999' => '', # too ambiguious to even attempt.
|
307
313
|
'[1923]' => Date.parse('1923-01-01')..Date.parse('1923-12-31'),
|
308
314
|
'1532.' => Date.parse('1532-01-01')..Date.parse('1532-12-31'),
|
309
315
|
'[ca 1834]' => Date.parse('1834-01-01')..Date.parse('1834-12-31'),
|
@@ -319,8 +325,11 @@ RSpec.describe Mods::Date do
|
|
319
325
|
'193-' => Date.parse('1930-01-01')..Date.parse('1939-12-31'),
|
320
326
|
'196_' => Date.parse('1960-01-01')..Date.parse('1969-12-31'),
|
321
327
|
'196x' => Date.parse('1960-01-01')..Date.parse('1969-12-31'),
|
328
|
+
'196u' => Date.parse('1960-01-01')..Date.parse('1969-12-31'),
|
329
|
+
'1960s' => Date.parse('1960-01-01')..Date.parse('1969-12-31'),
|
322
330
|
'186?' => Date.parse('1860-01-01')..Date.parse('1869-12-31'),
|
323
331
|
'1700?' => Date.parse('1700-01-01')..Date.parse('1700-12-31'),
|
332
|
+
'early 1730s' => Date.parse('1730-01-01')..Date.parse('1739-12-31'),
|
324
333
|
'[1670-1684]' => Date.parse('1670-01-01')..Date.parse('1684-12-31'),
|
325
334
|
'[18]74' => Date.parse('1874-01-01')..Date.parse('1874-12-31'),
|
326
335
|
'250 B.C.' => Date.parse('-0249-01-01')..Date.parse('-249-12-31'),
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mods
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naomi Dushay
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2022-
|
12
|
+
date: 2022-04-11 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -218,9 +218,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
218
218
|
version: '0'
|
219
219
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
220
220
|
requirements:
|
221
|
-
- - "
|
221
|
+
- - ">="
|
222
222
|
- !ruby/object:Gem::Version
|
223
|
-
version:
|
223
|
+
version: '0'
|
224
224
|
requirements: []
|
225
225
|
rubygems_version: 3.2.32
|
226
226
|
signing_key:
|