mods 3.0.0.alpha2 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/mods/date.rb +51 -17
- data/lib/mods/nom_terminology.rb +1 -1
- data/lib/mods/version.rb +1 -1
- data/spec/lib/date_spec.rb +8 -1
- metadata +4 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 30381fee9ce7d47827010ce02902834f10118daeb381bdfa65bcc5adf778b65f
|
|
4
|
+
data.tar.gz: 65b9fac4ebee7589010039dbba9cbd8b204a94987443ec2c68e36a7509f39685
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e9ecec0384d8c4d931cace14b32ddc9e2b5161ed6ef9f61af27fb89d9993643db790f98966f8ddea419008bd03588cbc6aaf4dfee25b404c5a392deae1b3e78e
|
|
7
|
+
data.tar.gz: 79b733073abbf0ee6dc8da9835c86e44be67f1f298a4e09f3d6380772ff51713cd63c121daf2bb0ae0751ab9ffb3954003eb24ecffde1a7e89ed3c542cead994
|
data/lib/mods/date.rb
CHANGED
|
@@ -11,7 +11,7 @@ module Mods
|
|
|
11
11
|
# @param [Nokogiri::XML::Element] xml A date-flavored MODS field from the XML
|
|
12
12
|
# @return [Mods::Date]
|
|
13
13
|
def self.from_element(xml)
|
|
14
|
-
case xml.attr(:encoding)
|
|
14
|
+
case xml.attr(:encoding)&.downcase
|
|
15
15
|
when 'w3cdtf'
|
|
16
16
|
Mods::Date::W3cdtfFormat.new(xml)
|
|
17
17
|
when 'iso8601'
|
|
@@ -23,12 +23,13 @@ module Mods
|
|
|
23
23
|
# when 'temper'
|
|
24
24
|
# Mods::Date::TemperFormat.new(xml)
|
|
25
25
|
else
|
|
26
|
-
date_class =
|
|
26
|
+
date_class = UnparseableDate if xml.text =~ /\p{Hebrew}/ || xml.text =~ /^-/
|
|
27
27
|
date_class ||= [
|
|
28
28
|
MMDDYYYYFormat,
|
|
29
29
|
MMDDYYFormat,
|
|
30
30
|
YearRangeFormat,
|
|
31
31
|
DecadeAsYearDashFormat,
|
|
32
|
+
DecadeStringFormat,
|
|
32
33
|
EmbeddedBCYearFormat,
|
|
33
34
|
EmbeddedYearFormat,
|
|
34
35
|
EmbeddedThreeDigitYearFormat,
|
|
@@ -49,7 +50,7 @@ module Mods
|
|
|
49
50
|
# Strict ISO8601-encoded date parser
|
|
50
51
|
class Iso8601Format < Date
|
|
51
52
|
def self.parse_date(text)
|
|
52
|
-
|
|
53
|
+
::Date.parse(normalize_to_edtf(text))
|
|
53
54
|
end
|
|
54
55
|
end
|
|
55
56
|
|
|
@@ -62,7 +63,16 @@ module Mods
|
|
|
62
63
|
attr_reader :date
|
|
63
64
|
|
|
64
65
|
def self.normalize_to_edtf(text)
|
|
65
|
-
text
|
|
66
|
+
return '0000' if text.strip == '0'
|
|
67
|
+
|
|
68
|
+
case text
|
|
69
|
+
when /^\d{1,3}$/
|
|
70
|
+
text.rjust(4, "0") if text =~ /^\d{1,3}$/
|
|
71
|
+
when /^-\d{1,3}$/
|
|
72
|
+
"-#{text.sub(/^-/, '').rjust(4, "0")}"
|
|
73
|
+
else
|
|
74
|
+
text
|
|
75
|
+
end
|
|
66
76
|
end
|
|
67
77
|
end
|
|
68
78
|
|
|
@@ -100,6 +110,12 @@ module Mods
|
|
|
100
110
|
end
|
|
101
111
|
end
|
|
102
112
|
|
|
113
|
+
class UnparseableDate < ExtractorDateFormat
|
|
114
|
+
def self.parse_date(text)
|
|
115
|
+
nil
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
103
119
|
# Full text extractor for MM/DD/YYYY and MM/DD/YYY-formatted dates
|
|
104
120
|
class MMDDYYYYFormat < ExtractorDateFormat
|
|
105
121
|
REGEX = /(?<month>\d{1,2})\/(?<day>\d{1,2})\/(?<year>\d{3,4})/
|
|
@@ -201,7 +217,17 @@ module Mods
|
|
|
201
217
|
|
|
202
218
|
# Full-text extractor for data formatted as YYY-
|
|
203
219
|
class DecadeAsYearDashFormat < ExtractorDateFormat
|
|
204
|
-
REGEX = /(?<!\d)(?<year>\d{3})[-
|
|
220
|
+
REGEX = /(?<!\d)(?<year>\d{3})[-_xu?](?!\d)/
|
|
221
|
+
|
|
222
|
+
def self.normalize_to_edtf(text)
|
|
223
|
+
matches = text.match(REGEX)
|
|
224
|
+
"#{matches[:year]}X"
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# Full-text extractor for data formatted as YYY0s
|
|
229
|
+
class DecadeStringFormat < ExtractorDateFormat
|
|
230
|
+
REGEX = /(?<!\d)(?<year>\d{3})0s(?!\d)/
|
|
205
231
|
|
|
206
232
|
def self.normalize_to_edtf(text)
|
|
207
233
|
matches = text.match(REGEX)
|
|
@@ -221,42 +247,42 @@ module Mods
|
|
|
221
247
|
|
|
222
248
|
# Full-text extractor that tries hard to pick any year present in the data
|
|
223
249
|
class EmbeddedYearFormat < ExtractorDateFormat
|
|
224
|
-
REGEX = /(
|
|
250
|
+
REGEX = /(?<!\d)(?<year>\d{4})(?!\d)/
|
|
225
251
|
|
|
226
252
|
def self.normalize_to_edtf(text)
|
|
227
253
|
matches = text.match(REGEX)
|
|
228
|
-
"#{matches[:
|
|
254
|
+
"#{matches[:year].rjust(4, "0")}"
|
|
229
255
|
end
|
|
230
256
|
end
|
|
231
257
|
|
|
232
258
|
# Full-text extractor that tries hard to pick any year present in the data
|
|
233
259
|
class EmbeddedThreeDigitYearFormat < ExtractorDateFormat
|
|
234
|
-
REGEX = /(
|
|
260
|
+
REGEX = /(?<!\d)(?<year>\d{3})(?!\d)(?!\d)/
|
|
235
261
|
|
|
236
262
|
def self.normalize_to_edtf(text)
|
|
237
263
|
matches = text.match(REGEX)
|
|
238
|
-
"#{matches[:
|
|
264
|
+
"#{matches[:year].rjust(4, "0")}"
|
|
239
265
|
end
|
|
240
266
|
end
|
|
241
267
|
|
|
242
268
|
# Full-text extractor that tries hard to pick any year present in the data
|
|
243
269
|
class OneOrTwoDigitYearFormat < ExtractorDateFormat
|
|
244
|
-
REGEX = /^(?<
|
|
270
|
+
REGEX = /^(?<year>\d{1,2})$/
|
|
245
271
|
|
|
246
272
|
def self.normalize_to_edtf(text)
|
|
247
273
|
matches = text.match(REGEX)
|
|
248
|
-
"#{matches[:
|
|
274
|
+
"#{matches[:year].rjust(4, "0")}"
|
|
249
275
|
end
|
|
250
276
|
end
|
|
251
277
|
|
|
252
278
|
# Full-text extractor that tries hard to pick any year present in the data
|
|
253
279
|
class EmbeddedYearWithBracketsFormat < ExtractorDateFormat
|
|
254
280
|
# [YYY]Y Y[YYY] [YY]YY Y[YY]Y YY[YY] YYY[Y] YY[Y]Y Y[Y]YY [Y]YYY
|
|
255
|
-
REGEX = /(?<
|
|
281
|
+
REGEX = /(?<year>[\d\[\]]{6})(?!\d)/
|
|
256
282
|
|
|
257
283
|
def self.normalize_to_edtf(text)
|
|
258
284
|
matches = text.match(REGEX)
|
|
259
|
-
"#{matches[:
|
|
285
|
+
"#{matches[:year].gsub('[', '').gsub(']', '')}"
|
|
260
286
|
end
|
|
261
287
|
end
|
|
262
288
|
|
|
@@ -327,7 +353,9 @@ module Mods
|
|
|
327
353
|
#
|
|
328
354
|
# @return [String]
|
|
329
355
|
def type
|
|
330
|
-
xml.attr(:type)
|
|
356
|
+
return if xml.attr(:type)&.empty?
|
|
357
|
+
|
|
358
|
+
xml.attr(:type)&.downcase
|
|
331
359
|
end
|
|
332
360
|
|
|
333
361
|
##
|
|
@@ -335,7 +363,9 @@ module Mods
|
|
|
335
363
|
#
|
|
336
364
|
# @return [String]
|
|
337
365
|
def encoding
|
|
338
|
-
xml.attr(:encoding)
|
|
366
|
+
return if xml.attr(:encoding)&.empty?
|
|
367
|
+
|
|
368
|
+
xml.attr(:encoding)&.downcase
|
|
339
369
|
end
|
|
340
370
|
|
|
341
371
|
##
|
|
@@ -359,7 +389,9 @@ module Mods
|
|
|
359
389
|
#
|
|
360
390
|
# @return [String]
|
|
361
391
|
def point
|
|
362
|
-
xml.attr(:point)
|
|
392
|
+
return if xml.attr(:point)&.empty?
|
|
393
|
+
|
|
394
|
+
xml.attr(:point)&.downcase
|
|
363
395
|
end
|
|
364
396
|
|
|
365
397
|
##
|
|
@@ -391,7 +423,7 @@ module Mods
|
|
|
391
423
|
#
|
|
392
424
|
# @return [String]
|
|
393
425
|
def qualifier
|
|
394
|
-
xml.attr(:qualifier)
|
|
426
|
+
xml.attr(:qualifier)&.downcase
|
|
395
427
|
end
|
|
396
428
|
|
|
397
429
|
##
|
|
@@ -419,6 +451,8 @@ module Mods
|
|
|
419
451
|
end
|
|
420
452
|
|
|
421
453
|
def precision
|
|
454
|
+
return :unknown unless date_range || date
|
|
455
|
+
|
|
422
456
|
if date_range.is_a? EDTF::Century
|
|
423
457
|
:century
|
|
424
458
|
elsif date_range.is_a? EDTF::Decade
|
data/lib/mods/nom_terminology.rb
CHANGED
|
@@ -258,7 +258,7 @@ module Mods
|
|
|
258
258
|
n.publisher :path => 'm:publisher'
|
|
259
259
|
Mods::OriginInfo::DATE_ELEMENTS.each { |date_el|
|
|
260
260
|
n.send date_el, :path => "m:#{date_el}" do |d|
|
|
261
|
-
d.as_object :path => '.', :accessor => lambda { |a| Mods::Date.from_element(a) }
|
|
261
|
+
d.as_object :path => '.', :single => true, :accessor => lambda { |a| Mods::Date.from_element(a) }
|
|
262
262
|
|
|
263
263
|
with_attributes(d, Mods::DATE_ATTRIBS)
|
|
264
264
|
|
data/lib/mods/version.rb
CHANGED
data/spec/lib/date_spec.rb
CHANGED
|
@@ -209,6 +209,9 @@ RSpec.describe Mods::Date do
|
|
|
209
209
|
'1900-uu-uu' => Date.parse('1900-01-01')..Date.parse('1900-12-31'),
|
|
210
210
|
'1900-uu-15' => Date.parse('1900-01-15')..Date.parse('1900-12-15'),
|
|
211
211
|
'1900-06-uu' => Date.parse('1900-06-01')..Date.parse('1900-06-30'),
|
|
212
|
+
'-250' => Date.parse('-250-01-01')..Date.parse('-250-12-31'), # EDTF requires a 4 digit year, but what can you do.
|
|
213
|
+
'63' => Date.parse('0063-01-01')..Date.parse('0063-12-31'),
|
|
214
|
+
'125' => Date.parse('125-01-01')..Date.parse('125-12-31'),
|
|
212
215
|
}.each do |data, expected|
|
|
213
216
|
describe "with #{data}" do
|
|
214
217
|
let(:date_element) { "<dateCreated encoding=\"edtf\">#{data}</dateCreated>" }
|
|
@@ -303,7 +306,8 @@ RSpec.describe Mods::Date do
|
|
|
303
306
|
{
|
|
304
307
|
'Minguo 19 [1930]' => Date.parse('1930-01-01')..Date.parse('1930-12-31'),
|
|
305
308
|
'1745 mag. 14' => Date.parse('1745-01-01')..Date.parse('1745-12-31'),
|
|
306
|
-
'-745' =>
|
|
309
|
+
'-745' => '', # too ambiguious to even attempt.
|
|
310
|
+
'-1999' => '', # too ambiguious to even attempt.
|
|
307
311
|
'[1923]' => Date.parse('1923-01-01')..Date.parse('1923-12-31'),
|
|
308
312
|
'1532.' => Date.parse('1532-01-01')..Date.parse('1532-12-31'),
|
|
309
313
|
'[ca 1834]' => Date.parse('1834-01-01')..Date.parse('1834-12-31'),
|
|
@@ -319,8 +323,11 @@ RSpec.describe Mods::Date do
|
|
|
319
323
|
'193-' => Date.parse('1930-01-01')..Date.parse('1939-12-31'),
|
|
320
324
|
'196_' => Date.parse('1960-01-01')..Date.parse('1969-12-31'),
|
|
321
325
|
'196x' => Date.parse('1960-01-01')..Date.parse('1969-12-31'),
|
|
326
|
+
'196u' => Date.parse('1960-01-01')..Date.parse('1969-12-31'),
|
|
327
|
+
'1960s' => Date.parse('1960-01-01')..Date.parse('1969-12-31'),
|
|
322
328
|
'186?' => Date.parse('1860-01-01')..Date.parse('1869-12-31'),
|
|
323
329
|
'1700?' => Date.parse('1700-01-01')..Date.parse('1700-12-31'),
|
|
330
|
+
'early 1730s' => Date.parse('1730-01-01')..Date.parse('1739-12-31'),
|
|
324
331
|
'[1670-1684]' => Date.parse('1670-01-01')..Date.parse('1684-12-31'),
|
|
325
332
|
'[18]74' => Date.parse('1874-01-01')..Date.parse('1874-12-31'),
|
|
326
333
|
'250 B.C.' => Date.parse('-0249-01-01')..Date.parse('-249-12-31'),
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: mods
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 3.0.0
|
|
4
|
+
version: 3.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Naomi Dushay
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2022-01
|
|
12
|
+
date: 2022-02-01 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: nokogiri
|
|
@@ -218,9 +218,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
218
218
|
version: '0'
|
|
219
219
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
220
220
|
requirements:
|
|
221
|
-
- - "
|
|
221
|
+
- - ">="
|
|
222
222
|
- !ruby/object:Gem::Version
|
|
223
|
-
version:
|
|
223
|
+
version: '0'
|
|
224
224
|
requirements: []
|
|
225
225
|
rubygems_version: 3.2.32
|
|
226
226
|
signing_key:
|