mods 2.1.0 → 2.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/Gemfile +1 -0
- data/lib/mods.rb +4 -0
- data/lib/mods/constants.rb +5 -8
- data/lib/mods/date.rb +401 -0
- data/lib/mods/nom_terminology.rb +5 -3
- data/lib/mods/origin_info.rb +19 -0
- data/lib/mods/version.rb +1 -1
- data/mods.gemspec +2 -1
- data/spec/date_spec.rb +304 -0
- data/spec/origin_info_spec.rb +9 -0
- metadata +21 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c05154eec0a38fc2a0ad117d1c47fb336c116edf
|
4
|
+
data.tar.gz: 9cbbfe083b89d92d0c18355be44136b82693e523
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7d2ebefc59ba8cf3f6cdd816d429c45b238181158840aef30b34c806830d3c33de5dab7fcc76be3527a6f40ffe174e2087fc3e43ad17fe236a4d38e06f990c49
|
7
|
+
data.tar.gz: 6ccff614c507ec6f3f541793d0407d90ef90ea8af5d38cb038cfdf560514ec4320e88ee74de60d5647db34f3e5de4b71aec071c9fcd8db7d8610a0ce22543a88
|
data/.gitignore
CHANGED
data/Gemfile
CHANGED
data/lib/mods.rb
CHANGED
@@ -4,13 +4,17 @@ require 'nom/xml'
|
|
4
4
|
module Mods
|
5
5
|
require 'mods/constants'
|
6
6
|
require 'mods/nom_terminology'
|
7
|
+
require 'mods/date'
|
7
8
|
require 'mods/marc_country_codes'
|
8
9
|
require 'mods/marc_geo_area_codes'
|
9
10
|
require 'mods/marc_relator_codes'
|
10
11
|
require 'mods/name'
|
12
|
+
require 'mods/origin_info'
|
11
13
|
require 'mods/reader'
|
12
14
|
require 'mods/record'
|
13
15
|
require 'mods/subject'
|
14
16
|
require 'mods/title_info'
|
15
17
|
require 'mods/version'
|
18
|
+
|
19
|
+
ORIGIN_INFO_DATE_ELEMENTS = Mods::OriginInfo::DATE_ELEMENTS
|
16
20
|
end
|
data/lib/mods/constants.rb
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
module Mods
|
2
2
|
# the version of MODS supported by this gem
|
3
3
|
MODS_VERSION = '3.4'
|
4
|
-
|
4
|
+
|
5
5
|
MODS_NS_V3 = "http://www.loc.gov/mods/v3"
|
6
6
|
MODS_NS = MODS_NS_V3
|
7
7
|
MODS_XSD = "http://www.loc.gov/standards/mods/mods.xsd"
|
8
|
-
|
8
|
+
|
9
9
|
DOC_URL = "http://www.loc.gov/standards/mods/"
|
10
10
|
|
11
11
|
# top level elements that cannot have subelement children
|
@@ -40,7 +40,7 @@ module Mods
|
|
40
40
|
# enumerated attribute values
|
41
41
|
TITLE_INFO_TYPES = ['abbreviated', 'translated', 'alternative', 'uniform']
|
42
42
|
RELATED_ITEM_TYPES = [
|
43
|
-
'preceding', 'succeeding', 'original', 'host', 'constituent', 'series',
|
43
|
+
'preceding', 'succeeding', 'original', 'host', 'constituent', 'series',
|
44
44
|
'otherVersion', 'otherFormat', 'isReferencedBy', 'references', 'reviewOf'
|
45
45
|
]
|
46
46
|
|
@@ -51,10 +51,7 @@ module Mods
|
|
51
51
|
'still image',
|
52
52
|
'moving image',
|
53
53
|
'three dimensional object',
|
54
|
-
'software',
|
54
|
+
'software',
|
55
55
|
'multimedia',
|
56
56
|
'mixed material']
|
57
|
-
|
58
|
-
ORIGIN_INFO_DATE_ELEMENTS = ['dateIssued', 'dateCreated', 'dateCaptured', 'dateValid', 'dateModified', 'copyrightDate', 'dateOther']
|
59
|
-
|
60
|
-
end
|
57
|
+
end
|
data/lib/mods/date.rb
ADDED
@@ -0,0 +1,401 @@
|
|
1
|
+
require 'edtf'
|
2
|
+
|
3
|
+
module Mods
|
4
|
+
class Date
|
5
|
+
attr_reader :xml
|
6
|
+
|
7
|
+
##
|
8
|
+
# Ugly date factory that tries to pick an appropriate parser for the
|
9
|
+
# type of data.
|
10
|
+
#
|
11
|
+
# @param [Nokogiri::XML::Element] xml A date-flavored MODS field from the XML
|
12
|
+
# @return [Mods::Date]
|
13
|
+
def self.from_element(xml)
|
14
|
+
case xml.attr(:encoding)
|
15
|
+
when 'w3cdtf'
|
16
|
+
Mods::Date::W3cdtfFormat.new(xml)
|
17
|
+
when 'iso8601'
|
18
|
+
Mods::Date::Iso8601Format.new(xml)
|
19
|
+
when 'marc'
|
20
|
+
Mods::Date::MarcFormat.new(xml)
|
21
|
+
when 'edtf'
|
22
|
+
Mods::Date::EdtfFormat.new(xml)
|
23
|
+
# when 'temper'
|
24
|
+
# Mods::Date::TemperFormat.new(xml)
|
25
|
+
else
|
26
|
+
date_class = [
|
27
|
+
MMDDYYYYFormat,
|
28
|
+
MMDDYYFormat,
|
29
|
+
EmbeddedYearFormat,
|
30
|
+
RomanNumeralCenturyFormat,
|
31
|
+
RomanNumeralYearFormat,
|
32
|
+
MysteryCenturyFormat,
|
33
|
+
CenturyFormat
|
34
|
+
].select { |klass| klass.supports? xml.text }.first
|
35
|
+
|
36
|
+
(date_class || Mods::Date).new(xml)
|
37
|
+
end
|
38
|
+
rescue
|
39
|
+
Mods::Date.new(xml)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Strict ISO8601-encoded date parser
|
43
|
+
class Iso8601Format < Date
|
44
|
+
def self.parse_date(text)
|
45
|
+
@date = ::Date.parse(cleanup(text))
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Less strict W3CDTF-encoded date parser
|
50
|
+
class W3cdtfFormat < Date
|
51
|
+
end
|
52
|
+
|
53
|
+
# Strict EDTF parser
|
54
|
+
class EdtfFormat < Date
|
55
|
+
attr_reader :date
|
56
|
+
|
57
|
+
def self.cleanup(text)
|
58
|
+
text
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# MARC-formatted date parser, similar to EDTF, but with special support for
|
63
|
+
# MARC-specific encodings
|
64
|
+
class MarcFormat < EdtfFormat
|
65
|
+
def self.cleanup(text)
|
66
|
+
return nil if text == "9999" || text == "uuuu"
|
67
|
+
|
68
|
+
text.gsub(/^[\[]+/, '').gsub(/[\.\]]+$/, '')
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
def earliest_date
|
74
|
+
if xml.text == '1uuu'
|
75
|
+
::Date.parse('1000-01-01')
|
76
|
+
else
|
77
|
+
super
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def latest_date
|
82
|
+
if xml.text == '1uuu'
|
83
|
+
::Date.parse('1999-12-31')
|
84
|
+
else
|
85
|
+
super
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
class ExtractorDateFormat < Date
|
91
|
+
def self.supports?(text)
|
92
|
+
text.match self::REGEX
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Full text extractor for MM/DD/YYYY-formatted dates
|
97
|
+
class MMDDYYYYFormat < ExtractorDateFormat
|
98
|
+
REGEX = /(?<month>\d{1,2})\/(?<day>\d{1,2})\/(?<year>\d{4})/
|
99
|
+
|
100
|
+
def self.cleanup(text)
|
101
|
+
matches = text.match(self::REGEX)
|
102
|
+
"#{matches[:year].rjust(2, "0")}-#{matches[:month].rjust(2, "0")}-#{matches[:day].rjust(2, "0")}"
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# Full text extractor for MM/DD/YY-formatted dates
|
107
|
+
class MMDDYYFormat < ExtractorDateFormat
|
108
|
+
REGEX = /(?<month>\d{1,2})\/(?<day>\d{1,2})\/(?<year>\d{2})/
|
109
|
+
|
110
|
+
def self.cleanup(text)
|
111
|
+
matches = text.match(self::REGEX)
|
112
|
+
year = munge_to_yyyy(matches[:year])
|
113
|
+
"#{year}-#{matches[:month].rjust(2, "0")}-#{matches[:day].rjust(2, "0")}"
|
114
|
+
end
|
115
|
+
|
116
|
+
def self.munge_to_yyyy(text)
|
117
|
+
if text.to_i > (::Date.current.year - 2000)
|
118
|
+
"19#{text}"
|
119
|
+
else
|
120
|
+
"20#{text}"
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
# Full-text extractor for dates encoded as Roman numerals
|
126
|
+
class RomanNumeralYearFormat < ExtractorDateFormat
|
127
|
+
REGEX = /^(?<year>[MCDLXVI]+)/
|
128
|
+
|
129
|
+
def self.cleanup(text)
|
130
|
+
matches = text.match(REGEX)
|
131
|
+
roman_to_int(matches[:year].upcase).to_s
|
132
|
+
end
|
133
|
+
|
134
|
+
def self.roman_to_int(value)
|
135
|
+
value = value.dup
|
136
|
+
map = { "M"=>1000, "CM"=>900, "D"=>500, "CD"=>400, "C"=>100, "XC"=>90, "L"=>50, "XL"=>40, "X"=>10, "IX"=>9, "V"=>5, "IV"=>4, "I"=>1 }
|
137
|
+
result = 0
|
138
|
+
map.each do |k,v|
|
139
|
+
while value.index(k) == 0
|
140
|
+
result += v
|
141
|
+
value.slice! k
|
142
|
+
end
|
143
|
+
end
|
144
|
+
result
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
# Full-text extractor for centuries encoded as Roman numerals
|
149
|
+
class RomanNumeralCenturyFormat < RomanNumeralYearFormat
|
150
|
+
REGEX = /(cent. )?(?<century>[xvi]+)/
|
151
|
+
|
152
|
+
def self.cleanup(text)
|
153
|
+
matches = text.match(REGEX)
|
154
|
+
munge_to_yyyy(matches[:century])
|
155
|
+
end
|
156
|
+
|
157
|
+
def self.munge_to_yyyy(text)
|
158
|
+
value = roman_to_int(text.upcase)
|
159
|
+
(value - 1).to_s.rjust(2, "0") + 'XX'
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
|
164
|
+
# Full-text extractor for a flavor of century encoding present in Stanford data
|
165
|
+
# of unknown origin.
|
166
|
+
class MysteryCenturyFormat < ExtractorDateFormat
|
167
|
+
REGEX = /(?<century>\d{2})--/
|
168
|
+
def self.cleanup(text)
|
169
|
+
matches = text.match(REGEX)
|
170
|
+
"#{matches[:century]}XX"
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
# Full-text extractor for dates given as centuries
|
175
|
+
class CenturyFormat < ExtractorDateFormat
|
176
|
+
REGEX = /(?<century>\d{2})th C(entury)?/i
|
177
|
+
|
178
|
+
def self.cleanup(text)
|
179
|
+
matches = text.match(REGEX)
|
180
|
+
"#{matches[:century].to_i - 1}XX"
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
# Full-text extractor that tries hard to pick any year present in the data
|
185
|
+
class EmbeddedYearFormat < ExtractorDateFormat
|
186
|
+
REGEX = /(?<prefix>-)?(?<year>\d{3,4})/
|
187
|
+
|
188
|
+
def self.cleanup(text)
|
189
|
+
matches = text.match(REGEX)
|
190
|
+
"#{matches[:prefix]}#{matches[:year].rjust(4, "0")}"
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
attr_reader :date
|
195
|
+
|
196
|
+
##
|
197
|
+
# Parse a string to a Date or EDTF::Date using rules appropriate to the
|
198
|
+
# given encoding
|
199
|
+
# @param [String] text
|
200
|
+
# @return [Date]
|
201
|
+
def self.parse_date(text)
|
202
|
+
::Date.edtf(cleanup(text))
|
203
|
+
end
|
204
|
+
|
205
|
+
##
|
206
|
+
# Apply any encoding-specific munging or text extraction logic
|
207
|
+
# @param [String] text
|
208
|
+
# @return [String]
|
209
|
+
def self.cleanup(text)
|
210
|
+
text.gsub(/^[\[]+/, '').gsub(/[\.\]]+$/, '')
|
211
|
+
end
|
212
|
+
|
213
|
+
def initialize(xml)
|
214
|
+
@xml = xml
|
215
|
+
@date = self.class.parse_date(xml.text)
|
216
|
+
end
|
217
|
+
|
218
|
+
##
|
219
|
+
# Return a range, with the min point as the earliest possible date and
|
220
|
+
# the max as the latest possible date (useful particularly for ranges and uncertainty)
|
221
|
+
#
|
222
|
+
# @param [Range]
|
223
|
+
def as_range
|
224
|
+
return unless earliest_date && latest_date
|
225
|
+
|
226
|
+
earliest_date..latest_date
|
227
|
+
end
|
228
|
+
|
229
|
+
##
|
230
|
+
# Return an array of all years that fall into the range of possible dates
|
231
|
+
# covered by the data. Note that some encodings support disjoint sets of ranges
|
232
|
+
# so this method could provide more accuracy than #as_range (although potentially)
|
233
|
+
# include a really big list of dates
|
234
|
+
#
|
235
|
+
# @return [Array]
|
236
|
+
def to_a
|
237
|
+
case date
|
238
|
+
when EDTF::Set
|
239
|
+
date.to_a
|
240
|
+
else
|
241
|
+
as_range.to_a
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
##
|
246
|
+
# The text as encoded in the MODS
|
247
|
+
# @return [String]
|
248
|
+
def text
|
249
|
+
xml.text
|
250
|
+
end
|
251
|
+
|
252
|
+
##
|
253
|
+
# The declared type of date (from the MODS @type attribute)
|
254
|
+
#
|
255
|
+
# @return [String]
|
256
|
+
def type
|
257
|
+
xml.attr(:type)
|
258
|
+
end
|
259
|
+
|
260
|
+
##
|
261
|
+
# The declared encoding of date (from the MODS @encoding attribute)
|
262
|
+
#
|
263
|
+
# @return [String]
|
264
|
+
def encoding
|
265
|
+
xml.attr(:encoding)
|
266
|
+
end
|
267
|
+
|
268
|
+
##
|
269
|
+
# Was an encoding provided?
|
270
|
+
#
|
271
|
+
# @return [Boolean]
|
272
|
+
def encoding?
|
273
|
+
!encoding.nil?
|
274
|
+
end
|
275
|
+
|
276
|
+
##
|
277
|
+
# The declared point of date (from the MODS @point attribute)
|
278
|
+
#
|
279
|
+
# @return [String]
|
280
|
+
def point
|
281
|
+
xml.attr(:point)
|
282
|
+
end
|
283
|
+
|
284
|
+
##
|
285
|
+
# Is this date stand-alone, or part of a MODS-encoded range?
|
286
|
+
#
|
287
|
+
# @return [Boolean]
|
288
|
+
def single?
|
289
|
+
point.nil?
|
290
|
+
end
|
291
|
+
|
292
|
+
##
|
293
|
+
# Is this date the start of a MODS-encoded range?
|
294
|
+
#
|
295
|
+
# @return [Boolean]
|
296
|
+
def start?
|
297
|
+
point == 'start'
|
298
|
+
end
|
299
|
+
|
300
|
+
##
|
301
|
+
# Is this date the end point of a MODS-encoded range?
|
302
|
+
#
|
303
|
+
# @return [Boolean]
|
304
|
+
def end?
|
305
|
+
point == 'end'
|
306
|
+
end
|
307
|
+
|
308
|
+
##
|
309
|
+
# The declared qualifier of date (from the MODS @qualifier attribute)
|
310
|
+
#
|
311
|
+
# @return [String]
|
312
|
+
def qualifier
|
313
|
+
xml.attr(:qualifier)
|
314
|
+
end
|
315
|
+
|
316
|
+
##
|
317
|
+
# Is the date declared as an approximate date?
|
318
|
+
#
|
319
|
+
# @return [Boolean]
|
320
|
+
def approximate?
|
321
|
+
qualifier == 'approximate'
|
322
|
+
end
|
323
|
+
|
324
|
+
##
|
325
|
+
# Is the date declared as an inferred date?
|
326
|
+
#
|
327
|
+
# @return [Boolean]
|
328
|
+
def inferred?
|
329
|
+
qualifier == 'inferred'
|
330
|
+
end
|
331
|
+
|
332
|
+
##
|
333
|
+
# Is the date declared as a questionable date?
|
334
|
+
#
|
335
|
+
# @return [Boolean]
|
336
|
+
def questionable?
|
337
|
+
qualifier == 'questionable'
|
338
|
+
end
|
339
|
+
|
340
|
+
private
|
341
|
+
|
342
|
+
def days_in_month(month, year)
|
343
|
+
if month == 2 && ::Date.gregorian_leap?(year)
|
344
|
+
29
|
345
|
+
else
|
346
|
+
[nil, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31][month]
|
347
|
+
end
|
348
|
+
end
|
349
|
+
|
350
|
+
##
|
351
|
+
# Return the earliest possible date that is encoded in the data, respecting
|
352
|
+
# unspecified or imprecise information.
|
353
|
+
#
|
354
|
+
# @return [::Date]
|
355
|
+
def earliest_date
|
356
|
+
return nil if date.nil?
|
357
|
+
|
358
|
+
case date_range
|
359
|
+
when EDTF::Epoch, EDTF::Interval
|
360
|
+
date_range.min
|
361
|
+
when EDTF::Set
|
362
|
+
date_range.to_a.first
|
363
|
+
else
|
364
|
+
d = date.dup
|
365
|
+
d = d.change(month: 1, day: 1) if date.unspecified.unspecified?(:year) || date.precision == :year
|
366
|
+
d = d.change(month: 1) if date.unspecified.unspecified?(:month) || date.precision == :year
|
367
|
+
d = d.change(day: 1) if date.unspecified.unspecified?(:day) || date.precision == :month
|
368
|
+
d
|
369
|
+
end
|
370
|
+
end
|
371
|
+
|
372
|
+
##
|
373
|
+
# Return the earliest possible date that is encoded in the data, respecting
|
374
|
+
# unspecified or imprecise information.
|
375
|
+
#
|
376
|
+
# @return [::Date]
|
377
|
+
def latest_date
|
378
|
+
return nil if date.nil?
|
379
|
+
case date_range
|
380
|
+
when EDTF::Epoch, EDTF::Interval
|
381
|
+
date_range.max
|
382
|
+
when EDTF::Set
|
383
|
+
date_range.to_a.last.change(month: 12, day: 31)
|
384
|
+
else
|
385
|
+
d = date.dup
|
386
|
+
d = d.change(month: 12, day: 31) if date.unspecified.unspecified?(:year) || date.precision == :year
|
387
|
+
d = d.change(month: 12) if date.unspecified.unspecified?(:month) || date.precision == :year
|
388
|
+
d = d.change(day: days_in_month(date.month, date.year)) if date.unspecified.unspecified?(:day) || date.precision == :month
|
389
|
+
d
|
390
|
+
end
|
391
|
+
end
|
392
|
+
|
393
|
+
def date_range
|
394
|
+
@date_range ||= if text =~ /u/
|
395
|
+
::Date.edtf(text.gsub('u', 'X')) || date
|
396
|
+
else
|
397
|
+
date
|
398
|
+
end
|
399
|
+
end
|
400
|
+
end
|
401
|
+
end
|
data/lib/mods/nom_terminology.rb
CHANGED
@@ -285,6 +285,7 @@ module Mods
|
|
285
285
|
# ORIGIN_INFO --------------------------------------------------------------------------
|
286
286
|
t.origin_info :path => '/m:mods/m:originInfo'
|
287
287
|
t._origin_info :path => '//m:originInfo' do |n|
|
288
|
+
n.as_object :path => '.', :accessor => lambda { |a| Mods::OriginInfo.new(a) }
|
288
289
|
# attributes
|
289
290
|
n.displayLabel :path => '@displayLabel', :accessor => lambda { |a| a.text }
|
290
291
|
Mods::LANG_ATTRIBS.each { |attr_name|
|
@@ -300,8 +301,10 @@ module Mods
|
|
300
301
|
end
|
301
302
|
end
|
302
303
|
n.publisher :path => 'm:publisher'
|
303
|
-
Mods::
|
304
|
+
Mods::OriginInfo::DATE_ELEMENTS.each { |date_el|
|
304
305
|
n.send date_el, :path => "m:#{date_el}" do |d|
|
306
|
+
d.as_object :path => '.', :accessor => lambda { |a| Mods::Date.from_element(a) }
|
307
|
+
|
305
308
|
Mods::DATE_ATTRIBS.each { |attr_name|
|
306
309
|
d.send attr_name, :path => "@#{attr_name}", :accessor => lambda { |a| a.text }
|
307
310
|
}
|
@@ -912,7 +915,7 @@ module Mods
|
|
912
915
|
end
|
913
916
|
end
|
914
917
|
n.publisher :path => 'publisher'
|
915
|
-
Mods::
|
918
|
+
Mods::OriginInfo::DATE_ELEMENTS.each { |date_el|
|
916
919
|
n.send date_el, :path => "#{date_el}" do |d|
|
917
920
|
Mods::DATE_ATTRIBS.each { |attr_name|
|
918
921
|
d.send attr_name, :path => "@#{attr_name}", :accessor => lambda { |a| a.text }
|
@@ -1241,4 +1244,3 @@ module Mods
|
|
1241
1244
|
|
1242
1245
|
end # Record class
|
1243
1246
|
end # Mods module
|
1244
|
-
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Mods
|
2
|
+
class OriginInfo
|
3
|
+
DATE_ELEMENTS = ['dateIssued', 'dateCreated', 'dateCaptured', 'dateValid', 'dateModified', 'copyrightDate', 'dateOther']
|
4
|
+
|
5
|
+
attr_reader :xml
|
6
|
+
|
7
|
+
def initialize(xml)
|
8
|
+
@xml = xml
|
9
|
+
end
|
10
|
+
|
11
|
+
def dates
|
12
|
+
DATE_ELEMENTS.flat_map { |element| xml.public_send(element) }
|
13
|
+
end
|
14
|
+
|
15
|
+
def key_dates
|
16
|
+
dates.select { |x| x.keyDate == 'yes' }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/mods/version.rb
CHANGED
data/mods.gemspec
CHANGED
@@ -15,10 +15,11 @@ Gem::Specification.new do |gem|
|
|
15
15
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
16
16
|
gem.test_files = gem.files.grep(%r{^spec/})
|
17
17
|
gem.require_paths = ["lib"]
|
18
|
-
|
18
|
+
|
19
19
|
gem.add_dependency 'nokogiri'
|
20
20
|
gem.add_dependency 'nom-xml', '~> 0.6.0'
|
21
21
|
gem.add_dependency 'iso-639'
|
22
|
+
gem.add_dependency 'edtf'
|
22
23
|
|
23
24
|
# Runtime dependencies
|
24
25
|
# gem.add_runtime_dependency 'nokogiri'
|
data/spec/date_spec.rb
ADDED
@@ -0,0 +1,304 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe Mods::Date do
|
4
|
+
subject(:date) { described_class.from_element(term) }
|
5
|
+
let(:term) { Nokogiri::XML.fragment(date_element).first_element_child }
|
6
|
+
|
7
|
+
describe '#to_a' do
|
8
|
+
context 'with EDTF encoded sets' do
|
9
|
+
let(:date_element) { "<dateCreated encoding=\"edtf\">[1667,1668,1670..1672]</dateCreated>" }
|
10
|
+
|
11
|
+
it 'returns the list of years' do
|
12
|
+
expect(date.to_a.map(&:year)).to match_array [1667, 1668, 1670, 1671, 1672]
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
context 'with EDTF encoded ranges' do
|
17
|
+
let(:date_element) { "<dateCreated encoding=\"edtf\">1856/1858</dateCreated>" }
|
18
|
+
|
19
|
+
it 'returns the list of years' do
|
20
|
+
expect(date.to_a.map(&:year)).to match_array [1856, 1857, 1858]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
context 'with random one-off years' do
|
25
|
+
let(:date_element) { "<dateCreated>1856</dateCreated>" }
|
26
|
+
|
27
|
+
it 'returns the year in an array' do
|
28
|
+
expect(date.to_a.map(&:year)).to match_array [1856]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
describe '#text' do
|
34
|
+
let(:date_element) { "<dateCreated>1856</dateCreated>" }
|
35
|
+
|
36
|
+
it 'returns the MODS text' do
|
37
|
+
expect(date.text).to eq '1856'
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
describe '#type' do
|
42
|
+
let(:date_element) { "<dateCreated type='fictional'>1856</dateCreated>" }
|
43
|
+
|
44
|
+
it 'returns the MODS type attribute' do
|
45
|
+
expect(date.type).to eq 'fictional'
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
describe '#encoding' do
|
50
|
+
let(:date_element) { "<dateCreated encoding='fictional'>1856</dateCreated>" }
|
51
|
+
|
52
|
+
it 'returns the MODS encoding attribute' do
|
53
|
+
expect(date.encoding).to eq 'fictional'
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
describe '#encoding?' do
|
58
|
+
context 'with an encoding' do
|
59
|
+
let(:date_element) { "<dateCreated encoding='fictional'>1856</dateCreated>" }
|
60
|
+
|
61
|
+
it 'returns true' do
|
62
|
+
expect(date.encoding?).to eq true
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
context 'without an encoding' do
|
67
|
+
let(:date_element) { "<dateCreated>1856</dateCreated>" }
|
68
|
+
|
69
|
+
it 'returns false' do
|
70
|
+
expect(date.encoding?).to eq false
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
describe '#point' do
|
76
|
+
let(:date_element) { "<dateCreated point='fictional'>1856</dateCreated>" }
|
77
|
+
|
78
|
+
it 'returns the MODS point attribute' do
|
79
|
+
expect(date.point).to eq 'fictional'
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
describe '#single?' do
|
84
|
+
context 'with a point' do
|
85
|
+
let(:date_element) { "<dateCreated point='fictional'>1856</dateCreated>" }
|
86
|
+
|
87
|
+
it 'returns false' do
|
88
|
+
expect(date.single?).to eq false
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
context 'without a point' do
|
93
|
+
let(:date_element) { "<dateCreated>1856</dateCreated>" }
|
94
|
+
|
95
|
+
it 'returns false' do
|
96
|
+
expect(date.single?).to eq true
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
describe '#start?' do
|
102
|
+
context 'with a point=start attribute' do
|
103
|
+
let(:date_element) { "<dateCreated point='start'>1856</dateCreated>" }
|
104
|
+
|
105
|
+
it 'returns true' do
|
106
|
+
expect(date.start?).to eq true
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
describe '#end?' do
|
112
|
+
context 'with a point=end attribute' do
|
113
|
+
let(:date_element) { "<dateCreated point='end'>1856</dateCreated>" }
|
114
|
+
|
115
|
+
it 'returns true' do
|
116
|
+
expect(date.end?).to eq true
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
describe '#qualifier' do
|
122
|
+
let(:date_element) { "<dateCreated qualifier='fictional'>1856</dateCreated>" }
|
123
|
+
|
124
|
+
it 'returns the MODS qualifier attribute' do
|
125
|
+
expect(date.qualifier).to eq 'fictional'
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
describe '#approximate?' do
|
130
|
+
context 'with a qualifier=approximate attribute' do
|
131
|
+
let(:date_element) { "<dateCreated qualifier='approximate'>1856</dateCreated>" }
|
132
|
+
|
133
|
+
it 'returns true' do
|
134
|
+
expect(date.approximate?).to eq true
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
describe '#inferred?' do
|
140
|
+
context 'with a qualifier=inferred attribute' do
|
141
|
+
let(:date_element) { "<dateCreated qualifier='inferred'>1856</dateCreated>" }
|
142
|
+
|
143
|
+
it 'returns true' do
|
144
|
+
expect(date.inferred?).to eq true
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
describe '#questionable?' do
|
150
|
+
context 'with a qualifier=questionable attribute' do
|
151
|
+
let(:date_element) { "<dateCreated qualifier='questionable'>1856</dateCreated>" }
|
152
|
+
|
153
|
+
it 'returns true' do
|
154
|
+
expect(date.questionable?).to eq true
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
describe 'EDTF encoded dates' do
|
160
|
+
{
|
161
|
+
'1905' => Date.parse('1905-01-01')..Date.parse('1905-12-31'),
|
162
|
+
'190u' => Date.parse('1900-01-01')..Date.parse('1909-12-31'),
|
163
|
+
'190X' => Date.parse('1900-01-01')..Date.parse('1909-12-31'),
|
164
|
+
'19uu' => Date.parse('1900-01-01')..Date.parse('1999-12-31'),
|
165
|
+
'19XX' => Date.parse('1900-01-01')..Date.parse('1999-12-31'),
|
166
|
+
'1856/1876' => Date.parse('1856-01-01')..Date.parse('1876-12-31'),
|
167
|
+
'[1667,1668,1670..1672]' => Date.parse('1667-01-01')..Date.parse('1672-12-31'),
|
168
|
+
'1900-uu' => Date.parse('1900-01-01')..Date.parse('1900-12-31'),
|
169
|
+
'1900-uu-uu' => Date.parse('1900-01-01')..Date.parse('1900-12-31'),
|
170
|
+
'1900-uu-15' => Date.parse('1900-01-15')..Date.parse('1900-12-15'),
|
171
|
+
'1900-06-uu' => Date.parse('1900-06-01')..Date.parse('1900-06-30'),
|
172
|
+
}.each do |data, expected|
|
173
|
+
describe "with #{data}" do
|
174
|
+
let(:date_element) { "<dateCreated encoding=\"edtf\">#{data}</dateCreated>" }
|
175
|
+
|
176
|
+
it "has the range #{expected}" do
|
177
|
+
expect(date).to be_single
|
178
|
+
expect(date.encoding).to eq 'edtf'
|
179
|
+
expect(date.as_range.to_s).to eq expected.to_s
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
describe 'W3cdtf encoded dates' do
|
186
|
+
{
|
187
|
+
'1753' => Date.parse('1753-01-01')..Date.parse('1753-12-31'),
|
188
|
+
'-1753' => Date.parse('-1753-01-01')..Date.parse('-1753-12-31'),
|
189
|
+
'1992-05-06' => Date.parse('1992-05-06')..Date.parse('1992-05-06'),
|
190
|
+
'1992-04' => Date.parse('1992-04-01')..Date.parse('1992-04-30'),
|
191
|
+
'2004-02' => Date.parse('2004-02-01')..Date.parse('2004-02-29')
|
192
|
+
}.each do |data, expected|
|
193
|
+
describe "with #{data}" do
|
194
|
+
let(:date_element) { "<dateCreated encoding=\"w3cdtf\">#{data}</dateCreated>" }
|
195
|
+
|
196
|
+
it "has the range #{expected}" do
|
197
|
+
expect(date).to be_single
|
198
|
+
expect(date.encoding).to eq 'w3cdtf'
|
199
|
+
expect(date.as_range.to_s).to eq expected.to_s
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
describe 'MARC encoded dates' do
|
206
|
+
{
|
207
|
+
'1234' => Date.parse('1234-01-01')..Date.parse('1234-12-31'),
|
208
|
+
'9999' => nil,
|
209
|
+
'1uuu' => Date.parse('1000-01-01')..Date.parse('1999-12-31')
|
210
|
+
}.each do |data, expected|
|
211
|
+
describe "with #{data}" do
|
212
|
+
let(:date_element) { "<dateCreated encoding=\"marc\">#{data}</dateCreated>" }
|
213
|
+
|
214
|
+
it "has the range #{expected}" do
|
215
|
+
expect(date).to be_single
|
216
|
+
expect(date.encoding).to eq 'marc'
|
217
|
+
expect(date.as_range.to_s).to eq expected.to_s
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
describe 'ISO8601 encoded dates' do
|
224
|
+
{
|
225
|
+
'20131114161429' => Date.parse('20131114161429')..Date.parse('20131114161429')
|
226
|
+
}.each do |data, expected|
|
227
|
+
describe "with #{data}" do
|
228
|
+
let(:date_element) { "<dateCreated encoding=\"iso8601\">#{data}</dateCreated>" }
|
229
|
+
|
230
|
+
it "has the range #{expected}" do
|
231
|
+
expect(date).to be_single
|
232
|
+
expect(date.encoding).to eq 'iso8601'
|
233
|
+
expect(date.as_range.to_s).to eq expected.to_s
|
234
|
+
end
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
describe 'MDY encoded dates' do
|
240
|
+
{
|
241
|
+
'11/27/2017' => Date.parse('2017-11-27')..Date.parse('2017-11-27'),
|
242
|
+
'5/27/2017' => Date.parse('2017-05-27')..Date.parse('2017-05-27'),
|
243
|
+
'5/2/2017' => Date.parse('2017-05-02')..Date.parse('2017-05-02'),
|
244
|
+
'12/1/2017' => Date.parse('2017-12-01')..Date.parse('2017-12-01'),
|
245
|
+
'12/1/17' => Date.parse('2017-12-01')..Date.parse('2017-12-01'),
|
246
|
+
'12/1/25' => Date.parse('1925-12-01')..Date.parse('1925-12-01')
|
247
|
+
}.each do |data, expected|
|
248
|
+
describe "with #{data}" do
|
249
|
+
let(:date_element) { "<dateCreated>#{data}</dateCreated>" }
|
250
|
+
|
251
|
+
it "has the range #{expected}" do
|
252
|
+
expect(date).to be_single
|
253
|
+
expect(date.as_range.to_s).to eq expected.to_s
|
254
|
+
end
|
255
|
+
end
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
describe 'Pulling out 4-digit years from unspecified dates' do
|
260
|
+
{
|
261
|
+
'Minguo 19 [1930]' => Date.parse('1930-01-01')..Date.parse('1930-12-31'),
|
262
|
+
'1745 mag. 14' => Date.parse('1745-01-01')..Date.parse('1745-12-31'),
|
263
|
+
'-745' => Date.parse('-745-01-01')..Date.parse('-745-12-31'),
|
264
|
+
'[1923]' => Date.parse('1923-01-01')..Date.parse('1923-12-31'),
|
265
|
+
'1532.' => Date.parse('1532-01-01')..Date.parse('1532-12-31'),
|
266
|
+
'[ca 1834]' => Date.parse('1834-01-01')..Date.parse('1834-12-31'),
|
267
|
+
'xvi' => Date.parse('1500-01-01')..Date.parse('1599-12-31'),
|
268
|
+
'cent. xvi' => Date.parse('1500-01-01')..Date.parse('1599-12-31'),
|
269
|
+
'MDLXXVIII' => Date.parse('1578-01-01')..Date.parse('1578-12-31'),
|
270
|
+
'[19--?]-' => Date.parse('1900-01-01')..Date.parse('1999-12-31'),
|
271
|
+
'19th Century' => Date.parse('1800-01-01')..Date.parse('1899-12-31'),
|
272
|
+
'19th c.' => Date.parse('1800-01-01')..Date.parse('1899-12-31')
|
273
|
+
}.each do |data, expected|
|
274
|
+
describe "with #{data}" do
|
275
|
+
let(:date_element) { "<dateCreated>#{data}</dateCreated>" }
|
276
|
+
|
277
|
+
it "has the range #{expected}" do
|
278
|
+
expect(date).to be_single
|
279
|
+
expect(date.as_range.to_s).to eq expected.to_s
|
280
|
+
end
|
281
|
+
end
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
describe 'garbage data' do
|
286
|
+
let(:date_element) { "<dateCreated>n.d.</dateCreated>" }
|
287
|
+
|
288
|
+
it 'handles it gracefully' do
|
289
|
+
expect(date.as_range).to be_nil
|
290
|
+
expect(date.to_a).to be_empty
|
291
|
+
expect(date.text).to eq 'n.d.'
|
292
|
+
end
|
293
|
+
|
294
|
+
context 'for dates with encodings declared, but invalid data' do
|
295
|
+
let(:date_element) { "<dateCreated encoding='iso8601'>n.d.</dateCreated>" }
|
296
|
+
|
297
|
+
it 'handles it gracefully' do
|
298
|
+
expect(date.as_range).to be_nil
|
299
|
+
expect(date.to_a).to be_empty
|
300
|
+
expect(date.text).to eq 'n.d.'
|
301
|
+
end
|
302
|
+
end
|
303
|
+
end
|
304
|
+
end
|
data/spec/origin_info_spec.rb
CHANGED
@@ -243,6 +243,15 @@ describe "Mods <originInfo> Element" do
|
|
243
243
|
end
|
244
244
|
end
|
245
245
|
|
246
|
+
describe '#as_object' do
|
247
|
+
describe '#key_dates' do
|
248
|
+
it 'should extract the date with the keyDate attribute' do
|
249
|
+
@mods_rec.from_str("<mods #{@ns_decl}><originInfo><dateCreated>other date</dateCreated><dateCreated keyDate='yes'>key date</dateCreated></originInfo></mods>")
|
250
|
+
expect(@mods_rec.origin_info.as_object.first.key_dates.first.text).to eq 'key date'
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
246
255
|
context "<xxxDate> child elements" do
|
247
256
|
it "should recognize each element" do
|
248
257
|
Mods::ORIGIN_INFO_DATE_ELEMENTS.each { |elname|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mods
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naomi Dushay
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2017-12-01 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -53,6 +53,20 @@ dependencies:
|
|
53
53
|
- - ">="
|
54
54
|
- !ruby/object:Gem::Version
|
55
55
|
version: '0'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: edtf
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
type: :runtime
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
56
70
|
- !ruby/object:Gem::Dependency
|
57
71
|
name: rake
|
58
72
|
requirement: !ruby/object:Gem::Requirement
|
@@ -145,17 +159,20 @@ files:
|
|
145
159
|
- Rakefile
|
146
160
|
- lib/mods.rb
|
147
161
|
- lib/mods/constants.rb
|
162
|
+
- lib/mods/date.rb
|
148
163
|
- lib/mods/marc_country_codes.rb
|
149
164
|
- lib/mods/marc_geo_area_codes.rb
|
150
165
|
- lib/mods/marc_relator_codes.rb
|
151
166
|
- lib/mods/name.rb
|
152
167
|
- lib/mods/nom_terminology.rb
|
168
|
+
- lib/mods/origin_info.rb
|
153
169
|
- lib/mods/reader.rb
|
154
170
|
- lib/mods/record.rb
|
155
171
|
- lib/mods/subject.rb
|
156
172
|
- lib/mods/title_info.rb
|
157
173
|
- lib/mods/version.rb
|
158
174
|
- mods.gemspec
|
175
|
+
- spec/date_spec.rb
|
159
176
|
- spec/fixture_data/shpc1.mods.xml
|
160
177
|
- spec/language_spec.rb
|
161
178
|
- spec/location_spec.rb
|
@@ -190,11 +207,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
190
207
|
version: '0'
|
191
208
|
requirements: []
|
192
209
|
rubyforge_project:
|
193
|
-
rubygems_version: 2.
|
210
|
+
rubygems_version: 2.6.11
|
194
211
|
signing_key:
|
195
212
|
specification_version: 4
|
196
213
|
summary: Parse MODS (Metadata Object Description Schema) records.
|
197
214
|
test_files:
|
215
|
+
- spec/date_spec.rb
|
198
216
|
- spec/fixture_data/shpc1.mods.xml
|
199
217
|
- spec/language_spec.rb
|
200
218
|
- spec/location_spec.rb
|