mods 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/Gemfile +1 -0
- data/lib/mods.rb +4 -0
- data/lib/mods/constants.rb +5 -8
- data/lib/mods/date.rb +401 -0
- data/lib/mods/nom_terminology.rb +5 -3
- data/lib/mods/origin_info.rb +19 -0
- data/lib/mods/version.rb +1 -1
- data/mods.gemspec +2 -1
- data/spec/date_spec.rb +304 -0
- data/spec/origin_info_spec.rb +9 -0
- metadata +21 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c05154eec0a38fc2a0ad117d1c47fb336c116edf
|
4
|
+
data.tar.gz: 9cbbfe083b89d92d0c18355be44136b82693e523
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7d2ebefc59ba8cf3f6cdd816d429c45b238181158840aef30b34c806830d3c33de5dab7fcc76be3527a6f40ffe174e2087fc3e43ad17fe236a4d38e06f990c49
|
7
|
+
data.tar.gz: 6ccff614c507ec6f3f541793d0407d90ef90ea8af5d38cb038cfdf560514ec4320e88ee74de60d5647db34f3e5de4b71aec071c9fcd8db7d8610a0ce22543a88
|
data/.gitignore
CHANGED
data/Gemfile
CHANGED
data/lib/mods.rb
CHANGED
@@ -4,13 +4,17 @@ require 'nom/xml'
|
|
4
4
|
module Mods
|
5
5
|
require 'mods/constants'
|
6
6
|
require 'mods/nom_terminology'
|
7
|
+
require 'mods/date'
|
7
8
|
require 'mods/marc_country_codes'
|
8
9
|
require 'mods/marc_geo_area_codes'
|
9
10
|
require 'mods/marc_relator_codes'
|
10
11
|
require 'mods/name'
|
12
|
+
require 'mods/origin_info'
|
11
13
|
require 'mods/reader'
|
12
14
|
require 'mods/record'
|
13
15
|
require 'mods/subject'
|
14
16
|
require 'mods/title_info'
|
15
17
|
require 'mods/version'
|
18
|
+
|
19
|
+
ORIGIN_INFO_DATE_ELEMENTS = Mods::OriginInfo::DATE_ELEMENTS
|
16
20
|
end
|
data/lib/mods/constants.rb
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
module Mods
|
2
2
|
# the version of MODS supported by this gem
|
3
3
|
MODS_VERSION = '3.4'
|
4
|
-
|
4
|
+
|
5
5
|
MODS_NS_V3 = "http://www.loc.gov/mods/v3"
|
6
6
|
MODS_NS = MODS_NS_V3
|
7
7
|
MODS_XSD = "http://www.loc.gov/standards/mods/mods.xsd"
|
8
|
-
|
8
|
+
|
9
9
|
DOC_URL = "http://www.loc.gov/standards/mods/"
|
10
10
|
|
11
11
|
# top level elements that cannot have subelement children
|
@@ -40,7 +40,7 @@ module Mods
|
|
40
40
|
# enumerated attribute values
|
41
41
|
TITLE_INFO_TYPES = ['abbreviated', 'translated', 'alternative', 'uniform']
|
42
42
|
RELATED_ITEM_TYPES = [
|
43
|
-
'preceding', 'succeeding', 'original', 'host', 'constituent', 'series',
|
43
|
+
'preceding', 'succeeding', 'original', 'host', 'constituent', 'series',
|
44
44
|
'otherVersion', 'otherFormat', 'isReferencedBy', 'references', 'reviewOf'
|
45
45
|
]
|
46
46
|
|
@@ -51,10 +51,7 @@ module Mods
|
|
51
51
|
'still image',
|
52
52
|
'moving image',
|
53
53
|
'three dimensional object',
|
54
|
-
'software',
|
54
|
+
'software',
|
55
55
|
'multimedia',
|
56
56
|
'mixed material']
|
57
|
-
|
58
|
-
ORIGIN_INFO_DATE_ELEMENTS = ['dateIssued', 'dateCreated', 'dateCaptured', 'dateValid', 'dateModified', 'copyrightDate', 'dateOther']
|
59
|
-
|
60
|
-
end
|
57
|
+
end
|
data/lib/mods/date.rb
ADDED
@@ -0,0 +1,401 @@
|
|
1
|
+
require 'edtf'
|
2
|
+
|
3
|
+
module Mods
|
4
|
+
class Date
|
5
|
+
attr_reader :xml
|
6
|
+
|
7
|
+
##
|
8
|
+
# Ugly date factory that tries to pick an appropriate parser for the
|
9
|
+
# type of data.
|
10
|
+
#
|
11
|
+
# @param [Nokogiri::XML::Element] xml A date-flavored MODS field from the XML
|
12
|
+
# @return [Mods::Date]
|
13
|
+
def self.from_element(xml)
|
14
|
+
case xml.attr(:encoding)
|
15
|
+
when 'w3cdtf'
|
16
|
+
Mods::Date::W3cdtfFormat.new(xml)
|
17
|
+
when 'iso8601'
|
18
|
+
Mods::Date::Iso8601Format.new(xml)
|
19
|
+
when 'marc'
|
20
|
+
Mods::Date::MarcFormat.new(xml)
|
21
|
+
when 'edtf'
|
22
|
+
Mods::Date::EdtfFormat.new(xml)
|
23
|
+
# when 'temper'
|
24
|
+
# Mods::Date::TemperFormat.new(xml)
|
25
|
+
else
|
26
|
+
date_class = [
|
27
|
+
MMDDYYYYFormat,
|
28
|
+
MMDDYYFormat,
|
29
|
+
EmbeddedYearFormat,
|
30
|
+
RomanNumeralCenturyFormat,
|
31
|
+
RomanNumeralYearFormat,
|
32
|
+
MysteryCenturyFormat,
|
33
|
+
CenturyFormat
|
34
|
+
].select { |klass| klass.supports? xml.text }.first
|
35
|
+
|
36
|
+
(date_class || Mods::Date).new(xml)
|
37
|
+
end
|
38
|
+
rescue
|
39
|
+
Mods::Date.new(xml)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Strict ISO8601-encoded date parser
|
43
|
+
class Iso8601Format < Date
|
44
|
+
def self.parse_date(text)
|
45
|
+
@date = ::Date.parse(cleanup(text))
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Less strict W3CDTF-encoded date parser
|
50
|
+
class W3cdtfFormat < Date
|
51
|
+
end
|
52
|
+
|
53
|
+
# Strict EDTF parser
|
54
|
+
class EdtfFormat < Date
|
55
|
+
attr_reader :date
|
56
|
+
|
57
|
+
def self.cleanup(text)
|
58
|
+
text
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# MARC-formatted date parser, similar to EDTF, but with special support for
|
63
|
+
# MARC-specific encodings
|
64
|
+
class MarcFormat < EdtfFormat
|
65
|
+
def self.cleanup(text)
|
66
|
+
return nil if text == "9999" || text == "uuuu"
|
67
|
+
|
68
|
+
text.gsub(/^[\[]+/, '').gsub(/[\.\]]+$/, '')
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
def earliest_date
|
74
|
+
if xml.text == '1uuu'
|
75
|
+
::Date.parse('1000-01-01')
|
76
|
+
else
|
77
|
+
super
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def latest_date
|
82
|
+
if xml.text == '1uuu'
|
83
|
+
::Date.parse('1999-12-31')
|
84
|
+
else
|
85
|
+
super
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
class ExtractorDateFormat < Date
|
91
|
+
def self.supports?(text)
|
92
|
+
text.match self::REGEX
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Full text extractor for MM/DD/YYYY-formatted dates
|
97
|
+
class MMDDYYYYFormat < ExtractorDateFormat
|
98
|
+
REGEX = /(?<month>\d{1,2})\/(?<day>\d{1,2})\/(?<year>\d{4})/
|
99
|
+
|
100
|
+
def self.cleanup(text)
|
101
|
+
matches = text.match(self::REGEX)
|
102
|
+
"#{matches[:year].rjust(2, "0")}-#{matches[:month].rjust(2, "0")}-#{matches[:day].rjust(2, "0")}"
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# Full text extractor for MM/DD/YY-formatted dates
|
107
|
+
class MMDDYYFormat < ExtractorDateFormat
|
108
|
+
REGEX = /(?<month>\d{1,2})\/(?<day>\d{1,2})\/(?<year>\d{2})/
|
109
|
+
|
110
|
+
def self.cleanup(text)
|
111
|
+
matches = text.match(self::REGEX)
|
112
|
+
year = munge_to_yyyy(matches[:year])
|
113
|
+
"#{year}-#{matches[:month].rjust(2, "0")}-#{matches[:day].rjust(2, "0")}"
|
114
|
+
end
|
115
|
+
|
116
|
+
def self.munge_to_yyyy(text)
|
117
|
+
if text.to_i > (::Date.current.year - 2000)
|
118
|
+
"19#{text}"
|
119
|
+
else
|
120
|
+
"20#{text}"
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
# Full-text extractor for dates encoded as Roman numerals
|
126
|
+
class RomanNumeralYearFormat < ExtractorDateFormat
|
127
|
+
REGEX = /^(?<year>[MCDLXVI]+)/
|
128
|
+
|
129
|
+
def self.cleanup(text)
|
130
|
+
matches = text.match(REGEX)
|
131
|
+
roman_to_int(matches[:year].upcase).to_s
|
132
|
+
end
|
133
|
+
|
134
|
+
def self.roman_to_int(value)
|
135
|
+
value = value.dup
|
136
|
+
map = { "M"=>1000, "CM"=>900, "D"=>500, "CD"=>400, "C"=>100, "XC"=>90, "L"=>50, "XL"=>40, "X"=>10, "IX"=>9, "V"=>5, "IV"=>4, "I"=>1 }
|
137
|
+
result = 0
|
138
|
+
map.each do |k,v|
|
139
|
+
while value.index(k) == 0
|
140
|
+
result += v
|
141
|
+
value.slice! k
|
142
|
+
end
|
143
|
+
end
|
144
|
+
result
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
# Full-text extractor for centuries encoded as Roman numerals
|
149
|
+
class RomanNumeralCenturyFormat < RomanNumeralYearFormat
|
150
|
+
REGEX = /(cent. )?(?<century>[xvi]+)/
|
151
|
+
|
152
|
+
def self.cleanup(text)
|
153
|
+
matches = text.match(REGEX)
|
154
|
+
munge_to_yyyy(matches[:century])
|
155
|
+
end
|
156
|
+
|
157
|
+
def self.munge_to_yyyy(text)
|
158
|
+
value = roman_to_int(text.upcase)
|
159
|
+
(value - 1).to_s.rjust(2, "0") + 'XX'
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
|
164
|
+
# Full-text extractor for a flavor of century encoding present in Stanford data
|
165
|
+
# of unknown origin.
|
166
|
+
class MysteryCenturyFormat < ExtractorDateFormat
|
167
|
+
REGEX = /(?<century>\d{2})--/
|
168
|
+
def self.cleanup(text)
|
169
|
+
matches = text.match(REGEX)
|
170
|
+
"#{matches[:century]}XX"
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
# Full-text extractor for dates given as centuries
|
175
|
+
class CenturyFormat < ExtractorDateFormat
|
176
|
+
REGEX = /(?<century>\d{2})th C(entury)?/i
|
177
|
+
|
178
|
+
def self.cleanup(text)
|
179
|
+
matches = text.match(REGEX)
|
180
|
+
"#{matches[:century].to_i - 1}XX"
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
# Full-text extractor that tries hard to pick any year present in the data
|
185
|
+
class EmbeddedYearFormat < ExtractorDateFormat
|
186
|
+
REGEX = /(?<prefix>-)?(?<year>\d{3,4})/
|
187
|
+
|
188
|
+
def self.cleanup(text)
|
189
|
+
matches = text.match(REGEX)
|
190
|
+
"#{matches[:prefix]}#{matches[:year].rjust(4, "0")}"
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
attr_reader :date
|
195
|
+
|
196
|
+
##
|
197
|
+
# Parse a string to a Date or EDTF::Date using rules appropriate to the
|
198
|
+
# given encoding
|
199
|
+
# @param [String] text
|
200
|
+
# @return [Date]
|
201
|
+
def self.parse_date(text)
|
202
|
+
::Date.edtf(cleanup(text))
|
203
|
+
end
|
204
|
+
|
205
|
+
##
|
206
|
+
# Apply any encoding-specific munging or text extraction logic
|
207
|
+
# @param [String] text
|
208
|
+
# @return [String]
|
209
|
+
def self.cleanup(text)
|
210
|
+
text.gsub(/^[\[]+/, '').gsub(/[\.\]]+$/, '')
|
211
|
+
end
|
212
|
+
|
213
|
+
def initialize(xml)
|
214
|
+
@xml = xml
|
215
|
+
@date = self.class.parse_date(xml.text)
|
216
|
+
end
|
217
|
+
|
218
|
+
##
|
219
|
+
# Return a range, with the min point as the earliest possible date and
|
220
|
+
# the max as the latest possible date (useful particularly for ranges and uncertainty)
|
221
|
+
#
|
222
|
+
# @param [Range]
|
223
|
+
def as_range
|
224
|
+
return unless earliest_date && latest_date
|
225
|
+
|
226
|
+
earliest_date..latest_date
|
227
|
+
end
|
228
|
+
|
229
|
+
##
|
230
|
+
# Return an array of all years that fall into the range of possible dates
|
231
|
+
# covered by the data. Note that some encodings support disjoint sets of ranges
|
232
|
+
# so this method could provide more accuracy than #as_range (although potentially)
|
233
|
+
# include a really big list of dates
|
234
|
+
#
|
235
|
+
# @return [Array]
|
236
|
+
def to_a
|
237
|
+
case date
|
238
|
+
when EDTF::Set
|
239
|
+
date.to_a
|
240
|
+
else
|
241
|
+
as_range.to_a
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
##
|
246
|
+
# The text as encoded in the MODS
|
247
|
+
# @return [String]
|
248
|
+
def text
|
249
|
+
xml.text
|
250
|
+
end
|
251
|
+
|
252
|
+
##
|
253
|
+
# The declared type of date (from the MODS @type attribute)
|
254
|
+
#
|
255
|
+
# @return [String]
|
256
|
+
def type
|
257
|
+
xml.attr(:type)
|
258
|
+
end
|
259
|
+
|
260
|
+
##
|
261
|
+
# The declared encoding of date (from the MODS @encoding attribute)
|
262
|
+
#
|
263
|
+
# @return [String]
|
264
|
+
def encoding
|
265
|
+
xml.attr(:encoding)
|
266
|
+
end
|
267
|
+
|
268
|
+
##
|
269
|
+
# Was an encoding provided?
|
270
|
+
#
|
271
|
+
# @return [Boolean]
|
272
|
+
def encoding?
|
273
|
+
!encoding.nil?
|
274
|
+
end
|
275
|
+
|
276
|
+
##
|
277
|
+
# The declared point of date (from the MODS @point attribute)
|
278
|
+
#
|
279
|
+
# @return [String]
|
280
|
+
def point
|
281
|
+
xml.attr(:point)
|
282
|
+
end
|
283
|
+
|
284
|
+
##
|
285
|
+
# Is this date stand-alone, or part of a MODS-encoded range?
|
286
|
+
#
|
287
|
+
# @return [Boolean]
|
288
|
+
def single?
|
289
|
+
point.nil?
|
290
|
+
end
|
291
|
+
|
292
|
+
##
|
293
|
+
# Is this date the start of a MODS-encoded range?
|
294
|
+
#
|
295
|
+
# @return [Boolean]
|
296
|
+
def start?
|
297
|
+
point == 'start'
|
298
|
+
end
|
299
|
+
|
300
|
+
##
|
301
|
+
# Is this date the end point of a MODS-encoded range?
|
302
|
+
#
|
303
|
+
# @return [Boolean]
|
304
|
+
def end?
|
305
|
+
point == 'end'
|
306
|
+
end
|
307
|
+
|
308
|
+
##
|
309
|
+
# The declared qualifier of date (from the MODS @qualifier attribute)
|
310
|
+
#
|
311
|
+
# @return [String]
|
312
|
+
def qualifier
|
313
|
+
xml.attr(:qualifier)
|
314
|
+
end
|
315
|
+
|
316
|
+
##
|
317
|
+
# Is the date declared as an approximate date?
|
318
|
+
#
|
319
|
+
# @return [Boolean]
|
320
|
+
def approximate?
|
321
|
+
qualifier == 'approximate'
|
322
|
+
end
|
323
|
+
|
324
|
+
##
|
325
|
+
# Is the date declared as an inferred date?
|
326
|
+
#
|
327
|
+
# @return [Boolean]
|
328
|
+
def inferred?
|
329
|
+
qualifier == 'inferred'
|
330
|
+
end
|
331
|
+
|
332
|
+
##
|
333
|
+
# Is the date declared as a questionable date?
|
334
|
+
#
|
335
|
+
# @return [Boolean]
|
336
|
+
def questionable?
|
337
|
+
qualifier == 'questionable'
|
338
|
+
end
|
339
|
+
|
340
|
+
private
|
341
|
+
|
342
|
+
def days_in_month(month, year)
|
343
|
+
if month == 2 && ::Date.gregorian_leap?(year)
|
344
|
+
29
|
345
|
+
else
|
346
|
+
[nil, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31][month]
|
347
|
+
end
|
348
|
+
end
|
349
|
+
|
350
|
+
##
|
351
|
+
# Return the earliest possible date that is encoded in the data, respecting
|
352
|
+
# unspecified or imprecise information.
|
353
|
+
#
|
354
|
+
# @return [::Date]
|
355
|
+
def earliest_date
|
356
|
+
return nil if date.nil?
|
357
|
+
|
358
|
+
case date_range
|
359
|
+
when EDTF::Epoch, EDTF::Interval
|
360
|
+
date_range.min
|
361
|
+
when EDTF::Set
|
362
|
+
date_range.to_a.first
|
363
|
+
else
|
364
|
+
d = date.dup
|
365
|
+
d = d.change(month: 1, day: 1) if date.unspecified.unspecified?(:year) || date.precision == :year
|
366
|
+
d = d.change(month: 1) if date.unspecified.unspecified?(:month) || date.precision == :year
|
367
|
+
d = d.change(day: 1) if date.unspecified.unspecified?(:day) || date.precision == :month
|
368
|
+
d
|
369
|
+
end
|
370
|
+
end
|
371
|
+
|
372
|
+
##
|
373
|
+
# Return the earliest possible date that is encoded in the data, respecting
|
374
|
+
# unspecified or imprecise information.
|
375
|
+
#
|
376
|
+
# @return [::Date]
|
377
|
+
def latest_date
|
378
|
+
return nil if date.nil?
|
379
|
+
case date_range
|
380
|
+
when EDTF::Epoch, EDTF::Interval
|
381
|
+
date_range.max
|
382
|
+
when EDTF::Set
|
383
|
+
date_range.to_a.last.change(month: 12, day: 31)
|
384
|
+
else
|
385
|
+
d = date.dup
|
386
|
+
d = d.change(month: 12, day: 31) if date.unspecified.unspecified?(:year) || date.precision == :year
|
387
|
+
d = d.change(month: 12) if date.unspecified.unspecified?(:month) || date.precision == :year
|
388
|
+
d = d.change(day: days_in_month(date.month, date.year)) if date.unspecified.unspecified?(:day) || date.precision == :month
|
389
|
+
d
|
390
|
+
end
|
391
|
+
end
|
392
|
+
|
393
|
+
def date_range
|
394
|
+
@date_range ||= if text =~ /u/
|
395
|
+
::Date.edtf(text.gsub('u', 'X')) || date
|
396
|
+
else
|
397
|
+
date
|
398
|
+
end
|
399
|
+
end
|
400
|
+
end
|
401
|
+
end
|
data/lib/mods/nom_terminology.rb
CHANGED
@@ -285,6 +285,7 @@ module Mods
|
|
285
285
|
# ORIGIN_INFO --------------------------------------------------------------------------
|
286
286
|
t.origin_info :path => '/m:mods/m:originInfo'
|
287
287
|
t._origin_info :path => '//m:originInfo' do |n|
|
288
|
+
n.as_object :path => '.', :accessor => lambda { |a| Mods::OriginInfo.new(a) }
|
288
289
|
# attributes
|
289
290
|
n.displayLabel :path => '@displayLabel', :accessor => lambda { |a| a.text }
|
290
291
|
Mods::LANG_ATTRIBS.each { |attr_name|
|
@@ -300,8 +301,10 @@ module Mods
|
|
300
301
|
end
|
301
302
|
end
|
302
303
|
n.publisher :path => 'm:publisher'
|
303
|
-
Mods::
|
304
|
+
Mods::OriginInfo::DATE_ELEMENTS.each { |date_el|
|
304
305
|
n.send date_el, :path => "m:#{date_el}" do |d|
|
306
|
+
d.as_object :path => '.', :accessor => lambda { |a| Mods::Date.from_element(a) }
|
307
|
+
|
305
308
|
Mods::DATE_ATTRIBS.each { |attr_name|
|
306
309
|
d.send attr_name, :path => "@#{attr_name}", :accessor => lambda { |a| a.text }
|
307
310
|
}
|
@@ -912,7 +915,7 @@ module Mods
|
|
912
915
|
end
|
913
916
|
end
|
914
917
|
n.publisher :path => 'publisher'
|
915
|
-
Mods::
|
918
|
+
Mods::OriginInfo::DATE_ELEMENTS.each { |date_el|
|
916
919
|
n.send date_el, :path => "#{date_el}" do |d|
|
917
920
|
Mods::DATE_ATTRIBS.each { |attr_name|
|
918
921
|
d.send attr_name, :path => "@#{attr_name}", :accessor => lambda { |a| a.text }
|
@@ -1241,4 +1244,3 @@ module Mods
|
|
1241
1244
|
|
1242
1245
|
end # Record class
|
1243
1246
|
end # Mods module
|
1244
|
-
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Mods
|
2
|
+
class OriginInfo
|
3
|
+
DATE_ELEMENTS = ['dateIssued', 'dateCreated', 'dateCaptured', 'dateValid', 'dateModified', 'copyrightDate', 'dateOther']
|
4
|
+
|
5
|
+
attr_reader :xml
|
6
|
+
|
7
|
+
def initialize(xml)
|
8
|
+
@xml = xml
|
9
|
+
end
|
10
|
+
|
11
|
+
def dates
|
12
|
+
DATE_ELEMENTS.flat_map { |element| xml.public_send(element) }
|
13
|
+
end
|
14
|
+
|
15
|
+
def key_dates
|
16
|
+
dates.select { |x| x.keyDate == 'yes' }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/mods/version.rb
CHANGED
data/mods.gemspec
CHANGED
@@ -15,10 +15,11 @@ Gem::Specification.new do |gem|
|
|
15
15
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
16
16
|
gem.test_files = gem.files.grep(%r{^spec/})
|
17
17
|
gem.require_paths = ["lib"]
|
18
|
-
|
18
|
+
|
19
19
|
gem.add_dependency 'nokogiri'
|
20
20
|
gem.add_dependency 'nom-xml', '~> 0.6.0'
|
21
21
|
gem.add_dependency 'iso-639'
|
22
|
+
gem.add_dependency 'edtf'
|
22
23
|
|
23
24
|
# Runtime dependencies
|
24
25
|
# gem.add_runtime_dependency 'nokogiri'
|
data/spec/date_spec.rb
ADDED
@@ -0,0 +1,304 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe Mods::Date do
|
4
|
+
subject(:date) { described_class.from_element(term) }
|
5
|
+
let(:term) { Nokogiri::XML.fragment(date_element).first_element_child }
|
6
|
+
|
7
|
+
describe '#to_a' do
|
8
|
+
context 'with EDTF encoded sets' do
|
9
|
+
let(:date_element) { "<dateCreated encoding=\"edtf\">[1667,1668,1670..1672]</dateCreated>" }
|
10
|
+
|
11
|
+
it 'returns the list of years' do
|
12
|
+
expect(date.to_a.map(&:year)).to match_array [1667, 1668, 1670, 1671, 1672]
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
context 'with EDTF encoded ranges' do
|
17
|
+
let(:date_element) { "<dateCreated encoding=\"edtf\">1856/1858</dateCreated>" }
|
18
|
+
|
19
|
+
it 'returns the list of years' do
|
20
|
+
expect(date.to_a.map(&:year)).to match_array [1856, 1857, 1858]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
context 'with random one-off years' do
|
25
|
+
let(:date_element) { "<dateCreated>1856</dateCreated>" }
|
26
|
+
|
27
|
+
it 'returns the year in an array' do
|
28
|
+
expect(date.to_a.map(&:year)).to match_array [1856]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
describe '#text' do
|
34
|
+
let(:date_element) { "<dateCreated>1856</dateCreated>" }
|
35
|
+
|
36
|
+
it 'returns the MODS text' do
|
37
|
+
expect(date.text).to eq '1856'
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
describe '#type' do
|
42
|
+
let(:date_element) { "<dateCreated type='fictional'>1856</dateCreated>" }
|
43
|
+
|
44
|
+
it 'returns the MODS type attribute' do
|
45
|
+
expect(date.type).to eq 'fictional'
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
describe '#encoding' do
|
50
|
+
let(:date_element) { "<dateCreated encoding='fictional'>1856</dateCreated>" }
|
51
|
+
|
52
|
+
it 'returns the MODS encoding attribute' do
|
53
|
+
expect(date.encoding).to eq 'fictional'
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
describe '#encoding?' do
|
58
|
+
context 'with an encoding' do
|
59
|
+
let(:date_element) { "<dateCreated encoding='fictional'>1856</dateCreated>" }
|
60
|
+
|
61
|
+
it 'returns true' do
|
62
|
+
expect(date.encoding?).to eq true
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
context 'without an encoding' do
|
67
|
+
let(:date_element) { "<dateCreated>1856</dateCreated>" }
|
68
|
+
|
69
|
+
it 'returns false' do
|
70
|
+
expect(date.encoding?).to eq false
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
describe '#point' do
|
76
|
+
let(:date_element) { "<dateCreated point='fictional'>1856</dateCreated>" }
|
77
|
+
|
78
|
+
it 'returns the MODS point attribute' do
|
79
|
+
expect(date.point).to eq 'fictional'
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
describe '#single?' do
|
84
|
+
context 'with a point' do
|
85
|
+
let(:date_element) { "<dateCreated point='fictional'>1856</dateCreated>" }
|
86
|
+
|
87
|
+
it 'returns false' do
|
88
|
+
expect(date.single?).to eq false
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
context 'without a point' do
|
93
|
+
let(:date_element) { "<dateCreated>1856</dateCreated>" }
|
94
|
+
|
95
|
+
it 'returns false' do
|
96
|
+
expect(date.single?).to eq true
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
describe '#start?' do
|
102
|
+
context 'with a point=start attribute' do
|
103
|
+
let(:date_element) { "<dateCreated point='start'>1856</dateCreated>" }
|
104
|
+
|
105
|
+
it 'returns true' do
|
106
|
+
expect(date.start?).to eq true
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
describe '#end?' do
|
112
|
+
context 'with a point=end attribute' do
|
113
|
+
let(:date_element) { "<dateCreated point='end'>1856</dateCreated>" }
|
114
|
+
|
115
|
+
it 'returns true' do
|
116
|
+
expect(date.end?).to eq true
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
describe '#qualifier' do
|
122
|
+
let(:date_element) { "<dateCreated qualifier='fictional'>1856</dateCreated>" }
|
123
|
+
|
124
|
+
it 'returns the MODS qualifier attribute' do
|
125
|
+
expect(date.qualifier).to eq 'fictional'
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
describe '#approximate?' do
|
130
|
+
context 'with a qualifier=approximate attribute' do
|
131
|
+
let(:date_element) { "<dateCreated qualifier='approximate'>1856</dateCreated>" }
|
132
|
+
|
133
|
+
it 'returns true' do
|
134
|
+
expect(date.approximate?).to eq true
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
describe '#inferred?' do
|
140
|
+
context 'with a qualifier=inferred attribute' do
|
141
|
+
let(:date_element) { "<dateCreated qualifier='inferred'>1856</dateCreated>" }
|
142
|
+
|
143
|
+
it 'returns true' do
|
144
|
+
expect(date.inferred?).to eq true
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
describe '#questionable?' do
|
150
|
+
context 'with a qualifier=questionable attribute' do
|
151
|
+
let(:date_element) { "<dateCreated qualifier='questionable'>1856</dateCreated>" }
|
152
|
+
|
153
|
+
it 'returns true' do
|
154
|
+
expect(date.questionable?).to eq true
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
describe 'EDTF encoded dates' do
|
160
|
+
{
|
161
|
+
'1905' => Date.parse('1905-01-01')..Date.parse('1905-12-31'),
|
162
|
+
'190u' => Date.parse('1900-01-01')..Date.parse('1909-12-31'),
|
163
|
+
'190X' => Date.parse('1900-01-01')..Date.parse('1909-12-31'),
|
164
|
+
'19uu' => Date.parse('1900-01-01')..Date.parse('1999-12-31'),
|
165
|
+
'19XX' => Date.parse('1900-01-01')..Date.parse('1999-12-31'),
|
166
|
+
'1856/1876' => Date.parse('1856-01-01')..Date.parse('1876-12-31'),
|
167
|
+
'[1667,1668,1670..1672]' => Date.parse('1667-01-01')..Date.parse('1672-12-31'),
|
168
|
+
'1900-uu' => Date.parse('1900-01-01')..Date.parse('1900-12-31'),
|
169
|
+
'1900-uu-uu' => Date.parse('1900-01-01')..Date.parse('1900-12-31'),
|
170
|
+
'1900-uu-15' => Date.parse('1900-01-15')..Date.parse('1900-12-15'),
|
171
|
+
'1900-06-uu' => Date.parse('1900-06-01')..Date.parse('1900-06-30'),
|
172
|
+
}.each do |data, expected|
|
173
|
+
describe "with #{data}" do
|
174
|
+
let(:date_element) { "<dateCreated encoding=\"edtf\">#{data}</dateCreated>" }
|
175
|
+
|
176
|
+
it "has the range #{expected}" do
|
177
|
+
expect(date).to be_single
|
178
|
+
expect(date.encoding).to eq 'edtf'
|
179
|
+
expect(date.as_range.to_s).to eq expected.to_s
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
describe 'W3cdtf encoded dates' do
|
186
|
+
{
|
187
|
+
'1753' => Date.parse('1753-01-01')..Date.parse('1753-12-31'),
|
188
|
+
'-1753' => Date.parse('-1753-01-01')..Date.parse('-1753-12-31'),
|
189
|
+
'1992-05-06' => Date.parse('1992-05-06')..Date.parse('1992-05-06'),
|
190
|
+
'1992-04' => Date.parse('1992-04-01')..Date.parse('1992-04-30'),
|
191
|
+
'2004-02' => Date.parse('2004-02-01')..Date.parse('2004-02-29')
|
192
|
+
}.each do |data, expected|
|
193
|
+
describe "with #{data}" do
|
194
|
+
let(:date_element) { "<dateCreated encoding=\"w3cdtf\">#{data}</dateCreated>" }
|
195
|
+
|
196
|
+
it "has the range #{expected}" do
|
197
|
+
expect(date).to be_single
|
198
|
+
expect(date.encoding).to eq 'w3cdtf'
|
199
|
+
expect(date.as_range.to_s).to eq expected.to_s
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
describe 'MARC encoded dates' do
|
206
|
+
{
|
207
|
+
'1234' => Date.parse('1234-01-01')..Date.parse('1234-12-31'),
|
208
|
+
'9999' => nil,
|
209
|
+
'1uuu' => Date.parse('1000-01-01')..Date.parse('1999-12-31')
|
210
|
+
}.each do |data, expected|
|
211
|
+
describe "with #{data}" do
|
212
|
+
let(:date_element) { "<dateCreated encoding=\"marc\">#{data}</dateCreated>" }
|
213
|
+
|
214
|
+
it "has the range #{expected}" do
|
215
|
+
expect(date).to be_single
|
216
|
+
expect(date.encoding).to eq 'marc'
|
217
|
+
expect(date.as_range.to_s).to eq expected.to_s
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
describe 'ISO8601 encoded dates' do
|
224
|
+
{
|
225
|
+
'20131114161429' => Date.parse('20131114161429')..Date.parse('20131114161429')
|
226
|
+
}.each do |data, expected|
|
227
|
+
describe "with #{data}" do
|
228
|
+
let(:date_element) { "<dateCreated encoding=\"iso8601\">#{data}</dateCreated>" }
|
229
|
+
|
230
|
+
it "has the range #{expected}" do
|
231
|
+
expect(date).to be_single
|
232
|
+
expect(date.encoding).to eq 'iso8601'
|
233
|
+
expect(date.as_range.to_s).to eq expected.to_s
|
234
|
+
end
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
describe 'MDY encoded dates' do
|
240
|
+
{
|
241
|
+
'11/27/2017' => Date.parse('2017-11-27')..Date.parse('2017-11-27'),
|
242
|
+
'5/27/2017' => Date.parse('2017-05-27')..Date.parse('2017-05-27'),
|
243
|
+
'5/2/2017' => Date.parse('2017-05-02')..Date.parse('2017-05-02'),
|
244
|
+
'12/1/2017' => Date.parse('2017-12-01')..Date.parse('2017-12-01'),
|
245
|
+
'12/1/17' => Date.parse('2017-12-01')..Date.parse('2017-12-01'),
|
246
|
+
'12/1/25' => Date.parse('1925-12-01')..Date.parse('1925-12-01')
|
247
|
+
}.each do |data, expected|
|
248
|
+
describe "with #{data}" do
|
249
|
+
let(:date_element) { "<dateCreated>#{data}</dateCreated>" }
|
250
|
+
|
251
|
+
it "has the range #{expected}" do
|
252
|
+
expect(date).to be_single
|
253
|
+
expect(date.as_range.to_s).to eq expected.to_s
|
254
|
+
end
|
255
|
+
end
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
describe 'Pulling out 4-digit years from unspecified dates' do
|
260
|
+
{
|
261
|
+
'Minguo 19 [1930]' => Date.parse('1930-01-01')..Date.parse('1930-12-31'),
|
262
|
+
'1745 mag. 14' => Date.parse('1745-01-01')..Date.parse('1745-12-31'),
|
263
|
+
'-745' => Date.parse('-745-01-01')..Date.parse('-745-12-31'),
|
264
|
+
'[1923]' => Date.parse('1923-01-01')..Date.parse('1923-12-31'),
|
265
|
+
'1532.' => Date.parse('1532-01-01')..Date.parse('1532-12-31'),
|
266
|
+
'[ca 1834]' => Date.parse('1834-01-01')..Date.parse('1834-12-31'),
|
267
|
+
'xvi' => Date.parse('1500-01-01')..Date.parse('1599-12-31'),
|
268
|
+
'cent. xvi' => Date.parse('1500-01-01')..Date.parse('1599-12-31'),
|
269
|
+
'MDLXXVIII' => Date.parse('1578-01-01')..Date.parse('1578-12-31'),
|
270
|
+
'[19--?]-' => Date.parse('1900-01-01')..Date.parse('1999-12-31'),
|
271
|
+
'19th Century' => Date.parse('1800-01-01')..Date.parse('1899-12-31'),
|
272
|
+
'19th c.' => Date.parse('1800-01-01')..Date.parse('1899-12-31')
|
273
|
+
}.each do |data, expected|
|
274
|
+
describe "with #{data}" do
|
275
|
+
let(:date_element) { "<dateCreated>#{data}</dateCreated>" }
|
276
|
+
|
277
|
+
it "has the range #{expected}" do
|
278
|
+
expect(date).to be_single
|
279
|
+
expect(date.as_range.to_s).to eq expected.to_s
|
280
|
+
end
|
281
|
+
end
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
describe 'garbage data' do
|
286
|
+
let(:date_element) { "<dateCreated>n.d.</dateCreated>" }
|
287
|
+
|
288
|
+
it 'handles it gracefully' do
|
289
|
+
expect(date.as_range).to be_nil
|
290
|
+
expect(date.to_a).to be_empty
|
291
|
+
expect(date.text).to eq 'n.d.'
|
292
|
+
end
|
293
|
+
|
294
|
+
context 'for dates with encodings declared, but invalid data' do
|
295
|
+
let(:date_element) { "<dateCreated encoding='iso8601'>n.d.</dateCreated>" }
|
296
|
+
|
297
|
+
it 'handles it gracefully' do
|
298
|
+
expect(date.as_range).to be_nil
|
299
|
+
expect(date.to_a).to be_empty
|
300
|
+
expect(date.text).to eq 'n.d.'
|
301
|
+
end
|
302
|
+
end
|
303
|
+
end
|
304
|
+
end
|
data/spec/origin_info_spec.rb
CHANGED
@@ -243,6 +243,15 @@ describe "Mods <originInfo> Element" do
|
|
243
243
|
end
|
244
244
|
end
|
245
245
|
|
246
|
+
describe '#as_object' do
|
247
|
+
describe '#key_dates' do
|
248
|
+
it 'should extract the date with the keyDate attribute' do
|
249
|
+
@mods_rec.from_str("<mods #{@ns_decl}><originInfo><dateCreated>other date</dateCreated><dateCreated keyDate='yes'>key date</dateCreated></originInfo></mods>")
|
250
|
+
expect(@mods_rec.origin_info.as_object.first.key_dates.first.text).to eq 'key date'
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
246
255
|
context "<xxxDate> child elements" do
|
247
256
|
it "should recognize each element" do
|
248
257
|
Mods::ORIGIN_INFO_DATE_ELEMENTS.each { |elname|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mods
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naomi Dushay
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2017-12-01 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -53,6 +53,20 @@ dependencies:
|
|
53
53
|
- - ">="
|
54
54
|
- !ruby/object:Gem::Version
|
55
55
|
version: '0'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: edtf
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
type: :runtime
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
56
70
|
- !ruby/object:Gem::Dependency
|
57
71
|
name: rake
|
58
72
|
requirement: !ruby/object:Gem::Requirement
|
@@ -145,17 +159,20 @@ files:
|
|
145
159
|
- Rakefile
|
146
160
|
- lib/mods.rb
|
147
161
|
- lib/mods/constants.rb
|
162
|
+
- lib/mods/date.rb
|
148
163
|
- lib/mods/marc_country_codes.rb
|
149
164
|
- lib/mods/marc_geo_area_codes.rb
|
150
165
|
- lib/mods/marc_relator_codes.rb
|
151
166
|
- lib/mods/name.rb
|
152
167
|
- lib/mods/nom_terminology.rb
|
168
|
+
- lib/mods/origin_info.rb
|
153
169
|
- lib/mods/reader.rb
|
154
170
|
- lib/mods/record.rb
|
155
171
|
- lib/mods/subject.rb
|
156
172
|
- lib/mods/title_info.rb
|
157
173
|
- lib/mods/version.rb
|
158
174
|
- mods.gemspec
|
175
|
+
- spec/date_spec.rb
|
159
176
|
- spec/fixture_data/shpc1.mods.xml
|
160
177
|
- spec/language_spec.rb
|
161
178
|
- spec/location_spec.rb
|
@@ -190,11 +207,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
190
207
|
version: '0'
|
191
208
|
requirements: []
|
192
209
|
rubyforge_project:
|
193
|
-
rubygems_version: 2.
|
210
|
+
rubygems_version: 2.6.11
|
194
211
|
signing_key:
|
195
212
|
specification_version: 4
|
196
213
|
summary: Parse MODS (Metadata Object Description Schema) records.
|
197
214
|
test_files:
|
215
|
+
- spec/date_spec.rb
|
198
216
|
- spec/fixture_data/shpc1.mods.xml
|
199
217
|
- spec/language_spec.rb
|
200
218
|
- spec/location_spec.rb
|