mods 2.4.1 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +24 -0
- data/.gitignore +1 -0
- data/README.md +0 -1
- data/lib/mods/date.rb +54 -17
- data/lib/mods/marc_country_codes.rb +12 -10
- data/lib/mods/nom_terminology.rb +109 -845
- data/lib/mods/reader.rb +9 -39
- data/lib/mods/record.rb +13 -28
- data/lib/mods/version.rb +1 -1
- data/mods.gemspec +2 -2
- data/spec/fixture_data/hp566jq8781.xml +334 -0
- data/spec/integration/parker_spec.rb +217 -0
- data/spec/{date_spec.rb → lib/date_spec.rb} +9 -1
- data/spec/lib/language_spec.rb +123 -0
- data/spec/lib/location_spec.rb +175 -0
- data/spec/lib/name_spec.rb +368 -0
- data/spec/lib/origin_info_spec.rb +134 -0
- data/spec/lib/part_spec.rb +162 -0
- data/spec/lib/physical_description_spec.rb +72 -0
- data/spec/{reader_spec.rb → lib/reader_spec.rb} +1 -41
- data/spec/lib/record_info_spec.rb +114 -0
- data/spec/lib/record_spec.rb +287 -0
- data/spec/lib/related_item_spec.rb +124 -0
- data/spec/lib/subject_spec.rb +427 -0
- data/spec/lib/title_spec.rb +108 -0
- data/spec/lib/top_level_elmnts_simple_spec.rb +169 -0
- data/spec/spec_helper.rb +86 -5
- data/spec/support/fixtures.rb +9 -0
- metadata +49 -44
- data/.travis.yml +0 -16
- data/spec/language_spec.rb +0 -118
- data/spec/location_spec.rb +0 -295
- data/spec/name_spec.rb +0 -759
- data/spec/origin_info_spec.rb +0 -447
- data/spec/part_spec.rb +0 -471
- data/spec/physical_description_spec.rb +0 -144
- data/spec/record_info_spec.rb +0 -493
- data/spec/record_spec.rb +0 -356
- data/spec/related_item_spec.rb +0 -305
- data/spec/subject_spec.rb +0 -809
- data/spec/title_spec.rb +0 -226
- data/spec/top_level_elmnts_simple_spec.rb +0 -369
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 566f8acde01c8696c1a183588b663934dc50d500999fcc83ba67516620e57ae7
|
4
|
+
data.tar.gz: 87aeca4a314870bcace3fc1b3fe2260b67134becc4ca2a7a22ac3bb5edf73be2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0b5d979ab8b4fbcb7e5dce0b5d8117e76f1c95a284cf531fd659a1e78d74af5571af309c9ab3e77b6d105f9e9d3427ba0fa88d82f4feeb5d54c18bc306079332
|
7
|
+
data.tar.gz: 0bea0554c539de56fcfee76380f0478133d9288fba63b1ff6c6a922d3877ab04e12070590c2a10339599cf2615bc75433d23fb546e75ad4f8627b75dd563efb7
|
@@ -0,0 +1,24 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
branches: [ master ]
|
6
|
+
pull_request:
|
7
|
+
branches: [ master ]
|
8
|
+
|
9
|
+
jobs:
|
10
|
+
tests:
|
11
|
+
runs-on: ubuntu-latest
|
12
|
+
strategy:
|
13
|
+
matrix:
|
14
|
+
ruby: [jruby-9.3.2.0, 2.7, '3.0', 3.1]
|
15
|
+
steps:
|
16
|
+
- uses: actions/checkout@v2
|
17
|
+
- name: Set up Ruby
|
18
|
+
uses: ruby/setup-ruby@v1
|
19
|
+
with:
|
20
|
+
ruby-version: ${{ matrix.ruby }}
|
21
|
+
- name: Install dependencies
|
22
|
+
run: bundle install
|
23
|
+
- name: Run tests
|
24
|
+
run: bundle exec rake
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
# Mods
|
2
2
|
|
3
3
|
[<img src="https://secure.travis-ci.org/sul-dlss/mods.png?branch=master" alt="Build Status"/>](http://travis-ci.org/sul-dlss/mods) [](https://codeclimate.com/github/sul-dlss/mods/coverage) [<img
|
4
|
-
src="https://gemnasium.com/sul-dlss/mods.png" alt="Dependency Status"/>](https://gemnasium.com/sul-dlss/mods) [<img
|
5
4
|
src="https://badge.fury.io/rb/mods.svg" alt="Gem Version"/>](http://badge.fury.io/rb/mods)
|
6
5
|
|
7
6
|
A Gem to parse MODS (Metadata Object Description Schema) records. More information about MODS can be found at
|
data/lib/mods/date.rb
CHANGED
@@ -11,7 +11,7 @@ module Mods
|
|
11
11
|
# @param [Nokogiri::XML::Element] xml A date-flavored MODS field from the XML
|
12
12
|
# @return [Mods::Date]
|
13
13
|
def self.from_element(xml)
|
14
|
-
case xml.attr(:encoding)
|
14
|
+
case xml.attr(:encoding)&.downcase
|
15
15
|
when 'w3cdtf'
|
16
16
|
Mods::Date::W3cdtfFormat.new(xml)
|
17
17
|
when 'iso8601'
|
@@ -23,12 +23,13 @@ module Mods
|
|
23
23
|
# when 'temper'
|
24
24
|
# Mods::Date::TemperFormat.new(xml)
|
25
25
|
else
|
26
|
-
date_class =
|
26
|
+
date_class = UnparseableDate if xml.text =~ /\p{Hebrew}/ || xml.text =~ /^-/
|
27
27
|
date_class ||= [
|
28
28
|
MMDDYYYYFormat,
|
29
29
|
MMDDYYFormat,
|
30
30
|
YearRangeFormat,
|
31
31
|
DecadeAsYearDashFormat,
|
32
|
+
DecadeStringFormat,
|
32
33
|
EmbeddedBCYearFormat,
|
33
34
|
EmbeddedYearFormat,
|
34
35
|
EmbeddedThreeDigitYearFormat,
|
@@ -49,12 +50,15 @@ module Mods
|
|
49
50
|
# Strict ISO8601-encoded date parser
|
50
51
|
class Iso8601Format < Date
|
51
52
|
def self.parse_date(text)
|
52
|
-
|
53
|
+
::Date.parse(normalize_to_edtf(text))
|
53
54
|
end
|
54
55
|
end
|
55
56
|
|
56
57
|
# Less strict W3CDTF-encoded date parser
|
57
58
|
class W3cdtfFormat < Date
|
59
|
+
def self.normalize_to_edtf(text)
|
60
|
+
super.gsub('-00', '')
|
61
|
+
end
|
58
62
|
end
|
59
63
|
|
60
64
|
# Strict EDTF parser
|
@@ -62,7 +66,16 @@ module Mods
|
|
62
66
|
attr_reader :date
|
63
67
|
|
64
68
|
def self.normalize_to_edtf(text)
|
65
|
-
text
|
69
|
+
return '0000' if text.strip == '0'
|
70
|
+
|
71
|
+
case text
|
72
|
+
when /^\d{1,3}$/
|
73
|
+
text.rjust(4, "0") if text =~ /^\d{1,3}$/
|
74
|
+
when /^-\d{1,3}$/
|
75
|
+
"-#{text.sub(/^-/, '').rjust(4, "0")}"
|
76
|
+
else
|
77
|
+
text
|
78
|
+
end
|
66
79
|
end
|
67
80
|
end
|
68
81
|
|
@@ -100,6 +113,12 @@ module Mods
|
|
100
113
|
end
|
101
114
|
end
|
102
115
|
|
116
|
+
class UnparseableDate < ExtractorDateFormat
|
117
|
+
def self.parse_date(text)
|
118
|
+
nil
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
103
122
|
# Full text extractor for MM/DD/YYYY and MM/DD/YYY-formatted dates
|
104
123
|
class MMDDYYYYFormat < ExtractorDateFormat
|
105
124
|
REGEX = /(?<month>\d{1,2})\/(?<day>\d{1,2})\/(?<year>\d{3,4})/
|
@@ -201,7 +220,17 @@ module Mods
|
|
201
220
|
|
202
221
|
# Full-text extractor for data formatted as YYY-
|
203
222
|
class DecadeAsYearDashFormat < ExtractorDateFormat
|
204
|
-
REGEX = /(?<!\d)(?<year>\d{3})[-
|
223
|
+
REGEX = /(?<!\d)(?<year>\d{3})[-_xu?](?!\d)/
|
224
|
+
|
225
|
+
def self.normalize_to_edtf(text)
|
226
|
+
matches = text.match(REGEX)
|
227
|
+
"#{matches[:year]}X"
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
# Full-text extractor for data formatted as YYY0s
|
232
|
+
class DecadeStringFormat < ExtractorDateFormat
|
233
|
+
REGEX = /(?<!\d)(?<year>\d{3})0s(?!\d)/
|
205
234
|
|
206
235
|
def self.normalize_to_edtf(text)
|
207
236
|
matches = text.match(REGEX)
|
@@ -221,42 +250,42 @@ module Mods
|
|
221
250
|
|
222
251
|
# Full-text extractor that tries hard to pick any year present in the data
|
223
252
|
class EmbeddedYearFormat < ExtractorDateFormat
|
224
|
-
REGEX = /(
|
253
|
+
REGEX = /(?<!\d)(?<year>\d{4})(?!\d)/
|
225
254
|
|
226
255
|
def self.normalize_to_edtf(text)
|
227
256
|
matches = text.match(REGEX)
|
228
|
-
"#{matches[:
|
257
|
+
"#{matches[:year].rjust(4, "0")}"
|
229
258
|
end
|
230
259
|
end
|
231
260
|
|
232
261
|
# Full-text extractor that tries hard to pick any year present in the data
|
233
262
|
class EmbeddedThreeDigitYearFormat < ExtractorDateFormat
|
234
|
-
REGEX = /(
|
263
|
+
REGEX = /(?<!\d)(?<year>\d{3})(?!\d)(?!\d)/
|
235
264
|
|
236
265
|
def self.normalize_to_edtf(text)
|
237
266
|
matches = text.match(REGEX)
|
238
|
-
"#{matches[:
|
267
|
+
"#{matches[:year].rjust(4, "0")}"
|
239
268
|
end
|
240
269
|
end
|
241
270
|
|
242
271
|
# Full-text extractor that tries hard to pick any year present in the data
|
243
272
|
class OneOrTwoDigitYearFormat < ExtractorDateFormat
|
244
|
-
REGEX = /^(?<
|
273
|
+
REGEX = /^(?<year>\d{1,2})$/
|
245
274
|
|
246
275
|
def self.normalize_to_edtf(text)
|
247
276
|
matches = text.match(REGEX)
|
248
|
-
"#{matches[:
|
277
|
+
"#{matches[:year].rjust(4, "0")}"
|
249
278
|
end
|
250
279
|
end
|
251
280
|
|
252
281
|
# Full-text extractor that tries hard to pick any year present in the data
|
253
282
|
class EmbeddedYearWithBracketsFormat < ExtractorDateFormat
|
254
283
|
# [YYY]Y Y[YYY] [YY]YY Y[YY]Y YY[YY] YYY[Y] YY[Y]Y Y[Y]YY [Y]YYY
|
255
|
-
REGEX = /(?<
|
284
|
+
REGEX = /(?<year>[\d\[\]]{6})(?!\d)/
|
256
285
|
|
257
286
|
def self.normalize_to_edtf(text)
|
258
287
|
matches = text.match(REGEX)
|
259
|
-
"#{matches[:
|
288
|
+
"#{matches[:year].gsub('[', '').gsub(']', '')}"
|
260
289
|
end
|
261
290
|
end
|
262
291
|
|
@@ -327,7 +356,9 @@ module Mods
|
|
327
356
|
#
|
328
357
|
# @return [String]
|
329
358
|
def type
|
330
|
-
xml.attr(:type)
|
359
|
+
return if xml.attr(:type)&.empty?
|
360
|
+
|
361
|
+
xml.attr(:type)&.downcase
|
331
362
|
end
|
332
363
|
|
333
364
|
##
|
@@ -335,7 +366,9 @@ module Mods
|
|
335
366
|
#
|
336
367
|
# @return [String]
|
337
368
|
def encoding
|
338
|
-
xml.attr(:encoding)
|
369
|
+
return if xml.attr(:encoding)&.empty?
|
370
|
+
|
371
|
+
xml.attr(:encoding)&.downcase
|
339
372
|
end
|
340
373
|
|
341
374
|
##
|
@@ -359,7 +392,9 @@ module Mods
|
|
359
392
|
#
|
360
393
|
# @return [String]
|
361
394
|
def point
|
362
|
-
xml.attr(:point)
|
395
|
+
return if xml.attr(:point)&.empty?
|
396
|
+
|
397
|
+
xml.attr(:point)&.downcase
|
363
398
|
end
|
364
399
|
|
365
400
|
##
|
@@ -391,7 +426,7 @@ module Mods
|
|
391
426
|
#
|
392
427
|
# @return [String]
|
393
428
|
def qualifier
|
394
|
-
xml.attr(:qualifier)
|
429
|
+
xml.attr(:qualifier)&.downcase
|
395
430
|
end
|
396
431
|
|
397
432
|
##
|
@@ -419,6 +454,8 @@ module Mods
|
|
419
454
|
end
|
420
455
|
|
421
456
|
def precision
|
457
|
+
return :unknown unless date_range || date
|
458
|
+
|
422
459
|
if date_range.is_a? EDTF::Century
|
423
460
|
:century
|
424
461
|
elsif date_range.is_a? EDTF::Decade
|
@@ -1,8 +1,8 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
|
3
1
|
# Represents the Marc Country Codes mapped to names, from http://www.loc.gov/marc/countries/countries_code.html 2013-01-03
|
4
|
-
#key - Marc Country code
|
5
|
-
#value - Marc Country term
|
2
|
+
# key - Marc Country code
|
3
|
+
# value - Marc Country term
|
4
|
+
# frozen_string_literal: true
|
5
|
+
|
6
6
|
MARC_COUNTRY = {
|
7
7
|
'aa' => "Albania",
|
8
8
|
'abc' => "Alberta",
|
@@ -11,7 +11,6 @@ MARC_COUNTRY = {
|
|
11
11
|
'ae' => "Algeria",
|
12
12
|
'af' => "Afghanistan",
|
13
13
|
'ag' => "Argentina",
|
14
|
-
#'ai' => "Anguilla", # discontinued
|
15
14
|
'ai' => "Armenia (Republic)",
|
16
15
|
'air' => "Armenian S.S.R.", # discontinued
|
17
16
|
'aj' => "Azerbaijan",
|
@@ -40,7 +39,7 @@ MARC_COUNTRY = {
|
|
40
39
|
'bi' => "British Indian Ocean Territory",
|
41
40
|
'bl' => "Brazil",
|
42
41
|
'bm' => "Bermuda Islands",
|
43
|
-
'bn' => "Bosnia and
|
42
|
+
'bn' => "Bosnia and Herzegovina",
|
44
43
|
'bo' => "Bolivia",
|
45
44
|
'bp' => "Solomon Islands",
|
46
45
|
'br' => "Burma",
|
@@ -74,7 +73,7 @@ MARC_COUNTRY = {
|
|
74
73
|
'cs' => "Czechoslovakia", # discontinued
|
75
74
|
'ctu' => "Connecticut",
|
76
75
|
'cu' => "Cuba",
|
77
|
-
'cv' => "
|
76
|
+
'cv' => "Cabo Verde",
|
78
77
|
'cw' => "Cook Islands",
|
79
78
|
'cx' => "Central African Republic",
|
80
79
|
'cy' => "Cyprus",
|
@@ -109,6 +108,7 @@ MARC_COUNTRY = {
|
|
109
108
|
'gb' => "Kiribati",
|
110
109
|
'gd' => "Grenada",
|
111
110
|
'ge' => "Germany (East)", # discontinued
|
111
|
+
'gg' => "Guernsey",
|
112
112
|
'gh' => "Ghana",
|
113
113
|
'gi' => "Gibraltar",
|
114
114
|
'gl' => "Greenland",
|
@@ -137,6 +137,7 @@ MARC_COUNTRY = {
|
|
137
137
|
'ie' => "Ireland",
|
138
138
|
'ii' => "India",
|
139
139
|
'ilu' => "Illinois",
|
140
|
+
'im' => "Isle of Man",
|
140
141
|
'inu' => "Indiana",
|
141
142
|
'io' => "Indonesia",
|
142
143
|
'iq' => "Iraq",
|
@@ -148,6 +149,7 @@ MARC_COUNTRY = {
|
|
148
149
|
'iw' => "Israel-Jordan Demilitarized Zones", # discontinued
|
149
150
|
'iy' => "Iraq-Saudi Arabia Neutral Zone",
|
150
151
|
'ja' => "Japan",
|
152
|
+
'je' => "Jersey",
|
151
153
|
'ji' => "Johnston Atoll",
|
152
154
|
'jm' => "Jamaica",
|
153
155
|
'jn' => "Jan Mayen", # discontinued
|
@@ -284,7 +286,7 @@ MARC_COUNTRY = {
|
|
284
286
|
'snc' => "Saskatchewan",
|
285
287
|
'so' => "Somalia",
|
286
288
|
'sp' => "Spain",
|
287
|
-
'sq' => "
|
289
|
+
'sq' => "Eswatini",
|
288
290
|
'sr' => "Surinam",
|
289
291
|
'ss' => "Western Sahara",
|
290
292
|
'st' => "Saint-Martin",
|
@@ -365,7 +367,7 @@ MARC_COUNTRY = {
|
|
365
367
|
'xk' => "Saint Lucia",
|
366
368
|
'xl' => "Saint Pierre and Miquelon",
|
367
369
|
'xm' => "Saint Vincent and the Grenadines",
|
368
|
-
'xn' => "Macedonia",
|
370
|
+
'xn' => "North Macedonia",
|
369
371
|
'xna' => "New South Wales",
|
370
372
|
'xo' => "Slovakia",
|
371
373
|
'xoa' => "Northern Territory",
|
@@ -384,4 +386,4 @@ MARC_COUNTRY = {
|
|
384
386
|
'ys' => "Yemen (People's Democratic Republic)", # discontinued
|
385
387
|
'yu' => "Serbia and Montenegro", # discontinued
|
386
388
|
'za' => "Zambia"
|
387
|
-
}
|
389
|
+
}.freeze
|