mods 2.4.1 → 3.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +24 -0
- data/.gitignore +1 -0
- data/README.md +0 -1
- data/lib/mods/date.rb +54 -17
- data/lib/mods/marc_country_codes.rb +12 -10
- data/lib/mods/nom_terminology.rb +109 -845
- data/lib/mods/reader.rb +9 -39
- data/lib/mods/record.rb +13 -28
- data/lib/mods/version.rb +1 -1
- data/mods.gemspec +2 -2
- data/spec/fixture_data/hp566jq8781.xml +334 -0
- data/spec/integration/parker_spec.rb +217 -0
- data/spec/{date_spec.rb → lib/date_spec.rb} +9 -1
- data/spec/lib/language_spec.rb +123 -0
- data/spec/lib/location_spec.rb +175 -0
- data/spec/lib/name_spec.rb +368 -0
- data/spec/lib/origin_info_spec.rb +134 -0
- data/spec/lib/part_spec.rb +162 -0
- data/spec/lib/physical_description_spec.rb +72 -0
- data/spec/{reader_spec.rb → lib/reader_spec.rb} +1 -41
- data/spec/lib/record_info_spec.rb +114 -0
- data/spec/lib/record_spec.rb +287 -0
- data/spec/lib/related_item_spec.rb +124 -0
- data/spec/lib/subject_spec.rb +427 -0
- data/spec/lib/title_spec.rb +108 -0
- data/spec/lib/top_level_elmnts_simple_spec.rb +169 -0
- data/spec/spec_helper.rb +86 -5
- data/spec/support/fixtures.rb +9 -0
- metadata +49 -44
- data/.travis.yml +0 -16
- data/spec/language_spec.rb +0 -118
- data/spec/location_spec.rb +0 -295
- data/spec/name_spec.rb +0 -759
- data/spec/origin_info_spec.rb +0 -447
- data/spec/part_spec.rb +0 -471
- data/spec/physical_description_spec.rb +0 -144
- data/spec/record_info_spec.rb +0 -493
- data/spec/record_spec.rb +0 -356
- data/spec/related_item_spec.rb +0 -305
- data/spec/subject_spec.rb +0 -809
- data/spec/title_spec.rb +0 -226
- data/spec/top_level_elmnts_simple_spec.rb +0 -369
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 566f8acde01c8696c1a183588b663934dc50d500999fcc83ba67516620e57ae7
|
4
|
+
data.tar.gz: 87aeca4a314870bcace3fc1b3fe2260b67134becc4ca2a7a22ac3bb5edf73be2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0b5d979ab8b4fbcb7e5dce0b5d8117e76f1c95a284cf531fd659a1e78d74af5571af309c9ab3e77b6d105f9e9d3427ba0fa88d82f4feeb5d54c18bc306079332
|
7
|
+
data.tar.gz: 0bea0554c539de56fcfee76380f0478133d9288fba63b1ff6c6a922d3877ab04e12070590c2a10339599cf2615bc75433d23fb546e75ad4f8627b75dd563efb7
|
@@ -0,0 +1,24 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
branches: [ master ]
|
6
|
+
pull_request:
|
7
|
+
branches: [ master ]
|
8
|
+
|
9
|
+
jobs:
|
10
|
+
tests:
|
11
|
+
runs-on: ubuntu-latest
|
12
|
+
strategy:
|
13
|
+
matrix:
|
14
|
+
ruby: [jruby-9.3.2.0, 2.7, '3.0', 3.1]
|
15
|
+
steps:
|
16
|
+
- uses: actions/checkout@v2
|
17
|
+
- name: Set up Ruby
|
18
|
+
uses: ruby/setup-ruby@v1
|
19
|
+
with:
|
20
|
+
ruby-version: ${{ matrix.ruby }}
|
21
|
+
- name: Install dependencies
|
22
|
+
run: bundle install
|
23
|
+
- name: Run tests
|
24
|
+
run: bundle exec rake
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
# Mods
|
2
2
|
|
3
3
|
[<img src="https://secure.travis-ci.org/sul-dlss/mods.png?branch=master" alt="Build Status"/>](http://travis-ci.org/sul-dlss/mods) [![Code Climate Test Coverage](https://codeclimate.com/github/sul-dlss/mods/badges/coverage.svg)](https://codeclimate.com/github/sul-dlss/mods/coverage) [<img
|
4
|
-
src="https://gemnasium.com/sul-dlss/mods.png" alt="Dependency Status"/>](https://gemnasium.com/sul-dlss/mods) [<img
|
5
4
|
src="https://badge.fury.io/rb/mods.svg" alt="Gem Version"/>](http://badge.fury.io/rb/mods)
|
6
5
|
|
7
6
|
A Gem to parse MODS (Metadata Object Description Schema) records. More information about MODS can be found at
|
data/lib/mods/date.rb
CHANGED
@@ -11,7 +11,7 @@ module Mods
|
|
11
11
|
# @param [Nokogiri::XML::Element] xml A date-flavored MODS field from the XML
|
12
12
|
# @return [Mods::Date]
|
13
13
|
def self.from_element(xml)
|
14
|
-
case xml.attr(:encoding)
|
14
|
+
case xml.attr(:encoding)&.downcase
|
15
15
|
when 'w3cdtf'
|
16
16
|
Mods::Date::W3cdtfFormat.new(xml)
|
17
17
|
when 'iso8601'
|
@@ -23,12 +23,13 @@ module Mods
|
|
23
23
|
# when 'temper'
|
24
24
|
# Mods::Date::TemperFormat.new(xml)
|
25
25
|
else
|
26
|
-
date_class =
|
26
|
+
date_class = UnparseableDate if xml.text =~ /\p{Hebrew}/ || xml.text =~ /^-/
|
27
27
|
date_class ||= [
|
28
28
|
MMDDYYYYFormat,
|
29
29
|
MMDDYYFormat,
|
30
30
|
YearRangeFormat,
|
31
31
|
DecadeAsYearDashFormat,
|
32
|
+
DecadeStringFormat,
|
32
33
|
EmbeddedBCYearFormat,
|
33
34
|
EmbeddedYearFormat,
|
34
35
|
EmbeddedThreeDigitYearFormat,
|
@@ -49,12 +50,15 @@ module Mods
|
|
49
50
|
# Strict ISO8601-encoded date parser
|
50
51
|
class Iso8601Format < Date
|
51
52
|
def self.parse_date(text)
|
52
|
-
|
53
|
+
::Date.parse(normalize_to_edtf(text))
|
53
54
|
end
|
54
55
|
end
|
55
56
|
|
56
57
|
# Less strict W3CDTF-encoded date parser
|
57
58
|
class W3cdtfFormat < Date
|
59
|
+
def self.normalize_to_edtf(text)
|
60
|
+
super.gsub('-00', '')
|
61
|
+
end
|
58
62
|
end
|
59
63
|
|
60
64
|
# Strict EDTF parser
|
@@ -62,7 +66,16 @@ module Mods
|
|
62
66
|
attr_reader :date
|
63
67
|
|
64
68
|
def self.normalize_to_edtf(text)
|
65
|
-
text
|
69
|
+
return '0000' if text.strip == '0'
|
70
|
+
|
71
|
+
case text
|
72
|
+
when /^\d{1,3}$/
|
73
|
+
text.rjust(4, "0") if text =~ /^\d{1,3}$/
|
74
|
+
when /^-\d{1,3}$/
|
75
|
+
"-#{text.sub(/^-/, '').rjust(4, "0")}"
|
76
|
+
else
|
77
|
+
text
|
78
|
+
end
|
66
79
|
end
|
67
80
|
end
|
68
81
|
|
@@ -100,6 +113,12 @@ module Mods
|
|
100
113
|
end
|
101
114
|
end
|
102
115
|
|
116
|
+
class UnparseableDate < ExtractorDateFormat
|
117
|
+
def self.parse_date(text)
|
118
|
+
nil
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
103
122
|
# Full text extractor for MM/DD/YYYY and MM/DD/YYY-formatted dates
|
104
123
|
class MMDDYYYYFormat < ExtractorDateFormat
|
105
124
|
REGEX = /(?<month>\d{1,2})\/(?<day>\d{1,2})\/(?<year>\d{3,4})/
|
@@ -201,7 +220,17 @@ module Mods
|
|
201
220
|
|
202
221
|
# Full-text extractor for data formatted as YYY-
|
203
222
|
class DecadeAsYearDashFormat < ExtractorDateFormat
|
204
|
-
REGEX = /(?<!\d)(?<year>\d{3})[-
|
223
|
+
REGEX = /(?<!\d)(?<year>\d{3})[-_xu?](?!\d)/
|
224
|
+
|
225
|
+
def self.normalize_to_edtf(text)
|
226
|
+
matches = text.match(REGEX)
|
227
|
+
"#{matches[:year]}X"
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
# Full-text extractor for data formatted as YYY0s
|
232
|
+
class DecadeStringFormat < ExtractorDateFormat
|
233
|
+
REGEX = /(?<!\d)(?<year>\d{3})0s(?!\d)/
|
205
234
|
|
206
235
|
def self.normalize_to_edtf(text)
|
207
236
|
matches = text.match(REGEX)
|
@@ -221,42 +250,42 @@ module Mods
|
|
221
250
|
|
222
251
|
# Full-text extractor that tries hard to pick any year present in the data
|
223
252
|
class EmbeddedYearFormat < ExtractorDateFormat
|
224
|
-
REGEX = /(
|
253
|
+
REGEX = /(?<!\d)(?<year>\d{4})(?!\d)/
|
225
254
|
|
226
255
|
def self.normalize_to_edtf(text)
|
227
256
|
matches = text.match(REGEX)
|
228
|
-
"#{matches[:
|
257
|
+
"#{matches[:year].rjust(4, "0")}"
|
229
258
|
end
|
230
259
|
end
|
231
260
|
|
232
261
|
# Full-text extractor that tries hard to pick any year present in the data
|
233
262
|
class EmbeddedThreeDigitYearFormat < ExtractorDateFormat
|
234
|
-
REGEX = /(
|
263
|
+
REGEX = /(?<!\d)(?<year>\d{3})(?!\d)(?!\d)/
|
235
264
|
|
236
265
|
def self.normalize_to_edtf(text)
|
237
266
|
matches = text.match(REGEX)
|
238
|
-
"#{matches[:
|
267
|
+
"#{matches[:year].rjust(4, "0")}"
|
239
268
|
end
|
240
269
|
end
|
241
270
|
|
242
271
|
# Full-text extractor that tries hard to pick any year present in the data
|
243
272
|
class OneOrTwoDigitYearFormat < ExtractorDateFormat
|
244
|
-
REGEX = /^(?<
|
273
|
+
REGEX = /^(?<year>\d{1,2})$/
|
245
274
|
|
246
275
|
def self.normalize_to_edtf(text)
|
247
276
|
matches = text.match(REGEX)
|
248
|
-
"#{matches[:
|
277
|
+
"#{matches[:year].rjust(4, "0")}"
|
249
278
|
end
|
250
279
|
end
|
251
280
|
|
252
281
|
# Full-text extractor that tries hard to pick any year present in the data
|
253
282
|
class EmbeddedYearWithBracketsFormat < ExtractorDateFormat
|
254
283
|
# [YYY]Y Y[YYY] [YY]YY Y[YY]Y YY[YY] YYY[Y] YY[Y]Y Y[Y]YY [Y]YYY
|
255
|
-
REGEX = /(?<
|
284
|
+
REGEX = /(?<year>[\d\[\]]{6})(?!\d)/
|
256
285
|
|
257
286
|
def self.normalize_to_edtf(text)
|
258
287
|
matches = text.match(REGEX)
|
259
|
-
"#{matches[:
|
288
|
+
"#{matches[:year].gsub('[', '').gsub(']', '')}"
|
260
289
|
end
|
261
290
|
end
|
262
291
|
|
@@ -327,7 +356,9 @@ module Mods
|
|
327
356
|
#
|
328
357
|
# @return [String]
|
329
358
|
def type
|
330
|
-
xml.attr(:type)
|
359
|
+
return if xml.attr(:type)&.empty?
|
360
|
+
|
361
|
+
xml.attr(:type)&.downcase
|
331
362
|
end
|
332
363
|
|
333
364
|
##
|
@@ -335,7 +366,9 @@ module Mods
|
|
335
366
|
#
|
336
367
|
# @return [String]
|
337
368
|
def encoding
|
338
|
-
xml.attr(:encoding)
|
369
|
+
return if xml.attr(:encoding)&.empty?
|
370
|
+
|
371
|
+
xml.attr(:encoding)&.downcase
|
339
372
|
end
|
340
373
|
|
341
374
|
##
|
@@ -359,7 +392,9 @@ module Mods
|
|
359
392
|
#
|
360
393
|
# @return [String]
|
361
394
|
def point
|
362
|
-
xml.attr(:point)
|
395
|
+
return if xml.attr(:point)&.empty?
|
396
|
+
|
397
|
+
xml.attr(:point)&.downcase
|
363
398
|
end
|
364
399
|
|
365
400
|
##
|
@@ -391,7 +426,7 @@ module Mods
|
|
391
426
|
#
|
392
427
|
# @return [String]
|
393
428
|
def qualifier
|
394
|
-
xml.attr(:qualifier)
|
429
|
+
xml.attr(:qualifier)&.downcase
|
395
430
|
end
|
396
431
|
|
397
432
|
##
|
@@ -419,6 +454,8 @@ module Mods
|
|
419
454
|
end
|
420
455
|
|
421
456
|
def precision
|
457
|
+
return :unknown unless date_range || date
|
458
|
+
|
422
459
|
if date_range.is_a? EDTF::Century
|
423
460
|
:century
|
424
461
|
elsif date_range.is_a? EDTF::Decade
|
@@ -1,8 +1,8 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
|
3
1
|
# Represents the Marc Country Codes mapped to names, from http://www.loc.gov/marc/countries/countries_code.html 2013-01-03
|
4
|
-
#key - Marc Country code
|
5
|
-
#value - Marc Country term
|
2
|
+
# key - Marc Country code
|
3
|
+
# value - Marc Country term
|
4
|
+
# frozen_string_literal: true
|
5
|
+
|
6
6
|
MARC_COUNTRY = {
|
7
7
|
'aa' => "Albania",
|
8
8
|
'abc' => "Alberta",
|
@@ -11,7 +11,6 @@ MARC_COUNTRY = {
|
|
11
11
|
'ae' => "Algeria",
|
12
12
|
'af' => "Afghanistan",
|
13
13
|
'ag' => "Argentina",
|
14
|
-
#'ai' => "Anguilla", # discontinued
|
15
14
|
'ai' => "Armenia (Republic)",
|
16
15
|
'air' => "Armenian S.S.R.", # discontinued
|
17
16
|
'aj' => "Azerbaijan",
|
@@ -40,7 +39,7 @@ MARC_COUNTRY = {
|
|
40
39
|
'bi' => "British Indian Ocean Territory",
|
41
40
|
'bl' => "Brazil",
|
42
41
|
'bm' => "Bermuda Islands",
|
43
|
-
'bn' => "Bosnia and
|
42
|
+
'bn' => "Bosnia and Herzegovina",
|
44
43
|
'bo' => "Bolivia",
|
45
44
|
'bp' => "Solomon Islands",
|
46
45
|
'br' => "Burma",
|
@@ -74,7 +73,7 @@ MARC_COUNTRY = {
|
|
74
73
|
'cs' => "Czechoslovakia", # discontinued
|
75
74
|
'ctu' => "Connecticut",
|
76
75
|
'cu' => "Cuba",
|
77
|
-
'cv' => "
|
76
|
+
'cv' => "Cabo Verde",
|
78
77
|
'cw' => "Cook Islands",
|
79
78
|
'cx' => "Central African Republic",
|
80
79
|
'cy' => "Cyprus",
|
@@ -109,6 +108,7 @@ MARC_COUNTRY = {
|
|
109
108
|
'gb' => "Kiribati",
|
110
109
|
'gd' => "Grenada",
|
111
110
|
'ge' => "Germany (East)", # discontinued
|
111
|
+
'gg' => "Guernsey",
|
112
112
|
'gh' => "Ghana",
|
113
113
|
'gi' => "Gibraltar",
|
114
114
|
'gl' => "Greenland",
|
@@ -137,6 +137,7 @@ MARC_COUNTRY = {
|
|
137
137
|
'ie' => "Ireland",
|
138
138
|
'ii' => "India",
|
139
139
|
'ilu' => "Illinois",
|
140
|
+
'im' => "Isle of Man",
|
140
141
|
'inu' => "Indiana",
|
141
142
|
'io' => "Indonesia",
|
142
143
|
'iq' => "Iraq",
|
@@ -148,6 +149,7 @@ MARC_COUNTRY = {
|
|
148
149
|
'iw' => "Israel-Jordan Demilitarized Zones", # discontinued
|
149
150
|
'iy' => "Iraq-Saudi Arabia Neutral Zone",
|
150
151
|
'ja' => "Japan",
|
152
|
+
'je' => "Jersey",
|
151
153
|
'ji' => "Johnston Atoll",
|
152
154
|
'jm' => "Jamaica",
|
153
155
|
'jn' => "Jan Mayen", # discontinued
|
@@ -284,7 +286,7 @@ MARC_COUNTRY = {
|
|
284
286
|
'snc' => "Saskatchewan",
|
285
287
|
'so' => "Somalia",
|
286
288
|
'sp' => "Spain",
|
287
|
-
'sq' => "
|
289
|
+
'sq' => "Eswatini",
|
288
290
|
'sr' => "Surinam",
|
289
291
|
'ss' => "Western Sahara",
|
290
292
|
'st' => "Saint-Martin",
|
@@ -365,7 +367,7 @@ MARC_COUNTRY = {
|
|
365
367
|
'xk' => "Saint Lucia",
|
366
368
|
'xl' => "Saint Pierre and Miquelon",
|
367
369
|
'xm' => "Saint Vincent and the Grenadines",
|
368
|
-
'xn' => "Macedonia",
|
370
|
+
'xn' => "North Macedonia",
|
369
371
|
'xna' => "New South Wales",
|
370
372
|
'xo' => "Slovakia",
|
371
373
|
'xoa' => "Northern Territory",
|
@@ -384,4 +386,4 @@ MARC_COUNTRY = {
|
|
384
386
|
'ys' => "Yemen (People's Democratic Republic)", # discontinued
|
385
387
|
'yu' => "Serbia and Montenegro", # discontinued
|
386
388
|
'za' => "Zambia"
|
387
|
-
}
|
389
|
+
}.freeze
|