mods 2.4.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/workflows/ruby.yml +24 -0
- data/.gitignore +1 -0
- data/Gemfile +0 -4
- data/README.md +1 -3
- data/lib/mods/date.rb +51 -17
- data/lib/mods/marc_country_codes.rb +12 -10
- data/lib/mods/nom_terminology.rb +110 -849
- data/lib/mods/reader.rb +9 -39
- data/lib/mods/record.rb +13 -28
- data/lib/mods/version.rb +1 -1
- data/mods.gemspec +3 -3
- data/spec/fixture_data/hp566jq8781.xml +334 -0
- data/spec/integration/parker_spec.rb +217 -0
- data/spec/{date_spec.rb → lib/date_spec.rb} +8 -1
- data/spec/lib/language_spec.rb +123 -0
- data/spec/lib/location_spec.rb +175 -0
- data/spec/lib/name_spec.rb +368 -0
- data/spec/lib/origin_info_spec.rb +134 -0
- data/spec/lib/part_spec.rb +162 -0
- data/spec/lib/physical_description_spec.rb +72 -0
- data/spec/{reader_spec.rb → lib/reader_spec.rb} +1 -41
- data/spec/lib/record_info_spec.rb +114 -0
- data/spec/lib/record_spec.rb +287 -0
- data/spec/lib/related_item_spec.rb +124 -0
- data/spec/lib/subject_spec.rb +427 -0
- data/spec/lib/title_spec.rb +108 -0
- data/spec/lib/top_level_elmnts_simple_spec.rb +169 -0
- data/spec/spec_helper.rb +87 -6
- data/spec/support/fixtures.rb +9 -0
- metadata +61 -43
- data/.coveralls.yml +0 -1
- data/.travis.yml +0 -6
- data/spec/language_spec.rb +0 -118
- data/spec/location_spec.rb +0 -295
- data/spec/name_spec.rb +0 -759
- data/spec/origin_info_spec.rb +0 -447
- data/spec/part_spec.rb +0 -471
- data/spec/physical_description_spec.rb +0 -144
- data/spec/record_info_spec.rb +0 -493
- data/spec/record_spec.rb +0 -356
- data/spec/related_item_spec.rb +0 -305
- data/spec/subject_spec.rb +0 -809
- data/spec/title_spec.rb +0 -226
- data/spec/top_level_elmnts_simple_spec.rb +0 -369
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 30381fee9ce7d47827010ce02902834f10118daeb381bdfa65bcc5adf778b65f
|
4
|
+
data.tar.gz: 65b9fac4ebee7589010039dbba9cbd8b204a94987443ec2c68e36a7509f39685
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e9ecec0384d8c4d931cace14b32ddc9e2b5161ed6ef9f61af27fb89d9993643db790f98966f8ddea419008bd03588cbc6aaf4dfee25b404c5a392deae1b3e78e
|
7
|
+
data.tar.gz: 79b733073abbf0ee6dc8da9835c86e44be67f1f298a4e09f3d6380772ff51713cd63c121daf2bb0ae0751ab9ffb3954003eb24ecffde1a7e89ed3c542cead994
|
@@ -0,0 +1,24 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
branches: [ master ]
|
6
|
+
pull_request:
|
7
|
+
branches: [ master ]
|
8
|
+
|
9
|
+
jobs:
|
10
|
+
tests:
|
11
|
+
runs-on: ubuntu-latest
|
12
|
+
strategy:
|
13
|
+
matrix:
|
14
|
+
ruby: [jruby-9.3.2.0, 2.7, '3.0', 3.1]
|
15
|
+
steps:
|
16
|
+
- uses: actions/checkout@v2
|
17
|
+
- name: Set up Ruby
|
18
|
+
uses: ruby/setup-ruby@v1
|
19
|
+
with:
|
20
|
+
ruby-version: ${{ matrix.ruby }}
|
21
|
+
- name: Install dependencies
|
22
|
+
run: bundle install
|
23
|
+
- name: Run tests
|
24
|
+
run: bundle exec rake
|
data/.gitignore
CHANGED
data/Gemfile
CHANGED
@@ -3,10 +3,6 @@ source 'https://rubygems.org'
|
|
3
3
|
# See mods.gemspec for this gem's dependencies
|
4
4
|
gemspec
|
5
5
|
|
6
|
-
group :test do
|
7
|
-
gem 'coveralls', require: false
|
8
|
-
end
|
9
|
-
|
10
6
|
# Pin to activesupport 4.x for older versions of ruby
|
11
7
|
gem 'activesupport', '~> 4.2' if RUBY_VERSION < '2.2.2'
|
12
8
|
gem 'byebug', platform: :mri
|
data/README.md
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
# Mods
|
2
2
|
|
3
|
-
[<img src="https://secure.travis-ci.org/sul-dlss/mods.png?branch=master" alt="Build Status"/>](http://travis-ci.org/sul-dlss/mods) [<img
|
4
|
-
src="https://coveralls.io/repos/sul-dlss/mods/badge.png" alt="Coverage Status"/>](https://coveralls.io/r/sul-dlss/mods) [<img
|
5
|
-
src="https://gemnasium.com/sul-dlss/mods.png" alt="Dependency Status"/>](https://gemnasium.com/sul-dlss/mods) [<img
|
3
|
+
[<img src="https://secure.travis-ci.org/sul-dlss/mods.png?branch=master" alt="Build Status"/>](http://travis-ci.org/sul-dlss/mods) [](https://codeclimate.com/github/sul-dlss/mods/coverage) [<img
|
6
4
|
src="https://badge.fury.io/rb/mods.svg" alt="Gem Version"/>](http://badge.fury.io/rb/mods)
|
7
5
|
|
8
6
|
A Gem to parse MODS (Metadata Object Description Schema) records. More information about MODS can be found at
|
data/lib/mods/date.rb
CHANGED
@@ -11,7 +11,7 @@ module Mods
|
|
11
11
|
# @param [Nokogiri::XML::Element] xml A date-flavored MODS field from the XML
|
12
12
|
# @return [Mods::Date]
|
13
13
|
def self.from_element(xml)
|
14
|
-
case xml.attr(:encoding)
|
14
|
+
case xml.attr(:encoding)&.downcase
|
15
15
|
when 'w3cdtf'
|
16
16
|
Mods::Date::W3cdtfFormat.new(xml)
|
17
17
|
when 'iso8601'
|
@@ -23,12 +23,13 @@ module Mods
|
|
23
23
|
# when 'temper'
|
24
24
|
# Mods::Date::TemperFormat.new(xml)
|
25
25
|
else
|
26
|
-
date_class =
|
26
|
+
date_class = UnparseableDate if xml.text =~ /\p{Hebrew}/ || xml.text =~ /^-/
|
27
27
|
date_class ||= [
|
28
28
|
MMDDYYYYFormat,
|
29
29
|
MMDDYYFormat,
|
30
30
|
YearRangeFormat,
|
31
31
|
DecadeAsYearDashFormat,
|
32
|
+
DecadeStringFormat,
|
32
33
|
EmbeddedBCYearFormat,
|
33
34
|
EmbeddedYearFormat,
|
34
35
|
EmbeddedThreeDigitYearFormat,
|
@@ -49,7 +50,7 @@ module Mods
|
|
49
50
|
# Strict ISO8601-encoded date parser
|
50
51
|
class Iso8601Format < Date
|
51
52
|
def self.parse_date(text)
|
52
|
-
|
53
|
+
::Date.parse(normalize_to_edtf(text))
|
53
54
|
end
|
54
55
|
end
|
55
56
|
|
@@ -62,7 +63,16 @@ module Mods
|
|
62
63
|
attr_reader :date
|
63
64
|
|
64
65
|
def self.normalize_to_edtf(text)
|
65
|
-
text
|
66
|
+
return '0000' if text.strip == '0'
|
67
|
+
|
68
|
+
case text
|
69
|
+
when /^\d{1,3}$/
|
70
|
+
text.rjust(4, "0") if text =~ /^\d{1,3}$/
|
71
|
+
when /^-\d{1,3}$/
|
72
|
+
"-#{text.sub(/^-/, '').rjust(4, "0")}"
|
73
|
+
else
|
74
|
+
text
|
75
|
+
end
|
66
76
|
end
|
67
77
|
end
|
68
78
|
|
@@ -100,6 +110,12 @@ module Mods
|
|
100
110
|
end
|
101
111
|
end
|
102
112
|
|
113
|
+
class UnparseableDate < ExtractorDateFormat
|
114
|
+
def self.parse_date(text)
|
115
|
+
nil
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
103
119
|
# Full text extractor for MM/DD/YYYY and MM/DD/YYY-formatted dates
|
104
120
|
class MMDDYYYYFormat < ExtractorDateFormat
|
105
121
|
REGEX = /(?<month>\d{1,2})\/(?<day>\d{1,2})\/(?<year>\d{3,4})/
|
@@ -201,7 +217,17 @@ module Mods
|
|
201
217
|
|
202
218
|
# Full-text extractor for data formatted as YYY-
|
203
219
|
class DecadeAsYearDashFormat < ExtractorDateFormat
|
204
|
-
REGEX = /(?<!\d)(?<year>\d{3})[-
|
220
|
+
REGEX = /(?<!\d)(?<year>\d{3})[-_xu?](?!\d)/
|
221
|
+
|
222
|
+
def self.normalize_to_edtf(text)
|
223
|
+
matches = text.match(REGEX)
|
224
|
+
"#{matches[:year]}X"
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
# Full-text extractor for data formatted as YYY0s
|
229
|
+
class DecadeStringFormat < ExtractorDateFormat
|
230
|
+
REGEX = /(?<!\d)(?<year>\d{3})0s(?!\d)/
|
205
231
|
|
206
232
|
def self.normalize_to_edtf(text)
|
207
233
|
matches = text.match(REGEX)
|
@@ -221,42 +247,42 @@ module Mods
|
|
221
247
|
|
222
248
|
# Full-text extractor that tries hard to pick any year present in the data
|
223
249
|
class EmbeddedYearFormat < ExtractorDateFormat
|
224
|
-
REGEX = /(
|
250
|
+
REGEX = /(?<!\d)(?<year>\d{4})(?!\d)/
|
225
251
|
|
226
252
|
def self.normalize_to_edtf(text)
|
227
253
|
matches = text.match(REGEX)
|
228
|
-
"#{matches[:
|
254
|
+
"#{matches[:year].rjust(4, "0")}"
|
229
255
|
end
|
230
256
|
end
|
231
257
|
|
232
258
|
# Full-text extractor that tries hard to pick any year present in the data
|
233
259
|
class EmbeddedThreeDigitYearFormat < ExtractorDateFormat
|
234
|
-
REGEX = /(
|
260
|
+
REGEX = /(?<!\d)(?<year>\d{3})(?!\d)(?!\d)/
|
235
261
|
|
236
262
|
def self.normalize_to_edtf(text)
|
237
263
|
matches = text.match(REGEX)
|
238
|
-
"#{matches[:
|
264
|
+
"#{matches[:year].rjust(4, "0")}"
|
239
265
|
end
|
240
266
|
end
|
241
267
|
|
242
268
|
# Full-text extractor that tries hard to pick any year present in the data
|
243
269
|
class OneOrTwoDigitYearFormat < ExtractorDateFormat
|
244
|
-
REGEX = /^(?<
|
270
|
+
REGEX = /^(?<year>\d{1,2})$/
|
245
271
|
|
246
272
|
def self.normalize_to_edtf(text)
|
247
273
|
matches = text.match(REGEX)
|
248
|
-
"#{matches[:
|
274
|
+
"#{matches[:year].rjust(4, "0")}"
|
249
275
|
end
|
250
276
|
end
|
251
277
|
|
252
278
|
# Full-text extractor that tries hard to pick any year present in the data
|
253
279
|
class EmbeddedYearWithBracketsFormat < ExtractorDateFormat
|
254
280
|
# [YYY]Y Y[YYY] [YY]YY Y[YY]Y YY[YY] YYY[Y] YY[Y]Y Y[Y]YY [Y]YYY
|
255
|
-
REGEX = /(?<
|
281
|
+
REGEX = /(?<year>[\d\[\]]{6})(?!\d)/
|
256
282
|
|
257
283
|
def self.normalize_to_edtf(text)
|
258
284
|
matches = text.match(REGEX)
|
259
|
-
"#{matches[:
|
285
|
+
"#{matches[:year].gsub('[', '').gsub(']', '')}"
|
260
286
|
end
|
261
287
|
end
|
262
288
|
|
@@ -327,7 +353,9 @@ module Mods
|
|
327
353
|
#
|
328
354
|
# @return [String]
|
329
355
|
def type
|
330
|
-
xml.attr(:type)
|
356
|
+
return if xml.attr(:type)&.empty?
|
357
|
+
|
358
|
+
xml.attr(:type)&.downcase
|
331
359
|
end
|
332
360
|
|
333
361
|
##
|
@@ -335,7 +363,9 @@ module Mods
|
|
335
363
|
#
|
336
364
|
# @return [String]
|
337
365
|
def encoding
|
338
|
-
xml.attr(:encoding)
|
366
|
+
return if xml.attr(:encoding)&.empty?
|
367
|
+
|
368
|
+
xml.attr(:encoding)&.downcase
|
339
369
|
end
|
340
370
|
|
341
371
|
##
|
@@ -359,7 +389,9 @@ module Mods
|
|
359
389
|
#
|
360
390
|
# @return [String]
|
361
391
|
def point
|
362
|
-
xml.attr(:point)
|
392
|
+
return if xml.attr(:point)&.empty?
|
393
|
+
|
394
|
+
xml.attr(:point)&.downcase
|
363
395
|
end
|
364
396
|
|
365
397
|
##
|
@@ -391,7 +423,7 @@ module Mods
|
|
391
423
|
#
|
392
424
|
# @return [String]
|
393
425
|
def qualifier
|
394
|
-
xml.attr(:qualifier)
|
426
|
+
xml.attr(:qualifier)&.downcase
|
395
427
|
end
|
396
428
|
|
397
429
|
##
|
@@ -419,6 +451,8 @@ module Mods
|
|
419
451
|
end
|
420
452
|
|
421
453
|
def precision
|
454
|
+
return :unknown unless date_range || date
|
455
|
+
|
422
456
|
if date_range.is_a? EDTF::Century
|
423
457
|
:century
|
424
458
|
elsif date_range.is_a? EDTF::Decade
|
@@ -1,8 +1,8 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
|
3
1
|
# Represents the Marc Country Codes mapped to names, from http://www.loc.gov/marc/countries/countries_code.html 2013-01-03
|
4
|
-
#key - Marc Country code
|
5
|
-
#value - Marc Country term
|
2
|
+
# key - Marc Country code
|
3
|
+
# value - Marc Country term
|
4
|
+
# frozen_string_literal: true
|
5
|
+
|
6
6
|
MARC_COUNTRY = {
|
7
7
|
'aa' => "Albania",
|
8
8
|
'abc' => "Alberta",
|
@@ -11,7 +11,6 @@ MARC_COUNTRY = {
|
|
11
11
|
'ae' => "Algeria",
|
12
12
|
'af' => "Afghanistan",
|
13
13
|
'ag' => "Argentina",
|
14
|
-
#'ai' => "Anguilla", # discontinued
|
15
14
|
'ai' => "Armenia (Republic)",
|
16
15
|
'air' => "Armenian S.S.R.", # discontinued
|
17
16
|
'aj' => "Azerbaijan",
|
@@ -40,7 +39,7 @@ MARC_COUNTRY = {
|
|
40
39
|
'bi' => "British Indian Ocean Territory",
|
41
40
|
'bl' => "Brazil",
|
42
41
|
'bm' => "Bermuda Islands",
|
43
|
-
'bn' => "Bosnia and
|
42
|
+
'bn' => "Bosnia and Herzegovina",
|
44
43
|
'bo' => "Bolivia",
|
45
44
|
'bp' => "Solomon Islands",
|
46
45
|
'br' => "Burma",
|
@@ -74,7 +73,7 @@ MARC_COUNTRY = {
|
|
74
73
|
'cs' => "Czechoslovakia", # discontinued
|
75
74
|
'ctu' => "Connecticut",
|
76
75
|
'cu' => "Cuba",
|
77
|
-
'cv' => "
|
76
|
+
'cv' => "Cabo Verde",
|
78
77
|
'cw' => "Cook Islands",
|
79
78
|
'cx' => "Central African Republic",
|
80
79
|
'cy' => "Cyprus",
|
@@ -109,6 +108,7 @@ MARC_COUNTRY = {
|
|
109
108
|
'gb' => "Kiribati",
|
110
109
|
'gd' => "Grenada",
|
111
110
|
'ge' => "Germany (East)", # discontinued
|
111
|
+
'gg' => "Guernsey",
|
112
112
|
'gh' => "Ghana",
|
113
113
|
'gi' => "Gibraltar",
|
114
114
|
'gl' => "Greenland",
|
@@ -137,6 +137,7 @@ MARC_COUNTRY = {
|
|
137
137
|
'ie' => "Ireland",
|
138
138
|
'ii' => "India",
|
139
139
|
'ilu' => "Illinois",
|
140
|
+
'im' => "Isle of Man",
|
140
141
|
'inu' => "Indiana",
|
141
142
|
'io' => "Indonesia",
|
142
143
|
'iq' => "Iraq",
|
@@ -148,6 +149,7 @@ MARC_COUNTRY = {
|
|
148
149
|
'iw' => "Israel-Jordan Demilitarized Zones", # discontinued
|
149
150
|
'iy' => "Iraq-Saudi Arabia Neutral Zone",
|
150
151
|
'ja' => "Japan",
|
152
|
+
'je' => "Jersey",
|
151
153
|
'ji' => "Johnston Atoll",
|
152
154
|
'jm' => "Jamaica",
|
153
155
|
'jn' => "Jan Mayen", # discontinued
|
@@ -284,7 +286,7 @@ MARC_COUNTRY = {
|
|
284
286
|
'snc' => "Saskatchewan",
|
285
287
|
'so' => "Somalia",
|
286
288
|
'sp' => "Spain",
|
287
|
-
'sq' => "
|
289
|
+
'sq' => "Eswatini",
|
288
290
|
'sr' => "Surinam",
|
289
291
|
'ss' => "Western Sahara",
|
290
292
|
'st' => "Saint-Martin",
|
@@ -365,7 +367,7 @@ MARC_COUNTRY = {
|
|
365
367
|
'xk' => "Saint Lucia",
|
366
368
|
'xl' => "Saint Pierre and Miquelon",
|
367
369
|
'xm' => "Saint Vincent and the Grenadines",
|
368
|
-
'xn' => "Macedonia",
|
370
|
+
'xn' => "North Macedonia",
|
369
371
|
'xna' => "New South Wales",
|
370
372
|
'xo' => "Slovakia",
|
371
373
|
'xoa' => "Northern Territory",
|
@@ -384,4 +386,4 @@ MARC_COUNTRY = {
|
|
384
386
|
'ys' => "Yemen (People's Democratic Republic)", # discontinued
|
385
387
|
'yu' => "Serbia and Montenegro", # discontinued
|
386
388
|
'za' => "Zambia"
|
387
|
-
}
|
389
|
+
}.freeze
|