mods 2.4.0 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/workflows/ruby.yml +24 -0
- data/.gitignore +1 -0
- data/Gemfile +0 -4
- data/README.md +1 -3
- data/lib/mods/date.rb +51 -17
- data/lib/mods/marc_country_codes.rb +12 -10
- data/lib/mods/nom_terminology.rb +110 -849
- data/lib/mods/reader.rb +9 -39
- data/lib/mods/record.rb +13 -28
- data/lib/mods/version.rb +1 -1
- data/mods.gemspec +3 -3
- data/spec/fixture_data/hp566jq8781.xml +334 -0
- data/spec/integration/parker_spec.rb +217 -0
- data/spec/{date_spec.rb → lib/date_spec.rb} +8 -1
- data/spec/lib/language_spec.rb +123 -0
- data/spec/lib/location_spec.rb +175 -0
- data/spec/lib/name_spec.rb +368 -0
- data/spec/lib/origin_info_spec.rb +134 -0
- data/spec/lib/part_spec.rb +162 -0
- data/spec/lib/physical_description_spec.rb +72 -0
- data/spec/{reader_spec.rb → lib/reader_spec.rb} +1 -41
- data/spec/lib/record_info_spec.rb +114 -0
- data/spec/lib/record_spec.rb +287 -0
- data/spec/lib/related_item_spec.rb +124 -0
- data/spec/lib/subject_spec.rb +427 -0
- data/spec/lib/title_spec.rb +108 -0
- data/spec/lib/top_level_elmnts_simple_spec.rb +169 -0
- data/spec/spec_helper.rb +87 -6
- data/spec/support/fixtures.rb +9 -0
- metadata +61 -43
- data/.coveralls.yml +0 -1
- data/.travis.yml +0 -6
- data/spec/language_spec.rb +0 -118
- data/spec/location_spec.rb +0 -295
- data/spec/name_spec.rb +0 -759
- data/spec/origin_info_spec.rb +0 -447
- data/spec/part_spec.rb +0 -471
- data/spec/physical_description_spec.rb +0 -144
- data/spec/record_info_spec.rb +0 -493
- data/spec/record_spec.rb +0 -356
- data/spec/related_item_spec.rb +0 -305
- data/spec/subject_spec.rb +0 -809
- data/spec/title_spec.rb +0 -226
- data/spec/top_level_elmnts_simple_spec.rb +0 -369
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 30381fee9ce7d47827010ce02902834f10118daeb381bdfa65bcc5adf778b65f
|
4
|
+
data.tar.gz: 65b9fac4ebee7589010039dbba9cbd8b204a94987443ec2c68e36a7509f39685
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e9ecec0384d8c4d931cace14b32ddc9e2b5161ed6ef9f61af27fb89d9993643db790f98966f8ddea419008bd03588cbc6aaf4dfee25b404c5a392deae1b3e78e
|
7
|
+
data.tar.gz: 79b733073abbf0ee6dc8da9835c86e44be67f1f298a4e09f3d6380772ff51713cd63c121daf2bb0ae0751ab9ffb3954003eb24ecffde1a7e89ed3c542cead994
|
@@ -0,0 +1,24 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
branches: [ master ]
|
6
|
+
pull_request:
|
7
|
+
branches: [ master ]
|
8
|
+
|
9
|
+
jobs:
|
10
|
+
tests:
|
11
|
+
runs-on: ubuntu-latest
|
12
|
+
strategy:
|
13
|
+
matrix:
|
14
|
+
ruby: [jruby-9.3.2.0, 2.7, '3.0', 3.1]
|
15
|
+
steps:
|
16
|
+
- uses: actions/checkout@v2
|
17
|
+
- name: Set up Ruby
|
18
|
+
uses: ruby/setup-ruby@v1
|
19
|
+
with:
|
20
|
+
ruby-version: ${{ matrix.ruby }}
|
21
|
+
- name: Install dependencies
|
22
|
+
run: bundle install
|
23
|
+
- name: Run tests
|
24
|
+
run: bundle exec rake
|
data/.gitignore
CHANGED
data/Gemfile
CHANGED
@@ -3,10 +3,6 @@ source 'https://rubygems.org'
|
|
3
3
|
# See mods.gemspec for this gem's dependencies
|
4
4
|
gemspec
|
5
5
|
|
6
|
-
group :test do
|
7
|
-
gem 'coveralls', require: false
|
8
|
-
end
|
9
|
-
|
10
6
|
# Pin to activesupport 4.x for older versions of ruby
|
11
7
|
gem 'activesupport', '~> 4.2' if RUBY_VERSION < '2.2.2'
|
12
8
|
gem 'byebug', platform: :mri
|
data/README.md
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
# Mods
|
2
2
|
|
3
|
-
[<img src="https://secure.travis-ci.org/sul-dlss/mods.png?branch=master" alt="Build Status"/>](http://travis-ci.org/sul-dlss/mods) [<img
|
4
|
-
src="https://coveralls.io/repos/sul-dlss/mods/badge.png" alt="Coverage Status"/>](https://coveralls.io/r/sul-dlss/mods) [<img
|
5
|
-
src="https://gemnasium.com/sul-dlss/mods.png" alt="Dependency Status"/>](https://gemnasium.com/sul-dlss/mods) [<img
|
3
|
+
[<img src="https://secure.travis-ci.org/sul-dlss/mods.png?branch=master" alt="Build Status"/>](http://travis-ci.org/sul-dlss/mods) [![Code Climate Test Coverage](https://codeclimate.com/github/sul-dlss/mods/badges/coverage.svg)](https://codeclimate.com/github/sul-dlss/mods/coverage) [<img
|
6
4
|
src="https://badge.fury.io/rb/mods.svg" alt="Gem Version"/>](http://badge.fury.io/rb/mods)
|
7
5
|
|
8
6
|
A Gem to parse MODS (Metadata Object Description Schema) records. More information about MODS can be found at
|
data/lib/mods/date.rb
CHANGED
@@ -11,7 +11,7 @@ module Mods
|
|
11
11
|
# @param [Nokogiri::XML::Element] xml A date-flavored MODS field from the XML
|
12
12
|
# @return [Mods::Date]
|
13
13
|
def self.from_element(xml)
|
14
|
-
case xml.attr(:encoding)
|
14
|
+
case xml.attr(:encoding)&.downcase
|
15
15
|
when 'w3cdtf'
|
16
16
|
Mods::Date::W3cdtfFormat.new(xml)
|
17
17
|
when 'iso8601'
|
@@ -23,12 +23,13 @@ module Mods
|
|
23
23
|
# when 'temper'
|
24
24
|
# Mods::Date::TemperFormat.new(xml)
|
25
25
|
else
|
26
|
-
date_class =
|
26
|
+
date_class = UnparseableDate if xml.text =~ /\p{Hebrew}/ || xml.text =~ /^-/
|
27
27
|
date_class ||= [
|
28
28
|
MMDDYYYYFormat,
|
29
29
|
MMDDYYFormat,
|
30
30
|
YearRangeFormat,
|
31
31
|
DecadeAsYearDashFormat,
|
32
|
+
DecadeStringFormat,
|
32
33
|
EmbeddedBCYearFormat,
|
33
34
|
EmbeddedYearFormat,
|
34
35
|
EmbeddedThreeDigitYearFormat,
|
@@ -49,7 +50,7 @@ module Mods
|
|
49
50
|
# Strict ISO8601-encoded date parser
|
50
51
|
class Iso8601Format < Date
|
51
52
|
def self.parse_date(text)
|
52
|
-
|
53
|
+
::Date.parse(normalize_to_edtf(text))
|
53
54
|
end
|
54
55
|
end
|
55
56
|
|
@@ -62,7 +63,16 @@ module Mods
|
|
62
63
|
attr_reader :date
|
63
64
|
|
64
65
|
def self.normalize_to_edtf(text)
|
65
|
-
text
|
66
|
+
return '0000' if text.strip == '0'
|
67
|
+
|
68
|
+
case text
|
69
|
+
when /^\d{1,3}$/
|
70
|
+
text.rjust(4, "0") if text =~ /^\d{1,3}$/
|
71
|
+
when /^-\d{1,3}$/
|
72
|
+
"-#{text.sub(/^-/, '').rjust(4, "0")}"
|
73
|
+
else
|
74
|
+
text
|
75
|
+
end
|
66
76
|
end
|
67
77
|
end
|
68
78
|
|
@@ -100,6 +110,12 @@ module Mods
|
|
100
110
|
end
|
101
111
|
end
|
102
112
|
|
113
|
+
class UnparseableDate < ExtractorDateFormat
|
114
|
+
def self.parse_date(text)
|
115
|
+
nil
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
103
119
|
# Full text extractor for MM/DD/YYYY and MM/DD/YYY-formatted dates
|
104
120
|
class MMDDYYYYFormat < ExtractorDateFormat
|
105
121
|
REGEX = /(?<month>\d{1,2})\/(?<day>\d{1,2})\/(?<year>\d{3,4})/
|
@@ -201,7 +217,17 @@ module Mods
|
|
201
217
|
|
202
218
|
# Full-text extractor for data formatted as YYY-
|
203
219
|
class DecadeAsYearDashFormat < ExtractorDateFormat
|
204
|
-
REGEX = /(?<!\d)(?<year>\d{3})[-
|
220
|
+
REGEX = /(?<!\d)(?<year>\d{3})[-_xu?](?!\d)/
|
221
|
+
|
222
|
+
def self.normalize_to_edtf(text)
|
223
|
+
matches = text.match(REGEX)
|
224
|
+
"#{matches[:year]}X"
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
# Full-text extractor for data formatted as YYY0s
|
229
|
+
class DecadeStringFormat < ExtractorDateFormat
|
230
|
+
REGEX = /(?<!\d)(?<year>\d{3})0s(?!\d)/
|
205
231
|
|
206
232
|
def self.normalize_to_edtf(text)
|
207
233
|
matches = text.match(REGEX)
|
@@ -221,42 +247,42 @@ module Mods
|
|
221
247
|
|
222
248
|
# Full-text extractor that tries hard to pick any year present in the data
|
223
249
|
class EmbeddedYearFormat < ExtractorDateFormat
|
224
|
-
REGEX = /(
|
250
|
+
REGEX = /(?<!\d)(?<year>\d{4})(?!\d)/
|
225
251
|
|
226
252
|
def self.normalize_to_edtf(text)
|
227
253
|
matches = text.match(REGEX)
|
228
|
-
"#{matches[:
|
254
|
+
"#{matches[:year].rjust(4, "0")}"
|
229
255
|
end
|
230
256
|
end
|
231
257
|
|
232
258
|
# Full-text extractor that tries hard to pick any year present in the data
|
233
259
|
class EmbeddedThreeDigitYearFormat < ExtractorDateFormat
|
234
|
-
REGEX = /(
|
260
|
+
REGEX = /(?<!\d)(?<year>\d{3})(?!\d)(?!\d)/
|
235
261
|
|
236
262
|
def self.normalize_to_edtf(text)
|
237
263
|
matches = text.match(REGEX)
|
238
|
-
"#{matches[:
|
264
|
+
"#{matches[:year].rjust(4, "0")}"
|
239
265
|
end
|
240
266
|
end
|
241
267
|
|
242
268
|
# Full-text extractor that tries hard to pick any year present in the data
|
243
269
|
class OneOrTwoDigitYearFormat < ExtractorDateFormat
|
244
|
-
REGEX = /^(?<
|
270
|
+
REGEX = /^(?<year>\d{1,2})$/
|
245
271
|
|
246
272
|
def self.normalize_to_edtf(text)
|
247
273
|
matches = text.match(REGEX)
|
248
|
-
"#{matches[:
|
274
|
+
"#{matches[:year].rjust(4, "0")}"
|
249
275
|
end
|
250
276
|
end
|
251
277
|
|
252
278
|
# Full-text extractor that tries hard to pick any year present in the data
|
253
279
|
class EmbeddedYearWithBracketsFormat < ExtractorDateFormat
|
254
280
|
# [YYY]Y Y[YYY] [YY]YY Y[YY]Y YY[YY] YYY[Y] YY[Y]Y Y[Y]YY [Y]YYY
|
255
|
-
REGEX = /(?<
|
281
|
+
REGEX = /(?<year>[\d\[\]]{6})(?!\d)/
|
256
282
|
|
257
283
|
def self.normalize_to_edtf(text)
|
258
284
|
matches = text.match(REGEX)
|
259
|
-
"#{matches[:
|
285
|
+
"#{matches[:year].gsub('[', '').gsub(']', '')}"
|
260
286
|
end
|
261
287
|
end
|
262
288
|
|
@@ -327,7 +353,9 @@ module Mods
|
|
327
353
|
#
|
328
354
|
# @return [String]
|
329
355
|
def type
|
330
|
-
xml.attr(:type)
|
356
|
+
return if xml.attr(:type)&.empty?
|
357
|
+
|
358
|
+
xml.attr(:type)&.downcase
|
331
359
|
end
|
332
360
|
|
333
361
|
##
|
@@ -335,7 +363,9 @@ module Mods
|
|
335
363
|
#
|
336
364
|
# @return [String]
|
337
365
|
def encoding
|
338
|
-
xml.attr(:encoding)
|
366
|
+
return if xml.attr(:encoding)&.empty?
|
367
|
+
|
368
|
+
xml.attr(:encoding)&.downcase
|
339
369
|
end
|
340
370
|
|
341
371
|
##
|
@@ -359,7 +389,9 @@ module Mods
|
|
359
389
|
#
|
360
390
|
# @return [String]
|
361
391
|
def point
|
362
|
-
xml.attr(:point)
|
392
|
+
return if xml.attr(:point)&.empty?
|
393
|
+
|
394
|
+
xml.attr(:point)&.downcase
|
363
395
|
end
|
364
396
|
|
365
397
|
##
|
@@ -391,7 +423,7 @@ module Mods
|
|
391
423
|
#
|
392
424
|
# @return [String]
|
393
425
|
def qualifier
|
394
|
-
xml.attr(:qualifier)
|
426
|
+
xml.attr(:qualifier)&.downcase
|
395
427
|
end
|
396
428
|
|
397
429
|
##
|
@@ -419,6 +451,8 @@ module Mods
|
|
419
451
|
end
|
420
452
|
|
421
453
|
def precision
|
454
|
+
return :unknown unless date_range || date
|
455
|
+
|
422
456
|
if date_range.is_a? EDTF::Century
|
423
457
|
:century
|
424
458
|
elsif date_range.is_a? EDTF::Decade
|
@@ -1,8 +1,8 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
|
3
1
|
# Represents the Marc Country Codes mapped to names, from http://www.loc.gov/marc/countries/countries_code.html 2013-01-03
|
4
|
-
#key - Marc Country code
|
5
|
-
#value - Marc Country term
|
2
|
+
# key - Marc Country code
|
3
|
+
# value - Marc Country term
|
4
|
+
# frozen_string_literal: true
|
5
|
+
|
6
6
|
MARC_COUNTRY = {
|
7
7
|
'aa' => "Albania",
|
8
8
|
'abc' => "Alberta",
|
@@ -11,7 +11,6 @@ MARC_COUNTRY = {
|
|
11
11
|
'ae' => "Algeria",
|
12
12
|
'af' => "Afghanistan",
|
13
13
|
'ag' => "Argentina",
|
14
|
-
#'ai' => "Anguilla", # discontinued
|
15
14
|
'ai' => "Armenia (Republic)",
|
16
15
|
'air' => "Armenian S.S.R.", # discontinued
|
17
16
|
'aj' => "Azerbaijan",
|
@@ -40,7 +39,7 @@ MARC_COUNTRY = {
|
|
40
39
|
'bi' => "British Indian Ocean Territory",
|
41
40
|
'bl' => "Brazil",
|
42
41
|
'bm' => "Bermuda Islands",
|
43
|
-
'bn' => "Bosnia and
|
42
|
+
'bn' => "Bosnia and Herzegovina",
|
44
43
|
'bo' => "Bolivia",
|
45
44
|
'bp' => "Solomon Islands",
|
46
45
|
'br' => "Burma",
|
@@ -74,7 +73,7 @@ MARC_COUNTRY = {
|
|
74
73
|
'cs' => "Czechoslovakia", # discontinued
|
75
74
|
'ctu' => "Connecticut",
|
76
75
|
'cu' => "Cuba",
|
77
|
-
'cv' => "
|
76
|
+
'cv' => "Cabo Verde",
|
78
77
|
'cw' => "Cook Islands",
|
79
78
|
'cx' => "Central African Republic",
|
80
79
|
'cy' => "Cyprus",
|
@@ -109,6 +108,7 @@ MARC_COUNTRY = {
|
|
109
108
|
'gb' => "Kiribati",
|
110
109
|
'gd' => "Grenada",
|
111
110
|
'ge' => "Germany (East)", # discontinued
|
111
|
+
'gg' => "Guernsey",
|
112
112
|
'gh' => "Ghana",
|
113
113
|
'gi' => "Gibraltar",
|
114
114
|
'gl' => "Greenland",
|
@@ -137,6 +137,7 @@ MARC_COUNTRY = {
|
|
137
137
|
'ie' => "Ireland",
|
138
138
|
'ii' => "India",
|
139
139
|
'ilu' => "Illinois",
|
140
|
+
'im' => "Isle of Man",
|
140
141
|
'inu' => "Indiana",
|
141
142
|
'io' => "Indonesia",
|
142
143
|
'iq' => "Iraq",
|
@@ -148,6 +149,7 @@ MARC_COUNTRY = {
|
|
148
149
|
'iw' => "Israel-Jordan Demilitarized Zones", # discontinued
|
149
150
|
'iy' => "Iraq-Saudi Arabia Neutral Zone",
|
150
151
|
'ja' => "Japan",
|
152
|
+
'je' => "Jersey",
|
151
153
|
'ji' => "Johnston Atoll",
|
152
154
|
'jm' => "Jamaica",
|
153
155
|
'jn' => "Jan Mayen", # discontinued
|
@@ -284,7 +286,7 @@ MARC_COUNTRY = {
|
|
284
286
|
'snc' => "Saskatchewan",
|
285
287
|
'so' => "Somalia",
|
286
288
|
'sp' => "Spain",
|
287
|
-
'sq' => "
|
289
|
+
'sq' => "Eswatini",
|
288
290
|
'sr' => "Surinam",
|
289
291
|
'ss' => "Western Sahara",
|
290
292
|
'st' => "Saint-Martin",
|
@@ -365,7 +367,7 @@ MARC_COUNTRY = {
|
|
365
367
|
'xk' => "Saint Lucia",
|
366
368
|
'xl' => "Saint Pierre and Miquelon",
|
367
369
|
'xm' => "Saint Vincent and the Grenadines",
|
368
|
-
'xn' => "Macedonia",
|
370
|
+
'xn' => "North Macedonia",
|
369
371
|
'xna' => "New South Wales",
|
370
372
|
'xo' => "Slovakia",
|
371
373
|
'xoa' => "Northern Territory",
|
@@ -384,4 +386,4 @@ MARC_COUNTRY = {
|
|
384
386
|
'ys' => "Yemen (People's Democratic Republic)", # discontinued
|
385
387
|
'yu' => "Serbia and Montenegro", # discontinued
|
386
388
|
'za' => "Zambia"
|
387
|
-
}
|
389
|
+
}.freeze
|