mods 2.4.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/ruby.yml +24 -0
  3. data/.gitignore +1 -0
  4. data/Gemfile +0 -4
  5. data/README.md +1 -3
  6. data/lib/mods/date.rb +51 -17
  7. data/lib/mods/marc_country_codes.rb +12 -10
  8. data/lib/mods/nom_terminology.rb +110 -849
  9. data/lib/mods/reader.rb +9 -39
  10. data/lib/mods/record.rb +13 -28
  11. data/lib/mods/version.rb +1 -1
  12. data/mods.gemspec +3 -3
  13. data/spec/fixture_data/hp566jq8781.xml +334 -0
  14. data/spec/integration/parker_spec.rb +217 -0
  15. data/spec/{date_spec.rb → lib/date_spec.rb} +8 -1
  16. data/spec/lib/language_spec.rb +123 -0
  17. data/spec/lib/location_spec.rb +175 -0
  18. data/spec/lib/name_spec.rb +368 -0
  19. data/spec/lib/origin_info_spec.rb +134 -0
  20. data/spec/lib/part_spec.rb +162 -0
  21. data/spec/lib/physical_description_spec.rb +72 -0
  22. data/spec/{reader_spec.rb → lib/reader_spec.rb} +1 -41
  23. data/spec/lib/record_info_spec.rb +114 -0
  24. data/spec/lib/record_spec.rb +287 -0
  25. data/spec/lib/related_item_spec.rb +124 -0
  26. data/spec/lib/subject_spec.rb +427 -0
  27. data/spec/lib/title_spec.rb +108 -0
  28. data/spec/lib/top_level_elmnts_simple_spec.rb +169 -0
  29. data/spec/spec_helper.rb +87 -6
  30. data/spec/support/fixtures.rb +9 -0
  31. metadata +61 -43
  32. data/.coveralls.yml +0 -1
  33. data/.travis.yml +0 -6
  34. data/spec/language_spec.rb +0 -118
  35. data/spec/location_spec.rb +0 -295
  36. data/spec/name_spec.rb +0 -759
  37. data/spec/origin_info_spec.rb +0 -447
  38. data/spec/part_spec.rb +0 -471
  39. data/spec/physical_description_spec.rb +0 -144
  40. data/spec/record_info_spec.rb +0 -493
  41. data/spec/record_spec.rb +0 -356
  42. data/spec/related_item_spec.rb +0 -305
  43. data/spec/subject_spec.rb +0 -809
  44. data/spec/title_spec.rb +0 -226
  45. data/spec/top_level_elmnts_simple_spec.rb +0 -369
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 93d01c5f52e3486f8ec3405b3f30355dd4f2fc31
4
- data.tar.gz: 55ef282f94d86ef30908e1a8bc865c388132f15d
2
+ SHA256:
3
+ metadata.gz: 30381fee9ce7d47827010ce02902834f10118daeb381bdfa65bcc5adf778b65f
4
+ data.tar.gz: 65b9fac4ebee7589010039dbba9cbd8b204a94987443ec2c68e36a7509f39685
5
5
  SHA512:
6
- metadata.gz: 20aaf32888be2c3511a8faf35ff5e985495c3e705277a50c0a3fba03d541bf340bbb0972becba9e32da530179bd8d97de894defcb31c6cd60825a6777bcf4692
7
- data.tar.gz: 402b4bd7f93f0b35471a241bee110acc1e34574c913862599a7fde5fe5ed49d48d9e06d7adced7d2c1258f3bf41e92e7dfa5cb35a69350477696c7470d79af5f
6
+ metadata.gz: e9ecec0384d8c4d931cace14b32ddc9e2b5161ed6ef9f61af27fb89d9993643db790f98966f8ddea419008bd03588cbc6aaf4dfee25b404c5a392deae1b3e78e
7
+ data.tar.gz: 79b733073abbf0ee6dc8da9835c86e44be67f1f298a4e09f3d6380772ff51713cd63c121daf2bb0ae0751ab9ffb3954003eb24ecffde1a7e89ed3c542cead994
@@ -0,0 +1,24 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [ master ]
6
+ pull_request:
7
+ branches: [ master ]
8
+
9
+ jobs:
10
+ tests:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ ruby: [jruby-9.3.2.0, 2.7, '3.0', 3.1]
15
+ steps:
16
+ - uses: actions/checkout@v2
17
+ - name: Set up Ruby
18
+ uses: ruby/setup-ruby@v1
19
+ with:
20
+ ruby-version: ${{ matrix.ruby }}
21
+ - name: Install dependencies
22
+ run: bundle install
23
+ - name: Run tests
24
+ run: bundle exec rake
data/.gitignore CHANGED
@@ -22,3 +22,4 @@ tmtags
22
22
  .idea/*
23
23
  to_delete
24
24
  .byebug_history
25
+ /spec/examples.txt
data/Gemfile CHANGED
@@ -3,10 +3,6 @@ source 'https://rubygems.org'
3
3
  # See mods.gemspec for this gem's dependencies
4
4
  gemspec
5
5
 
6
- group :test do
7
- gem 'coveralls', require: false
8
- end
9
-
10
6
  # Pin to activesupport 4.x for older versions of ruby
11
7
  gem 'activesupport', '~> 4.2' if RUBY_VERSION < '2.2.2'
12
8
  gem 'byebug', platform: :mri
data/README.md CHANGED
@@ -1,8 +1,6 @@
1
1
  # Mods
2
2
 
3
- [<img src="https://secure.travis-ci.org/sul-dlss/mods.png?branch=master" alt="Build Status"/>](http://travis-ci.org/sul-dlss/mods) [<img
4
- src="https://coveralls.io/repos/sul-dlss/mods/badge.png" alt="Coverage Status"/>](https://coveralls.io/r/sul-dlss/mods) [<img
5
- src="https://gemnasium.com/sul-dlss/mods.png" alt="Dependency Status"/>](https://gemnasium.com/sul-dlss/mods) [<img
3
+ [<img src="https://secure.travis-ci.org/sul-dlss/mods.png?branch=master" alt="Build Status"/>](http://travis-ci.org/sul-dlss/mods) [![Code Climate Test Coverage](https://codeclimate.com/github/sul-dlss/mods/badges/coverage.svg)](https://codeclimate.com/github/sul-dlss/mods/coverage) [<img
6
4
  src="https://badge.fury.io/rb/mods.svg" alt="Gem Version"/>](http://badge.fury.io/rb/mods)
7
5
 
8
6
  A Gem to parse MODS (Metadata Object Description Schema) records. More information about MODS can be found at
data/lib/mods/date.rb CHANGED
@@ -11,7 +11,7 @@ module Mods
11
11
  # @param [Nokogiri::XML::Element] xml A date-flavored MODS field from the XML
12
12
  # @return [Mods::Date]
13
13
  def self.from_element(xml)
14
- case xml.attr(:encoding)
14
+ case xml.attr(:encoding)&.downcase
15
15
  when 'w3cdtf'
16
16
  Mods::Date::W3cdtfFormat.new(xml)
17
17
  when 'iso8601'
@@ -23,12 +23,13 @@ module Mods
23
23
  # when 'temper'
24
24
  # Mods::Date::TemperFormat.new(xml)
25
25
  else
26
- date_class = Mods::Date if xml.text =~ /\p{Hebrew}/
26
+ date_class = UnparseableDate if xml.text =~ /\p{Hebrew}/ || xml.text =~ /^-/
27
27
  date_class ||= [
28
28
  MMDDYYYYFormat,
29
29
  MMDDYYFormat,
30
30
  YearRangeFormat,
31
31
  DecadeAsYearDashFormat,
32
+ DecadeStringFormat,
32
33
  EmbeddedBCYearFormat,
33
34
  EmbeddedYearFormat,
34
35
  EmbeddedThreeDigitYearFormat,
@@ -49,7 +50,7 @@ module Mods
49
50
  # Strict ISO8601-encoded date parser
50
51
  class Iso8601Format < Date
51
52
  def self.parse_date(text)
52
- @date = ::Date.parse(normalize_to_edtf(text))
53
+ ::Date.parse(normalize_to_edtf(text))
53
54
  end
54
55
  end
55
56
 
@@ -62,7 +63,16 @@ module Mods
62
63
  attr_reader :date
63
64
 
64
65
  def self.normalize_to_edtf(text)
65
- text
66
+ return '0000' if text.strip == '0'
67
+
68
+ case text
69
+ when /^\d{1,3}$/
70
+ text.rjust(4, "0") if text =~ /^\d{1,3}$/
71
+ when /^-\d{1,3}$/
72
+ "-#{text.sub(/^-/, '').rjust(4, "0")}"
73
+ else
74
+ text
75
+ end
66
76
  end
67
77
  end
68
78
 
@@ -100,6 +110,12 @@ module Mods
100
110
  end
101
111
  end
102
112
 
113
+ class UnparseableDate < ExtractorDateFormat
114
+ def self.parse_date(text)
115
+ nil
116
+ end
117
+ end
118
+
103
119
  # Full text extractor for MM/DD/YYYY and MM/DD/YYY-formatted dates
104
120
  class MMDDYYYYFormat < ExtractorDateFormat
105
121
  REGEX = /(?<month>\d{1,2})\/(?<day>\d{1,2})\/(?<year>\d{3,4})/
@@ -201,7 +217,17 @@ module Mods
201
217
 
202
218
  # Full-text extractor for data formatted as YYY-
203
219
  class DecadeAsYearDashFormat < ExtractorDateFormat
204
- REGEX = /(?<!\d)(?<year>\d{3})[-_x?](?!\d)/
220
+ REGEX = /(?<!\d)(?<year>\d{3})[-_xu?](?!\d)/
221
+
222
+ def self.normalize_to_edtf(text)
223
+ matches = text.match(REGEX)
224
+ "#{matches[:year]}X"
225
+ end
226
+ end
227
+
228
+ # Full-text extractor for data formatted as YYY0s
229
+ class DecadeStringFormat < ExtractorDateFormat
230
+ REGEX = /(?<!\d)(?<year>\d{3})0s(?!\d)/
205
231
 
206
232
  def self.normalize_to_edtf(text)
207
233
  matches = text.match(REGEX)
@@ -221,42 +247,42 @@ module Mods
221
247
 
222
248
  # Full-text extractor that tries hard to pick any year present in the data
223
249
  class EmbeddedYearFormat < ExtractorDateFormat
224
- REGEX = /(?<prefix>-)?(?<!\d)(?<year>\d{4})(?!\d)/
250
+ REGEX = /(?<!\d)(?<year>\d{4})(?!\d)/
225
251
 
226
252
  def self.normalize_to_edtf(text)
227
253
  matches = text.match(REGEX)
228
- "#{matches[:prefix]}#{matches[:year].rjust(4, "0")}"
254
+ "#{matches[:year].rjust(4, "0")}"
229
255
  end
230
256
  end
231
257
 
232
258
  # Full-text extractor that tries hard to pick any year present in the data
233
259
  class EmbeddedThreeDigitYearFormat < ExtractorDateFormat
234
- REGEX = /(?<prefix>-)?(?<!\d)(?<year>\d{3})(?!\d)(?!\d)/
260
+ REGEX = /(?<!\d)(?<year>\d{3})(?!\d)(?!\d)/
235
261
 
236
262
  def self.normalize_to_edtf(text)
237
263
  matches = text.match(REGEX)
238
- "#{matches[:prefix]}#{matches[:year].rjust(4, "0")}"
264
+ "#{matches[:year].rjust(4, "0")}"
239
265
  end
240
266
  end
241
267
 
242
268
  # Full-text extractor that tries hard to pick any year present in the data
243
269
  class OneOrTwoDigitYearFormat < ExtractorDateFormat
244
- REGEX = /^(?<prefix>-)?(?<year>\d{1,2})$/
270
+ REGEX = /^(?<year>\d{1,2})$/
245
271
 
246
272
  def self.normalize_to_edtf(text)
247
273
  matches = text.match(REGEX)
248
- "#{matches[:prefix]}#{matches[:year].rjust(4, "0")}"
274
+ "#{matches[:year].rjust(4, "0")}"
249
275
  end
250
276
  end
251
277
 
252
278
  # Full-text extractor that tries hard to pick any year present in the data
253
279
  class EmbeddedYearWithBracketsFormat < ExtractorDateFormat
254
280
  # [YYY]Y Y[YYY] [YY]YY Y[YY]Y YY[YY] YYY[Y] YY[Y]Y Y[Y]YY [Y]YYY
255
- REGEX = /(?<prefix>-)?(?<year>[\d\[\]]{6})(?!\d)/
281
+ REGEX = /(?<year>[\d\[\]]{6})(?!\d)/
256
282
 
257
283
  def self.normalize_to_edtf(text)
258
284
  matches = text.match(REGEX)
259
- "#{matches[:prefix]}#{matches[:year].gsub('[', '').gsub(']', '')}"
285
+ "#{matches[:year].gsub('[', '').gsub(']', '')}"
260
286
  end
261
287
  end
262
288
 
@@ -327,7 +353,9 @@ module Mods
327
353
  #
328
354
  # @return [String]
329
355
  def type
330
- xml.attr(:type)
356
+ return if xml.attr(:type)&.empty?
357
+
358
+ xml.attr(:type)&.downcase
331
359
  end
332
360
 
333
361
  ##
@@ -335,7 +363,9 @@ module Mods
335
363
  #
336
364
  # @return [String]
337
365
  def encoding
338
- xml.attr(:encoding)
366
+ return if xml.attr(:encoding)&.empty?
367
+
368
+ xml.attr(:encoding)&.downcase
339
369
  end
340
370
 
341
371
  ##
@@ -359,7 +389,9 @@ module Mods
359
389
  #
360
390
  # @return [String]
361
391
  def point
362
- xml.attr(:point)
392
+ return if xml.attr(:point)&.empty?
393
+
394
+ xml.attr(:point)&.downcase
363
395
  end
364
396
 
365
397
  ##
@@ -391,7 +423,7 @@ module Mods
391
423
  #
392
424
  # @return [String]
393
425
  def qualifier
394
- xml.attr(:qualifier)
426
+ xml.attr(:qualifier)&.downcase
395
427
  end
396
428
 
397
429
  ##
@@ -419,6 +451,8 @@ module Mods
419
451
  end
420
452
 
421
453
  def precision
454
+ return :unknown unless date_range || date
455
+
422
456
  if date_range.is_a? EDTF::Century
423
457
  :century
424
458
  elsif date_range.is_a? EDTF::Decade
@@ -1,8 +1,8 @@
1
- # encoding: UTF-8
2
-
3
1
  # Represents the Marc Country Codes mapped to names, from http://www.loc.gov/marc/countries/countries_code.html 2013-01-03
4
- #key - Marc Country code
5
- #value - Marc Country term
2
+ # key - Marc Country code
3
+ # value - Marc Country term
4
+ # frozen_string_literal: true
5
+
6
6
  MARC_COUNTRY = {
7
7
  'aa' => "Albania",
8
8
  'abc' => "Alberta",
@@ -11,7 +11,6 @@ MARC_COUNTRY = {
11
11
  'ae' => "Algeria",
12
12
  'af' => "Afghanistan",
13
13
  'ag' => "Argentina",
14
- #'ai' => "Anguilla", # discontinued
15
14
  'ai' => "Armenia (Republic)",
16
15
  'air' => "Armenian S.S.R.", # discontinued
17
16
  'aj' => "Azerbaijan",
@@ -40,7 +39,7 @@ MARC_COUNTRY = {
40
39
  'bi' => "British Indian Ocean Territory",
41
40
  'bl' => "Brazil",
42
41
  'bm' => "Bermuda Islands",
43
- 'bn' => "Bosnia and Hercegovina",
42
+ 'bn' => "Bosnia and Herzegovina",
44
43
  'bo' => "Bolivia",
45
44
  'bp' => "Solomon Islands",
46
45
  'br' => "Burma",
@@ -74,7 +73,7 @@ MARC_COUNTRY = {
74
73
  'cs' => "Czechoslovakia", # discontinued
75
74
  'ctu' => "Connecticut",
76
75
  'cu' => "Cuba",
77
- 'cv' => "Cape Verde",
76
+ 'cv' => "Cabo Verde",
78
77
  'cw' => "Cook Islands",
79
78
  'cx' => "Central African Republic",
80
79
  'cy' => "Cyprus",
@@ -109,6 +108,7 @@ MARC_COUNTRY = {
109
108
  'gb' => "Kiribati",
110
109
  'gd' => "Grenada",
111
110
  'ge' => "Germany (East)", # discontinued
111
+ 'gg' => "Guernsey",
112
112
  'gh' => "Ghana",
113
113
  'gi' => "Gibraltar",
114
114
  'gl' => "Greenland",
@@ -137,6 +137,7 @@ MARC_COUNTRY = {
137
137
  'ie' => "Ireland",
138
138
  'ii' => "India",
139
139
  'ilu' => "Illinois",
140
+ 'im' => "Isle of Man",
140
141
  'inu' => "Indiana",
141
142
  'io' => "Indonesia",
142
143
  'iq' => "Iraq",
@@ -148,6 +149,7 @@ MARC_COUNTRY = {
148
149
  'iw' => "Israel-Jordan Demilitarized Zones", # discontinued
149
150
  'iy' => "Iraq-Saudi Arabia Neutral Zone",
150
151
  'ja' => "Japan",
152
+ 'je' => "Jersey",
151
153
  'ji' => "Johnston Atoll",
152
154
  'jm' => "Jamaica",
153
155
  'jn' => "Jan Mayen", # discontinued
@@ -284,7 +286,7 @@ MARC_COUNTRY = {
284
286
  'snc' => "Saskatchewan",
285
287
  'so' => "Somalia",
286
288
  'sp' => "Spain",
287
- 'sq' => "Swaziland",
289
+ 'sq' => "Eswatini",
288
290
  'sr' => "Surinam",
289
291
  'ss' => "Western Sahara",
290
292
  'st' => "Saint-Martin",
@@ -365,7 +367,7 @@ MARC_COUNTRY = {
365
367
  'xk' => "Saint Lucia",
366
368
  'xl' => "Saint Pierre and Miquelon",
367
369
  'xm' => "Saint Vincent and the Grenadines",
368
- 'xn' => "Macedonia",
370
+ 'xn' => "North Macedonia",
369
371
  'xna' => "New South Wales",
370
372
  'xo' => "Slovakia",
371
373
  'xoa' => "Northern Territory",
@@ -384,4 +386,4 @@ MARC_COUNTRY = {
384
386
  'ys' => "Yemen (People's Democratic Republic)", # discontinued
385
387
  'yu' => "Serbia and Montenegro", # discontinued
386
388
  'za' => "Zambia"
387
- }
389
+ }.freeze