mods 2.4.1 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +24 -0
  3. data/.gitignore +1 -0
  4. data/README.md +0 -1
  5. data/lib/mods/date.rb +54 -17
  6. data/lib/mods/marc_country_codes.rb +12 -10
  7. data/lib/mods/nom_terminology.rb +109 -845
  8. data/lib/mods/reader.rb +9 -39
  9. data/lib/mods/record.rb +13 -28
  10. data/lib/mods/version.rb +1 -1
  11. data/mods.gemspec +2 -2
  12. data/spec/fixture_data/hp566jq8781.xml +334 -0
  13. data/spec/integration/parker_spec.rb +217 -0
  14. data/spec/{date_spec.rb → lib/date_spec.rb} +9 -1
  15. data/spec/lib/language_spec.rb +123 -0
  16. data/spec/lib/location_spec.rb +175 -0
  17. data/spec/lib/name_spec.rb +368 -0
  18. data/spec/lib/origin_info_spec.rb +134 -0
  19. data/spec/lib/part_spec.rb +162 -0
  20. data/spec/lib/physical_description_spec.rb +72 -0
  21. data/spec/{reader_spec.rb → lib/reader_spec.rb} +1 -41
  22. data/spec/lib/record_info_spec.rb +114 -0
  23. data/spec/lib/record_spec.rb +287 -0
  24. data/spec/lib/related_item_spec.rb +124 -0
  25. data/spec/lib/subject_spec.rb +427 -0
  26. data/spec/lib/title_spec.rb +108 -0
  27. data/spec/lib/top_level_elmnts_simple_spec.rb +169 -0
  28. data/spec/spec_helper.rb +86 -5
  29. data/spec/support/fixtures.rb +9 -0
  30. metadata +49 -44
  31. data/.travis.yml +0 -16
  32. data/spec/language_spec.rb +0 -118
  33. data/spec/location_spec.rb +0 -295
  34. data/spec/name_spec.rb +0 -759
  35. data/spec/origin_info_spec.rb +0 -447
  36. data/spec/part_spec.rb +0 -471
  37. data/spec/physical_description_spec.rb +0 -144
  38. data/spec/record_info_spec.rb +0 -493
  39. data/spec/record_spec.rb +0 -356
  40. data/spec/related_item_spec.rb +0 -305
  41. data/spec/subject_spec.rb +0 -809
  42. data/spec/title_spec.rb +0 -226
  43. data/spec/top_level_elmnts_simple_spec.rb +0 -369
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4ee9e9f682562bec8eeee3698cb1dc20fedee759172e0f6ef84a39ab01679550
4
- data.tar.gz: 67a3d7a70e895de43831a40b30195bd8fe2ffb27315c74ec28e805416169512d
3
+ metadata.gz: 566f8acde01c8696c1a183588b663934dc50d500999fcc83ba67516620e57ae7
4
+ data.tar.gz: 87aeca4a314870bcace3fc1b3fe2260b67134becc4ca2a7a22ac3bb5edf73be2
5
5
  SHA512:
6
- metadata.gz: 90ba88b4a9354acfa9714711cd20a932473fafc62445ee40ea33725b4a03927b5f499cff78fab004a7ed307741424ef9aa19dd65e4b821f416e31a95bb116f83
7
- data.tar.gz: 6e343ecf819c236774468511abb31be4a80aa2cfa8725231736f352c34c51cae0e8fe8fd9cd93d527c97fda52160fe1fe65ae0d204fbcff8fab54e5bad4379ec
6
+ metadata.gz: 0b5d979ab8b4fbcb7e5dce0b5d8117e76f1c95a284cf531fd659a1e78d74af5571af309c9ab3e77b6d105f9e9d3427ba0fa88d82f4feeb5d54c18bc306079332
7
+ data.tar.gz: 0bea0554c539de56fcfee76380f0478133d9288fba63b1ff6c6a922d3877ab04e12070590c2a10339599cf2615bc75433d23fb546e75ad4f8627b75dd563efb7
@@ -0,0 +1,24 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [ master ]
6
+ pull_request:
7
+ branches: [ master ]
8
+
9
+ jobs:
10
+ tests:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ ruby: [jruby-9.3.2.0, 2.7, '3.0', 3.1]
15
+ steps:
16
+ - uses: actions/checkout@v2
17
+ - name: Set up Ruby
18
+ uses: ruby/setup-ruby@v1
19
+ with:
20
+ ruby-version: ${{ matrix.ruby }}
21
+ - name: Install dependencies
22
+ run: bundle install
23
+ - name: Run tests
24
+ run: bundle exec rake
data/.gitignore CHANGED
@@ -22,3 +22,4 @@ tmtags
22
22
  .idea/*
23
23
  to_delete
24
24
  .byebug_history
25
+ /spec/examples.txt
data/README.md CHANGED
@@ -1,7 +1,6 @@
1
1
  # Mods
2
2
 
3
3
  [<img src="https://secure.travis-ci.org/sul-dlss/mods.png?branch=master" alt="Build Status"/>](http://travis-ci.org/sul-dlss/mods) [![Code Climate Test Coverage](https://codeclimate.com/github/sul-dlss/mods/badges/coverage.svg)](https://codeclimate.com/github/sul-dlss/mods/coverage) [<img
4
- src="https://gemnasium.com/sul-dlss/mods.png" alt="Dependency Status"/>](https://gemnasium.com/sul-dlss/mods) [<img
5
4
  src="https://badge.fury.io/rb/mods.svg" alt="Gem Version"/>](http://badge.fury.io/rb/mods)
6
5
 
7
6
  A Gem to parse MODS (Metadata Object Description Schema) records. More information about MODS can be found at
data/lib/mods/date.rb CHANGED
@@ -11,7 +11,7 @@ module Mods
11
11
  # @param [Nokogiri::XML::Element] xml A date-flavored MODS field from the XML
12
12
  # @return [Mods::Date]
13
13
  def self.from_element(xml)
14
- case xml.attr(:encoding)
14
+ case xml.attr(:encoding)&.downcase
15
15
  when 'w3cdtf'
16
16
  Mods::Date::W3cdtfFormat.new(xml)
17
17
  when 'iso8601'
@@ -23,12 +23,13 @@ module Mods
23
23
  # when 'temper'
24
24
  # Mods::Date::TemperFormat.new(xml)
25
25
  else
26
- date_class = Mods::Date if xml.text =~ /\p{Hebrew}/
26
+ date_class = UnparseableDate if xml.text =~ /\p{Hebrew}/ || xml.text =~ /^-/
27
27
  date_class ||= [
28
28
  MMDDYYYYFormat,
29
29
  MMDDYYFormat,
30
30
  YearRangeFormat,
31
31
  DecadeAsYearDashFormat,
32
+ DecadeStringFormat,
32
33
  EmbeddedBCYearFormat,
33
34
  EmbeddedYearFormat,
34
35
  EmbeddedThreeDigitYearFormat,
@@ -49,12 +50,15 @@ module Mods
49
50
  # Strict ISO8601-encoded date parser
50
51
  class Iso8601Format < Date
51
52
  def self.parse_date(text)
52
- @date = ::Date.parse(normalize_to_edtf(text))
53
+ ::Date.parse(normalize_to_edtf(text))
53
54
  end
54
55
  end
55
56
 
56
57
  # Less strict W3CDTF-encoded date parser
57
58
  class W3cdtfFormat < Date
59
+ def self.normalize_to_edtf(text)
60
+ super.gsub('-00', '')
61
+ end
58
62
  end
59
63
 
60
64
  # Strict EDTF parser
@@ -62,7 +66,16 @@ module Mods
62
66
  attr_reader :date
63
67
 
64
68
  def self.normalize_to_edtf(text)
65
- text
69
+ return '0000' if text.strip == '0'
70
+
71
+ case text
72
+ when /^\d{1,3}$/
73
+ text.rjust(4, "0") if text =~ /^\d{1,3}$/
74
+ when /^-\d{1,3}$/
75
+ "-#{text.sub(/^-/, '').rjust(4, "0")}"
76
+ else
77
+ text
78
+ end
66
79
  end
67
80
  end
68
81
 
@@ -100,6 +113,12 @@ module Mods
100
113
  end
101
114
  end
102
115
 
116
+ class UnparseableDate < ExtractorDateFormat
117
+ def self.parse_date(text)
118
+ nil
119
+ end
120
+ end
121
+
103
122
  # Full text extractor for MM/DD/YYYY and MM/DD/YYY-formatted dates
104
123
  class MMDDYYYYFormat < ExtractorDateFormat
105
124
  REGEX = /(?<month>\d{1,2})\/(?<day>\d{1,2})\/(?<year>\d{3,4})/
@@ -201,7 +220,17 @@ module Mods
201
220
 
202
221
  # Full-text extractor for data formatted as YYY-
203
222
  class DecadeAsYearDashFormat < ExtractorDateFormat
204
- REGEX = /(?<!\d)(?<year>\d{3})[-_x?](?!\d)/
223
+ REGEX = /(?<!\d)(?<year>\d{3})[-_xu?](?!\d)/
224
+
225
+ def self.normalize_to_edtf(text)
226
+ matches = text.match(REGEX)
227
+ "#{matches[:year]}X"
228
+ end
229
+ end
230
+
231
+ # Full-text extractor for data formatted as YYY0s
232
+ class DecadeStringFormat < ExtractorDateFormat
233
+ REGEX = /(?<!\d)(?<year>\d{3})0s(?!\d)/
205
234
 
206
235
  def self.normalize_to_edtf(text)
207
236
  matches = text.match(REGEX)
@@ -221,42 +250,42 @@ module Mods
221
250
 
222
251
  # Full-text extractor that tries hard to pick any year present in the data
223
252
  class EmbeddedYearFormat < ExtractorDateFormat
224
- REGEX = /(?<prefix>-)?(?<!\d)(?<year>\d{4})(?!\d)/
253
+ REGEX = /(?<!\d)(?<year>\d{4})(?!\d)/
225
254
 
226
255
  def self.normalize_to_edtf(text)
227
256
  matches = text.match(REGEX)
228
- "#{matches[:prefix]}#{matches[:year].rjust(4, "0")}"
257
+ "#{matches[:year].rjust(4, "0")}"
229
258
  end
230
259
  end
231
260
 
232
261
  # Full-text extractor that tries hard to pick any year present in the data
233
262
  class EmbeddedThreeDigitYearFormat < ExtractorDateFormat
234
- REGEX = /(?<prefix>-)?(?<!\d)(?<year>\d{3})(?!\d)(?!\d)/
263
+ REGEX = /(?<!\d)(?<year>\d{3})(?!\d)(?!\d)/
235
264
 
236
265
  def self.normalize_to_edtf(text)
237
266
  matches = text.match(REGEX)
238
- "#{matches[:prefix]}#{matches[:year].rjust(4, "0")}"
267
+ "#{matches[:year].rjust(4, "0")}"
239
268
  end
240
269
  end
241
270
 
242
271
  # Full-text extractor that tries hard to pick any year present in the data
243
272
  class OneOrTwoDigitYearFormat < ExtractorDateFormat
244
- REGEX = /^(?<prefix>-)?(?<year>\d{1,2})$/
273
+ REGEX = /^(?<year>\d{1,2})$/
245
274
 
246
275
  def self.normalize_to_edtf(text)
247
276
  matches = text.match(REGEX)
248
- "#{matches[:prefix]}#{matches[:year].rjust(4, "0")}"
277
+ "#{matches[:year].rjust(4, "0")}"
249
278
  end
250
279
  end
251
280
 
252
281
  # Full-text extractor that tries hard to pick any year present in the data
253
282
  class EmbeddedYearWithBracketsFormat < ExtractorDateFormat
254
283
  # [YYY]Y Y[YYY] [YY]YY Y[YY]Y YY[YY] YYY[Y] YY[Y]Y Y[Y]YY [Y]YYY
255
- REGEX = /(?<prefix>-)?(?<year>[\d\[\]]{6})(?!\d)/
284
+ REGEX = /(?<year>[\d\[\]]{6})(?!\d)/
256
285
 
257
286
  def self.normalize_to_edtf(text)
258
287
  matches = text.match(REGEX)
259
- "#{matches[:prefix]}#{matches[:year].gsub('[', '').gsub(']', '')}"
288
+ "#{matches[:year].gsub('[', '').gsub(']', '')}"
260
289
  end
261
290
  end
262
291
 
@@ -327,7 +356,9 @@ module Mods
327
356
  #
328
357
  # @return [String]
329
358
  def type
330
- xml.attr(:type)
359
+ return if xml.attr(:type)&.empty?
360
+
361
+ xml.attr(:type)&.downcase
331
362
  end
332
363
 
333
364
  ##
@@ -335,7 +366,9 @@ module Mods
335
366
  #
336
367
  # @return [String]
337
368
  def encoding
338
- xml.attr(:encoding)
369
+ return if xml.attr(:encoding)&.empty?
370
+
371
+ xml.attr(:encoding)&.downcase
339
372
  end
340
373
 
341
374
  ##
@@ -359,7 +392,9 @@ module Mods
359
392
  #
360
393
  # @return [String]
361
394
  def point
362
- xml.attr(:point)
395
+ return if xml.attr(:point)&.empty?
396
+
397
+ xml.attr(:point)&.downcase
363
398
  end
364
399
 
365
400
  ##
@@ -391,7 +426,7 @@ module Mods
391
426
  #
392
427
  # @return [String]
393
428
  def qualifier
394
- xml.attr(:qualifier)
429
+ xml.attr(:qualifier)&.downcase
395
430
  end
396
431
 
397
432
  ##
@@ -419,6 +454,8 @@ module Mods
419
454
  end
420
455
 
421
456
  def precision
457
+ return :unknown unless date_range || date
458
+
422
459
  if date_range.is_a? EDTF::Century
423
460
  :century
424
461
  elsif date_range.is_a? EDTF::Decade
@@ -1,8 +1,8 @@
1
- # encoding: UTF-8
2
-
3
1
  # Represents the Marc Country Codes mapped to names, from http://www.loc.gov/marc/countries/countries_code.html 2013-01-03
4
- #key - Marc Country code
5
- #value - Marc Country term
2
+ # key - Marc Country code
3
+ # value - Marc Country term
4
+ # frozen_string_literal: true
5
+
6
6
  MARC_COUNTRY = {
7
7
  'aa' => "Albania",
8
8
  'abc' => "Alberta",
@@ -11,7 +11,6 @@ MARC_COUNTRY = {
11
11
  'ae' => "Algeria",
12
12
  'af' => "Afghanistan",
13
13
  'ag' => "Argentina",
14
- #'ai' => "Anguilla", # discontinued
15
14
  'ai' => "Armenia (Republic)",
16
15
  'air' => "Armenian S.S.R.", # discontinued
17
16
  'aj' => "Azerbaijan",
@@ -40,7 +39,7 @@ MARC_COUNTRY = {
40
39
  'bi' => "British Indian Ocean Territory",
41
40
  'bl' => "Brazil",
42
41
  'bm' => "Bermuda Islands",
43
- 'bn' => "Bosnia and Hercegovina",
42
+ 'bn' => "Bosnia and Herzegovina",
44
43
  'bo' => "Bolivia",
45
44
  'bp' => "Solomon Islands",
46
45
  'br' => "Burma",
@@ -74,7 +73,7 @@ MARC_COUNTRY = {
74
73
  'cs' => "Czechoslovakia", # discontinued
75
74
  'ctu' => "Connecticut",
76
75
  'cu' => "Cuba",
77
- 'cv' => "Cape Verde",
76
+ 'cv' => "Cabo Verde",
78
77
  'cw' => "Cook Islands",
79
78
  'cx' => "Central African Republic",
80
79
  'cy' => "Cyprus",
@@ -109,6 +108,7 @@ MARC_COUNTRY = {
109
108
  'gb' => "Kiribati",
110
109
  'gd' => "Grenada",
111
110
  'ge' => "Germany (East)", # discontinued
111
+ 'gg' => "Guernsey",
112
112
  'gh' => "Ghana",
113
113
  'gi' => "Gibraltar",
114
114
  'gl' => "Greenland",
@@ -137,6 +137,7 @@ MARC_COUNTRY = {
137
137
  'ie' => "Ireland",
138
138
  'ii' => "India",
139
139
  'ilu' => "Illinois",
140
+ 'im' => "Isle of Man",
140
141
  'inu' => "Indiana",
141
142
  'io' => "Indonesia",
142
143
  'iq' => "Iraq",
@@ -148,6 +149,7 @@ MARC_COUNTRY = {
148
149
  'iw' => "Israel-Jordan Demilitarized Zones", # discontinued
149
150
  'iy' => "Iraq-Saudi Arabia Neutral Zone",
150
151
  'ja' => "Japan",
152
+ 'je' => "Jersey",
151
153
  'ji' => "Johnston Atoll",
152
154
  'jm' => "Jamaica",
153
155
  'jn' => "Jan Mayen", # discontinued
@@ -284,7 +286,7 @@ MARC_COUNTRY = {
284
286
  'snc' => "Saskatchewan",
285
287
  'so' => "Somalia",
286
288
  'sp' => "Spain",
287
- 'sq' => "Swaziland",
289
+ 'sq' => "Eswatini",
288
290
  'sr' => "Surinam",
289
291
  'ss' => "Western Sahara",
290
292
  'st' => "Saint-Martin",
@@ -365,7 +367,7 @@ MARC_COUNTRY = {
365
367
  'xk' => "Saint Lucia",
366
368
  'xl' => "Saint Pierre and Miquelon",
367
369
  'xm' => "Saint Vincent and the Grenadines",
368
- 'xn' => "Macedonia",
370
+ 'xn' => "North Macedonia",
369
371
  'xna' => "New South Wales",
370
372
  'xo' => "Slovakia",
371
373
  'xoa' => "Northern Territory",
@@ -384,4 +386,4 @@ MARC_COUNTRY = {
384
386
  'ys' => "Yemen (People's Democratic Republic)", # discontinued
385
387
  'yu' => "Serbia and Montenegro", # discontinued
386
388
  'za' => "Zambia"
387
- }
389
+ }.freeze