mods 2.4.1 → 3.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +24 -0
  3. data/.gitignore +1 -0
  4. data/README.md +0 -1
  5. data/lib/mods/date.rb +54 -17
  6. data/lib/mods/marc_country_codes.rb +12 -10
  7. data/lib/mods/nom_terminology.rb +109 -845
  8. data/lib/mods/reader.rb +9 -39
  9. data/lib/mods/record.rb +13 -28
  10. data/lib/mods/version.rb +1 -1
  11. data/mods.gemspec +2 -2
  12. data/spec/fixture_data/hp566jq8781.xml +334 -0
  13. data/spec/integration/parker_spec.rb +217 -0
  14. data/spec/{date_spec.rb → lib/date_spec.rb} +9 -1
  15. data/spec/lib/language_spec.rb +123 -0
  16. data/spec/lib/location_spec.rb +175 -0
  17. data/spec/lib/name_spec.rb +368 -0
  18. data/spec/lib/origin_info_spec.rb +134 -0
  19. data/spec/lib/part_spec.rb +162 -0
  20. data/spec/lib/physical_description_spec.rb +72 -0
  21. data/spec/{reader_spec.rb → lib/reader_spec.rb} +1 -41
  22. data/spec/lib/record_info_spec.rb +114 -0
  23. data/spec/lib/record_spec.rb +287 -0
  24. data/spec/lib/related_item_spec.rb +124 -0
  25. data/spec/lib/subject_spec.rb +427 -0
  26. data/spec/lib/title_spec.rb +108 -0
  27. data/spec/lib/top_level_elmnts_simple_spec.rb +169 -0
  28. data/spec/spec_helper.rb +86 -5
  29. data/spec/support/fixtures.rb +9 -0
  30. metadata +49 -44
  31. data/.travis.yml +0 -16
  32. data/spec/language_spec.rb +0 -118
  33. data/spec/location_spec.rb +0 -295
  34. data/spec/name_spec.rb +0 -759
  35. data/spec/origin_info_spec.rb +0 -447
  36. data/spec/part_spec.rb +0 -471
  37. data/spec/physical_description_spec.rb +0 -144
  38. data/spec/record_info_spec.rb +0 -493
  39. data/spec/record_spec.rb +0 -356
  40. data/spec/related_item_spec.rb +0 -305
  41. data/spec/subject_spec.rb +0 -809
  42. data/spec/title_spec.rb +0 -226
  43. data/spec/top_level_elmnts_simple_spec.rb +0 -369
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4ee9e9f682562bec8eeee3698cb1dc20fedee759172e0f6ef84a39ab01679550
4
- data.tar.gz: 67a3d7a70e895de43831a40b30195bd8fe2ffb27315c74ec28e805416169512d
3
+ metadata.gz: 566f8acde01c8696c1a183588b663934dc50d500999fcc83ba67516620e57ae7
4
+ data.tar.gz: 87aeca4a314870bcace3fc1b3fe2260b67134becc4ca2a7a22ac3bb5edf73be2
5
5
  SHA512:
6
- metadata.gz: 90ba88b4a9354acfa9714711cd20a932473fafc62445ee40ea33725b4a03927b5f499cff78fab004a7ed307741424ef9aa19dd65e4b821f416e31a95bb116f83
7
- data.tar.gz: 6e343ecf819c236774468511abb31be4a80aa2cfa8725231736f352c34c51cae0e8fe8fd9cd93d527c97fda52160fe1fe65ae0d204fbcff8fab54e5bad4379ec
6
+ metadata.gz: 0b5d979ab8b4fbcb7e5dce0b5d8117e76f1c95a284cf531fd659a1e78d74af5571af309c9ab3e77b6d105f9e9d3427ba0fa88d82f4feeb5d54c18bc306079332
7
+ data.tar.gz: 0bea0554c539de56fcfee76380f0478133d9288fba63b1ff6c6a922d3877ab04e12070590c2a10339599cf2615bc75433d23fb546e75ad4f8627b75dd563efb7
@@ -0,0 +1,24 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [ master ]
6
+ pull_request:
7
+ branches: [ master ]
8
+
9
+ jobs:
10
+ tests:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ ruby: [jruby-9.3.2.0, 2.7, '3.0', 3.1]
15
+ steps:
16
+ - uses: actions/checkout@v2
17
+ - name: Set up Ruby
18
+ uses: ruby/setup-ruby@v1
19
+ with:
20
+ ruby-version: ${{ matrix.ruby }}
21
+ - name: Install dependencies
22
+ run: bundle install
23
+ - name: Run tests
24
+ run: bundle exec rake
data/.gitignore CHANGED
@@ -22,3 +22,4 @@ tmtags
22
22
  .idea/*
23
23
  to_delete
24
24
  .byebug_history
25
+ /spec/examples.txt
data/README.md CHANGED
@@ -1,7 +1,6 @@
1
1
  # Mods
2
2
 
3
3
  [<img src="https://secure.travis-ci.org/sul-dlss/mods.png?branch=master" alt="Build Status"/>](http://travis-ci.org/sul-dlss/mods) [![Code Climate Test Coverage](https://codeclimate.com/github/sul-dlss/mods/badges/coverage.svg)](https://codeclimate.com/github/sul-dlss/mods/coverage) [<img
4
- src="https://gemnasium.com/sul-dlss/mods.png" alt="Dependency Status"/>](https://gemnasium.com/sul-dlss/mods) [<img
5
4
  src="https://badge.fury.io/rb/mods.svg" alt="Gem Version"/>](http://badge.fury.io/rb/mods)
6
5
 
7
6
  A Gem to parse MODS (Metadata Object Description Schema) records. More information about MODS can be found at
data/lib/mods/date.rb CHANGED
@@ -11,7 +11,7 @@ module Mods
11
11
  # @param [Nokogiri::XML::Element] xml A date-flavored MODS field from the XML
12
12
  # @return [Mods::Date]
13
13
  def self.from_element(xml)
14
- case xml.attr(:encoding)
14
+ case xml.attr(:encoding)&.downcase
15
15
  when 'w3cdtf'
16
16
  Mods::Date::W3cdtfFormat.new(xml)
17
17
  when 'iso8601'
@@ -23,12 +23,13 @@ module Mods
23
23
  # when 'temper'
24
24
  # Mods::Date::TemperFormat.new(xml)
25
25
  else
26
- date_class = Mods::Date if xml.text =~ /\p{Hebrew}/
26
+ date_class = UnparseableDate if xml.text =~ /\p{Hebrew}/ || xml.text =~ /^-/
27
27
  date_class ||= [
28
28
  MMDDYYYYFormat,
29
29
  MMDDYYFormat,
30
30
  YearRangeFormat,
31
31
  DecadeAsYearDashFormat,
32
+ DecadeStringFormat,
32
33
  EmbeddedBCYearFormat,
33
34
  EmbeddedYearFormat,
34
35
  EmbeddedThreeDigitYearFormat,
@@ -49,12 +50,15 @@ module Mods
49
50
  # Strict ISO8601-encoded date parser
50
51
  class Iso8601Format < Date
51
52
  def self.parse_date(text)
52
- @date = ::Date.parse(normalize_to_edtf(text))
53
+ ::Date.parse(normalize_to_edtf(text))
53
54
  end
54
55
  end
55
56
 
56
57
  # Less strict W3CDTF-encoded date parser
57
58
  class W3cdtfFormat < Date
59
+ def self.normalize_to_edtf(text)
60
+ super.gsub('-00', '')
61
+ end
58
62
  end
59
63
 
60
64
  # Strict EDTF parser
@@ -62,7 +66,16 @@ module Mods
62
66
  attr_reader :date
63
67
 
64
68
  def self.normalize_to_edtf(text)
65
- text
69
+ return '0000' if text.strip == '0'
70
+
71
+ case text
72
+ when /^\d{1,3}$/
73
+ text.rjust(4, "0") if text =~ /^\d{1,3}$/
74
+ when /^-\d{1,3}$/
75
+ "-#{text.sub(/^-/, '').rjust(4, "0")}"
76
+ else
77
+ text
78
+ end
66
79
  end
67
80
  end
68
81
 
@@ -100,6 +113,12 @@ module Mods
100
113
  end
101
114
  end
102
115
 
116
+ class UnparseableDate < ExtractorDateFormat
117
+ def self.parse_date(text)
118
+ nil
119
+ end
120
+ end
121
+
103
122
  # Full text extractor for MM/DD/YYYY and MM/DD/YYY-formatted dates
104
123
  class MMDDYYYYFormat < ExtractorDateFormat
105
124
  REGEX = /(?<month>\d{1,2})\/(?<day>\d{1,2})\/(?<year>\d{3,4})/
@@ -201,7 +220,17 @@ module Mods
201
220
 
202
221
  # Full-text extractor for data formatted as YYY-
203
222
  class DecadeAsYearDashFormat < ExtractorDateFormat
204
- REGEX = /(?<!\d)(?<year>\d{3})[-_x?](?!\d)/
223
+ REGEX = /(?<!\d)(?<year>\d{3})[-_xu?](?!\d)/
224
+
225
+ def self.normalize_to_edtf(text)
226
+ matches = text.match(REGEX)
227
+ "#{matches[:year]}X"
228
+ end
229
+ end
230
+
231
+ # Full-text extractor for data formatted as YYY0s
232
+ class DecadeStringFormat < ExtractorDateFormat
233
+ REGEX = /(?<!\d)(?<year>\d{3})0s(?!\d)/
205
234
 
206
235
  def self.normalize_to_edtf(text)
207
236
  matches = text.match(REGEX)
@@ -221,42 +250,42 @@ module Mods
221
250
 
222
251
  # Full-text extractor that tries hard to pick any year present in the data
223
252
  class EmbeddedYearFormat < ExtractorDateFormat
224
- REGEX = /(?<prefix>-)?(?<!\d)(?<year>\d{4})(?!\d)/
253
+ REGEX = /(?<!\d)(?<year>\d{4})(?!\d)/
225
254
 
226
255
  def self.normalize_to_edtf(text)
227
256
  matches = text.match(REGEX)
228
- "#{matches[:prefix]}#{matches[:year].rjust(4, "0")}"
257
+ "#{matches[:year].rjust(4, "0")}"
229
258
  end
230
259
  end
231
260
 
232
261
  # Full-text extractor that tries hard to pick any year present in the data
233
262
  class EmbeddedThreeDigitYearFormat < ExtractorDateFormat
234
- REGEX = /(?<prefix>-)?(?<!\d)(?<year>\d{3})(?!\d)(?!\d)/
263
+ REGEX = /(?<!\d)(?<year>\d{3})(?!\d)(?!\d)/
235
264
 
236
265
  def self.normalize_to_edtf(text)
237
266
  matches = text.match(REGEX)
238
- "#{matches[:prefix]}#{matches[:year].rjust(4, "0")}"
267
+ "#{matches[:year].rjust(4, "0")}"
239
268
  end
240
269
  end
241
270
 
242
271
  # Full-text extractor that tries hard to pick any year present in the data
243
272
  class OneOrTwoDigitYearFormat < ExtractorDateFormat
244
- REGEX = /^(?<prefix>-)?(?<year>\d{1,2})$/
273
+ REGEX = /^(?<year>\d{1,2})$/
245
274
 
246
275
  def self.normalize_to_edtf(text)
247
276
  matches = text.match(REGEX)
248
- "#{matches[:prefix]}#{matches[:year].rjust(4, "0")}"
277
+ "#{matches[:year].rjust(4, "0")}"
249
278
  end
250
279
  end
251
280
 
252
281
  # Full-text extractor that tries hard to pick any year present in the data
253
282
  class EmbeddedYearWithBracketsFormat < ExtractorDateFormat
254
283
  # [YYY]Y Y[YYY] [YY]YY Y[YY]Y YY[YY] YYY[Y] YY[Y]Y Y[Y]YY [Y]YYY
255
- REGEX = /(?<prefix>-)?(?<year>[\d\[\]]{6})(?!\d)/
284
+ REGEX = /(?<year>[\d\[\]]{6})(?!\d)/
256
285
 
257
286
  def self.normalize_to_edtf(text)
258
287
  matches = text.match(REGEX)
259
- "#{matches[:prefix]}#{matches[:year].gsub('[', '').gsub(']', '')}"
288
+ "#{matches[:year].gsub('[', '').gsub(']', '')}"
260
289
  end
261
290
  end
262
291
 
@@ -327,7 +356,9 @@ module Mods
327
356
  #
328
357
  # @return [String]
329
358
  def type
330
- xml.attr(:type)
359
+ return if xml.attr(:type)&.empty?
360
+
361
+ xml.attr(:type)&.downcase
331
362
  end
332
363
 
333
364
  ##
@@ -335,7 +366,9 @@ module Mods
335
366
  #
336
367
  # @return [String]
337
368
  def encoding
338
- xml.attr(:encoding)
369
+ return if xml.attr(:encoding)&.empty?
370
+
371
+ xml.attr(:encoding)&.downcase
339
372
  end
340
373
 
341
374
  ##
@@ -359,7 +392,9 @@ module Mods
359
392
  #
360
393
  # @return [String]
361
394
  def point
362
- xml.attr(:point)
395
+ return if xml.attr(:point)&.empty?
396
+
397
+ xml.attr(:point)&.downcase
363
398
  end
364
399
 
365
400
  ##
@@ -391,7 +426,7 @@ module Mods
391
426
  #
392
427
  # @return [String]
393
428
  def qualifier
394
- xml.attr(:qualifier)
429
+ xml.attr(:qualifier)&.downcase
395
430
  end
396
431
 
397
432
  ##
@@ -419,6 +454,8 @@ module Mods
419
454
  end
420
455
 
421
456
  def precision
457
+ return :unknown unless date_range || date
458
+
422
459
  if date_range.is_a? EDTF::Century
423
460
  :century
424
461
  elsif date_range.is_a? EDTF::Decade
@@ -1,8 +1,8 @@
1
- # encoding: UTF-8
2
-
3
1
  # Represents the Marc Country Codes mapped to names, from http://www.loc.gov/marc/countries/countries_code.html 2013-01-03
4
- #key - Marc Country code
5
- #value - Marc Country term
2
+ # key - Marc Country code
3
+ # value - Marc Country term
4
+ # frozen_string_literal: true
5
+
6
6
  MARC_COUNTRY = {
7
7
  'aa' => "Albania",
8
8
  'abc' => "Alberta",
@@ -11,7 +11,6 @@ MARC_COUNTRY = {
11
11
  'ae' => "Algeria",
12
12
  'af' => "Afghanistan",
13
13
  'ag' => "Argentina",
14
- #'ai' => "Anguilla", # discontinued
15
14
  'ai' => "Armenia (Republic)",
16
15
  'air' => "Armenian S.S.R.", # discontinued
17
16
  'aj' => "Azerbaijan",
@@ -40,7 +39,7 @@ MARC_COUNTRY = {
40
39
  'bi' => "British Indian Ocean Territory",
41
40
  'bl' => "Brazil",
42
41
  'bm' => "Bermuda Islands",
43
- 'bn' => "Bosnia and Hercegovina",
42
+ 'bn' => "Bosnia and Herzegovina",
44
43
  'bo' => "Bolivia",
45
44
  'bp' => "Solomon Islands",
46
45
  'br' => "Burma",
@@ -74,7 +73,7 @@ MARC_COUNTRY = {
74
73
  'cs' => "Czechoslovakia", # discontinued
75
74
  'ctu' => "Connecticut",
76
75
  'cu' => "Cuba",
77
- 'cv' => "Cape Verde",
76
+ 'cv' => "Cabo Verde",
78
77
  'cw' => "Cook Islands",
79
78
  'cx' => "Central African Republic",
80
79
  'cy' => "Cyprus",
@@ -109,6 +108,7 @@ MARC_COUNTRY = {
109
108
  'gb' => "Kiribati",
110
109
  'gd' => "Grenada",
111
110
  'ge' => "Germany (East)", # discontinued
111
+ 'gg' => "Guernsey",
112
112
  'gh' => "Ghana",
113
113
  'gi' => "Gibraltar",
114
114
  'gl' => "Greenland",
@@ -137,6 +137,7 @@ MARC_COUNTRY = {
137
137
  'ie' => "Ireland",
138
138
  'ii' => "India",
139
139
  'ilu' => "Illinois",
140
+ 'im' => "Isle of Man",
140
141
  'inu' => "Indiana",
141
142
  'io' => "Indonesia",
142
143
  'iq' => "Iraq",
@@ -148,6 +149,7 @@ MARC_COUNTRY = {
148
149
  'iw' => "Israel-Jordan Demilitarized Zones", # discontinued
149
150
  'iy' => "Iraq-Saudi Arabia Neutral Zone",
150
151
  'ja' => "Japan",
152
+ 'je' => "Jersey",
151
153
  'ji' => "Johnston Atoll",
152
154
  'jm' => "Jamaica",
153
155
  'jn' => "Jan Mayen", # discontinued
@@ -284,7 +286,7 @@ MARC_COUNTRY = {
284
286
  'snc' => "Saskatchewan",
285
287
  'so' => "Somalia",
286
288
  'sp' => "Spain",
287
- 'sq' => "Swaziland",
289
+ 'sq' => "Eswatini",
288
290
  'sr' => "Surinam",
289
291
  'ss' => "Western Sahara",
290
292
  'st' => "Saint-Martin",
@@ -365,7 +367,7 @@ MARC_COUNTRY = {
365
367
  'xk' => "Saint Lucia",
366
368
  'xl' => "Saint Pierre and Miquelon",
367
369
  'xm' => "Saint Vincent and the Grenadines",
368
- 'xn' => "Macedonia",
370
+ 'xn' => "North Macedonia",
369
371
  'xna' => "New South Wales",
370
372
  'xo' => "Slovakia",
371
373
  'xoa' => "Northern Territory",
@@ -384,4 +386,4 @@ MARC_COUNTRY = {
384
386
  'ys' => "Yemen (People's Democratic Republic)", # discontinued
385
387
  'yu' => "Serbia and Montenegro", # discontinued
386
388
  'za' => "Zambia"
387
- }
389
+ }.freeze