mods 2.4.0 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/ruby.yml +24 -0
  3. data/.gitignore +1 -0
  4. data/Gemfile +0 -4
  5. data/README.md +1 -3
  6. data/lib/mods/date.rb +51 -17
  7. data/lib/mods/marc_country_codes.rb +12 -10
  8. data/lib/mods/nom_terminology.rb +110 -849
  9. data/lib/mods/reader.rb +9 -39
  10. data/lib/mods/record.rb +13 -28
  11. data/lib/mods/version.rb +1 -1
  12. data/mods.gemspec +3 -3
  13. data/spec/fixture_data/hp566jq8781.xml +334 -0
  14. data/spec/integration/parker_spec.rb +217 -0
  15. data/spec/{date_spec.rb → lib/date_spec.rb} +8 -1
  16. data/spec/lib/language_spec.rb +123 -0
  17. data/spec/lib/location_spec.rb +175 -0
  18. data/spec/lib/name_spec.rb +368 -0
  19. data/spec/lib/origin_info_spec.rb +134 -0
  20. data/spec/lib/part_spec.rb +162 -0
  21. data/spec/lib/physical_description_spec.rb +72 -0
  22. data/spec/{reader_spec.rb → lib/reader_spec.rb} +1 -41
  23. data/spec/lib/record_info_spec.rb +114 -0
  24. data/spec/lib/record_spec.rb +287 -0
  25. data/spec/lib/related_item_spec.rb +124 -0
  26. data/spec/lib/subject_spec.rb +427 -0
  27. data/spec/lib/title_spec.rb +108 -0
  28. data/spec/lib/top_level_elmnts_simple_spec.rb +169 -0
  29. data/spec/spec_helper.rb +87 -6
  30. data/spec/support/fixtures.rb +9 -0
  31. metadata +61 -43
  32. data/.coveralls.yml +0 -1
  33. data/.travis.yml +0 -6
  34. data/spec/language_spec.rb +0 -118
  35. data/spec/location_spec.rb +0 -295
  36. data/spec/name_spec.rb +0 -759
  37. data/spec/origin_info_spec.rb +0 -447
  38. data/spec/part_spec.rb +0 -471
  39. data/spec/physical_description_spec.rb +0 -144
  40. data/spec/record_info_spec.rb +0 -493
  41. data/spec/record_spec.rb +0 -356
  42. data/spec/related_item_spec.rb +0 -305
  43. data/spec/subject_spec.rb +0 -809
  44. data/spec/title_spec.rb +0 -226
  45. data/spec/top_level_elmnts_simple_spec.rb +0 -369
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 93d01c5f52e3486f8ec3405b3f30355dd4f2fc31
4
- data.tar.gz: 55ef282f94d86ef30908e1a8bc865c388132f15d
2
+ SHA256:
3
+ metadata.gz: 30381fee9ce7d47827010ce02902834f10118daeb381bdfa65bcc5adf778b65f
4
+ data.tar.gz: 65b9fac4ebee7589010039dbba9cbd8b204a94987443ec2c68e36a7509f39685
5
5
  SHA512:
6
- metadata.gz: 20aaf32888be2c3511a8faf35ff5e985495c3e705277a50c0a3fba03d541bf340bbb0972becba9e32da530179bd8d97de894defcb31c6cd60825a6777bcf4692
7
- data.tar.gz: 402b4bd7f93f0b35471a241bee110acc1e34574c913862599a7fde5fe5ed49d48d9e06d7adced7d2c1258f3bf41e92e7dfa5cb35a69350477696c7470d79af5f
6
+ metadata.gz: e9ecec0384d8c4d931cace14b32ddc9e2b5161ed6ef9f61af27fb89d9993643db790f98966f8ddea419008bd03588cbc6aaf4dfee25b404c5a392deae1b3e78e
7
+ data.tar.gz: 79b733073abbf0ee6dc8da9835c86e44be67f1f298a4e09f3d6380772ff51713cd63c121daf2bb0ae0751ab9ffb3954003eb24ecffde1a7e89ed3c542cead994
@@ -0,0 +1,24 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [ master ]
6
+ pull_request:
7
+ branches: [ master ]
8
+
9
+ jobs:
10
+ tests:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ ruby: [jruby-9.3.2.0, 2.7, '3.0', 3.1]
15
+ steps:
16
+ - uses: actions/checkout@v2
17
+ - name: Set up Ruby
18
+ uses: ruby/setup-ruby@v1
19
+ with:
20
+ ruby-version: ${{ matrix.ruby }}
21
+ - name: Install dependencies
22
+ run: bundle install
23
+ - name: Run tests
24
+ run: bundle exec rake
data/.gitignore CHANGED
@@ -22,3 +22,4 @@ tmtags
22
22
  .idea/*
23
23
  to_delete
24
24
  .byebug_history
25
+ /spec/examples.txt
data/Gemfile CHANGED
@@ -3,10 +3,6 @@ source 'https://rubygems.org'
3
3
  # See mods.gemspec for this gem's dependencies
4
4
  gemspec
5
5
 
6
- group :test do
7
- gem 'coveralls', require: false
8
- end
9
-
10
6
  # Pin to activesupport 4.x for older versions of ruby
11
7
  gem 'activesupport', '~> 4.2' if RUBY_VERSION < '2.2.2'
12
8
  gem 'byebug', platform: :mri
data/README.md CHANGED
@@ -1,8 +1,6 @@
1
1
  # Mods
2
2
 
3
- [<img src="https://secure.travis-ci.org/sul-dlss/mods.png?branch=master" alt="Build Status"/>](http://travis-ci.org/sul-dlss/mods) [<img
4
- src="https://coveralls.io/repos/sul-dlss/mods/badge.png" alt="Coverage Status"/>](https://coveralls.io/r/sul-dlss/mods) [<img
5
- src="https://gemnasium.com/sul-dlss/mods.png" alt="Dependency Status"/>](https://gemnasium.com/sul-dlss/mods) [<img
3
+ [<img src="https://secure.travis-ci.org/sul-dlss/mods.png?branch=master" alt="Build Status"/>](http://travis-ci.org/sul-dlss/mods) [![Code Climate Test Coverage](https://codeclimate.com/github/sul-dlss/mods/badges/coverage.svg)](https://codeclimate.com/github/sul-dlss/mods/coverage) [<img
6
4
  src="https://badge.fury.io/rb/mods.svg" alt="Gem Version"/>](http://badge.fury.io/rb/mods)
7
5
 
8
6
  A Gem to parse MODS (Metadata Object Description Schema) records. More information about MODS can be found at
data/lib/mods/date.rb CHANGED
@@ -11,7 +11,7 @@ module Mods
11
11
  # @param [Nokogiri::XML::Element] xml A date-flavored MODS field from the XML
12
12
  # @return [Mods::Date]
13
13
  def self.from_element(xml)
14
- case xml.attr(:encoding)
14
+ case xml.attr(:encoding)&.downcase
15
15
  when 'w3cdtf'
16
16
  Mods::Date::W3cdtfFormat.new(xml)
17
17
  when 'iso8601'
@@ -23,12 +23,13 @@ module Mods
23
23
  # when 'temper'
24
24
  # Mods::Date::TemperFormat.new(xml)
25
25
  else
26
- date_class = Mods::Date if xml.text =~ /\p{Hebrew}/
26
+ date_class = UnparseableDate if xml.text =~ /\p{Hebrew}/ || xml.text =~ /^-/
27
27
  date_class ||= [
28
28
  MMDDYYYYFormat,
29
29
  MMDDYYFormat,
30
30
  YearRangeFormat,
31
31
  DecadeAsYearDashFormat,
32
+ DecadeStringFormat,
32
33
  EmbeddedBCYearFormat,
33
34
  EmbeddedYearFormat,
34
35
  EmbeddedThreeDigitYearFormat,
@@ -49,7 +50,7 @@ module Mods
49
50
  # Strict ISO8601-encoded date parser
50
51
  class Iso8601Format < Date
51
52
  def self.parse_date(text)
52
- @date = ::Date.parse(normalize_to_edtf(text))
53
+ ::Date.parse(normalize_to_edtf(text))
53
54
  end
54
55
  end
55
56
 
@@ -62,7 +63,16 @@ module Mods
62
63
  attr_reader :date
63
64
 
64
65
  def self.normalize_to_edtf(text)
65
- text
66
+ return '0000' if text.strip == '0'
67
+
68
+ case text
69
+ when /^\d{1,3}$/
70
+ text.rjust(4, "0") if text =~ /^\d{1,3}$/
71
+ when /^-\d{1,3}$/
72
+ "-#{text.sub(/^-/, '').rjust(4, "0")}"
73
+ else
74
+ text
75
+ end
66
76
  end
67
77
  end
68
78
 
@@ -100,6 +110,12 @@ module Mods
100
110
  end
101
111
  end
102
112
 
113
+ class UnparseableDate < ExtractorDateFormat
114
+ def self.parse_date(text)
115
+ nil
116
+ end
117
+ end
118
+
103
119
  # Full text extractor for MM/DD/YYYY and MM/DD/YYY-formatted dates
104
120
  class MMDDYYYYFormat < ExtractorDateFormat
105
121
  REGEX = /(?<month>\d{1,2})\/(?<day>\d{1,2})\/(?<year>\d{3,4})/
@@ -201,7 +217,17 @@ module Mods
201
217
 
202
218
  # Full-text extractor for data formatted as YYY-
203
219
  class DecadeAsYearDashFormat < ExtractorDateFormat
204
- REGEX = /(?<!\d)(?<year>\d{3})[-_x?](?!\d)/
220
+ REGEX = /(?<!\d)(?<year>\d{3})[-_xu?](?!\d)/
221
+
222
+ def self.normalize_to_edtf(text)
223
+ matches = text.match(REGEX)
224
+ "#{matches[:year]}X"
225
+ end
226
+ end
227
+
228
+ # Full-text extractor for data formatted as YYY0s
229
+ class DecadeStringFormat < ExtractorDateFormat
230
+ REGEX = /(?<!\d)(?<year>\d{3})0s(?!\d)/
205
231
 
206
232
  def self.normalize_to_edtf(text)
207
233
  matches = text.match(REGEX)
@@ -221,42 +247,42 @@ module Mods
221
247
 
222
248
  # Full-text extractor that tries hard to pick any year present in the data
223
249
  class EmbeddedYearFormat < ExtractorDateFormat
224
- REGEX = /(?<prefix>-)?(?<!\d)(?<year>\d{4})(?!\d)/
250
+ REGEX = /(?<!\d)(?<year>\d{4})(?!\d)/
225
251
 
226
252
  def self.normalize_to_edtf(text)
227
253
  matches = text.match(REGEX)
228
- "#{matches[:prefix]}#{matches[:year].rjust(4, "0")}"
254
+ "#{matches[:year].rjust(4, "0")}"
229
255
  end
230
256
  end
231
257
 
232
258
  # Full-text extractor that tries hard to pick any year present in the data
233
259
  class EmbeddedThreeDigitYearFormat < ExtractorDateFormat
234
- REGEX = /(?<prefix>-)?(?<!\d)(?<year>\d{3})(?!\d)(?!\d)/
260
+ REGEX = /(?<!\d)(?<year>\d{3})(?!\d)(?!\d)/
235
261
 
236
262
  def self.normalize_to_edtf(text)
237
263
  matches = text.match(REGEX)
238
- "#{matches[:prefix]}#{matches[:year].rjust(4, "0")}"
264
+ "#{matches[:year].rjust(4, "0")}"
239
265
  end
240
266
  end
241
267
 
242
268
  # Full-text extractor that tries hard to pick any year present in the data
243
269
  class OneOrTwoDigitYearFormat < ExtractorDateFormat
244
- REGEX = /^(?<prefix>-)?(?<year>\d{1,2})$/
270
+ REGEX = /^(?<year>\d{1,2})$/
245
271
 
246
272
  def self.normalize_to_edtf(text)
247
273
  matches = text.match(REGEX)
248
- "#{matches[:prefix]}#{matches[:year].rjust(4, "0")}"
274
+ "#{matches[:year].rjust(4, "0")}"
249
275
  end
250
276
  end
251
277
 
252
278
  # Full-text extractor that tries hard to pick any year present in the data
253
279
  class EmbeddedYearWithBracketsFormat < ExtractorDateFormat
254
280
  # [YYY]Y Y[YYY] [YY]YY Y[YY]Y YY[YY] YYY[Y] YY[Y]Y Y[Y]YY [Y]YYY
255
- REGEX = /(?<prefix>-)?(?<year>[\d\[\]]{6})(?!\d)/
281
+ REGEX = /(?<year>[\d\[\]]{6})(?!\d)/
256
282
 
257
283
  def self.normalize_to_edtf(text)
258
284
  matches = text.match(REGEX)
259
- "#{matches[:prefix]}#{matches[:year].gsub('[', '').gsub(']', '')}"
285
+ "#{matches[:year].gsub('[', '').gsub(']', '')}"
260
286
  end
261
287
  end
262
288
 
@@ -327,7 +353,9 @@ module Mods
327
353
  #
328
354
  # @return [String]
329
355
  def type
330
- xml.attr(:type)
356
+ return if xml.attr(:type)&.empty?
357
+
358
+ xml.attr(:type)&.downcase
331
359
  end
332
360
 
333
361
  ##
@@ -335,7 +363,9 @@ module Mods
335
363
  #
336
364
  # @return [String]
337
365
  def encoding
338
- xml.attr(:encoding)
366
+ return if xml.attr(:encoding)&.empty?
367
+
368
+ xml.attr(:encoding)&.downcase
339
369
  end
340
370
 
341
371
  ##
@@ -359,7 +389,9 @@ module Mods
359
389
  #
360
390
  # @return [String]
361
391
  def point
362
- xml.attr(:point)
392
+ return if xml.attr(:point)&.empty?
393
+
394
+ xml.attr(:point)&.downcase
363
395
  end
364
396
 
365
397
  ##
@@ -391,7 +423,7 @@ module Mods
391
423
  #
392
424
  # @return [String]
393
425
  def qualifier
394
- xml.attr(:qualifier)
426
+ xml.attr(:qualifier)&.downcase
395
427
  end
396
428
 
397
429
  ##
@@ -419,6 +451,8 @@ module Mods
419
451
  end
420
452
 
421
453
  def precision
454
+ return :unknown unless date_range || date
455
+
422
456
  if date_range.is_a? EDTF::Century
423
457
  :century
424
458
  elsif date_range.is_a? EDTF::Decade
@@ -1,8 +1,8 @@
1
- # encoding: UTF-8
2
-
3
1
  # Represents the Marc Country Codes mapped to names, from http://www.loc.gov/marc/countries/countries_code.html 2013-01-03
4
- #key - Marc Country code
5
- #value - Marc Country term
2
+ # key - Marc Country code
3
+ # value - Marc Country term
4
+ # frozen_string_literal: true
5
+
6
6
  MARC_COUNTRY = {
7
7
  'aa' => "Albania",
8
8
  'abc' => "Alberta",
@@ -11,7 +11,6 @@ MARC_COUNTRY = {
11
11
  'ae' => "Algeria",
12
12
  'af' => "Afghanistan",
13
13
  'ag' => "Argentina",
14
- #'ai' => "Anguilla", # discontinued
15
14
  'ai' => "Armenia (Republic)",
16
15
  'air' => "Armenian S.S.R.", # discontinued
17
16
  'aj' => "Azerbaijan",
@@ -40,7 +39,7 @@ MARC_COUNTRY = {
40
39
  'bi' => "British Indian Ocean Territory",
41
40
  'bl' => "Brazil",
42
41
  'bm' => "Bermuda Islands",
43
- 'bn' => "Bosnia and Hercegovina",
42
+ 'bn' => "Bosnia and Herzegovina",
44
43
  'bo' => "Bolivia",
45
44
  'bp' => "Solomon Islands",
46
45
  'br' => "Burma",
@@ -74,7 +73,7 @@ MARC_COUNTRY = {
74
73
  'cs' => "Czechoslovakia", # discontinued
75
74
  'ctu' => "Connecticut",
76
75
  'cu' => "Cuba",
77
- 'cv' => "Cape Verde",
76
+ 'cv' => "Cabo Verde",
78
77
  'cw' => "Cook Islands",
79
78
  'cx' => "Central African Republic",
80
79
  'cy' => "Cyprus",
@@ -109,6 +108,7 @@ MARC_COUNTRY = {
109
108
  'gb' => "Kiribati",
110
109
  'gd' => "Grenada",
111
110
  'ge' => "Germany (East)", # discontinued
111
+ 'gg' => "Guernsey",
112
112
  'gh' => "Ghana",
113
113
  'gi' => "Gibraltar",
114
114
  'gl' => "Greenland",
@@ -137,6 +137,7 @@ MARC_COUNTRY = {
137
137
  'ie' => "Ireland",
138
138
  'ii' => "India",
139
139
  'ilu' => "Illinois",
140
+ 'im' => "Isle of Man",
140
141
  'inu' => "Indiana",
141
142
  'io' => "Indonesia",
142
143
  'iq' => "Iraq",
@@ -148,6 +149,7 @@ MARC_COUNTRY = {
148
149
  'iw' => "Israel-Jordan Demilitarized Zones", # discontinued
149
150
  'iy' => "Iraq-Saudi Arabia Neutral Zone",
150
151
  'ja' => "Japan",
152
+ 'je' => "Jersey",
151
153
  'ji' => "Johnston Atoll",
152
154
  'jm' => "Jamaica",
153
155
  'jn' => "Jan Mayen", # discontinued
@@ -284,7 +286,7 @@ MARC_COUNTRY = {
284
286
  'snc' => "Saskatchewan",
285
287
  'so' => "Somalia",
286
288
  'sp' => "Spain",
287
- 'sq' => "Swaziland",
289
+ 'sq' => "Eswatini",
288
290
  'sr' => "Surinam",
289
291
  'ss' => "Western Sahara",
290
292
  'st' => "Saint-Martin",
@@ -365,7 +367,7 @@ MARC_COUNTRY = {
365
367
  'xk' => "Saint Lucia",
366
368
  'xl' => "Saint Pierre and Miquelon",
367
369
  'xm' => "Saint Vincent and the Grenadines",
368
- 'xn' => "Macedonia",
370
+ 'xn' => "North Macedonia",
369
371
  'xna' => "New South Wales",
370
372
  'xo' => "Slovakia",
371
373
  'xoa' => "Northern Territory",
@@ -384,4 +386,4 @@ MARC_COUNTRY = {
384
386
  'ys' => "Yemen (People's Democratic Republic)", # discontinued
385
387
  'yu' => "Serbia and Montenegro", # discontinued
386
388
  'za' => "Zambia"
387
- }
389
+ }.freeze