stanford-mods 1.1.5 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.hound.yml +2 -0
- data/.rubocop.yml +3 -0
- data/.rubocop_todo.yml +12 -0
- data/.travis.yml +12 -13
- data/Gemfile +5 -2
- data/Rakefile +11 -7
- data/config/mappings_hash.rb +1 -1
- data/lib/stanford-mods/name.rb +84 -0
- data/lib/stanford-mods/physical_location.rb +83 -0
- data/lib/stanford-mods/searchworks.rb +1 -2
- data/lib/stanford-mods/version.rb +1 -1
- data/lib/stanford-mods.rb +2 -37
- data/spec/name_spec.rb +385 -206
- data/spec/physical_location_spec.rb +254 -0
- data/spec/searchworks_title_spec.rb +2 -2
- data/spec/spec_helper.rb +2 -2
- metadata +9 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 025c7efb3e29b5c56eaf05af7f58eb364e13c8f7
|
4
|
+
data.tar.gz: 4d66ca23ed45283c8374fa50166f694f63c31bb8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f4c1b8aaf08deb76e6de29153cd492def619aa884b8841e7c68254ea076da2948d91ab81123f81ee565f1cb41f7ba85ee2a4dff4bec1a4b2f10f72cd77775a44
|
7
|
+
data.tar.gz: 8b90601ce966f4ac16cac8dd78c255b692eff2bfdaef8dcd889bdc257acdc594f47925baddd705b2d969cc646c3df4295198edefba1f1bd0c6bfe3011232be6f
|
data/.hound.yml
ADDED
data/.rubocop.yml
ADDED
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2015-11-17 14:05:33 -0800 using RuboCop version 0.35.1.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 1
|
10
|
+
Lint/UselessAssignment:
|
11
|
+
Exclude:
|
12
|
+
- 'config/mappings_hash.rb'
|
data/.travis.yml
CHANGED
@@ -1,18 +1,17 @@
|
|
1
1
|
language: ruby
|
2
|
-
script: rake
|
2
|
+
script: rake
|
3
3
|
rvm:
|
4
|
+
- ruby-head
|
5
|
+
- 2.2.3 # spotlight
|
4
6
|
- 2.2.0
|
5
7
|
- 2.1.5
|
6
8
|
- 2.0.0
|
7
|
-
- 1.9.3
|
8
|
-
|
9
|
-
- jruby-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
- bess@stanford.edu
|
17
|
-
#before_install:
|
18
|
-
# gem update --system 1.8.24
|
9
|
+
- 1.9.3 # argo, FRDA
|
10
|
+
# we used to use jruby for merged DOR + MARC records, but no more ...
|
11
|
+
- jruby-head
|
12
|
+
matrix:
|
13
|
+
allow_failures:
|
14
|
+
- rvm: jruby-head
|
15
|
+
|
16
|
+
notifications: false
|
17
|
+
sudo: false
|
data/Gemfile
CHANGED
@@ -3,9 +3,12 @@ source 'https://rubygems.org'
|
|
3
3
|
# See stanford-mods.gemspec for this gem's dependencies
|
4
4
|
gemspec
|
5
5
|
|
6
|
-
|
6
|
+
group :test, :development do
|
7
|
+
gem 'rubocop', require: false
|
8
|
+
gem 'rubocop-rspec', require: false
|
9
|
+
end
|
7
10
|
|
8
11
|
group :test do
|
9
12
|
gem 'coveralls', require: false
|
13
|
+
# gem 'pry-byebug', require: false
|
10
14
|
end
|
11
|
-
|
data/Rakefile
CHANGED
@@ -2,10 +2,6 @@
|
|
2
2
|
require "bundler/gem_tasks"
|
3
3
|
require 'bundler'
|
4
4
|
|
5
|
-
require 'rspec/core/rake_task'
|
6
|
-
require 'yard'
|
7
|
-
require 'yard/rake/yardoc_task'
|
8
|
-
|
9
5
|
begin
|
10
6
|
Bundler.setup(:default, :development)
|
11
7
|
rescue Bundler::BundlerError => e
|
@@ -16,23 +12,31 @@ end
|
|
16
12
|
|
17
13
|
task :default => :ci
|
18
14
|
|
19
|
-
desc "run continuous integration suite (tests, coverage,
|
20
|
-
task :ci => [:rspec, :
|
15
|
+
desc "run continuous integration suite (tests, coverage, rubocop lint)"
|
16
|
+
task :ci => [:rspec, :rubocop]
|
21
17
|
|
22
18
|
task :spec => :rspec
|
23
19
|
|
20
|
+
require 'rspec/core/rake_task'
|
24
21
|
RSpec::Core::RakeTask.new(:rspec) do |spec|
|
25
22
|
spec.rspec_opts = ["-c", "--tty", "-f progress", "-r ./spec/spec_helper.rb"]
|
26
23
|
end
|
27
24
|
|
25
|
+
require 'rubocop/rake_task'
|
26
|
+
RuboCop::RakeTask.new(:rubocop) do |task|
|
27
|
+
task.options = ['-l'] # run lint cops only
|
28
|
+
end
|
29
|
+
|
28
30
|
# Use yard to build docs
|
31
|
+
require 'yard'
|
32
|
+
require 'yard/rake/yardoc_task'
|
29
33
|
begin
|
30
34
|
project_root = File.expand_path(File.dirname(__FILE__))
|
31
35
|
doc_dest_dir = File.join(project_root, 'doc')
|
32
36
|
|
33
37
|
YARD::Rake::YardocTask.new(:doc) do |yt|
|
34
38
|
yt.files = Dir.glob(File.join(project_root, 'lib', '**', '*.rb')) +
|
35
|
-
|
39
|
+
[File.join(project_root, 'README.md')]
|
36
40
|
yt.options = ['--output-dir', doc_dest_dir, '--readme', 'README.md', '--title', 'Stanford-Mods Documentation']
|
37
41
|
end
|
38
42
|
rescue LoadError
|
data/config/mappings_hash.rb
CHANGED
@@ -17,7 +17,7 @@ name_to_xpath = {
|
|
17
17
|
|
18
18
|
# titleInfo
|
19
19
|
'title' => '//mods/titleInfo[not(@type="alternative")]', # .first, extract_title_from_title_info(node)
|
20
|
-
'subtitle' => '//mods/titleInfo/subTitle' #.first.text
|
20
|
+
'subtitle' => '//mods/titleInfo/subTitle', #.first.text
|
21
21
|
'full_title' => '//mods/titleInfo', # .first, extract_full_title_from_title_info(node)
|
22
22
|
'title_variant' => '//mods/titleInfo[@type="alternative"]', # .first, extract_title_from_title_info(node)
|
23
23
|
|
@@ -0,0 +1,84 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'logger'
|
3
|
+
require 'mods'
|
4
|
+
|
5
|
+
# NON-SearchWorks specific wranglings of MODS <name> metadata as a mixin to the Stanford::Mods::Record object
|
6
|
+
module Stanford
|
7
|
+
module Mods
|
8
|
+
|
9
|
+
class Record < ::Mods::Record
|
10
|
+
|
11
|
+
# the first encountered <mods><name> element with marcrelator flavor role of 'Creator' or 'Author'.
|
12
|
+
# if no marcrelator 'Creator' or 'Author', the first name without a role.
|
13
|
+
# if no name without a role, then nil
|
14
|
+
# @return [String] a name in the display_value_w_date form
|
15
|
+
# see Mods::Record.name in nom_terminology for details on the display_value algorithm
|
16
|
+
def main_author_w_date
|
17
|
+
result = nil
|
18
|
+
first_wo_role = nil
|
19
|
+
@mods_ng_xml.plain_name.each { |n|
|
20
|
+
if n.role.size == 0
|
21
|
+
first_wo_role ||= n
|
22
|
+
end
|
23
|
+
n.role.each { |r|
|
24
|
+
if r.authority.include?('marcrelator') &&
|
25
|
+
(r.value.include?('Creator') || r.value.include?('Author'))
|
26
|
+
result ||= n.display_value_w_date
|
27
|
+
end
|
28
|
+
}
|
29
|
+
}
|
30
|
+
if !result && first_wo_role
|
31
|
+
result = first_wo_role.display_value_w_date
|
32
|
+
end
|
33
|
+
result
|
34
|
+
end # main_author
|
35
|
+
|
36
|
+
# all names, in display form, except the main_author
|
37
|
+
# names will be the display_value_w_date form
|
38
|
+
# see Mods::Record.name in nom_terminology for details on the display_value algorithm
|
39
|
+
def additional_authors_w_dates
|
40
|
+
results = []
|
41
|
+
@mods_ng_xml.plain_name.each { |n|
|
42
|
+
results << n.display_value_w_date
|
43
|
+
}
|
44
|
+
results.delete(main_author_w_date)
|
45
|
+
results
|
46
|
+
end
|
47
|
+
|
48
|
+
COLLECTOR_ROLE_URI = 'http://id.loc.gov/vocabulary/relators/col'
|
49
|
+
|
50
|
+
# @return Array of Strings, each containing the computed display value of a personal name
|
51
|
+
# except for the collector role (see mods gem nom_terminology for display value algorithm)
|
52
|
+
# FIXME: this is broken if there are multiple role codes and some of them are not marcrelator
|
53
|
+
def non_collector_person_authors
|
54
|
+
result = []
|
55
|
+
@mods_ng_xml.personal_name.map do |n|
|
56
|
+
r = n.role
|
57
|
+
unless (r.authority.include?('marcrelator') && r.value.include?('Collector')) ||
|
58
|
+
r.roleTerm.valueURI.first == COLLECTOR_ROLE_URI
|
59
|
+
result << n.display_value_w_date
|
60
|
+
end
|
61
|
+
end
|
62
|
+
result unless result.empty?
|
63
|
+
end
|
64
|
+
|
65
|
+
# @return Array of Strings, each containing the computed display value of
|
66
|
+
# a personal name with the role of Collector (see mods gem nom_terminology for display value algorithm)
|
67
|
+
def collectors_w_dates
|
68
|
+
result = []
|
69
|
+
@mods_ng_xml.personal_name.each do |n|
|
70
|
+
unless n.role.size == 0
|
71
|
+
n.role.each { |r|
|
72
|
+
if (r.authority.include?('marcrelator') && r.value.include?('Collector')) ||
|
73
|
+
r.roleTerm.valueURI.first == COLLECTOR_ROLE_URI
|
74
|
+
result << n.display_value_w_date
|
75
|
+
end
|
76
|
+
}
|
77
|
+
end
|
78
|
+
end
|
79
|
+
result unless result.empty?
|
80
|
+
end
|
81
|
+
|
82
|
+
end # class Record
|
83
|
+
end # Module Mods
|
84
|
+
end # Module Stanford
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require 'logger'
|
2
|
+
require 'mods'
|
3
|
+
|
4
|
+
# Parsing MODS //location/physicalLocation for series, box, and folder for Special Collections
|
5
|
+
# This is not used by Searchworks, otherwise it would have been in the searchworks.rb file
|
6
|
+
module Stanford
|
7
|
+
module Mods
|
8
|
+
class Record < ::Mods::Record
|
9
|
+
# return box number (note: single valued and might be something like 35A)
|
10
|
+
# data in location/physicalLocation or in relatedItem/location/physicalLocation
|
11
|
+
# so use _location to get the data from either one of them
|
12
|
+
# TODO: should it be hierarchical series/box/folder?
|
13
|
+
def box
|
14
|
+
# _location.physicalLocation should find top level and relatedItem
|
15
|
+
box_num = @mods_ng_xml._location.physicalLocation.map do |node|
|
16
|
+
val = node.text
|
17
|
+
# note that this will also find Flatbox or Flat-box
|
18
|
+
match_data = val.match(/Box ?:? ?([^,|(Folder)]+)/i)
|
19
|
+
match_data[1].strip if match_data.present?
|
20
|
+
end.compact
|
21
|
+
|
22
|
+
# There should only be one box
|
23
|
+
box_num.first
|
24
|
+
end
|
25
|
+
|
26
|
+
# returns folder number (note: single valued)
|
27
|
+
# data in location/physicalLocation or in relatedItem/location/physicalLocation
|
28
|
+
# so use _location to get the data from either one of them
|
29
|
+
# TODO: should it be hierarchical series/box/folder?
|
30
|
+
def folder
|
31
|
+
# _location.physicalLocation should find top level and relatedItem
|
32
|
+
folder_num = @mods_ng_xml._location.physicalLocation.map do |node|
|
33
|
+
val = node.text
|
34
|
+
|
35
|
+
match_data = if val =~ /\|/
|
36
|
+
# we assume the data is pipe-delimited, and may contain commas within values
|
37
|
+
val.match(/Folder ?:? ?([^|]+)/)
|
38
|
+
else
|
39
|
+
# the data should be comma-delimited, and may not contain commas within values
|
40
|
+
val.match(/Folder ?:? ?([^,]+)/)
|
41
|
+
end
|
42
|
+
|
43
|
+
match_data[1].strip if match_data.present?
|
44
|
+
end.compact
|
45
|
+
|
46
|
+
# There should be one folder
|
47
|
+
folder_num.first
|
48
|
+
end
|
49
|
+
|
50
|
+
# return entire contents of physicalLocation (note: single valued)
|
51
|
+
# but only if it has series, accession, box or folder data
|
52
|
+
# data in location/physicalLocation or in relatedItem/location/physicalLocation
|
53
|
+
# so use _location to get the data from either one of them
|
54
|
+
# TODO: should it be hierarchical series/box/folder?
|
55
|
+
def location
|
56
|
+
# _location.physicalLocation should find top level and relatedItem
|
57
|
+
loc = @mods_ng_xml._location.physicalLocation.map do |node|
|
58
|
+
node.text if node.text.match(/.*(Series)|(Accession)|(Folder)|(Box).*/i)
|
59
|
+
end.compact
|
60
|
+
|
61
|
+
# There should only be one location
|
62
|
+
loc.first
|
63
|
+
end
|
64
|
+
|
65
|
+
# return series/accession 'number' (note: single valued)
|
66
|
+
# data in location/physicalLocation or in relatedItem/location/physicalLocation
|
67
|
+
# so use _location to get the data from either one of them
|
68
|
+
# TODO: should it be hierarchical series/box/folder?
|
69
|
+
def series
|
70
|
+
# _location.physicalLocation should find top level and relatedItem
|
71
|
+
series_num = @mods_ng_xml._location.physicalLocation.map do |node|
|
72
|
+
val = node.text
|
73
|
+
# feigenbaum uses 'Accession'
|
74
|
+
match_data = val.match(/(?:(?:Series)|(?:Accession)):? ([^,|]+)/i)
|
75
|
+
match_data[1].strip if match_data.present?
|
76
|
+
end.compact
|
77
|
+
|
78
|
+
# There should be only one series
|
79
|
+
series_num.first
|
80
|
+
end
|
81
|
+
end # class Record
|
82
|
+
end # Module Mods
|
83
|
+
end # Module Stanford
|
@@ -26,7 +26,7 @@ module Stanford
|
|
26
26
|
result << SEARCHWORKS_LANGUAGES[v.strip]
|
27
27
|
end
|
28
28
|
end
|
29
|
-
rescue
|
29
|
+
rescue
|
30
30
|
# TODO: this should be written to a logger
|
31
31
|
p "Couldn't find english name for #{ct.text}"
|
32
32
|
end
|
@@ -422,7 +422,6 @@ module Stanford
|
|
422
422
|
end
|
423
423
|
dates = pub_dates
|
424
424
|
if dates
|
425
|
-
year = []
|
426
425
|
pruned_dates = []
|
427
426
|
dates.each do |f_date|
|
428
427
|
#remove ? and []
|
data/lib/stanford-mods.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
require 'stanford-mods/version'
|
2
2
|
require 'mods'
|
3
|
+
require 'stanford-mods/name'
|
3
4
|
require 'stanford-mods/searchworks'
|
5
|
+
require 'stanford-mods/physical_location'
|
4
6
|
|
5
7
|
# Stanford specific wranglings of MODS metadata as an extension of the Mods::Record object
|
6
8
|
module Stanford
|
@@ -8,43 +10,6 @@ module Stanford
|
|
8
10
|
|
9
11
|
class Record < ::Mods::Record
|
10
12
|
|
11
|
-
# the first encountered <mods><name> element with marcrelator flavor role of 'Creator' or 'Author'.
|
12
|
-
# if no marcrelator 'Creator' or 'Author', the first name without a role.
|
13
|
-
# if no name without a role, then nil
|
14
|
-
# @return [String] a name in the display_value_w_date form
|
15
|
-
# see Mods::Record.name in nom_terminology for details on the display_value algorithm
|
16
|
-
def main_author_w_date
|
17
|
-
result = nil
|
18
|
-
first_wo_role = nil
|
19
|
-
@mods_ng_xml.plain_name.each { |n|
|
20
|
-
if n.role.size == 0
|
21
|
-
first_wo_role ||= n
|
22
|
-
end
|
23
|
-
n.role.each { |r|
|
24
|
-
if r.authority.include?('marcrelator') &&
|
25
|
-
(r.value.include?('Creator') || r.value.include?('Author'))
|
26
|
-
result ||= n.display_value_w_date
|
27
|
-
end
|
28
|
-
}
|
29
|
-
}
|
30
|
-
if !result && first_wo_role
|
31
|
-
result = first_wo_role.display_value_w_date
|
32
|
-
end
|
33
|
-
result
|
34
|
-
end # main_author
|
35
|
-
|
36
|
-
# all names, in display form, except the main_author
|
37
|
-
# names will be the display_value_w_date form
|
38
|
-
# see Mods::Record.name in nom_terminology for details on the display_value algorithm
|
39
|
-
def additional_authors_w_dates
|
40
|
-
results = []
|
41
|
-
@mods_ng_xml.plain_name.each { |n|
|
42
|
-
results << n.display_value_w_date
|
43
|
-
}
|
44
|
-
results.delete(main_author_w_date)
|
45
|
-
results
|
46
|
-
end
|
47
|
-
|
48
13
|
end # Record class
|
49
14
|
end # Mods module
|
50
15
|
end # Stanford module
|