stanford-mods 1.1.5 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.hound.yml +2 -0
- data/.rubocop.yml +3 -0
- data/.rubocop_todo.yml +12 -0
- data/.travis.yml +12 -13
- data/Gemfile +5 -2
- data/Rakefile +11 -7
- data/config/mappings_hash.rb +1 -1
- data/lib/stanford-mods/name.rb +84 -0
- data/lib/stanford-mods/physical_location.rb +83 -0
- data/lib/stanford-mods/searchworks.rb +1 -2
- data/lib/stanford-mods/version.rb +1 -1
- data/lib/stanford-mods.rb +2 -37
- data/spec/name_spec.rb +385 -206
- data/spec/physical_location_spec.rb +254 -0
- data/spec/searchworks_title_spec.rb +2 -2
- data/spec/spec_helper.rb +2 -2
- metadata +9 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 025c7efb3e29b5c56eaf05af7f58eb364e13c8f7
|
4
|
+
data.tar.gz: 4d66ca23ed45283c8374fa50166f694f63c31bb8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f4c1b8aaf08deb76e6de29153cd492def619aa884b8841e7c68254ea076da2948d91ab81123f81ee565f1cb41f7ba85ee2a4dff4bec1a4b2f10f72cd77775a44
|
7
|
+
data.tar.gz: 8b90601ce966f4ac16cac8dd78c255b692eff2bfdaef8dcd889bdc257acdc594f47925baddd705b2d969cc646c3df4295198edefba1f1bd0c6bfe3011232be6f
|
data/.hound.yml
ADDED
data/.rubocop.yml
ADDED
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2015-11-17 14:05:33 -0800 using RuboCop version 0.35.1.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 1
|
10
|
+
Lint/UselessAssignment:
|
11
|
+
Exclude:
|
12
|
+
- 'config/mappings_hash.rb'
|
data/.travis.yml
CHANGED
@@ -1,18 +1,17 @@
|
|
1
1
|
language: ruby
|
2
|
-
script: rake
|
2
|
+
script: rake
|
3
3
|
rvm:
|
4
|
+
- ruby-head
|
5
|
+
- 2.2.3 # spotlight
|
4
6
|
- 2.2.0
|
5
7
|
- 2.1.5
|
6
8
|
- 2.0.0
|
7
|
-
- 1.9.3
|
8
|
-
|
9
|
-
- jruby-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
- bess@stanford.edu
|
17
|
-
#before_install:
|
18
|
-
# gem update --system 1.8.24
|
9
|
+
- 1.9.3 # argo, FRDA
|
10
|
+
# we used to use jruby for merged DOR + MARC records, but no more ...
|
11
|
+
- jruby-head
|
12
|
+
matrix:
|
13
|
+
allow_failures:
|
14
|
+
- rvm: jruby-head
|
15
|
+
|
16
|
+
notifications: false
|
17
|
+
sudo: false
|
data/Gemfile
CHANGED
@@ -3,9 +3,12 @@ source 'https://rubygems.org'
|
|
3
3
|
# See stanford-mods.gemspec for this gem's dependencies
|
4
4
|
gemspec
|
5
5
|
|
6
|
-
|
6
|
+
group :test, :development do
|
7
|
+
gem 'rubocop', require: false
|
8
|
+
gem 'rubocop-rspec', require: false
|
9
|
+
end
|
7
10
|
|
8
11
|
group :test do
|
9
12
|
gem 'coveralls', require: false
|
13
|
+
# gem 'pry-byebug', require: false
|
10
14
|
end
|
11
|
-
|
data/Rakefile
CHANGED
@@ -2,10 +2,6 @@
|
|
2
2
|
require "bundler/gem_tasks"
|
3
3
|
require 'bundler'
|
4
4
|
|
5
|
-
require 'rspec/core/rake_task'
|
6
|
-
require 'yard'
|
7
|
-
require 'yard/rake/yardoc_task'
|
8
|
-
|
9
5
|
begin
|
10
6
|
Bundler.setup(:default, :development)
|
11
7
|
rescue Bundler::BundlerError => e
|
@@ -16,23 +12,31 @@ end
|
|
16
12
|
|
17
13
|
task :default => :ci
|
18
14
|
|
19
|
-
desc "run continuous integration suite (tests, coverage,
|
20
|
-
task :ci => [:rspec, :
|
15
|
+
desc "run continuous integration suite (tests, coverage, rubocop lint)"
|
16
|
+
task :ci => [:rspec, :rubocop]
|
21
17
|
|
22
18
|
task :spec => :rspec
|
23
19
|
|
20
|
+
require 'rspec/core/rake_task'
|
24
21
|
RSpec::Core::RakeTask.new(:rspec) do |spec|
|
25
22
|
spec.rspec_opts = ["-c", "--tty", "-f progress", "-r ./spec/spec_helper.rb"]
|
26
23
|
end
|
27
24
|
|
25
|
+
require 'rubocop/rake_task'
|
26
|
+
RuboCop::RakeTask.new(:rubocop) do |task|
|
27
|
+
task.options = ['-l'] # run lint cops only
|
28
|
+
end
|
29
|
+
|
28
30
|
# Use yard to build docs
|
31
|
+
require 'yard'
|
32
|
+
require 'yard/rake/yardoc_task'
|
29
33
|
begin
|
30
34
|
project_root = File.expand_path(File.dirname(__FILE__))
|
31
35
|
doc_dest_dir = File.join(project_root, 'doc')
|
32
36
|
|
33
37
|
YARD::Rake::YardocTask.new(:doc) do |yt|
|
34
38
|
yt.files = Dir.glob(File.join(project_root, 'lib', '**', '*.rb')) +
|
35
|
-
|
39
|
+
[File.join(project_root, 'README.md')]
|
36
40
|
yt.options = ['--output-dir', doc_dest_dir, '--readme', 'README.md', '--title', 'Stanford-Mods Documentation']
|
37
41
|
end
|
38
42
|
rescue LoadError
|
data/config/mappings_hash.rb
CHANGED
@@ -17,7 +17,7 @@ name_to_xpath = {
|
|
17
17
|
|
18
18
|
# titleInfo
|
19
19
|
'title' => '//mods/titleInfo[not(@type="alternative")]', # .first, extract_title_from_title_info(node)
|
20
|
-
'subtitle' => '//mods/titleInfo/subTitle' #.first.text
|
20
|
+
'subtitle' => '//mods/titleInfo/subTitle', #.first.text
|
21
21
|
'full_title' => '//mods/titleInfo', # .first, extract_full_title_from_title_info(node)
|
22
22
|
'title_variant' => '//mods/titleInfo[@type="alternative"]', # .first, extract_title_from_title_info(node)
|
23
23
|
|
@@ -0,0 +1,84 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'logger'
|
3
|
+
require 'mods'
|
4
|
+
|
5
|
+
# NON-SearchWorks specific wranglings of MODS <name> metadata as a mixin to the Stanford::Mods::Record object
|
6
|
+
module Stanford
|
7
|
+
module Mods
|
8
|
+
|
9
|
+
class Record < ::Mods::Record
|
10
|
+
|
11
|
+
# the first encountered <mods><name> element with marcrelator flavor role of 'Creator' or 'Author'.
|
12
|
+
# if no marcrelator 'Creator' or 'Author', the first name without a role.
|
13
|
+
# if no name without a role, then nil
|
14
|
+
# @return [String] a name in the display_value_w_date form
|
15
|
+
# see Mods::Record.name in nom_terminology for details on the display_value algorithm
|
16
|
+
def main_author_w_date
|
17
|
+
result = nil
|
18
|
+
first_wo_role = nil
|
19
|
+
@mods_ng_xml.plain_name.each { |n|
|
20
|
+
if n.role.size == 0
|
21
|
+
first_wo_role ||= n
|
22
|
+
end
|
23
|
+
n.role.each { |r|
|
24
|
+
if r.authority.include?('marcrelator') &&
|
25
|
+
(r.value.include?('Creator') || r.value.include?('Author'))
|
26
|
+
result ||= n.display_value_w_date
|
27
|
+
end
|
28
|
+
}
|
29
|
+
}
|
30
|
+
if !result && first_wo_role
|
31
|
+
result = first_wo_role.display_value_w_date
|
32
|
+
end
|
33
|
+
result
|
34
|
+
end # main_author
|
35
|
+
|
36
|
+
# all names, in display form, except the main_author
|
37
|
+
# names will be the display_value_w_date form
|
38
|
+
# see Mods::Record.name in nom_terminology for details on the display_value algorithm
|
39
|
+
def additional_authors_w_dates
|
40
|
+
results = []
|
41
|
+
@mods_ng_xml.plain_name.each { |n|
|
42
|
+
results << n.display_value_w_date
|
43
|
+
}
|
44
|
+
results.delete(main_author_w_date)
|
45
|
+
results
|
46
|
+
end
|
47
|
+
|
48
|
+
COLLECTOR_ROLE_URI = 'http://id.loc.gov/vocabulary/relators/col'
|
49
|
+
|
50
|
+
# @return Array of Strings, each containing the computed display value of a personal name
|
51
|
+
# except for the collector role (see mods gem nom_terminology for display value algorithm)
|
52
|
+
# FIXME: this is broken if there are multiple role codes and some of them are not marcrelator
|
53
|
+
def non_collector_person_authors
|
54
|
+
result = []
|
55
|
+
@mods_ng_xml.personal_name.map do |n|
|
56
|
+
r = n.role
|
57
|
+
unless (r.authority.include?('marcrelator') && r.value.include?('Collector')) ||
|
58
|
+
r.roleTerm.valueURI.first == COLLECTOR_ROLE_URI
|
59
|
+
result << n.display_value_w_date
|
60
|
+
end
|
61
|
+
end
|
62
|
+
result unless result.empty?
|
63
|
+
end
|
64
|
+
|
65
|
+
# @return Array of Strings, each containing the computed display value of
|
66
|
+
# a personal name with the role of Collector (see mods gem nom_terminology for display value algorithm)
|
67
|
+
def collectors_w_dates
|
68
|
+
result = []
|
69
|
+
@mods_ng_xml.personal_name.each do |n|
|
70
|
+
unless n.role.size == 0
|
71
|
+
n.role.each { |r|
|
72
|
+
if (r.authority.include?('marcrelator') && r.value.include?('Collector')) ||
|
73
|
+
r.roleTerm.valueURI.first == COLLECTOR_ROLE_URI
|
74
|
+
result << n.display_value_w_date
|
75
|
+
end
|
76
|
+
}
|
77
|
+
end
|
78
|
+
end
|
79
|
+
result unless result.empty?
|
80
|
+
end
|
81
|
+
|
82
|
+
end # class Record
|
83
|
+
end # Module Mods
|
84
|
+
end # Module Stanford
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require 'logger'
|
2
|
+
require 'mods'
|
3
|
+
|
4
|
+
# Parsing MODS //location/physicalLocation for series, box, and folder for Special Collections
|
5
|
+
# This is not used by Searchworks, otherwise it would have been in the searchworks.rb file
|
6
|
+
module Stanford
|
7
|
+
module Mods
|
8
|
+
class Record < ::Mods::Record
|
9
|
+
# return box number (note: single valued and might be something like 35A)
|
10
|
+
# data in location/physicalLocation or in relatedItem/location/physicalLocation
|
11
|
+
# so use _location to get the data from either one of them
|
12
|
+
# TODO: should it be hierarchical series/box/folder?
|
13
|
+
def box
|
14
|
+
# _location.physicalLocation should find top level and relatedItem
|
15
|
+
box_num = @mods_ng_xml._location.physicalLocation.map do |node|
|
16
|
+
val = node.text
|
17
|
+
# note that this will also find Flatbox or Flat-box
|
18
|
+
match_data = val.match(/Box ?:? ?([^,|(Folder)]+)/i)
|
19
|
+
match_data[1].strip if match_data.present?
|
20
|
+
end.compact
|
21
|
+
|
22
|
+
# There should only be one box
|
23
|
+
box_num.first
|
24
|
+
end
|
25
|
+
|
26
|
+
# returns folder number (note: single valued)
|
27
|
+
# data in location/physicalLocation or in relatedItem/location/physicalLocation
|
28
|
+
# so use _location to get the data from either one of them
|
29
|
+
# TODO: should it be hierarchical series/box/folder?
|
30
|
+
def folder
|
31
|
+
# _location.physicalLocation should find top level and relatedItem
|
32
|
+
folder_num = @mods_ng_xml._location.physicalLocation.map do |node|
|
33
|
+
val = node.text
|
34
|
+
|
35
|
+
match_data = if val =~ /\|/
|
36
|
+
# we assume the data is pipe-delimited, and may contain commas within values
|
37
|
+
val.match(/Folder ?:? ?([^|]+)/)
|
38
|
+
else
|
39
|
+
# the data should be comma-delimited, and may not contain commas within values
|
40
|
+
val.match(/Folder ?:? ?([^,]+)/)
|
41
|
+
end
|
42
|
+
|
43
|
+
match_data[1].strip if match_data.present?
|
44
|
+
end.compact
|
45
|
+
|
46
|
+
# There should be one folder
|
47
|
+
folder_num.first
|
48
|
+
end
|
49
|
+
|
50
|
+
# return entire contents of physicalLocation (note: single valued)
|
51
|
+
# but only if it has series, accession, box or folder data
|
52
|
+
# data in location/physicalLocation or in relatedItem/location/physicalLocation
|
53
|
+
# so use _location to get the data from either one of them
|
54
|
+
# TODO: should it be hierarchical series/box/folder?
|
55
|
+
def location
|
56
|
+
# _location.physicalLocation should find top level and relatedItem
|
57
|
+
loc = @mods_ng_xml._location.physicalLocation.map do |node|
|
58
|
+
node.text if node.text.match(/.*(Series)|(Accession)|(Folder)|(Box).*/i)
|
59
|
+
end.compact
|
60
|
+
|
61
|
+
# There should only be one location
|
62
|
+
loc.first
|
63
|
+
end
|
64
|
+
|
65
|
+
# return series/accession 'number' (note: single valued)
|
66
|
+
# data in location/physicalLocation or in relatedItem/location/physicalLocation
|
67
|
+
# so use _location to get the data from either one of them
|
68
|
+
# TODO: should it be hierarchical series/box/folder?
|
69
|
+
def series
|
70
|
+
# _location.physicalLocation should find top level and relatedItem
|
71
|
+
series_num = @mods_ng_xml._location.physicalLocation.map do |node|
|
72
|
+
val = node.text
|
73
|
+
# feigenbaum uses 'Accession'
|
74
|
+
match_data = val.match(/(?:(?:Series)|(?:Accession)):? ([^,|]+)/i)
|
75
|
+
match_data[1].strip if match_data.present?
|
76
|
+
end.compact
|
77
|
+
|
78
|
+
# There should be only one series
|
79
|
+
series_num.first
|
80
|
+
end
|
81
|
+
end # class Record
|
82
|
+
end # Module Mods
|
83
|
+
end # Module Stanford
|
@@ -26,7 +26,7 @@ module Stanford
|
|
26
26
|
result << SEARCHWORKS_LANGUAGES[v.strip]
|
27
27
|
end
|
28
28
|
end
|
29
|
-
rescue
|
29
|
+
rescue
|
30
30
|
# TODO: this should be written to a logger
|
31
31
|
p "Couldn't find english name for #{ct.text}"
|
32
32
|
end
|
@@ -422,7 +422,6 @@ module Stanford
|
|
422
422
|
end
|
423
423
|
dates = pub_dates
|
424
424
|
if dates
|
425
|
-
year = []
|
426
425
|
pruned_dates = []
|
427
426
|
dates.each do |f_date|
|
428
427
|
#remove ? and []
|
data/lib/stanford-mods.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
require 'stanford-mods/version'
|
2
2
|
require 'mods'
|
3
|
+
require 'stanford-mods/name'
|
3
4
|
require 'stanford-mods/searchworks'
|
5
|
+
require 'stanford-mods/physical_location'
|
4
6
|
|
5
7
|
# Stanford specific wranglings of MODS metadata as an extension of the Mods::Record object
|
6
8
|
module Stanford
|
@@ -8,43 +10,6 @@ module Stanford
|
|
8
10
|
|
9
11
|
class Record < ::Mods::Record
|
10
12
|
|
11
|
-
# the first encountered <mods><name> element with marcrelator flavor role of 'Creator' or 'Author'.
|
12
|
-
# if no marcrelator 'Creator' or 'Author', the first name without a role.
|
13
|
-
# if no name without a role, then nil
|
14
|
-
# @return [String] a name in the display_value_w_date form
|
15
|
-
# see Mods::Record.name in nom_terminology for details on the display_value algorithm
|
16
|
-
def main_author_w_date
|
17
|
-
result = nil
|
18
|
-
first_wo_role = nil
|
19
|
-
@mods_ng_xml.plain_name.each { |n|
|
20
|
-
if n.role.size == 0
|
21
|
-
first_wo_role ||= n
|
22
|
-
end
|
23
|
-
n.role.each { |r|
|
24
|
-
if r.authority.include?('marcrelator') &&
|
25
|
-
(r.value.include?('Creator') || r.value.include?('Author'))
|
26
|
-
result ||= n.display_value_w_date
|
27
|
-
end
|
28
|
-
}
|
29
|
-
}
|
30
|
-
if !result && first_wo_role
|
31
|
-
result = first_wo_role.display_value_w_date
|
32
|
-
end
|
33
|
-
result
|
34
|
-
end # main_author
|
35
|
-
|
36
|
-
# all names, in display form, except the main_author
|
37
|
-
# names will be the display_value_w_date form
|
38
|
-
# see Mods::Record.name in nom_terminology for details on the display_value algorithm
|
39
|
-
def additional_authors_w_dates
|
40
|
-
results = []
|
41
|
-
@mods_ng_xml.plain_name.each { |n|
|
42
|
-
results << n.display_value_w_date
|
43
|
-
}
|
44
|
-
results.delete(main_author_w_date)
|
45
|
-
results
|
46
|
-
end
|
47
|
-
|
48
13
|
end # Record class
|
49
14
|
end # Mods module
|
50
15
|
end # Stanford module
|