stanford-mods 0.0.9 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc
CHANGED
|
@@ -60,6 +60,7 @@ Example Using SearchWorks Mixins:
|
|
|
60
60
|
|
|
61
61
|
== Releases
|
|
62
62
|
|
|
63
|
+
* <b>0.0.10</b> get rid of ignore_me files
|
|
63
64
|
* <b>0.0.9</b> add sw_subject_names and sw_subject_titles methods to searchworks mixin
|
|
64
65
|
* <b>0.0.8</b> require stanford-mods/searchworks in stanford-mods (top level)
|
|
65
66
|
* <b>0.0.7</b> added sw_geographic_search to searchworks mixin
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: stanford-mods
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.10
|
|
5
5
|
prerelease:
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
@@ -146,11 +146,9 @@ files:
|
|
|
146
146
|
- config/mappings_hash.rb
|
|
147
147
|
- lib/stanford-mods.rb
|
|
148
148
|
- lib/stanford-mods/kolb.rb
|
|
149
|
-
- lib/stanford-mods/old_mappings_4_ref.rb
|
|
150
149
|
- lib/stanford-mods/searchworks.rb
|
|
151
150
|
- lib/stanford-mods/searchworks_languages.rb
|
|
152
151
|
- lib/stanford-mods/version.rb
|
|
153
|
-
- spec/ignore_me_sw_required_flds_spec.rb
|
|
154
152
|
- spec/kolb_spec.rb
|
|
155
153
|
- spec/name_spec.rb
|
|
156
154
|
- spec/searchworks_spec.rb
|
|
@@ -171,7 +169,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
171
169
|
version: '0'
|
|
172
170
|
segments:
|
|
173
171
|
- 0
|
|
174
|
-
hash:
|
|
172
|
+
hash: -3519337351591429375
|
|
175
173
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
176
174
|
none: false
|
|
177
175
|
requirements:
|
|
@@ -180,7 +178,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
180
178
|
version: '0'
|
|
181
179
|
segments:
|
|
182
180
|
- 0
|
|
183
|
-
hash:
|
|
181
|
+
hash: -3519337351591429375
|
|
184
182
|
requirements: []
|
|
185
183
|
rubyforge_project:
|
|
186
184
|
rubygems_version: 1.8.24
|
|
@@ -188,7 +186,6 @@ signing_key:
|
|
|
188
186
|
specification_version: 3
|
|
189
187
|
summary: Stanford specific wrangling of MODS metadata
|
|
190
188
|
test_files:
|
|
191
|
-
- spec/ignore_me_sw_required_flds_spec.rb
|
|
192
189
|
- spec/kolb_spec.rb
|
|
193
190
|
- spec/name_spec.rb
|
|
194
191
|
- spec/searchworks_spec.rb
|
|
@@ -1,155 +0,0 @@
|
|
|
1
|
-
# This file is for reference as I implement the searchworks.rb mixin file
|
|
2
|
-
# it is a bunch of methods from the old dor-sw-ingest code
|
|
3
|
-
module Stanford
|
|
4
|
-
module Mods
|
|
5
|
-
|
|
6
|
-
def empty?
|
|
7
|
-
mods_xml.xpath('//text()').empty?
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
# Determine what language(s) this record declares
|
|
11
|
-
# Use iso-639 to translate codes into English words
|
|
12
|
-
# @return Array
|
|
13
|
-
def language
|
|
14
|
-
languages = []
|
|
15
|
-
language_codes.each do |code|
|
|
16
|
-
begin
|
|
17
|
-
csv_codes = code.to_s.split(/[,|\ ]/)
|
|
18
|
-
csv_codes = csv_codes.delete_if {|x| x.strip.length==0 }
|
|
19
|
-
csv_codes.each do |c|
|
|
20
|
-
languages << ISO_639.find(c.to_s.strip).english_name
|
|
21
|
-
end
|
|
22
|
-
rescue => e
|
|
23
|
-
SearchWorksOaiHarvester.logger.error "Couldn't find english name for #{code.to_s}"
|
|
24
|
-
# SearchWorksOaiHarvester.logger.error e
|
|
25
|
-
languages << code.to_s
|
|
26
|
-
end
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
language_words.each do |word|
|
|
30
|
-
if word.to_s.strip.length > 0
|
|
31
|
-
languages << word.to_s.strip
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
return nil if languages.uniq.empty?
|
|
35
|
-
return languages.uniq
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
# Interpret the various permutations of dateCreated
|
|
39
|
-
# @param [Nokogiri::XML::Element] node
|
|
40
|
-
def date_created
|
|
41
|
-
if create_start_date.length > 0 && create_end_date.length > 0
|
|
42
|
-
return "#{start_date} - #{end_date}"
|
|
43
|
-
else
|
|
44
|
-
return node.xpath('//dateCreated/text()').to_s
|
|
45
|
-
end
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
def date_created_or_issued
|
|
49
|
-
begin
|
|
50
|
-
return date_created unless date_created.nil? or date_created.length == 0
|
|
51
|
-
return date_issued unless date_issued.nil? or date_issued.length == 0
|
|
52
|
-
nil
|
|
53
|
-
rescue
|
|
54
|
-
nil
|
|
55
|
-
end
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
# A single value for publication year (this will need refinement over time)
|
|
59
|
-
# @return String
|
|
60
|
-
def pub_year(year = date_created_or_issued)
|
|
61
|
-
year[/[0-9]{4}/]
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
# TODO: Ask Jessie what the valid values are here
|
|
68
|
-
# @return String
|
|
69
|
-
def display_type
|
|
70
|
-
return "image" if is_an_image?
|
|
71
|
-
return "image" if is_a_map?
|
|
72
|
-
return "collection" if is_a_collection?
|
|
73
|
-
nil
|
|
74
|
-
end
|
|
75
|
-
|
|
76
|
-
# Check to see if this item is a map
|
|
77
|
-
# @return Boolean
|
|
78
|
-
def is_a_map?
|
|
79
|
-
return true if mods_xml.xpath('//typeOfResource/text()').to_s.match(/^[Cc]artographic/)
|
|
80
|
-
return true if mods_xml.xpath('//genre[@authority="marcgt"]/text()').to_s.match(/^[Mm]ap/)
|
|
81
|
-
return true if mods_xml.xpath('//physicalDescription/form/text()').to_s.match(/[Mm]ap/)
|
|
82
|
-
return true if mods_xml.xpath('//physicalDescription/internetMediaType/text()').to_s.match(/[Mm]ap/)
|
|
83
|
-
false
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
# Check to see if this item is an Image
|
|
87
|
-
# @return Boolean
|
|
88
|
-
def is_an_image?
|
|
89
|
-
return true if mods_xml.xpath('//typeOfResource/text()').to_s.match(/still image/)
|
|
90
|
-
false
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
# Objects can belong to a collection by reference (handled in solr_mapper.rb),
|
|
94
|
-
# or they can declare themselves part of a collection in MODS
|
|
95
|
-
# e.g., Revs does it like this:
|
|
96
|
-
# <mods:relatedItem type="host">
|
|
97
|
-
# <mods:titleInfo>
|
|
98
|
-
# <mods:title>The Collier Collection of the Revs Institute for Automotive Research</mods:title>
|
|
99
|
-
# </mods:titleInfo>
|
|
100
|
-
# <mods:typeOfResource collection="yes"/>
|
|
101
|
-
# </mods:relatedItem>
|
|
102
|
-
def declared_collections
|
|
103
|
-
c = []
|
|
104
|
-
collection_nodes = mods_xml.xpath("//relatedItem/typeOfResource[@collection='yes']")
|
|
105
|
-
collection_nodes.each do |node|
|
|
106
|
-
c << node.xpath('../titleInfo/title/text()').to_s
|
|
107
|
-
end
|
|
108
|
-
c
|
|
109
|
-
end
|
|
110
|
-
|
|
111
|
-
# Check to see if this item is a collection
|
|
112
|
-
# @return Boolean
|
|
113
|
-
def is_a_collection?
|
|
114
|
-
return true if mods_xml.xpath("/mods/typeOfResource/@collection").to_s == 'yes'
|
|
115
|
-
false
|
|
116
|
-
end
|
|
117
|
-
|
|
118
|
-
def physical_description_form
|
|
119
|
-
form = mods_xml.xpath('//physicalDescription/form/text()').to_s
|
|
120
|
-
media_type = mods_xml.xpath('//physicalDescription/internetMediaType/text()').to_s
|
|
121
|
-
if form != media_type && form
|
|
122
|
-
form
|
|
123
|
-
elsif media_type
|
|
124
|
-
media_type
|
|
125
|
-
else
|
|
126
|
-
nil
|
|
127
|
-
end
|
|
128
|
-
end
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
# Accept a nokogiri representation of a mods titleInfo element
|
|
133
|
-
# Return a formatted string of the title it describes
|
|
134
|
-
# @param node Nokogiri::XML::Element
|
|
135
|
-
# @return String
|
|
136
|
-
def extract_title_from_title_info(node)
|
|
137
|
-
"#{node.xpath('nonSort/text()')} #{node.xpath('title/text()')}".strip
|
|
138
|
-
end
|
|
139
|
-
|
|
140
|
-
# Accept a nokogiri representation of a mods titleInfo element
|
|
141
|
-
# Return a formatted string of the title it describes
|
|
142
|
-
# @param node Nokogiri::XML::Element
|
|
143
|
-
# @return String
|
|
144
|
-
def extract_full_title_from_title_info(node)
|
|
145
|
-
title = "#{node.xpath('nonSort/text()')} #{node.xpath('title/text()')}".strip
|
|
146
|
-
unless node.xpath('subTitle/text()').empty?
|
|
147
|
-
title = "#{title}: #{node.xpath('subTitle/text()')}"
|
|
148
|
-
end
|
|
149
|
-
return title
|
|
150
|
-
end
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
end
|
|
155
|
-
end
|
|
@@ -1,113 +0,0 @@
|
|
|
1
|
-
require 'spec_helper'
|
|
2
|
-
|
|
3
|
-
describe "Values for SearchWorks Solr" do
|
|
4
|
-
# from https://consul.stanford.edu/display/NGDE/Required+and+Recommended+Solr+Fields+for+SearchWorks+documents
|
|
5
|
-
before(:all) do
|
|
6
|
-
@ns_decl = "xmlns='#{Mods::MODS_NS}'"
|
|
7
|
-
end
|
|
8
|
-
|
|
9
|
-
context "required fields" do
|
|
10
|
-
context "DOR specific" do
|
|
11
|
-
# in harvestdor code: druid, parent_coll_ckey, id, collection
|
|
12
|
-
|
|
13
|
-
it "url_fulltext" do
|
|
14
|
-
pending "to be implemented"
|
|
15
|
-
end
|
|
16
|
-
it "mods_xml" do
|
|
17
|
-
pending "to be implemented"
|
|
18
|
-
end
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
it "all_search" do
|
|
22
|
-
pending "to be implemented"
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
it "format" do
|
|
26
|
-
pending "to be implemented, using SearchWorks controlled vocab"
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
# FIXME: update per gryphDOR code / searcworks code / new schema
|
|
30
|
-
it "collection" do
|
|
31
|
-
pending "to be implemented, using controlled vocab, in harvestdor"
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
it "display_type" do
|
|
35
|
-
pending "to be implemented, using controlled vocab"
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
context "strongly recommended fields" do
|
|
41
|
-
# access_facet has nothing to do with mods
|
|
42
|
-
# title convenience methods are implemented in the Mods gem; no special work here
|
|
43
|
-
context "title fields" do
|
|
44
|
-
context "for display" do
|
|
45
|
-
it "short title" do
|
|
46
|
-
pending "to be implemented"
|
|
47
|
-
end
|
|
48
|
-
it "full title" do
|
|
49
|
-
pending "to be implemented"
|
|
50
|
-
end
|
|
51
|
-
end
|
|
52
|
-
context "for searching" do
|
|
53
|
-
it "short title" do
|
|
54
|
-
pending "to be implemented"
|
|
55
|
-
end
|
|
56
|
-
it "full title" do
|
|
57
|
-
pending "to be implemented"
|
|
58
|
-
end
|
|
59
|
-
end
|
|
60
|
-
it "sortable title" do
|
|
61
|
-
pending "to be implemented"
|
|
62
|
-
end
|
|
63
|
-
end
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
context "recommended fields" do
|
|
67
|
-
context "publication date" do
|
|
68
|
-
it "for searching and facet" do
|
|
69
|
-
pending "to be implemented"
|
|
70
|
-
end
|
|
71
|
-
it "for sorting" do
|
|
72
|
-
pending "to be implemented"
|
|
73
|
-
end
|
|
74
|
-
it "for pub date grouping (hierarchical / date slider?)" do
|
|
75
|
-
pending "to be implemented"
|
|
76
|
-
end
|
|
77
|
-
end
|
|
78
|
-
context "language" do
|
|
79
|
-
it "should use the SearchWorks controlled vocabulary" do
|
|
80
|
-
m = "<mods #{@ns_decl}><language><languageTerm authority='iso639-2b' type='code'>per ara, dut</languageTerm></language></mods>"
|
|
81
|
-
r = Stanford::Mods::Record.new()
|
|
82
|
-
r.from_str(m)
|
|
83
|
-
langs = r.sw_language_facet
|
|
84
|
-
langs.size.should == 3
|
|
85
|
-
langs.should include("Persian")
|
|
86
|
-
langs.should include("Arabic")
|
|
87
|
-
langs.should include("Dutch")
|
|
88
|
-
langs.should_not include("Dutch; Flemish")
|
|
89
|
-
end
|
|
90
|
-
it "should not have duplicates" do
|
|
91
|
-
m = "<mods #{@ns_decl}><language><languageTerm type='code' authority='iso639-2b'>eng</languageTerm><languageTerm type='text'>English</languageTerm></language></mods>"
|
|
92
|
-
r = Stanford::Mods::Record.new
|
|
93
|
-
r.from_str(m)
|
|
94
|
-
langs = r.sw_language_facet
|
|
95
|
-
langs.size.should == 1
|
|
96
|
-
langs.should include("English")
|
|
97
|
-
end
|
|
98
|
-
|
|
99
|
-
end
|
|
100
|
-
context "authors" do
|
|
101
|
-
it "main author" do
|
|
102
|
-
pending "to be implemented"
|
|
103
|
-
end
|
|
104
|
-
it "additional authors" do
|
|
105
|
-
pending "to be implemented"
|
|
106
|
-
end
|
|
107
|
-
it "author sort" do
|
|
108
|
-
pending "to be implemented"
|
|
109
|
-
end
|
|
110
|
-
end
|
|
111
|
-
end
|
|
112
|
-
|
|
113
|
-
end
|