stanford-mods 0.0.9 → 0.0.10
Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc
CHANGED
@@ -60,6 +60,7 @@ Example Using SearchWorks Mixins:
|
|
60
60
|
|
61
61
|
== Releases
|
62
62
|
|
63
|
+
* <b>0.0.10</b> get rid of ignore_me files
|
63
64
|
* <b>0.0.9</b> add sw_subject_names and sw_subject_titles methods to searchworks mixin
|
64
65
|
* <b>0.0.8</b> require stanford-mods/searchworks in stanford-mods (top level)
|
65
66
|
* <b>0.0.7</b> added sw_geographic_search to searchworks mixin
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: stanford-mods
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.10
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -146,11 +146,9 @@ files:
|
|
146
146
|
- config/mappings_hash.rb
|
147
147
|
- lib/stanford-mods.rb
|
148
148
|
- lib/stanford-mods/kolb.rb
|
149
|
-
- lib/stanford-mods/old_mappings_4_ref.rb
|
150
149
|
- lib/stanford-mods/searchworks.rb
|
151
150
|
- lib/stanford-mods/searchworks_languages.rb
|
152
151
|
- lib/stanford-mods/version.rb
|
153
|
-
- spec/ignore_me_sw_required_flds_spec.rb
|
154
152
|
- spec/kolb_spec.rb
|
155
153
|
- spec/name_spec.rb
|
156
154
|
- spec/searchworks_spec.rb
|
@@ -171,7 +169,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
171
169
|
version: '0'
|
172
170
|
segments:
|
173
171
|
- 0
|
174
|
-
hash:
|
172
|
+
hash: -3519337351591429375
|
175
173
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
176
174
|
none: false
|
177
175
|
requirements:
|
@@ -180,7 +178,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
180
178
|
version: '0'
|
181
179
|
segments:
|
182
180
|
- 0
|
183
|
-
hash:
|
181
|
+
hash: -3519337351591429375
|
184
182
|
requirements: []
|
185
183
|
rubyforge_project:
|
186
184
|
rubygems_version: 1.8.24
|
@@ -188,7 +186,6 @@ signing_key:
|
|
188
186
|
specification_version: 3
|
189
187
|
summary: Stanford specific wrangling of MODS metadata
|
190
188
|
test_files:
|
191
|
-
- spec/ignore_me_sw_required_flds_spec.rb
|
192
189
|
- spec/kolb_spec.rb
|
193
190
|
- spec/name_spec.rb
|
194
191
|
- spec/searchworks_spec.rb
|
@@ -1,155 +0,0 @@
|
|
1
|
-
# This file is for reference as I implement the searchworks.rb mixin file
|
2
|
-
# it is a bunch of methods from the old dor-sw-ingest code
|
3
|
-
module Stanford
|
4
|
-
module Mods
|
5
|
-
|
6
|
-
def empty?
|
7
|
-
mods_xml.xpath('//text()').empty?
|
8
|
-
end
|
9
|
-
|
10
|
-
# Determine what language(s) this record declares
|
11
|
-
# Use iso-639 to translate codes into English words
|
12
|
-
# @return Array
|
13
|
-
def language
|
14
|
-
languages = []
|
15
|
-
language_codes.each do |code|
|
16
|
-
begin
|
17
|
-
csv_codes = code.to_s.split(/[,|\ ]/)
|
18
|
-
csv_codes = csv_codes.delete_if {|x| x.strip.length==0 }
|
19
|
-
csv_codes.each do |c|
|
20
|
-
languages << ISO_639.find(c.to_s.strip).english_name
|
21
|
-
end
|
22
|
-
rescue => e
|
23
|
-
SearchWorksOaiHarvester.logger.error "Couldn't find english name for #{code.to_s}"
|
24
|
-
# SearchWorksOaiHarvester.logger.error e
|
25
|
-
languages << code.to_s
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
language_words.each do |word|
|
30
|
-
if word.to_s.strip.length > 0
|
31
|
-
languages << word.to_s.strip
|
32
|
-
end
|
33
|
-
end
|
34
|
-
return nil if languages.uniq.empty?
|
35
|
-
return languages.uniq
|
36
|
-
end
|
37
|
-
|
38
|
-
# Interpret the various permutations of dateCreated
|
39
|
-
# @param [Nokogiri::XML::Element] node
|
40
|
-
def date_created
|
41
|
-
if create_start_date.length > 0 && create_end_date.length > 0
|
42
|
-
return "#{start_date} - #{end_date}"
|
43
|
-
else
|
44
|
-
return node.xpath('//dateCreated/text()').to_s
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
def date_created_or_issued
|
49
|
-
begin
|
50
|
-
return date_created unless date_created.nil? or date_created.length == 0
|
51
|
-
return date_issued unless date_issued.nil? or date_issued.length == 0
|
52
|
-
nil
|
53
|
-
rescue
|
54
|
-
nil
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
# A single value for publication year (this will need refinement over time)
|
59
|
-
# @return String
|
60
|
-
def pub_year(year = date_created_or_issued)
|
61
|
-
year[/[0-9]{4}/]
|
62
|
-
end
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
# TODO: Ask Jessie what the valid values are here
|
68
|
-
# @return String
|
69
|
-
def display_type
|
70
|
-
return "image" if is_an_image?
|
71
|
-
return "image" if is_a_map?
|
72
|
-
return "collection" if is_a_collection?
|
73
|
-
nil
|
74
|
-
end
|
75
|
-
|
76
|
-
# Check to see if this item is a map
|
77
|
-
# @return Boolean
|
78
|
-
def is_a_map?
|
79
|
-
return true if mods_xml.xpath('//typeOfResource/text()').to_s.match(/^[Cc]artographic/)
|
80
|
-
return true if mods_xml.xpath('//genre[@authority="marcgt"]/text()').to_s.match(/^[Mm]ap/)
|
81
|
-
return true if mods_xml.xpath('//physicalDescription/form/text()').to_s.match(/[Mm]ap/)
|
82
|
-
return true if mods_xml.xpath('//physicalDescription/internetMediaType/text()').to_s.match(/[Mm]ap/)
|
83
|
-
false
|
84
|
-
end
|
85
|
-
|
86
|
-
# Check to see if this item is an Image
|
87
|
-
# @return Boolean
|
88
|
-
def is_an_image?
|
89
|
-
return true if mods_xml.xpath('//typeOfResource/text()').to_s.match(/still image/)
|
90
|
-
false
|
91
|
-
end
|
92
|
-
|
93
|
-
# Objects can belong to a collection by reference (handled in solr_mapper.rb),
|
94
|
-
# or they can declare themselves part of a collection in MODS
|
95
|
-
# e.g., Revs does it like this:
|
96
|
-
# <mods:relatedItem type="host">
|
97
|
-
# <mods:titleInfo>
|
98
|
-
# <mods:title>The Collier Collection of the Revs Institute for Automotive Research</mods:title>
|
99
|
-
# </mods:titleInfo>
|
100
|
-
# <mods:typeOfResource collection="yes"/>
|
101
|
-
# </mods:relatedItem>
|
102
|
-
def declared_collections
|
103
|
-
c = []
|
104
|
-
collection_nodes = mods_xml.xpath("//relatedItem/typeOfResource[@collection='yes']")
|
105
|
-
collection_nodes.each do |node|
|
106
|
-
c << node.xpath('../titleInfo/title/text()').to_s
|
107
|
-
end
|
108
|
-
c
|
109
|
-
end
|
110
|
-
|
111
|
-
# Check to see if this item is a collection
|
112
|
-
# @return Boolean
|
113
|
-
def is_a_collection?
|
114
|
-
return true if mods_xml.xpath("/mods/typeOfResource/@collection").to_s == 'yes'
|
115
|
-
false
|
116
|
-
end
|
117
|
-
|
118
|
-
def physical_description_form
|
119
|
-
form = mods_xml.xpath('//physicalDescription/form/text()').to_s
|
120
|
-
media_type = mods_xml.xpath('//physicalDescription/internetMediaType/text()').to_s
|
121
|
-
if form != media_type && form
|
122
|
-
form
|
123
|
-
elsif media_type
|
124
|
-
media_type
|
125
|
-
else
|
126
|
-
nil
|
127
|
-
end
|
128
|
-
end
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
# Accept a nokogiri representation of a mods titleInfo element
|
133
|
-
# Return a formatted string of the title it describes
|
134
|
-
# @param node Nokogiri::XML::Element
|
135
|
-
# @return String
|
136
|
-
def extract_title_from_title_info(node)
|
137
|
-
"#{node.xpath('nonSort/text()')} #{node.xpath('title/text()')}".strip
|
138
|
-
end
|
139
|
-
|
140
|
-
# Accept a nokogiri representation of a mods titleInfo element
|
141
|
-
# Return a formatted string of the title it describes
|
142
|
-
# @param node Nokogiri::XML::Element
|
143
|
-
# @return String
|
144
|
-
def extract_full_title_from_title_info(node)
|
145
|
-
title = "#{node.xpath('nonSort/text()')} #{node.xpath('title/text()')}".strip
|
146
|
-
unless node.xpath('subTitle/text()').empty?
|
147
|
-
title = "#{title}: #{node.xpath('subTitle/text()')}"
|
148
|
-
end
|
149
|
-
return title
|
150
|
-
end
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
end
|
155
|
-
end
|
@@ -1,113 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe "Values for SearchWorks Solr" do
|
4
|
-
# from https://consul.stanford.edu/display/NGDE/Required+and+Recommended+Solr+Fields+for+SearchWorks+documents
|
5
|
-
before(:all) do
|
6
|
-
@ns_decl = "xmlns='#{Mods::MODS_NS}'"
|
7
|
-
end
|
8
|
-
|
9
|
-
context "required fields" do
|
10
|
-
context "DOR specific" do
|
11
|
-
# in harvestdor code: druid, parent_coll_ckey, id, collection
|
12
|
-
|
13
|
-
it "url_fulltext" do
|
14
|
-
pending "to be implemented"
|
15
|
-
end
|
16
|
-
it "mods_xml" do
|
17
|
-
pending "to be implemented"
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
it "all_search" do
|
22
|
-
pending "to be implemented"
|
23
|
-
end
|
24
|
-
|
25
|
-
it "format" do
|
26
|
-
pending "to be implemented, using SearchWorks controlled vocab"
|
27
|
-
end
|
28
|
-
|
29
|
-
# FIXME: update per gryphDOR code / searcworks code / new schema
|
30
|
-
it "collection" do
|
31
|
-
pending "to be implemented, using controlled vocab, in harvestdor"
|
32
|
-
end
|
33
|
-
|
34
|
-
it "display_type" do
|
35
|
-
pending "to be implemented, using controlled vocab"
|
36
|
-
end
|
37
|
-
|
38
|
-
end
|
39
|
-
|
40
|
-
context "strongly recommended fields" do
|
41
|
-
# access_facet has nothing to do with mods
|
42
|
-
# title convenience methods are implemented in the Mods gem; no special work here
|
43
|
-
context "title fields" do
|
44
|
-
context "for display" do
|
45
|
-
it "short title" do
|
46
|
-
pending "to be implemented"
|
47
|
-
end
|
48
|
-
it "full title" do
|
49
|
-
pending "to be implemented"
|
50
|
-
end
|
51
|
-
end
|
52
|
-
context "for searching" do
|
53
|
-
it "short title" do
|
54
|
-
pending "to be implemented"
|
55
|
-
end
|
56
|
-
it "full title" do
|
57
|
-
pending "to be implemented"
|
58
|
-
end
|
59
|
-
end
|
60
|
-
it "sortable title" do
|
61
|
-
pending "to be implemented"
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
context "recommended fields" do
|
67
|
-
context "publication date" do
|
68
|
-
it "for searching and facet" do
|
69
|
-
pending "to be implemented"
|
70
|
-
end
|
71
|
-
it "for sorting" do
|
72
|
-
pending "to be implemented"
|
73
|
-
end
|
74
|
-
it "for pub date grouping (hierarchical / date slider?)" do
|
75
|
-
pending "to be implemented"
|
76
|
-
end
|
77
|
-
end
|
78
|
-
context "language" do
|
79
|
-
it "should use the SearchWorks controlled vocabulary" do
|
80
|
-
m = "<mods #{@ns_decl}><language><languageTerm authority='iso639-2b' type='code'>per ara, dut</languageTerm></language></mods>"
|
81
|
-
r = Stanford::Mods::Record.new()
|
82
|
-
r.from_str(m)
|
83
|
-
langs = r.sw_language_facet
|
84
|
-
langs.size.should == 3
|
85
|
-
langs.should include("Persian")
|
86
|
-
langs.should include("Arabic")
|
87
|
-
langs.should include("Dutch")
|
88
|
-
langs.should_not include("Dutch; Flemish")
|
89
|
-
end
|
90
|
-
it "should not have duplicates" do
|
91
|
-
m = "<mods #{@ns_decl}><language><languageTerm type='code' authority='iso639-2b'>eng</languageTerm><languageTerm type='text'>English</languageTerm></language></mods>"
|
92
|
-
r = Stanford::Mods::Record.new
|
93
|
-
r.from_str(m)
|
94
|
-
langs = r.sw_language_facet
|
95
|
-
langs.size.should == 1
|
96
|
-
langs.should include("English")
|
97
|
-
end
|
98
|
-
|
99
|
-
end
|
100
|
-
context "authors" do
|
101
|
-
it "main author" do
|
102
|
-
pending "to be implemented"
|
103
|
-
end
|
104
|
-
it "additional authors" do
|
105
|
-
pending "to be implemented"
|
106
|
-
end
|
107
|
-
it "author sort" do
|
108
|
-
pending "to be implemented"
|
109
|
-
end
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
|
-
end
|