spotlight-dor-resources 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rubocop.yml +4 -0
- data/.rubocop_todo.yml +0 -3
- data/lib/spotlight/dor/indexer.rb +25 -13
- data/lib/spotlight/dor/resources/version.rb +1 -1
- data/spec/unit/spotlight/dor/indexer_spec.rb +63 -1
- data/spotlight-dor-resources.gemspec +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 31130b0ef44659b04e226ce3f7d54afeb2fab9bd
|
4
|
+
data.tar.gz: 773201c4913261fb010cbd06a7bd027fb550acc1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dd69ded57b4a0daaa03b544218a95edb3b23a503ae54c56274f7f8cc06bb8949120f19af29b9047b81d743914dd2e601fbeb5b4a133edfe2718084457290a096
|
7
|
+
data.tar.gz: 90f34128fd6494c255f69f9cf4b430cdae7c060618ba648e99a982c3fdfdf2d41ade20b73d6febe48a8eae46b7dbf5823996b5469e25b756340a84fb217a2f04
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
data/.rubocop_todo.yml
CHANGED
@@ -31,11 +31,12 @@ module Spotlight::Dor
|
|
31
31
|
end
|
32
32
|
|
33
33
|
# add fields from raw mods
|
34
|
-
# see comment with add_donor_tags about Feigenbaum specific donor tags data
|
35
34
|
before_index :add_box
|
35
|
+
# see comment with add_donor_tags about Feigenbaum specific donor tags data
|
36
36
|
before_index :add_donor_tags
|
37
37
|
before_index :add_genre
|
38
38
|
before_index :add_folder
|
39
|
+
before_index :add_folder_name
|
39
40
|
before_index :add_series
|
40
41
|
before_index :mods_cartographics_indexing
|
41
42
|
|
@@ -72,7 +73,7 @@ module Spotlight::Dor
|
|
72
73
|
|
73
74
|
# This new donor_tags_sim field was added in October 2015 specifically for the Feigenbaum exhibit. It is very likely
|
74
75
|
# it will go ununsed by other projects, but should be benign (since this field will not be created if this specific MODs note is not found.)
|
75
|
-
# Later refactoring could include
|
76
|
+
# Later refactoring could include exhibit specific fields. Peter Mangiafico
|
76
77
|
def add_donor_tags sdb, solr_doc
|
77
78
|
donor_tags = sdb.smods_rec.note.select { |n| n.displayLabel == 'Donor tags' }.map(&:content)
|
78
79
|
insert_field solr_doc, 'donor_tags', donor_tags, :symbol # this is a _ssim field
|
@@ -86,21 +87,30 @@ module Spotlight::Dor
|
|
86
87
|
# _location.physicalLocation should find top level and relatedItem
|
87
88
|
folder_num = sdb.smods_rec._location.physicalLocation.map do |node|
|
88
89
|
val = node.text
|
89
|
-
|
90
|
-
match_data = val
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
end
|
90
|
+
|
91
|
+
match_data = if val =~ /\|/
|
92
|
+
# we assume the data is pipe-delimited, and may contain commas within values
|
93
|
+
val.match(/Folder ?:? ?([^|]+)/)
|
94
|
+
else
|
95
|
+
# the data should be comma-delimited, and may not contain commas within values
|
96
|
+
val.match(/Folder ?:? ?([^,]+)/)
|
97
|
+
end
|
98
|
+
|
99
|
+
match_data[1].strip if match_data.present?
|
100
100
|
end
|
101
|
+
|
101
102
|
solr_doc['folder_ssi'] = folder_num.first if folder_num.present?
|
102
103
|
end
|
103
104
|
|
105
|
+
# add the folder name to solr_doc as folder_name_ssi field (note: single valued!)
|
106
|
+
# data is specific to Feigenbaum collection and is in <note type='preferred citation'>
|
107
|
+
def add_folder_name(sdb, solr_doc)
|
108
|
+
# see spec for data examples
|
109
|
+
preferred_citation = sdb.smods_rec.note.select { |n| n.type_at == 'preferred citation' }.map(&:content)
|
110
|
+
match_data = preferred_citation.first.match(/Title: +(.+)/i) if preferred_citation.present?
|
111
|
+
solr_doc['folder_name_ssi'] = match_data[1].rstrip if match_data.present?
|
112
|
+
end
|
113
|
+
|
104
114
|
# add plain MODS <genre> element data, not the SearchWorks genre values
|
105
115
|
def add_genre sdb, solr_doc
|
106
116
|
insert_field solr_doc, 'genre', sdb.smods_rec.genre.content, :symbol # this is a _ssim field
|
@@ -121,6 +131,7 @@ module Spotlight::Dor
|
|
121
131
|
solr_doc['series_ssi'] = series_num.first if series_num.present?
|
122
132
|
end
|
123
133
|
|
134
|
+
# rubocop:disable Metrics/AbcSize
|
124
135
|
def mods_cartographics_indexing sdb, solr_doc
|
125
136
|
insert_field(solr_doc, "coordinates", Array(sdb.smods_rec.subject.cartographics.coordinates).map { |n| n.text }, :stored_searchable)
|
126
137
|
|
@@ -138,6 +149,7 @@ module Spotlight::Dor
|
|
138
149
|
solr_doc["point_bbox"] << "#{minX} #{minY} #{maxX} #{maxY}"
|
139
150
|
end
|
140
151
|
end
|
152
|
+
# rubocop:enable Metrics/AbcSize
|
141
153
|
|
142
154
|
def coord_to_decimal point
|
143
155
|
regex = /(?<dir>[NESW])\s*(?<deg>\d+)°(?:(?<sec>\d+)ʹ)?/
|
@@ -172,7 +172,9 @@ describe Spotlight::Dor::Indexer do
|
|
172
172
|
'Flat-box 228 | Volume 1': nil,
|
173
173
|
# shpc (actually in <relatedItem><location><physicalLocation>)
|
174
174
|
'Series Biographical Photographs | Box 1 | Folder Abbot, Nathan': 'Abbot, Nathan',
|
175
|
-
'Series General Photographs | Box 1 | Folder Administration building--Outer Quad': 'Administration building--Outer Quad'
|
175
|
+
'Series General Photographs | Box 1 | Folder Administration building--Outer Quad': 'Administration building--Outer Quad',
|
176
|
+
# hypothetical
|
177
|
+
'Folder: 42, Sheet: 15': '42'
|
176
178
|
}.each do |example, expected|
|
177
179
|
describe "for example '#{example}'" do
|
178
180
|
let(:example) { example }
|
@@ -197,4 +199,64 @@ describe Spotlight::Dor::Indexer do
|
|
197
199
|
end # for example
|
198
200
|
end # each
|
199
201
|
end # add_folder
|
202
|
+
|
203
|
+
let(:mods_note_plain) do
|
204
|
+
Nokogiri::XML <<-EOF
|
205
|
+
<mods xmlns="#{Mods::MODS_NS}">
|
206
|
+
<note>#{example}</note>
|
207
|
+
</mods>
|
208
|
+
EOF
|
209
|
+
end
|
210
|
+
let(:mods_note_preferred_citation) do
|
211
|
+
Nokogiri::XML <<-EOF
|
212
|
+
<mods xmlns="#{Mods::MODS_NS}">
|
213
|
+
<note type="preferred citation">#{example}</note>
|
214
|
+
</mods>
|
215
|
+
EOF
|
216
|
+
end
|
217
|
+
describe "#add_folder_name" do
|
218
|
+
# example string as key, expected folder name as value
|
219
|
+
# all from feigenbaum (or based on feigenbaum), as that is only coll
|
220
|
+
{
|
221
|
+
'Call Number: SC0340, Accession: 1986-052, Box: 20, Folder: 40, Title: S': 'S',
|
222
|
+
'Call Number: SC0340, Accession: 1986-052, Box: 54, Folder: 25, Title: Balzer': 'Balzer',
|
223
|
+
'Call Number: SC0340, Accession: 1986-052, Box : 30, Folder: 21, Title: Feigenbaum, Publications. 2 of 2.': 'Feigenbaum, Publications. 2 of 2.',
|
224
|
+
# colon in name
|
225
|
+
'Call Number: SC0340, Accession 2005-101, Box: 10, Folder: 26, Title: Gordon Bell Letter rdf:about blah (AI) 1987': 'Gordon Bell Letter rdf:about blah (AI) 1987',
|
226
|
+
'Call Number: SC0340, Accession 2005-101, Box: 11, Folder: 74, Title: Microcomputer Systems Proposal: blah blah': 'Microcomputer Systems Proposal: blah blah',
|
227
|
+
'Call Number: SC0340, Accession 2005-101, Box: 14, Folder: 20, Title: blah "bleah: blargW^"ugh" seriously?.': 'blah "bleah: blargW^"ugh" seriously?.',
|
228
|
+
# quotes in name
|
229
|
+
'Call Number: SC0340, Accession 2005-101, Box: 29, Folder: 18, Title: "bleah" blah': '"bleah" blah',
|
230
|
+
'Call Number: SC0340, Accession 2005-101, Box: 11, Folder: 58, Title: "M": blah': '"M": blah',
|
231
|
+
'Call Number: SC0340, Accession 2005-101, Box : 32A, Folder: 19, Title: blah "bleah" blue': 'blah "bleah" blue',
|
232
|
+
# not parseable
|
233
|
+
'Call Number: SC0340, Accession 2005-101': nil,
|
234
|
+
'Call Number: SC0340, Accession: 1986-052': nil,
|
235
|
+
'Call Number: SC0340, Accession: 1986-052, Box 36 Folder 38': nil,
|
236
|
+
'blah blah ... with the umbrella title Feigenbaum and Feldman, Computers and Thought II. blah blah': nil,
|
237
|
+
'blah blah ... Title ... blah blah': nil
|
238
|
+
}.each do |example, expected|
|
239
|
+
describe "for example '#{example}'" do
|
240
|
+
let(:example) { example }
|
241
|
+
context 'in preferred citation note' do
|
242
|
+
before do
|
243
|
+
allow(r).to receive(:mods).and_return(mods_note_preferred_citation)
|
244
|
+
subject.send(:add_folder_name, sdb, solr_doc)
|
245
|
+
end
|
246
|
+
it "has the expected folder name '#{expected}'" do
|
247
|
+
expect(solr_doc['folder_name_ssi']).to eq expected
|
248
|
+
end
|
249
|
+
end
|
250
|
+
context 'in plain note' do
|
251
|
+
before do
|
252
|
+
allow(r).to receive(:mods).and_return(mods_note_plain)
|
253
|
+
subject.send(:add_folder_name, sdb, solr_doc)
|
254
|
+
end
|
255
|
+
it 'does not have a folder name' do
|
256
|
+
expect(solr_doc['folder_name_ssi']).to be_falsey
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end # for example
|
260
|
+
end # each
|
261
|
+
end # add_folder_name
|
200
262
|
end
|
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.authors = ["Chris Beer"]
|
10
10
|
spec.email = ["cabeer@stanford.edu"]
|
11
11
|
spec.summary = "Spotlight resource indexer for DOR resources."
|
12
|
-
spec.homepage = ""
|
12
|
+
spec.homepage = "https://github.com/sul-dlss/spotlight-dor-resources"
|
13
13
|
spec.license = "Apache 2"
|
14
14
|
|
15
15
|
spec.files = `git ls-files -z`.split("\x0")
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spotlight-dor-resources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Beer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: faraday
|
@@ -292,7 +292,7 @@ files:
|
|
292
292
|
- spec/vcr_cassettes/indexer_integration_tests/genre/no_genre_ssim_field_when_genre_not_in_MODS.yml
|
293
293
|
- spec/vcr_cassettes/indexer_integration_tests/genre/solr_doc_has_genre_ssim_field_when_genre_in_MODS.yml
|
294
294
|
- spotlight-dor-resources.gemspec
|
295
|
-
homepage:
|
295
|
+
homepage: https://github.com/sul-dlss/spotlight-dor-resources
|
296
296
|
licenses:
|
297
297
|
- Apache 2
|
298
298
|
metadata: {}
|