spotlight-dor-resources 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rubocop.yml +4 -0
- data/.rubocop_todo.yml +0 -3
- data/lib/spotlight/dor/indexer.rb +25 -13
- data/lib/spotlight/dor/resources/version.rb +1 -1
- data/spec/unit/spotlight/dor/indexer_spec.rb +63 -1
- data/spotlight-dor-resources.gemspec +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 31130b0ef44659b04e226ce3f7d54afeb2fab9bd
|
4
|
+
data.tar.gz: 773201c4913261fb010cbd06a7bd027fb550acc1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dd69ded57b4a0daaa03b544218a95edb3b23a503ae54c56274f7f8cc06bb8949120f19af29b9047b81d743914dd2e601fbeb5b4a133edfe2718084457290a096
|
7
|
+
data.tar.gz: 90f34128fd6494c255f69f9cf4b430cdae7c060618ba648e99a982c3fdfdf2d41ade20b73d6febe48a8eae46b7dbf5823996b5469e25b756340a84fb217a2f04
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
data/.rubocop_todo.yml
CHANGED
@@ -31,11 +31,12 @@ module Spotlight::Dor
|
|
31
31
|
end
|
32
32
|
|
33
33
|
# add fields from raw mods
|
34
|
-
# see comment with add_donor_tags about Feigenbaum specific donor tags data
|
35
34
|
before_index :add_box
|
35
|
+
# see comment with add_donor_tags about Feigenbaum specific donor tags data
|
36
36
|
before_index :add_donor_tags
|
37
37
|
before_index :add_genre
|
38
38
|
before_index :add_folder
|
39
|
+
before_index :add_folder_name
|
39
40
|
before_index :add_series
|
40
41
|
before_index :mods_cartographics_indexing
|
41
42
|
|
@@ -72,7 +73,7 @@ module Spotlight::Dor
|
|
72
73
|
|
73
74
|
# This new donor_tags_sim field was added in October 2015 specifically for the Feigenbaum exhibit. It is very likely
|
74
75
|
# it will go ununsed by other projects, but should be benign (since this field will not be created if this specific MODs note is not found.)
|
75
|
-
# Later refactoring could include
|
76
|
+
# Later refactoring could include exhibit specific fields. Peter Mangiafico
|
76
77
|
def add_donor_tags sdb, solr_doc
|
77
78
|
donor_tags = sdb.smods_rec.note.select { |n| n.displayLabel == 'Donor tags' }.map(&:content)
|
78
79
|
insert_field solr_doc, 'donor_tags', donor_tags, :symbol # this is a _ssim field
|
@@ -86,21 +87,30 @@ module Spotlight::Dor
|
|
86
87
|
# _location.physicalLocation should find top level and relatedItem
|
87
88
|
folder_num = sdb.smods_rec._location.physicalLocation.map do |node|
|
88
89
|
val = node.text
|
89
|
-
|
90
|
-
match_data = val
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
end
|
90
|
+
|
91
|
+
match_data = if val =~ /\|/
|
92
|
+
# we assume the data is pipe-delimited, and may contain commas within values
|
93
|
+
val.match(/Folder ?:? ?([^|]+)/)
|
94
|
+
else
|
95
|
+
# the data should be comma-delimited, and may not contain commas within values
|
96
|
+
val.match(/Folder ?:? ?([^,]+)/)
|
97
|
+
end
|
98
|
+
|
99
|
+
match_data[1].strip if match_data.present?
|
100
100
|
end
|
101
|
+
|
101
102
|
solr_doc['folder_ssi'] = folder_num.first if folder_num.present?
|
102
103
|
end
|
103
104
|
|
105
|
+
# add the folder name to solr_doc as folder_name_ssi field (note: single valued!)
|
106
|
+
# data is specific to Feigenbaum collection and is in <note type='preferred citation'>
|
107
|
+
def add_folder_name(sdb, solr_doc)
|
108
|
+
# see spec for data examples
|
109
|
+
preferred_citation = sdb.smods_rec.note.select { |n| n.type_at == 'preferred citation' }.map(&:content)
|
110
|
+
match_data = preferred_citation.first.match(/Title: +(.+)/i) if preferred_citation.present?
|
111
|
+
solr_doc['folder_name_ssi'] = match_data[1].rstrip if match_data.present?
|
112
|
+
end
|
113
|
+
|
104
114
|
# add plain MODS <genre> element data, not the SearchWorks genre values
|
105
115
|
def add_genre sdb, solr_doc
|
106
116
|
insert_field solr_doc, 'genre', sdb.smods_rec.genre.content, :symbol # this is a _ssim field
|
@@ -121,6 +131,7 @@ module Spotlight::Dor
|
|
121
131
|
solr_doc['series_ssi'] = series_num.first if series_num.present?
|
122
132
|
end
|
123
133
|
|
134
|
+
# rubocop:disable Metrics/AbcSize
|
124
135
|
def mods_cartographics_indexing sdb, solr_doc
|
125
136
|
insert_field(solr_doc, "coordinates", Array(sdb.smods_rec.subject.cartographics.coordinates).map { |n| n.text }, :stored_searchable)
|
126
137
|
|
@@ -138,6 +149,7 @@ module Spotlight::Dor
|
|
138
149
|
solr_doc["point_bbox"] << "#{minX} #{minY} #{maxX} #{maxY}"
|
139
150
|
end
|
140
151
|
end
|
152
|
+
# rubocop:enable Metrics/AbcSize
|
141
153
|
|
142
154
|
def coord_to_decimal point
|
143
155
|
regex = /(?<dir>[NESW])\s*(?<deg>\d+)°(?:(?<sec>\d+)ʹ)?/
|
@@ -172,7 +172,9 @@ describe Spotlight::Dor::Indexer do
|
|
172
172
|
'Flat-box 228 | Volume 1': nil,
|
173
173
|
# shpc (actually in <relatedItem><location><physicalLocation>)
|
174
174
|
'Series Biographical Photographs | Box 1 | Folder Abbot, Nathan': 'Abbot, Nathan',
|
175
|
-
'Series General Photographs | Box 1 | Folder Administration building--Outer Quad': 'Administration building--Outer Quad'
|
175
|
+
'Series General Photographs | Box 1 | Folder Administration building--Outer Quad': 'Administration building--Outer Quad',
|
176
|
+
# hypothetical
|
177
|
+
'Folder: 42, Sheet: 15': '42'
|
176
178
|
}.each do |example, expected|
|
177
179
|
describe "for example '#{example}'" do
|
178
180
|
let(:example) { example }
|
@@ -197,4 +199,64 @@ describe Spotlight::Dor::Indexer do
|
|
197
199
|
end # for example
|
198
200
|
end # each
|
199
201
|
end # add_folder
|
202
|
+
|
203
|
+
let(:mods_note_plain) do
|
204
|
+
Nokogiri::XML <<-EOF
|
205
|
+
<mods xmlns="#{Mods::MODS_NS}">
|
206
|
+
<note>#{example}</note>
|
207
|
+
</mods>
|
208
|
+
EOF
|
209
|
+
end
|
210
|
+
let(:mods_note_preferred_citation) do
|
211
|
+
Nokogiri::XML <<-EOF
|
212
|
+
<mods xmlns="#{Mods::MODS_NS}">
|
213
|
+
<note type="preferred citation">#{example}</note>
|
214
|
+
</mods>
|
215
|
+
EOF
|
216
|
+
end
|
217
|
+
describe "#add_folder_name" do
|
218
|
+
# example string as key, expected folder name as value
|
219
|
+
# all from feigenbaum (or based on feigenbaum), as that is only coll
|
220
|
+
{
|
221
|
+
'Call Number: SC0340, Accession: 1986-052, Box: 20, Folder: 40, Title: S': 'S',
|
222
|
+
'Call Number: SC0340, Accession: 1986-052, Box: 54, Folder: 25, Title: Balzer': 'Balzer',
|
223
|
+
'Call Number: SC0340, Accession: 1986-052, Box : 30, Folder: 21, Title: Feigenbaum, Publications. 2 of 2.': 'Feigenbaum, Publications. 2 of 2.',
|
224
|
+
# colon in name
|
225
|
+
'Call Number: SC0340, Accession 2005-101, Box: 10, Folder: 26, Title: Gordon Bell Letter rdf:about blah (AI) 1987': 'Gordon Bell Letter rdf:about blah (AI) 1987',
|
226
|
+
'Call Number: SC0340, Accession 2005-101, Box: 11, Folder: 74, Title: Microcomputer Systems Proposal: blah blah': 'Microcomputer Systems Proposal: blah blah',
|
227
|
+
'Call Number: SC0340, Accession 2005-101, Box: 14, Folder: 20, Title: blah "bleah: blargW^"ugh" seriously?.': 'blah "bleah: blargW^"ugh" seriously?.',
|
228
|
+
# quotes in name
|
229
|
+
'Call Number: SC0340, Accession 2005-101, Box: 29, Folder: 18, Title: "bleah" blah': '"bleah" blah',
|
230
|
+
'Call Number: SC0340, Accession 2005-101, Box: 11, Folder: 58, Title: "M": blah': '"M": blah',
|
231
|
+
'Call Number: SC0340, Accession 2005-101, Box : 32A, Folder: 19, Title: blah "bleah" blue': 'blah "bleah" blue',
|
232
|
+
# not parseable
|
233
|
+
'Call Number: SC0340, Accession 2005-101': nil,
|
234
|
+
'Call Number: SC0340, Accession: 1986-052': nil,
|
235
|
+
'Call Number: SC0340, Accession: 1986-052, Box 36 Folder 38': nil,
|
236
|
+
'blah blah ... with the umbrella title Feigenbaum and Feldman, Computers and Thought II. blah blah': nil,
|
237
|
+
'blah blah ... Title ... blah blah': nil
|
238
|
+
}.each do |example, expected|
|
239
|
+
describe "for example '#{example}'" do
|
240
|
+
let(:example) { example }
|
241
|
+
context 'in preferred citation note' do
|
242
|
+
before do
|
243
|
+
allow(r).to receive(:mods).and_return(mods_note_preferred_citation)
|
244
|
+
subject.send(:add_folder_name, sdb, solr_doc)
|
245
|
+
end
|
246
|
+
it "has the expected folder name '#{expected}'" do
|
247
|
+
expect(solr_doc['folder_name_ssi']).to eq expected
|
248
|
+
end
|
249
|
+
end
|
250
|
+
context 'in plain note' do
|
251
|
+
before do
|
252
|
+
allow(r).to receive(:mods).and_return(mods_note_plain)
|
253
|
+
subject.send(:add_folder_name, sdb, solr_doc)
|
254
|
+
end
|
255
|
+
it 'does not have a folder name' do
|
256
|
+
expect(solr_doc['folder_name_ssi']).to be_falsey
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end # for example
|
260
|
+
end # each
|
261
|
+
end # add_folder_name
|
200
262
|
end
|
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.authors = ["Chris Beer"]
|
10
10
|
spec.email = ["cabeer@stanford.edu"]
|
11
11
|
spec.summary = "Spotlight resource indexer for DOR resources."
|
12
|
-
spec.homepage = ""
|
12
|
+
spec.homepage = "https://github.com/sul-dlss/spotlight-dor-resources"
|
13
13
|
spec.license = "Apache 2"
|
14
14
|
|
15
15
|
spec.files = `git ls-files -z`.split("\x0")
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spotlight-dor-resources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Beer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: faraday
|
@@ -292,7 +292,7 @@ files:
|
|
292
292
|
- spec/vcr_cassettes/indexer_integration_tests/genre/no_genre_ssim_field_when_genre_not_in_MODS.yml
|
293
293
|
- spec/vcr_cassettes/indexer_integration_tests/genre/solr_doc_has_genre_ssim_field_when_genre_in_MODS.yml
|
294
294
|
- spotlight-dor-resources.gemspec
|
295
|
-
homepage:
|
295
|
+
homepage: https://github.com/sul-dlss/spotlight-dor-resources
|
296
296
|
licenses:
|
297
297
|
- Apache 2
|
298
298
|
metadata: {}
|