spotlight-dor-resources 0.3.1 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/lib/spotlight/dor/indexer.rb +28 -47
- data/lib/spotlight/dor/resources.rb +1 -1
- data/lib/spotlight/dor/resources/version.rb +1 -1
- data/spec/lib/spotlight/dor/indexer_spec.rb +323 -166
- data/spotlight-dor-resources.gemspec +1 -2
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: df866b5840fd2ef1f3068c6244ef96aaf50f463b
|
4
|
+
data.tar.gz: 484fb0089d890785d1b7cbfa9560cd7a95f9b83c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1dee32477104678ade84fc9ccdc0400c56a2981af353063dac19a6415938e2c010e2c7f3137bb1b4cd06cad33afb47863bb518205db044e2221612d5291af413
|
7
|
+
data.tar.gz: f2b17bd142699edda90806158fcd60dba2ad687d28d16b22c08fae04df58f2e79a2705d2603e0d8010a8ea37f53606647f8d140f770ed71e79254bf01994e803
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
[![Build Status](https://travis-ci.org/sul-dlss/spotlight-dor-resources.png?branch=master)](https://travis-ci.org/sul-dlss/spotlight-dor-resources) | [![Coverage Status](https://coveralls.io/repos/sul-dlss/spotlight-dor-resources/badge.png?branch=master)](https://coveralls.io/r/sul-dlss/spotlight-dor-resources) | [![Gem Version](https://badge.fury.io/rb/spotlight-dor-resources.png)](http://badge.fury.io/rb/spotlight-dor-resources)
|
1
|
+
[![Build Status](https://travis-ci.org/sul-dlss/spotlight-dor-resources.png?branch=master)](https://travis-ci.org/sul-dlss/spotlight-dor-resources) | [![Coverage Status](https://coveralls.io/repos/sul-dlss/spotlight-dor-resources/badge.png?branch=master)](https://coveralls.io/r/sul-dlss/spotlight-dor-resources) | [![Dependency Status](https://gemnasium.com/sul-dlss/spotlight-dor-resources.svg)](https://gemnasium.com/sul-dlss/spotlight-dor-resources) | [![Gem Version](https://badge.fury.io/rb/spotlight-dor-resources.png)](http://badge.fury.io/rb/spotlight-dor-resources)
|
2
2
|
|
3
3
|
# Spotlight::Dor::Resources
|
4
4
|
|
@@ -34,7 +34,7 @@ For access to a Rails console with the gem loaded up for testing purposes, you c
|
|
34
34
|
### First time only configuration for local testing
|
35
35
|
|
36
36
|
$ bundle
|
37
|
-
$ bundle exec rake
|
37
|
+
$ bundle exec rake ci
|
38
38
|
|
39
39
|
This will download a test jetty instance (to run Solr), generate a testing app at ```.internal_test_app``` and run the tests.
|
40
40
|
|
@@ -32,9 +32,11 @@ module Spotlight::Dor
|
|
32
32
|
before_index :add_author_no_collector
|
33
33
|
before_index :add_box
|
34
34
|
before_index :add_collector
|
35
|
+
before_index :add_coordinates
|
35
36
|
before_index :add_folder
|
36
37
|
before_index :add_genre
|
37
38
|
before_index :add_location
|
39
|
+
before_index :add_point_bbox
|
38
40
|
before_index :add_series
|
39
41
|
end
|
40
42
|
|
@@ -48,6 +50,12 @@ module Spotlight::Dor
|
|
48
50
|
solr_doc['box_ssi'] = sdb.smods_rec.box
|
49
51
|
end
|
50
52
|
|
53
|
+
# add coordinates solr field containing the cartographic coordinates per
|
54
|
+
# MODS subject.cartographics.coordinates (via stanford-mods gem)
|
55
|
+
def add_coordinates(sdb, solr_doc)
|
56
|
+
solr_doc['coordinates'] = sdb.smods_rec.coordinates
|
57
|
+
end
|
58
|
+
|
51
59
|
# add collector_ssim solr field containing the collector per MODS names (via stanford-mods gem)
|
52
60
|
def add_collector(sdb, solr_doc)
|
53
61
|
insert_field solr_doc, 'collector', sdb.smods_rec.collectors_w_dates, :symbol # _ssim field
|
@@ -66,10 +74,16 @@ module Spotlight::Dor
|
|
66
74
|
solr_doc['location_ssi'] = sdb.smods_rec.location
|
67
75
|
end
|
68
76
|
|
77
|
+
# add point_bbox solr field containing the point bounding box per
|
78
|
+
# MODS subject.cartographics.coordinates (via stanford-mods gem)
|
79
|
+
def add_point_bbox(sdb, solr_doc)
|
80
|
+
solr_doc['point_bbox'] = sdb.smods_rec.point_bbox
|
81
|
+
end
|
82
|
+
|
69
83
|
def add_series(sdb, solr_doc)
|
70
84
|
solr_doc['series_ssi'] = sdb.smods_rec.series
|
71
85
|
end
|
72
|
-
end
|
86
|
+
end # StanfordMods concern
|
73
87
|
|
74
88
|
concerning :ContentMetadata do
|
75
89
|
included do
|
@@ -125,8 +139,15 @@ module Spotlight::Dor
|
|
125
139
|
# only create these fields on an as-needed basis.
|
126
140
|
|
127
141
|
included do
|
142
|
+
before_index :add_document_subtype
|
128
143
|
before_index :add_donor_tags
|
129
144
|
before_index :add_folder_name
|
145
|
+
before_index :add_general_notes
|
146
|
+
end
|
147
|
+
|
148
|
+
def add_document_subtype(sdb, solr_doc)
|
149
|
+
subtype = sdb.smods_rec.note.select { |n| n.displayLabel == 'Document subtype' }.map(&:content)
|
150
|
+
solr_doc['doc_subtype_ssi'] = subtype.first if subtype.size > 0
|
130
151
|
end
|
131
152
|
|
132
153
|
def add_donor_tags(sdb, solr_doc)
|
@@ -142,7 +163,12 @@ module Spotlight::Dor
|
|
142
163
|
match_data = preferred_citation.first.match(/Title: +(.+)/i) if preferred_citation.present?
|
143
164
|
solr_doc['folder_name_ssi'] = match_data[1].strip if match_data.present?
|
144
165
|
end
|
145
|
-
|
166
|
+
|
167
|
+
def add_general_notes(sdb, solr_doc)
|
168
|
+
general_notes = sdb.smods_rec.note.select { |n| n.type_at.blank? && n.displayLabel.blank? }.map(&:content)
|
169
|
+
insert_field solr_doc, 'general_notes', general_notes, :symbol # this is a _ssim field
|
170
|
+
end
|
171
|
+
end # end feigbenbaum specific fields
|
146
172
|
|
147
173
|
concerning :FullTextIndexing do
|
148
174
|
included do
|
@@ -187,51 +213,6 @@ module Spotlight::Dor
|
|
187
213
|
end
|
188
214
|
end
|
189
215
|
|
190
|
-
concerning :CartographicIndexing do
|
191
|
-
included do
|
192
|
-
before_index :mods_cartographics_indexing
|
193
|
-
end
|
194
|
-
|
195
|
-
def mods_cartographics_indexing(sdb, solr_doc)
|
196
|
-
coordinates = Array(sdb.smods_rec.subject.cartographics.coordinates)
|
197
|
-
|
198
|
-
insert_field(solr_doc, 'coordinates', coordinates.map(&:text), :stored_searchable)
|
199
|
-
|
200
|
-
solr_doc['point_bbox'] ||= []
|
201
|
-
solr_doc['point_bbox'] += coords_to_bboxes(coordinates)
|
202
|
-
end
|
203
|
-
|
204
|
-
private
|
205
|
-
|
206
|
-
def coords_to_bboxes(coordinates)
|
207
|
-
coordinates.select { |n| n.text =~ /^\(.*\)$/ }.map do |n|
|
208
|
-
coord_to_bbox(n.text)
|
209
|
-
end
|
210
|
-
end
|
211
|
-
|
212
|
-
def coord_to_bbox(coord)
|
213
|
-
bbox = coord.delete('(').delete(')')
|
214
|
-
|
215
|
-
lng, lat = bbox.split('/')
|
216
|
-
|
217
|
-
min_x, max_x = lng.split('--').map { |x| coord_to_decimal(x) }
|
218
|
-
max_y, min_y = lat.split('--').map { |y| coord_to_decimal(y) }
|
219
|
-
"#{min_x} #{min_y} #{max_x} #{max_y}"
|
220
|
-
end
|
221
|
-
|
222
|
-
def coord_to_decimal(point)
|
223
|
-
regex = /(?<dir>[NESW])\s*(?<deg>\d+)°(?:(?<sec>\d+)ʹ)?/
|
224
|
-
match = regex.match(point)
|
225
|
-
dec = 0
|
226
|
-
|
227
|
-
dec += match['deg'].to_i
|
228
|
-
dec += match['sec'].to_f / 60
|
229
|
-
dec = -1 * dec if match['dir'] == 'W' || match['dir'] == 'S'
|
230
|
-
|
231
|
-
dec
|
232
|
-
end
|
233
|
-
end
|
234
|
-
|
235
216
|
def insert_field(solr_doc, field, values, *args)
|
236
217
|
Array(values).each do |v|
|
237
218
|
Solrizer.insert_field solr_doc, field, v, *args
|
@@ -76,49 +76,188 @@ describe Spotlight::Dor::Indexer do
|
|
76
76
|
end
|
77
77
|
end
|
78
78
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
79
|
+
context 'Feigbenbaum specific fields concern' do
|
80
|
+
describe '#add_document_subtype' do
|
81
|
+
before do
|
82
|
+
allow(r).to receive(:mods).and_return(mods)
|
83
|
+
subject.send(:add_document_subtype, sdb, solr_doc)
|
84
|
+
end
|
84
85
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
86
|
+
context 'with a record without document subtype' do
|
87
|
+
let(:mods) do
|
88
|
+
Nokogiri::XML <<-EOF
|
89
|
+
<mods xmlns="#{Mods::MODS_NS}">
|
90
|
+
<note displayLabel="preferred citation">(not a document subtype)</note>
|
91
|
+
<note>a generic note</note>
|
92
|
+
</mods>
|
93
|
+
EOF
|
94
|
+
end
|
95
|
+
|
96
|
+
it 'is blank' do
|
97
|
+
expect(solr_doc['doc_subtype_ssi']).to be_blank
|
98
|
+
end
|
92
99
|
end
|
93
100
|
|
94
|
-
|
95
|
-
|
101
|
+
context 'with a record with document subtype' do
|
102
|
+
let(:mods) do
|
103
|
+
Nokogiri::XML <<-EOF
|
104
|
+
<mods xmlns="#{Mods::MODS_NS}">
|
105
|
+
<note displayLabel="Document subtype">memorandums</note>
|
106
|
+
<note>a generic note</note>
|
107
|
+
</mods>
|
108
|
+
EOF
|
109
|
+
end
|
110
|
+
|
111
|
+
it 'extracts the doc subtypes' do
|
112
|
+
expect(solr_doc['doc_subtype_ssi']).to eq('memorandums')
|
113
|
+
end
|
96
114
|
end
|
97
|
-
end
|
115
|
+
end # doc subtype
|
98
116
|
|
99
|
-
|
100
|
-
|
101
|
-
|
117
|
+
describe '#add_donor_tags' do
|
118
|
+
before do
|
119
|
+
allow(r).to receive(:mods).and_return(mods)
|
120
|
+
subject.send(:add_donor_tags, sdb, solr_doc)
|
121
|
+
end
|
122
|
+
|
123
|
+
context 'with a record without donor tags' do
|
124
|
+
let(:mods) do
|
125
|
+
Nokogiri::XML <<-EOF
|
126
|
+
<mods xmlns="#{Mods::MODS_NS}">
|
127
|
+
<note displayLabel="preferred citation">(not a donor tag)</note>
|
128
|
+
</mods>
|
129
|
+
EOF
|
130
|
+
end
|
131
|
+
|
132
|
+
it 'is blank' do
|
133
|
+
expect(solr_doc['donor_tags_ssim']).to be_blank
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
context 'with a record with donor tags' do
|
138
|
+
let(:mods) do
|
139
|
+
# e.g. from https://purl.stanford.edu/vw282gv1740
|
140
|
+
Nokogiri::XML <<-EOF
|
141
|
+
<mods xmlns="#{Mods::MODS_NS}">
|
142
|
+
<note displayLabel="Donor tags">Knowledge Systems Laboratory</note>
|
143
|
+
<note displayLabel="Donor tags">medical applications</note>
|
144
|
+
<note displayLabel="Donor tags">Publishing</note>
|
145
|
+
<note displayLabel="Donor tags">Stanford</note>
|
146
|
+
<note displayLabel="Donor tags">Stanford Computer Science Department</note>
|
147
|
+
</mods>
|
148
|
+
EOF
|
149
|
+
end
|
150
|
+
|
151
|
+
it 'extracts the donor tags' do
|
152
|
+
expect(solr_doc['donor_tags_ssim']).to contain_exactly 'Knowledge Systems Laboratory',
|
153
|
+
'medical applications',
|
154
|
+
'Publishing',
|
155
|
+
'Stanford',
|
156
|
+
'Stanford Computer Science Department'
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end # donor tags
|
160
|
+
|
161
|
+
# rubocop:disable Metrics/LineLength
|
162
|
+
describe '#add_folder_name' do
|
163
|
+
let(:mods_note_plain) do
|
102
164
|
Nokogiri::XML <<-EOF
|
103
165
|
<mods xmlns="#{Mods::MODS_NS}">
|
104
|
-
<note
|
105
|
-
<note displayLabel="Donor tags">medical applications</note>
|
106
|
-
<note displayLabel="Donor tags">Publishing</note>
|
107
|
-
<note displayLabel="Donor tags">Stanford</note>
|
108
|
-
<note displayLabel="Donor tags">Stanford Computer Science Department</note>
|
166
|
+
<note>#{example}</note>
|
109
167
|
</mods>
|
110
|
-
|
168
|
+
EOF
|
111
169
|
end
|
170
|
+
let(:mods_note_preferred_citation) do
|
171
|
+
Nokogiri::XML <<-EOF
|
172
|
+
<mods xmlns="#{Mods::MODS_NS}">
|
173
|
+
<note type="preferred citation">#{example}</note>
|
174
|
+
</mods>
|
175
|
+
EOF
|
176
|
+
end
|
177
|
+
# example string as key, expected folder name as value
|
178
|
+
# all from feigenbaum (or based on feigenbaum), as that is only coll with this data
|
179
|
+
{
|
180
|
+
'Call Number: SC0340, Accession: 1986-052, Box: 20, Folder: 40, Title: S': 'S',
|
181
|
+
'Call Number: SC0340, Accession: 1986-052, Box: 54, Folder: 25, Title: Balzer': 'Balzer',
|
182
|
+
'Call Number: SC0340, Accession: 1986-052, Box : 30, Folder: 21, Title: Feigenbaum, Publications. 2 of 2.': 'Feigenbaum, Publications. 2 of 2.',
|
183
|
+
# colon in name
|
184
|
+
'Call Number: SC0340, Accession 2005-101, Box: 10, Folder: 26, Title: Gordon Bell Letter rdf:about blah (AI) 1987': 'Gordon Bell Letter rdf:about blah (AI) 1987',
|
185
|
+
'Call Number: SC0340, Accession 2005-101, Box: 11, Folder: 74, Title: Microcomputer Systems Proposal: blah blah': 'Microcomputer Systems Proposal: blah blah',
|
186
|
+
'Call Number: SC0340, Accession 2005-101, Box: 14, Folder: 20, Title: blah "bleah: blargW^"ugh" seriously?.': 'blah "bleah: blargW^"ugh" seriously?.',
|
187
|
+
# quotes in name
|
188
|
+
'Call Number: SC0340, Accession 2005-101, Box: 29, Folder: 18, Title: "bleah" blah': '"bleah" blah',
|
189
|
+
'Call Number: SC0340, Accession 2005-101, Box: 11, Folder: 58, Title: "M": blah': '"M": blah',
|
190
|
+
'Call Number: SC0340, Accession 2005-101, Box : 32A, Folder: 19, Title: blah "bleah" blue': 'blah "bleah" blue',
|
191
|
+
# not parseable
|
192
|
+
'Call Number: SC0340, Accession 2005-101': nil,
|
193
|
+
'Call Number: SC0340, Accession: 1986-052': nil,
|
194
|
+
'Call Number: SC0340, Accession: 1986-052, Box 36 Folder 38': nil,
|
195
|
+
'blah blah ... with the umbrella title Feigenbaum and Feldman, Computers and Thought II. blah blah': nil,
|
196
|
+
'blah blah ... Title ... blah blah': nil
|
197
|
+
}.each do |example, expected|
|
198
|
+
describe "for example '#{example}'" do
|
199
|
+
let(:example) { example }
|
200
|
+
context 'in preferred citation note' do
|
201
|
+
before do
|
202
|
+
allow(r).to receive(:mods).and_return(mods_note_preferred_citation)
|
203
|
+
subject.send(:add_folder_name, sdb, solr_doc)
|
204
|
+
end
|
205
|
+
it "has the expected folder name '#{expected}'" do
|
206
|
+
expect(solr_doc['folder_name_ssi']).to eq expected
|
207
|
+
end
|
208
|
+
end
|
209
|
+
context 'in plain note' do
|
210
|
+
before do
|
211
|
+
allow(r).to receive(:mods).and_return(mods_note_plain)
|
212
|
+
subject.send(:add_folder_name, sdb, solr_doc)
|
213
|
+
end
|
214
|
+
it 'does not have a folder name' do
|
215
|
+
expect(solr_doc['folder_name_ssi']).to be_falsey
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end # for example
|
219
|
+
end # each
|
220
|
+
end # add_folder_name
|
221
|
+
# rubocop:enable Metrics/LineLength
|
112
222
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
'Stanford',
|
118
|
-
'Stanford Computer Science Department'
|
223
|
+
describe '#add_general_notes' do
|
224
|
+
before do
|
225
|
+
allow(r).to receive(:mods).and_return(mods)
|
226
|
+
subject.send(:add_general_notes, sdb, solr_doc)
|
119
227
|
end
|
120
|
-
|
121
|
-
|
228
|
+
|
229
|
+
context 'no general notes, but other types of notes' do
|
230
|
+
let(:mods) do
|
231
|
+
Nokogiri::XML <<-EOF
|
232
|
+
<mods xmlns="#{Mods::MODS_NS}">
|
233
|
+
<note displayLabel="preferred citation">(not a document subtype)</note>
|
234
|
+
<note displayLabel="Document subtype">memorandums</note>
|
235
|
+
<note displayLabel="Donor tags">Knowledge Systems Laboratory</note>
|
236
|
+
</mods>
|
237
|
+
EOF
|
238
|
+
end
|
239
|
+
|
240
|
+
it 'is blank' do
|
241
|
+
expect(solr_doc['general_notes_ssim']).to be_blank
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
context 'ignore extra notes' do
|
246
|
+
let(:mods) do
|
247
|
+
Nokogiri::XML <<-EOF
|
248
|
+
<mods xmlns="#{Mods::MODS_NS}">
|
249
|
+
<note displayLabel="Document subtype">memorandums</note>
|
250
|
+
<note>a generic note</note>
|
251
|
+
</mods>
|
252
|
+
EOF
|
253
|
+
end
|
254
|
+
|
255
|
+
it 'extracts the doc subtypes' do
|
256
|
+
expect(solr_doc['general_notes_ssim']).to contain_exactly 'a generic note'
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end # general notes
|
260
|
+
end # feigbenbaum specific fields concern
|
122
261
|
|
123
262
|
context 'StanfordMods concern' do
|
124
263
|
describe '#add_author_no_collector' do
|
@@ -216,6 +355,44 @@ describe Spotlight::Dor::Indexer do
|
|
216
355
|
end
|
217
356
|
end
|
218
357
|
|
358
|
+
describe '#add_coordinates' do
|
359
|
+
before do
|
360
|
+
allow(r).to receive(:mods).and_return(mods)
|
361
|
+
subject.send(:add_coordinates, sdb, solr_doc)
|
362
|
+
end
|
363
|
+
context 'with a record without coordinates' do
|
364
|
+
let(:mods) do
|
365
|
+
Nokogiri::XML <<-EOF
|
366
|
+
<mods xmlns="#{Mods::MODS_NS}">
|
367
|
+
</mods>
|
368
|
+
EOF
|
369
|
+
end
|
370
|
+
|
371
|
+
it 'is blank' do
|
372
|
+
expect(solr_doc['coordinates']).to be_blank
|
373
|
+
end
|
374
|
+
end
|
375
|
+
context 'with a record with coordinates' do
|
376
|
+
let(:mods) do
|
377
|
+
# e.g. from https://purl.stanford.edu/vw282gv1740
|
378
|
+
Nokogiri::XML <<-EOF
|
379
|
+
<mods xmlns="#{Mods::MODS_NS}">
|
380
|
+
<subject>
|
381
|
+
<cartographics>
|
382
|
+
<scale>Scale 1:500,000</scale>
|
383
|
+
<coordinates>(W16°--E28°/N13°--S15°).</coordinates>
|
384
|
+
</cartographics>
|
385
|
+
</subject>
|
386
|
+
</mods>
|
387
|
+
EOF
|
388
|
+
end
|
389
|
+
|
390
|
+
it 'extracts the coordinates' do
|
391
|
+
expect(solr_doc['coordinates']).to eq(['(W16°--E28°/N13°--S15°).'])
|
392
|
+
end
|
393
|
+
end
|
394
|
+
end # add_coordinates
|
395
|
+
|
219
396
|
describe '#add_folder' do
|
220
397
|
before do
|
221
398
|
allow(r).to receive(:mods).and_return(mods)
|
@@ -321,6 +498,44 @@ describe Spotlight::Dor::Indexer do
|
|
321
498
|
end
|
322
499
|
end # add_location
|
323
500
|
|
501
|
+
describe '#add_point_bbox' do
|
502
|
+
before do
|
503
|
+
allow(r).to receive(:mods).and_return(mods)
|
504
|
+
subject.send(:add_point_bbox, sdb, solr_doc)
|
505
|
+
end
|
506
|
+
context 'with a record without coordinates' do
|
507
|
+
let(:mods) do
|
508
|
+
Nokogiri::XML <<-EOF
|
509
|
+
<mods xmlns="#{Mods::MODS_NS}">
|
510
|
+
</mods>
|
511
|
+
EOF
|
512
|
+
end
|
513
|
+
|
514
|
+
it 'is blank' do
|
515
|
+
expect(solr_doc['point_bbox']).to be_blank
|
516
|
+
end
|
517
|
+
end
|
518
|
+
context 'with a record with coordinates' do
|
519
|
+
let(:mods) do
|
520
|
+
# e.g. from https://purl.stanford.edu/vw282gv1740
|
521
|
+
Nokogiri::XML <<-EOF
|
522
|
+
<mods xmlns="#{Mods::MODS_NS}">
|
523
|
+
<subject>
|
524
|
+
<cartographics>
|
525
|
+
<scale>Scale 1:500,000</scale>
|
526
|
+
<coordinates>(W16°--E28°/N13°--S15°).</coordinates>
|
527
|
+
</cartographics>
|
528
|
+
</subject>
|
529
|
+
</mods>
|
530
|
+
EOF
|
531
|
+
end
|
532
|
+
|
533
|
+
it 'extracts the point_bbox' do
|
534
|
+
expect(solr_doc['point_bbox']).to eq(['-16.0 -15.0 28.0 13.0'])
|
535
|
+
end
|
536
|
+
end
|
537
|
+
end # add_point_bbox
|
538
|
+
|
324
539
|
describe '#add_series' do
|
325
540
|
before do
|
326
541
|
allow(r).to receive(:mods).and_return(mods)
|
@@ -358,139 +573,81 @@ describe Spotlight::Dor::Indexer do
|
|
358
573
|
end # context StanfordMods concern
|
359
574
|
|
360
575
|
# rubocop:disable Metrics/LineLength
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
'
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
576
|
+
context 'Full Text Indexing concern' do
|
577
|
+
describe '#add_object_full_text' do
|
578
|
+
let(:full_text_solr_fname) { 'full_text_tesimv' }
|
579
|
+
before do
|
580
|
+
allow(sdb).to receive(:bare_druid).and_return(fake_druid)
|
581
|
+
end
|
582
|
+
let!(:expected_text) { 'SOME full text string that is returned from the server' }
|
583
|
+
let!(:full_file_path) { 'https://stacks.stanford.edu/file/oo000oo0000/oo000oo0000.txt' }
|
584
|
+
it 'indexes the full text into the appropriate field if a recognized file pattern is found' do
|
585
|
+
public_xml_with_feigenbaum_full_text = Nokogiri::XML <<-EOF
|
586
|
+
<publicObject id="druid:oo000oo0000" published="2015-10-17T18:24:08-07:00">
|
587
|
+
<contentMetadata objectId="oo000oo0000" type="book">
|
588
|
+
<resource id="oo000oo0000_4" sequence="4" type="object">
|
589
|
+
<label>Document</label>
|
590
|
+
<file id="oo000oo0000.pdf" mimetype="application/pdf" size="6801421"></file>
|
591
|
+
<file id="oo000oo0000.txt" mimetype="text/plain" size="23376"></file>
|
592
|
+
</resource>
|
593
|
+
<resource id="oo000oo0000_5" sequence="5" type="page">
|
594
|
+
<label>Page 1</label>
|
595
|
+
<file id="oo000oo0000_00001.jp2" mimetype="image/jp2" size="1864266"><imageData width="2632" height="3422"/></file>
|
596
|
+
</resource>
|
597
|
+
</contentMetadata>
|
598
|
+
</publicObject>
|
599
|
+
EOF
|
600
|
+
allow(sdb).to receive(:public_xml).and_return(public_xml_with_feigenbaum_full_text)
|
601
|
+
# don't actually attempt a call to the stacks
|
602
|
+
allow(subject).to receive(:get_file_content).with(full_file_path).and_return(expected_text)
|
603
|
+
subject.send(:add_object_full_text, sdb, solr_doc)
|
604
|
+
expect(subject.object_level_full_text_urls(sdb)).to eq [full_file_path]
|
605
|
+
expect(solr_doc[full_text_solr_fname]).to eq [expected_text]
|
606
|
+
end
|
607
|
+
it 'does not index the full text if no recognized pattern is found' do
|
608
|
+
public_xml_with_no_recognized_full_text = Nokogiri::XML <<-EOF
|
609
|
+
<publicObject id="druid:oo000oo0000" published="2015-10-17T18:24:08-07:00">
|
610
|
+
<contentMetadata objectId="oo000oo0000" type="book">
|
611
|
+
<resource id="oo000oo0000_4" sequence="4" type="object">
|
612
|
+
<label>Document</label>
|
613
|
+
<file id="oo000oo0000.pdf" mimetype="application/pdf" size="6801421"></file>
|
614
|
+
</resource>
|
615
|
+
<resource id="oo000oo0000_5" sequence="5" type="page">
|
616
|
+
<label>Page 1</label>
|
617
|
+
<file id="oo000oo0000_00001.jp2" mimetype="image/jp2" size="1864266"><imageData width="2632" height="3422"/></file>
|
618
|
+
</resource>
|
619
|
+
</contentMetadata>
|
620
|
+
</publicObject>
|
621
|
+
EOF
|
622
|
+
allow(sdb).to receive(:public_xml).and_return(public_xml_with_no_recognized_full_text)
|
623
|
+
subject.send(:add_object_full_text, sdb, solr_doc)
|
624
|
+
expect(subject.object_level_full_text_urls(sdb)).to eq []
|
625
|
+
expect(solr_doc[full_text_solr_fname]).to be_nil
|
626
|
+
end
|
627
|
+
it 'indexes the full text from two files if two recognized patterns are found' do
|
628
|
+
public_xml_with_two_recognized_full_text_files = Nokogiri::XML <<-EOF
|
629
|
+
<publicObject id="druid:oo000oo0000" published="2015-10-17T18:24:08-07:00">
|
630
|
+
<contentMetadata objectId="oo000oo0000" type="book">
|
631
|
+
<resource id="oo000oo0000_4" sequence="4" type="object">
|
632
|
+
<label>Document</label>
|
633
|
+
<file id="oo000oo0000.pdf" mimetype="application/pdf" size="6801421"></file>
|
634
|
+
<file id="oo000oo0000.txt" mimetype="text/plain" size="23376"></file>
|
635
|
+
</resource>
|
636
|
+
<resource id="oo000oo0000_5" sequence="5" type="page">
|
637
|
+
<label>Page 1</label>
|
638
|
+
<file id="oo000oo0000_00001.jp2" mimetype="image/jp2" size="1864266"><imageData width="2632" height="3422"/></file>
|
639
|
+
<file id="oo000oo0000.txt" mimetype="text/plain" size="23376"></file>
|
640
|
+
</resource>
|
641
|
+
</contentMetadata>
|
642
|
+
</publicObject>
|
643
|
+
EOF
|
644
|
+
allow(sdb).to receive(:public_xml).and_return(public_xml_with_two_recognized_full_text_files)
|
645
|
+
allow(subject).to receive(:get_file_content).with(full_file_path).and_return(expected_text)
|
646
|
+
subject.send(:add_object_full_text, sdb, solr_doc)
|
647
|
+
expect(subject.object_level_full_text_urls(sdb)).to eq [full_file_path, full_file_path]
|
648
|
+
expect(solr_doc[full_text_solr_fname]).to eq [expected_text, expected_text] # same file twice in a 2 element array
|
649
|
+
end
|
650
|
+
end # add_object_full_text
|
651
|
+
end # full text indexing concern
|
420
652
|
# rubocop:enable Metrics/LineLength
|
421
|
-
|
422
|
-
describe '#add_object_full_text' do
|
423
|
-
let(:full_text_solr_fname) { 'full_text_tesimv' }
|
424
|
-
before do
|
425
|
-
allow(sdb).to receive(:bare_druid).and_return(fake_druid)
|
426
|
-
end
|
427
|
-
let!(:expected_text) { 'SOME full text string that is returned from the server' }
|
428
|
-
let!(:full_file_path) { 'https://stacks.stanford.edu/file/oo000oo0000/oo000oo0000.txt' }
|
429
|
-
it 'indexes the full text into the appropriate field if a recognized file pattern is found' do
|
430
|
-
public_xml_with_feigenbaum_full_text = Nokogiri::XML <<-EOF
|
431
|
-
<publicObject id="druid:oo000oo0000" published="2015-10-17T18:24:08-07:00">
|
432
|
-
<contentMetadata objectId="oo000oo0000" type="book">
|
433
|
-
<resource id="oo000oo0000_4" sequence="4" type="object">
|
434
|
-
<label>Document</label>
|
435
|
-
<file id="oo000oo0000.pdf" mimetype="application/pdf" size="6801421"></file>
|
436
|
-
<file id="oo000oo0000.txt" mimetype="text/plain" size="23376"></file>
|
437
|
-
</resource>
|
438
|
-
<resource id="oo000oo0000_5" sequence="5" type="page">
|
439
|
-
<label>Page 1</label>
|
440
|
-
<file id="oo000oo0000_00001.jp2" mimetype="image/jp2" size="1864266"><imageData width="2632" height="3422"/></file>
|
441
|
-
</resource>
|
442
|
-
</contentMetadata>
|
443
|
-
</publicObject>
|
444
|
-
EOF
|
445
|
-
allow(sdb).to receive(:public_xml).and_return(public_xml_with_feigenbaum_full_text)
|
446
|
-
# don't actually attempt a call to the stacks
|
447
|
-
allow(subject).to receive(:get_file_content).with(full_file_path).and_return(expected_text)
|
448
|
-
subject.send(:add_object_full_text, sdb, solr_doc)
|
449
|
-
expect(subject.object_level_full_text_urls(sdb)).to eq [full_file_path]
|
450
|
-
expect(solr_doc[full_text_solr_fname]).to eq [expected_text]
|
451
|
-
end
|
452
|
-
it 'does not index the full text if no recognized pattern is found' do
|
453
|
-
public_xml_with_no_recognized_full_text = Nokogiri::XML <<-EOF
|
454
|
-
<publicObject id="druid:oo000oo0000" published="2015-10-17T18:24:08-07:00">
|
455
|
-
<contentMetadata objectId="oo000oo0000" type="book">
|
456
|
-
<resource id="oo000oo0000_4" sequence="4" type="object">
|
457
|
-
<label>Document</label>
|
458
|
-
<file id="oo000oo0000.pdf" mimetype="application/pdf" size="6801421"></file>
|
459
|
-
</resource>
|
460
|
-
<resource id="oo000oo0000_5" sequence="5" type="page">
|
461
|
-
<label>Page 1</label>
|
462
|
-
<file id="oo000oo0000_00001.jp2" mimetype="image/jp2" size="1864266"><imageData width="2632" height="3422"/></file>
|
463
|
-
</resource>
|
464
|
-
</contentMetadata>
|
465
|
-
</publicObject>
|
466
|
-
EOF
|
467
|
-
allow(sdb).to receive(:public_xml).and_return(public_xml_with_no_recognized_full_text)
|
468
|
-
subject.send(:add_object_full_text, sdb, solr_doc)
|
469
|
-
expect(subject.object_level_full_text_urls(sdb)).to eq []
|
470
|
-
expect(solr_doc[full_text_solr_fname]).to be_nil
|
471
|
-
end
|
472
|
-
it 'indexes the full text from two files if two recognized patterns are found' do
|
473
|
-
public_xml_with_two_recognized_full_text_files = Nokogiri::XML <<-EOF
|
474
|
-
<publicObject id="druid:oo000oo0000" published="2015-10-17T18:24:08-07:00">
|
475
|
-
<contentMetadata objectId="oo000oo0000" type="book">
|
476
|
-
<resource id="oo000oo0000_4" sequence="4" type="object">
|
477
|
-
<label>Document</label>
|
478
|
-
<file id="oo000oo0000.pdf" mimetype="application/pdf" size="6801421"></file>
|
479
|
-
<file id="oo000oo0000.txt" mimetype="text/plain" size="23376"></file>
|
480
|
-
</resource>
|
481
|
-
<resource id="oo000oo0000_5" sequence="5" type="page">
|
482
|
-
<label>Page 1</label>
|
483
|
-
<file id="oo000oo0000_00001.jp2" mimetype="image/jp2" size="1864266"><imageData width="2632" height="3422"/></file>
|
484
|
-
<file id="oo000oo0000.txt" mimetype="text/plain" size="23376"></file>
|
485
|
-
</resource>
|
486
|
-
</contentMetadata>
|
487
|
-
</publicObject>
|
488
|
-
EOF
|
489
|
-
allow(sdb).to receive(:public_xml).and_return(public_xml_with_two_recognized_full_text_files)
|
490
|
-
allow(subject).to receive(:get_file_content).with(full_file_path).and_return(expected_text)
|
491
|
-
subject.send(:add_object_full_text, sdb, solr_doc)
|
492
|
-
expect(subject.object_level_full_text_urls(sdb)).to eq [full_file_path, full_file_path]
|
493
|
-
expect(solr_doc[full_text_solr_fname]).to eq [expected_text, expected_text] # same file twice in a 2 element array
|
494
|
-
end
|
495
|
-
end # add_object_full_text
|
496
653
|
end
|
@@ -25,8 +25,7 @@ Gem::Specification.new do |spec|
|
|
25
25
|
spec.add_dependency 'gdor-indexer'
|
26
26
|
# newer versions of harvestdor-indexer have performance improvements for collections
|
27
27
|
spec.add_dependency 'harvestdor-indexer', '~> 2.3'
|
28
|
-
|
29
|
-
spec.add_dependency 'stanford-mods', '>= 1.2.1'
|
28
|
+
spec.add_dependency 'stanford-mods', '>= 1.3.0'
|
30
29
|
spec.add_dependency 'rails'
|
31
30
|
spec.add_dependency 'blacklight-spotlight', '~> 0.6'
|
32
31
|
spec.add_dependency 'parallel'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spotlight-dor-resources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Beer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-12-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: faraday
|
@@ -72,14 +72,14 @@ dependencies:
|
|
72
72
|
requirements:
|
73
73
|
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: 1.
|
75
|
+
version: 1.3.0
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: 1.
|
82
|
+
version: 1.3.0
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: rails
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|