spotlight-dor-resources 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/lib/spotlight/dor/indexer.rb +28 -47
- data/lib/spotlight/dor/resources.rb +1 -1
- data/lib/spotlight/dor/resources/version.rb +1 -1
- data/spec/lib/spotlight/dor/indexer_spec.rb +323 -166
- data/spotlight-dor-resources.gemspec +1 -2
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: df866b5840fd2ef1f3068c6244ef96aaf50f463b
|
4
|
+
data.tar.gz: 484fb0089d890785d1b7cbfa9560cd7a95f9b83c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1dee32477104678ade84fc9ccdc0400c56a2981af353063dac19a6415938e2c010e2c7f3137bb1b4cd06cad33afb47863bb518205db044e2221612d5291af413
|
7
|
+
data.tar.gz: f2b17bd142699edda90806158fcd60dba2ad687d28d16b22c08fae04df58f2e79a2705d2603e0d8010a8ea37f53606647f8d140f770ed71e79254bf01994e803
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
[](https://travis-ci.org/sul-dlss/spotlight-dor-resources) | [](https://coveralls.io/r/sul-dlss/spotlight-dor-resources) | [](http://badge.fury.io/rb/spotlight-dor-resources)
|
1
|
+
[](https://travis-ci.org/sul-dlss/spotlight-dor-resources) | [](https://coveralls.io/r/sul-dlss/spotlight-dor-resources) | [](https://gemnasium.com/sul-dlss/spotlight-dor-resources) | [](http://badge.fury.io/rb/spotlight-dor-resources)
|
2
2
|
|
3
3
|
# Spotlight::Dor::Resources
|
4
4
|
|
@@ -34,7 +34,7 @@ For access to a Rails console with the gem loaded up for testing purposes, you c
|
|
34
34
|
### First time only configuration for local testing
|
35
35
|
|
36
36
|
$ bundle
|
37
|
-
$ bundle exec rake
|
37
|
+
$ bundle exec rake ci
|
38
38
|
|
39
39
|
This will download a test jetty instance (to run Solr), generate a testing app at ```.internal_test_app``` and run the tests.
|
40
40
|
|
@@ -32,9 +32,11 @@ module Spotlight::Dor
|
|
32
32
|
before_index :add_author_no_collector
|
33
33
|
before_index :add_box
|
34
34
|
before_index :add_collector
|
35
|
+
before_index :add_coordinates
|
35
36
|
before_index :add_folder
|
36
37
|
before_index :add_genre
|
37
38
|
before_index :add_location
|
39
|
+
before_index :add_point_bbox
|
38
40
|
before_index :add_series
|
39
41
|
end
|
40
42
|
|
@@ -48,6 +50,12 @@ module Spotlight::Dor
|
|
48
50
|
solr_doc['box_ssi'] = sdb.smods_rec.box
|
49
51
|
end
|
50
52
|
|
53
|
+
# add coordinates solr field containing the cartographic coordinates per
|
54
|
+
# MODS subject.cartographics.coordinates (via stanford-mods gem)
|
55
|
+
def add_coordinates(sdb, solr_doc)
|
56
|
+
solr_doc['coordinates'] = sdb.smods_rec.coordinates
|
57
|
+
end
|
58
|
+
|
51
59
|
# add collector_ssim solr field containing the collector per MODS names (via stanford-mods gem)
|
52
60
|
def add_collector(sdb, solr_doc)
|
53
61
|
insert_field solr_doc, 'collector', sdb.smods_rec.collectors_w_dates, :symbol # _ssim field
|
@@ -66,10 +74,16 @@ module Spotlight::Dor
|
|
66
74
|
solr_doc['location_ssi'] = sdb.smods_rec.location
|
67
75
|
end
|
68
76
|
|
77
|
+
# add point_bbox solr field containing the point bounding box per
|
78
|
+
# MODS subject.cartographics.coordinates (via stanford-mods gem)
|
79
|
+
def add_point_bbox(sdb, solr_doc)
|
80
|
+
solr_doc['point_bbox'] = sdb.smods_rec.point_bbox
|
81
|
+
end
|
82
|
+
|
69
83
|
def add_series(sdb, solr_doc)
|
70
84
|
solr_doc['series_ssi'] = sdb.smods_rec.series
|
71
85
|
end
|
72
|
-
end
|
86
|
+
end # StanfordMods concern
|
73
87
|
|
74
88
|
concerning :ContentMetadata do
|
75
89
|
included do
|
@@ -125,8 +139,15 @@ module Spotlight::Dor
|
|
125
139
|
# only create these fields on an as-needed basis.
|
126
140
|
|
127
141
|
included do
|
142
|
+
before_index :add_document_subtype
|
128
143
|
before_index :add_donor_tags
|
129
144
|
before_index :add_folder_name
|
145
|
+
before_index :add_general_notes
|
146
|
+
end
|
147
|
+
|
148
|
+
def add_document_subtype(sdb, solr_doc)
|
149
|
+
subtype = sdb.smods_rec.note.select { |n| n.displayLabel == 'Document subtype' }.map(&:content)
|
150
|
+
solr_doc['doc_subtype_ssi'] = subtype.first if subtype.size > 0
|
130
151
|
end
|
131
152
|
|
132
153
|
def add_donor_tags(sdb, solr_doc)
|
@@ -142,7 +163,12 @@ module Spotlight::Dor
|
|
142
163
|
match_data = preferred_citation.first.match(/Title: +(.+)/i) if preferred_citation.present?
|
143
164
|
solr_doc['folder_name_ssi'] = match_data[1].strip if match_data.present?
|
144
165
|
end
|
145
|
-
|
166
|
+
|
167
|
+
def add_general_notes(sdb, solr_doc)
|
168
|
+
general_notes = sdb.smods_rec.note.select { |n| n.type_at.blank? && n.displayLabel.blank? }.map(&:content)
|
169
|
+
insert_field solr_doc, 'general_notes', general_notes, :symbol # this is a _ssim field
|
170
|
+
end
|
171
|
+
end # end feigbenbaum specific fields
|
146
172
|
|
147
173
|
concerning :FullTextIndexing do
|
148
174
|
included do
|
@@ -187,51 +213,6 @@ module Spotlight::Dor
|
|
187
213
|
end
|
188
214
|
end
|
189
215
|
|
190
|
-
concerning :CartographicIndexing do
|
191
|
-
included do
|
192
|
-
before_index :mods_cartographics_indexing
|
193
|
-
end
|
194
|
-
|
195
|
-
def mods_cartographics_indexing(sdb, solr_doc)
|
196
|
-
coordinates = Array(sdb.smods_rec.subject.cartographics.coordinates)
|
197
|
-
|
198
|
-
insert_field(solr_doc, 'coordinates', coordinates.map(&:text), :stored_searchable)
|
199
|
-
|
200
|
-
solr_doc['point_bbox'] ||= []
|
201
|
-
solr_doc['point_bbox'] += coords_to_bboxes(coordinates)
|
202
|
-
end
|
203
|
-
|
204
|
-
private
|
205
|
-
|
206
|
-
def coords_to_bboxes(coordinates)
|
207
|
-
coordinates.select { |n| n.text =~ /^\(.*\)$/ }.map do |n|
|
208
|
-
coord_to_bbox(n.text)
|
209
|
-
end
|
210
|
-
end
|
211
|
-
|
212
|
-
def coord_to_bbox(coord)
|
213
|
-
bbox = coord.delete('(').delete(')')
|
214
|
-
|
215
|
-
lng, lat = bbox.split('/')
|
216
|
-
|
217
|
-
min_x, max_x = lng.split('--').map { |x| coord_to_decimal(x) }
|
218
|
-
max_y, min_y = lat.split('--').map { |y| coord_to_decimal(y) }
|
219
|
-
"#{min_x} #{min_y} #{max_x} #{max_y}"
|
220
|
-
end
|
221
|
-
|
222
|
-
def coord_to_decimal(point)
|
223
|
-
regex = /(?<dir>[NESW])\s*(?<deg>\d+)°(?:(?<sec>\d+)ʹ)?/
|
224
|
-
match = regex.match(point)
|
225
|
-
dec = 0
|
226
|
-
|
227
|
-
dec += match['deg'].to_i
|
228
|
-
dec += match['sec'].to_f / 60
|
229
|
-
dec = -1 * dec if match['dir'] == 'W' || match['dir'] == 'S'
|
230
|
-
|
231
|
-
dec
|
232
|
-
end
|
233
|
-
end
|
234
|
-
|
235
216
|
def insert_field(solr_doc, field, values, *args)
|
236
217
|
Array(values).each do |v|
|
237
218
|
Solrizer.insert_field solr_doc, field, v, *args
|
@@ -76,49 +76,188 @@ describe Spotlight::Dor::Indexer do
|
|
76
76
|
end
|
77
77
|
end
|
78
78
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
79
|
+
context 'Feigbenbaum specific fields concern' do
|
80
|
+
describe '#add_document_subtype' do
|
81
|
+
before do
|
82
|
+
allow(r).to receive(:mods).and_return(mods)
|
83
|
+
subject.send(:add_document_subtype, sdb, solr_doc)
|
84
|
+
end
|
84
85
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
86
|
+
context 'with a record without document subtype' do
|
87
|
+
let(:mods) do
|
88
|
+
Nokogiri::XML <<-EOF
|
89
|
+
<mods xmlns="#{Mods::MODS_NS}">
|
90
|
+
<note displayLabel="preferred citation">(not a document subtype)</note>
|
91
|
+
<note>a generic note</note>
|
92
|
+
</mods>
|
93
|
+
EOF
|
94
|
+
end
|
95
|
+
|
96
|
+
it 'is blank' do
|
97
|
+
expect(solr_doc['doc_subtype_ssi']).to be_blank
|
98
|
+
end
|
92
99
|
end
|
93
100
|
|
94
|
-
|
95
|
-
|
101
|
+
context 'with a record with document subtype' do
|
102
|
+
let(:mods) do
|
103
|
+
Nokogiri::XML <<-EOF
|
104
|
+
<mods xmlns="#{Mods::MODS_NS}">
|
105
|
+
<note displayLabel="Document subtype">memorandums</note>
|
106
|
+
<note>a generic note</note>
|
107
|
+
</mods>
|
108
|
+
EOF
|
109
|
+
end
|
110
|
+
|
111
|
+
it 'extracts the doc subtypes' do
|
112
|
+
expect(solr_doc['doc_subtype_ssi']).to eq('memorandums')
|
113
|
+
end
|
96
114
|
end
|
97
|
-
end
|
115
|
+
end # doc subtype
|
98
116
|
|
99
|
-
|
100
|
-
|
101
|
-
|
117
|
+
describe '#add_donor_tags' do
|
118
|
+
before do
|
119
|
+
allow(r).to receive(:mods).and_return(mods)
|
120
|
+
subject.send(:add_donor_tags, sdb, solr_doc)
|
121
|
+
end
|
122
|
+
|
123
|
+
context 'with a record without donor tags' do
|
124
|
+
let(:mods) do
|
125
|
+
Nokogiri::XML <<-EOF
|
126
|
+
<mods xmlns="#{Mods::MODS_NS}">
|
127
|
+
<note displayLabel="preferred citation">(not a donor tag)</note>
|
128
|
+
</mods>
|
129
|
+
EOF
|
130
|
+
end
|
131
|
+
|
132
|
+
it 'is blank' do
|
133
|
+
expect(solr_doc['donor_tags_ssim']).to be_blank
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
context 'with a record with donor tags' do
|
138
|
+
let(:mods) do
|
139
|
+
# e.g. from https://purl.stanford.edu/vw282gv1740
|
140
|
+
Nokogiri::XML <<-EOF
|
141
|
+
<mods xmlns="#{Mods::MODS_NS}">
|
142
|
+
<note displayLabel="Donor tags">Knowledge Systems Laboratory</note>
|
143
|
+
<note displayLabel="Donor tags">medical applications</note>
|
144
|
+
<note displayLabel="Donor tags">Publishing</note>
|
145
|
+
<note displayLabel="Donor tags">Stanford</note>
|
146
|
+
<note displayLabel="Donor tags">Stanford Computer Science Department</note>
|
147
|
+
</mods>
|
148
|
+
EOF
|
149
|
+
end
|
150
|
+
|
151
|
+
it 'extracts the donor tags' do
|
152
|
+
expect(solr_doc['donor_tags_ssim']).to contain_exactly 'Knowledge Systems Laboratory',
|
153
|
+
'medical applications',
|
154
|
+
'Publishing',
|
155
|
+
'Stanford',
|
156
|
+
'Stanford Computer Science Department'
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end # donor tags
|
160
|
+
|
161
|
+
# rubocop:disable Metrics/LineLength
|
162
|
+
describe '#add_folder_name' do
|
163
|
+
let(:mods_note_plain) do
|
102
164
|
Nokogiri::XML <<-EOF
|
103
165
|
<mods xmlns="#{Mods::MODS_NS}">
|
104
|
-
<note
|
105
|
-
<note displayLabel="Donor tags">medical applications</note>
|
106
|
-
<note displayLabel="Donor tags">Publishing</note>
|
107
|
-
<note displayLabel="Donor tags">Stanford</note>
|
108
|
-
<note displayLabel="Donor tags">Stanford Computer Science Department</note>
|
166
|
+
<note>#{example}</note>
|
109
167
|
</mods>
|
110
|
-
|
168
|
+
EOF
|
111
169
|
end
|
170
|
+
let(:mods_note_preferred_citation) do
|
171
|
+
Nokogiri::XML <<-EOF
|
172
|
+
<mods xmlns="#{Mods::MODS_NS}">
|
173
|
+
<note type="preferred citation">#{example}</note>
|
174
|
+
</mods>
|
175
|
+
EOF
|
176
|
+
end
|
177
|
+
# example string as key, expected folder name as value
|
178
|
+
# all from feigenbaum (or based on feigenbaum), as that is only coll with this data
|
179
|
+
{
|
180
|
+
'Call Number: SC0340, Accession: 1986-052, Box: 20, Folder: 40, Title: S': 'S',
|
181
|
+
'Call Number: SC0340, Accession: 1986-052, Box: 54, Folder: 25, Title: Balzer': 'Balzer',
|
182
|
+
'Call Number: SC0340, Accession: 1986-052, Box : 30, Folder: 21, Title: Feigenbaum, Publications. 2 of 2.': 'Feigenbaum, Publications. 2 of 2.',
|
183
|
+
# colon in name
|
184
|
+
'Call Number: SC0340, Accession 2005-101, Box: 10, Folder: 26, Title: Gordon Bell Letter rdf:about blah (AI) 1987': 'Gordon Bell Letter rdf:about blah (AI) 1987',
|
185
|
+
'Call Number: SC0340, Accession 2005-101, Box: 11, Folder: 74, Title: Microcomputer Systems Proposal: blah blah': 'Microcomputer Systems Proposal: blah blah',
|
186
|
+
'Call Number: SC0340, Accession 2005-101, Box: 14, Folder: 20, Title: blah "bleah: blargW^"ugh" seriously?.': 'blah "bleah: blargW^"ugh" seriously?.',
|
187
|
+
# quotes in name
|
188
|
+
'Call Number: SC0340, Accession 2005-101, Box: 29, Folder: 18, Title: "bleah" blah': '"bleah" blah',
|
189
|
+
'Call Number: SC0340, Accession 2005-101, Box: 11, Folder: 58, Title: "M": blah': '"M": blah',
|
190
|
+
'Call Number: SC0340, Accession 2005-101, Box : 32A, Folder: 19, Title: blah "bleah" blue': 'blah "bleah" blue',
|
191
|
+
# not parseable
|
192
|
+
'Call Number: SC0340, Accession 2005-101': nil,
|
193
|
+
'Call Number: SC0340, Accession: 1986-052': nil,
|
194
|
+
'Call Number: SC0340, Accession: 1986-052, Box 36 Folder 38': nil,
|
195
|
+
'blah blah ... with the umbrella title Feigenbaum and Feldman, Computers and Thought II. blah blah': nil,
|
196
|
+
'blah blah ... Title ... blah blah': nil
|
197
|
+
}.each do |example, expected|
|
198
|
+
describe "for example '#{example}'" do
|
199
|
+
let(:example) { example }
|
200
|
+
context 'in preferred citation note' do
|
201
|
+
before do
|
202
|
+
allow(r).to receive(:mods).and_return(mods_note_preferred_citation)
|
203
|
+
subject.send(:add_folder_name, sdb, solr_doc)
|
204
|
+
end
|
205
|
+
it "has the expected folder name '#{expected}'" do
|
206
|
+
expect(solr_doc['folder_name_ssi']).to eq expected
|
207
|
+
end
|
208
|
+
end
|
209
|
+
context 'in plain note' do
|
210
|
+
before do
|
211
|
+
allow(r).to receive(:mods).and_return(mods_note_plain)
|
212
|
+
subject.send(:add_folder_name, sdb, solr_doc)
|
213
|
+
end
|
214
|
+
it 'does not have a folder name' do
|
215
|
+
expect(solr_doc['folder_name_ssi']).to be_falsey
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end # for example
|
219
|
+
end # each
|
220
|
+
end # add_folder_name
|
221
|
+
# rubocop:enable Metrics/LineLength
|
112
222
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
'Stanford',
|
118
|
-
'Stanford Computer Science Department'
|
223
|
+
describe '#add_general_notes' do
|
224
|
+
before do
|
225
|
+
allow(r).to receive(:mods).and_return(mods)
|
226
|
+
subject.send(:add_general_notes, sdb, solr_doc)
|
119
227
|
end
|
120
|
-
|
121
|
-
|
228
|
+
|
229
|
+
context 'no general notes, but other types of notes' do
|
230
|
+
let(:mods) do
|
231
|
+
Nokogiri::XML <<-EOF
|
232
|
+
<mods xmlns="#{Mods::MODS_NS}">
|
233
|
+
<note displayLabel="preferred citation">(not a document subtype)</note>
|
234
|
+
<note displayLabel="Document subtype">memorandums</note>
|
235
|
+
<note displayLabel="Donor tags">Knowledge Systems Laboratory</note>
|
236
|
+
</mods>
|
237
|
+
EOF
|
238
|
+
end
|
239
|
+
|
240
|
+
it 'is blank' do
|
241
|
+
expect(solr_doc['general_notes_ssim']).to be_blank
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
context 'ignore extra notes' do
|
246
|
+
let(:mods) do
|
247
|
+
Nokogiri::XML <<-EOF
|
248
|
+
<mods xmlns="#{Mods::MODS_NS}">
|
249
|
+
<note displayLabel="Document subtype">memorandums</note>
|
250
|
+
<note>a generic note</note>
|
251
|
+
</mods>
|
252
|
+
EOF
|
253
|
+
end
|
254
|
+
|
255
|
+
it 'extracts the doc subtypes' do
|
256
|
+
expect(solr_doc['general_notes_ssim']).to contain_exactly 'a generic note'
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end # general notes
|
260
|
+
end # feigbenbaum specific fields concern
|
122
261
|
|
123
262
|
context 'StanfordMods concern' do
|
124
263
|
describe '#add_author_no_collector' do
|
@@ -216,6 +355,44 @@ describe Spotlight::Dor::Indexer do
|
|
216
355
|
end
|
217
356
|
end
|
218
357
|
|
358
|
+
describe '#add_coordinates' do
|
359
|
+
before do
|
360
|
+
allow(r).to receive(:mods).and_return(mods)
|
361
|
+
subject.send(:add_coordinates, sdb, solr_doc)
|
362
|
+
end
|
363
|
+
context 'with a record without coordinates' do
|
364
|
+
let(:mods) do
|
365
|
+
Nokogiri::XML <<-EOF
|
366
|
+
<mods xmlns="#{Mods::MODS_NS}">
|
367
|
+
</mods>
|
368
|
+
EOF
|
369
|
+
end
|
370
|
+
|
371
|
+
it 'is blank' do
|
372
|
+
expect(solr_doc['coordinates']).to be_blank
|
373
|
+
end
|
374
|
+
end
|
375
|
+
context 'with a record with coordinates' do
|
376
|
+
let(:mods) do
|
377
|
+
# e.g. from https://purl.stanford.edu/vw282gv1740
|
378
|
+
Nokogiri::XML <<-EOF
|
379
|
+
<mods xmlns="#{Mods::MODS_NS}">
|
380
|
+
<subject>
|
381
|
+
<cartographics>
|
382
|
+
<scale>Scale 1:500,000</scale>
|
383
|
+
<coordinates>(W16°--E28°/N13°--S15°).</coordinates>
|
384
|
+
</cartographics>
|
385
|
+
</subject>
|
386
|
+
</mods>
|
387
|
+
EOF
|
388
|
+
end
|
389
|
+
|
390
|
+
it 'extracts the coordinates' do
|
391
|
+
expect(solr_doc['coordinates']).to eq(['(W16°--E28°/N13°--S15°).'])
|
392
|
+
end
|
393
|
+
end
|
394
|
+
end # add_coordinates
|
395
|
+
|
219
396
|
describe '#add_folder' do
|
220
397
|
before do
|
221
398
|
allow(r).to receive(:mods).and_return(mods)
|
@@ -321,6 +498,44 @@ describe Spotlight::Dor::Indexer do
|
|
321
498
|
end
|
322
499
|
end # add_location
|
323
500
|
|
501
|
+
describe '#add_point_bbox' do
|
502
|
+
before do
|
503
|
+
allow(r).to receive(:mods).and_return(mods)
|
504
|
+
subject.send(:add_point_bbox, sdb, solr_doc)
|
505
|
+
end
|
506
|
+
context 'with a record without coordinates' do
|
507
|
+
let(:mods) do
|
508
|
+
Nokogiri::XML <<-EOF
|
509
|
+
<mods xmlns="#{Mods::MODS_NS}">
|
510
|
+
</mods>
|
511
|
+
EOF
|
512
|
+
end
|
513
|
+
|
514
|
+
it 'is blank' do
|
515
|
+
expect(solr_doc['point_bbox']).to be_blank
|
516
|
+
end
|
517
|
+
end
|
518
|
+
context 'with a record with coordinates' do
|
519
|
+
let(:mods) do
|
520
|
+
# e.g. from https://purl.stanford.edu/vw282gv1740
|
521
|
+
Nokogiri::XML <<-EOF
|
522
|
+
<mods xmlns="#{Mods::MODS_NS}">
|
523
|
+
<subject>
|
524
|
+
<cartographics>
|
525
|
+
<scale>Scale 1:500,000</scale>
|
526
|
+
<coordinates>(W16°--E28°/N13°--S15°).</coordinates>
|
527
|
+
</cartographics>
|
528
|
+
</subject>
|
529
|
+
</mods>
|
530
|
+
EOF
|
531
|
+
end
|
532
|
+
|
533
|
+
it 'extracts the point_bbox' do
|
534
|
+
expect(solr_doc['point_bbox']).to eq(['-16.0 -15.0 28.0 13.0'])
|
535
|
+
end
|
536
|
+
end
|
537
|
+
end # add_point_bbox
|
538
|
+
|
324
539
|
describe '#add_series' do
|
325
540
|
before do
|
326
541
|
allow(r).to receive(:mods).and_return(mods)
|
@@ -358,139 +573,81 @@ describe Spotlight::Dor::Indexer do
|
|
358
573
|
end # context StanfordMods concern
|
359
574
|
|
360
575
|
# rubocop:disable Metrics/LineLength
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
'
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
576
|
+
context 'Full Text Indexing concern' do
|
577
|
+
describe '#add_object_full_text' do
|
578
|
+
let(:full_text_solr_fname) { 'full_text_tesimv' }
|
579
|
+
before do
|
580
|
+
allow(sdb).to receive(:bare_druid).and_return(fake_druid)
|
581
|
+
end
|
582
|
+
let!(:expected_text) { 'SOME full text string that is returned from the server' }
|
583
|
+
let!(:full_file_path) { 'https://stacks.stanford.edu/file/oo000oo0000/oo000oo0000.txt' }
|
584
|
+
it 'indexes the full text into the appropriate field if a recognized file pattern is found' do
|
585
|
+
public_xml_with_feigenbaum_full_text = Nokogiri::XML <<-EOF
|
586
|
+
<publicObject id="druid:oo000oo0000" published="2015-10-17T18:24:08-07:00">
|
587
|
+
<contentMetadata objectId="oo000oo0000" type="book">
|
588
|
+
<resource id="oo000oo0000_4" sequence="4" type="object">
|
589
|
+
<label>Document</label>
|
590
|
+
<file id="oo000oo0000.pdf" mimetype="application/pdf" size="6801421"></file>
|
591
|
+
<file id="oo000oo0000.txt" mimetype="text/plain" size="23376"></file>
|
592
|
+
</resource>
|
593
|
+
<resource id="oo000oo0000_5" sequence="5" type="page">
|
594
|
+
<label>Page 1</label>
|
595
|
+
<file id="oo000oo0000_00001.jp2" mimetype="image/jp2" size="1864266"><imageData width="2632" height="3422"/></file>
|
596
|
+
</resource>
|
597
|
+
</contentMetadata>
|
598
|
+
</publicObject>
|
599
|
+
EOF
|
600
|
+
allow(sdb).to receive(:public_xml).and_return(public_xml_with_feigenbaum_full_text)
|
601
|
+
# don't actually attempt a call to the stacks
|
602
|
+
allow(subject).to receive(:get_file_content).with(full_file_path).and_return(expected_text)
|
603
|
+
subject.send(:add_object_full_text, sdb, solr_doc)
|
604
|
+
expect(subject.object_level_full_text_urls(sdb)).to eq [full_file_path]
|
605
|
+
expect(solr_doc[full_text_solr_fname]).to eq [expected_text]
|
606
|
+
end
|
607
|
+
it 'does not index the full text if no recognized pattern is found' do
|
608
|
+
public_xml_with_no_recognized_full_text = Nokogiri::XML <<-EOF
|
609
|
+
<publicObject id="druid:oo000oo0000" published="2015-10-17T18:24:08-07:00">
|
610
|
+
<contentMetadata objectId="oo000oo0000" type="book">
|
611
|
+
<resource id="oo000oo0000_4" sequence="4" type="object">
|
612
|
+
<label>Document</label>
|
613
|
+
<file id="oo000oo0000.pdf" mimetype="application/pdf" size="6801421"></file>
|
614
|
+
</resource>
|
615
|
+
<resource id="oo000oo0000_5" sequence="5" type="page">
|
616
|
+
<label>Page 1</label>
|
617
|
+
<file id="oo000oo0000_00001.jp2" mimetype="image/jp2" size="1864266"><imageData width="2632" height="3422"/></file>
|
618
|
+
</resource>
|
619
|
+
</contentMetadata>
|
620
|
+
</publicObject>
|
621
|
+
EOF
|
622
|
+
allow(sdb).to receive(:public_xml).and_return(public_xml_with_no_recognized_full_text)
|
623
|
+
subject.send(:add_object_full_text, sdb, solr_doc)
|
624
|
+
expect(subject.object_level_full_text_urls(sdb)).to eq []
|
625
|
+
expect(solr_doc[full_text_solr_fname]).to be_nil
|
626
|
+
end
|
627
|
+
it 'indexes the full text from two files if two recognized patterns are found' do
|
628
|
+
public_xml_with_two_recognized_full_text_files = Nokogiri::XML <<-EOF
|
629
|
+
<publicObject id="druid:oo000oo0000" published="2015-10-17T18:24:08-07:00">
|
630
|
+
<contentMetadata objectId="oo000oo0000" type="book">
|
631
|
+
<resource id="oo000oo0000_4" sequence="4" type="object">
|
632
|
+
<label>Document</label>
|
633
|
+
<file id="oo000oo0000.pdf" mimetype="application/pdf" size="6801421"></file>
|
634
|
+
<file id="oo000oo0000.txt" mimetype="text/plain" size="23376"></file>
|
635
|
+
</resource>
|
636
|
+
<resource id="oo000oo0000_5" sequence="5" type="page">
|
637
|
+
<label>Page 1</label>
|
638
|
+
<file id="oo000oo0000_00001.jp2" mimetype="image/jp2" size="1864266"><imageData width="2632" height="3422"/></file>
|
639
|
+
<file id="oo000oo0000.txt" mimetype="text/plain" size="23376"></file>
|
640
|
+
</resource>
|
641
|
+
</contentMetadata>
|
642
|
+
</publicObject>
|
643
|
+
EOF
|
644
|
+
allow(sdb).to receive(:public_xml).and_return(public_xml_with_two_recognized_full_text_files)
|
645
|
+
allow(subject).to receive(:get_file_content).with(full_file_path).and_return(expected_text)
|
646
|
+
subject.send(:add_object_full_text, sdb, solr_doc)
|
647
|
+
expect(subject.object_level_full_text_urls(sdb)).to eq [full_file_path, full_file_path]
|
648
|
+
expect(solr_doc[full_text_solr_fname]).to eq [expected_text, expected_text] # same file twice in a 2 element array
|
649
|
+
end
|
650
|
+
end # add_object_full_text
|
651
|
+
end # full text indexing concern
|
420
652
|
# rubocop:enable Metrics/LineLength
|
421
|
-
|
422
|
-
describe '#add_object_full_text' do
|
423
|
-
let(:full_text_solr_fname) { 'full_text_tesimv' }
|
424
|
-
before do
|
425
|
-
allow(sdb).to receive(:bare_druid).and_return(fake_druid)
|
426
|
-
end
|
427
|
-
let!(:expected_text) { 'SOME full text string that is returned from the server' }
|
428
|
-
let!(:full_file_path) { 'https://stacks.stanford.edu/file/oo000oo0000/oo000oo0000.txt' }
|
429
|
-
it 'indexes the full text into the appropriate field if a recognized file pattern is found' do
|
430
|
-
public_xml_with_feigenbaum_full_text = Nokogiri::XML <<-EOF
|
431
|
-
<publicObject id="druid:oo000oo0000" published="2015-10-17T18:24:08-07:00">
|
432
|
-
<contentMetadata objectId="oo000oo0000" type="book">
|
433
|
-
<resource id="oo000oo0000_4" sequence="4" type="object">
|
434
|
-
<label>Document</label>
|
435
|
-
<file id="oo000oo0000.pdf" mimetype="application/pdf" size="6801421"></file>
|
436
|
-
<file id="oo000oo0000.txt" mimetype="text/plain" size="23376"></file>
|
437
|
-
</resource>
|
438
|
-
<resource id="oo000oo0000_5" sequence="5" type="page">
|
439
|
-
<label>Page 1</label>
|
440
|
-
<file id="oo000oo0000_00001.jp2" mimetype="image/jp2" size="1864266"><imageData width="2632" height="3422"/></file>
|
441
|
-
</resource>
|
442
|
-
</contentMetadata>
|
443
|
-
</publicObject>
|
444
|
-
EOF
|
445
|
-
allow(sdb).to receive(:public_xml).and_return(public_xml_with_feigenbaum_full_text)
|
446
|
-
# don't actually attempt a call to the stacks
|
447
|
-
allow(subject).to receive(:get_file_content).with(full_file_path).and_return(expected_text)
|
448
|
-
subject.send(:add_object_full_text, sdb, solr_doc)
|
449
|
-
expect(subject.object_level_full_text_urls(sdb)).to eq [full_file_path]
|
450
|
-
expect(solr_doc[full_text_solr_fname]).to eq [expected_text]
|
451
|
-
end
|
452
|
-
it 'does not index the full text if no recognized pattern is found' do
|
453
|
-
public_xml_with_no_recognized_full_text = Nokogiri::XML <<-EOF
|
454
|
-
<publicObject id="druid:oo000oo0000" published="2015-10-17T18:24:08-07:00">
|
455
|
-
<contentMetadata objectId="oo000oo0000" type="book">
|
456
|
-
<resource id="oo000oo0000_4" sequence="4" type="object">
|
457
|
-
<label>Document</label>
|
458
|
-
<file id="oo000oo0000.pdf" mimetype="application/pdf" size="6801421"></file>
|
459
|
-
</resource>
|
460
|
-
<resource id="oo000oo0000_5" sequence="5" type="page">
|
461
|
-
<label>Page 1</label>
|
462
|
-
<file id="oo000oo0000_00001.jp2" mimetype="image/jp2" size="1864266"><imageData width="2632" height="3422"/></file>
|
463
|
-
</resource>
|
464
|
-
</contentMetadata>
|
465
|
-
</publicObject>
|
466
|
-
EOF
|
467
|
-
allow(sdb).to receive(:public_xml).and_return(public_xml_with_no_recognized_full_text)
|
468
|
-
subject.send(:add_object_full_text, sdb, solr_doc)
|
469
|
-
expect(subject.object_level_full_text_urls(sdb)).to eq []
|
470
|
-
expect(solr_doc[full_text_solr_fname]).to be_nil
|
471
|
-
end
|
472
|
-
it 'indexes the full text from two files if two recognized patterns are found' do
|
473
|
-
public_xml_with_two_recognized_full_text_files = Nokogiri::XML <<-EOF
|
474
|
-
<publicObject id="druid:oo000oo0000" published="2015-10-17T18:24:08-07:00">
|
475
|
-
<contentMetadata objectId="oo000oo0000" type="book">
|
476
|
-
<resource id="oo000oo0000_4" sequence="4" type="object">
|
477
|
-
<label>Document</label>
|
478
|
-
<file id="oo000oo0000.pdf" mimetype="application/pdf" size="6801421"></file>
|
479
|
-
<file id="oo000oo0000.txt" mimetype="text/plain" size="23376"></file>
|
480
|
-
</resource>
|
481
|
-
<resource id="oo000oo0000_5" sequence="5" type="page">
|
482
|
-
<label>Page 1</label>
|
483
|
-
<file id="oo000oo0000_00001.jp2" mimetype="image/jp2" size="1864266"><imageData width="2632" height="3422"/></file>
|
484
|
-
<file id="oo000oo0000.txt" mimetype="text/plain" size="23376"></file>
|
485
|
-
</resource>
|
486
|
-
</contentMetadata>
|
487
|
-
</publicObject>
|
488
|
-
EOF
|
489
|
-
allow(sdb).to receive(:public_xml).and_return(public_xml_with_two_recognized_full_text_files)
|
490
|
-
allow(subject).to receive(:get_file_content).with(full_file_path).and_return(expected_text)
|
491
|
-
subject.send(:add_object_full_text, sdb, solr_doc)
|
492
|
-
expect(subject.object_level_full_text_urls(sdb)).to eq [full_file_path, full_file_path]
|
493
|
-
expect(solr_doc[full_text_solr_fname]).to eq [expected_text, expected_text] # same file twice in a 2 element array
|
494
|
-
end
|
495
|
-
end # add_object_full_text
|
496
653
|
end
|
@@ -25,8 +25,7 @@ Gem::Specification.new do |spec|
|
|
25
25
|
spec.add_dependency 'gdor-indexer'
|
26
26
|
# newer versions of harvestdor-indexer have performance improvements for collections
|
27
27
|
spec.add_dependency 'harvestdor-indexer', '~> 2.3'
|
28
|
-
|
29
|
-
spec.add_dependency 'stanford-mods', '>= 1.2.1'
|
28
|
+
spec.add_dependency 'stanford-mods', '>= 1.3.0'
|
30
29
|
spec.add_dependency 'rails'
|
31
30
|
spec.add_dependency 'blacklight-spotlight', '~> 0.6'
|
32
31
|
spec.add_dependency 'parallel'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spotlight-dor-resources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Beer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-12-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: faraday
|
@@ -72,14 +72,14 @@ dependencies:
|
|
72
72
|
requirements:
|
73
73
|
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: 1.
|
75
|
+
version: 1.3.0
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: 1.
|
82
|
+
version: 1.3.0
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: rails
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|