gdor-indexer 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,26 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+
3
+ # for test coverage
4
+ require 'simplecov'
5
+ SimpleCov.start do
6
+ # exclude from coverage
7
+ add_filter 'spec/'
8
+ add_filter 'config/deploy'
9
+ add_filter 'config/deploy.rb'
10
+ end
11
+
12
+ require 'gdor/indexer'
13
+ require 'rspec/matchers' # req by equivalent-xml custom matcher `be_equivalent_to`
14
+ require 'equivalent-xml'
15
+ require 'vcr'
16
+ require 'stringio'
17
+
18
+ VCR.configure do |c|
19
+ c.cassette_library_dir = 'spec/vcr_cassettes'
20
+ c.hook_into :webmock
21
+ c.allow_http_connections_when_no_cassette = true
22
+ c.configure_rspec_metadata!
23
+ end
24
+
25
+ # RSpec.configure do |config|
26
+ # end
@@ -0,0 +1,812 @@
1
+ require 'spec_helper'
2
+
3
+ describe GDor::Indexer::ModsFields do
4
+ before(:all) do
5
+ @fake_druid = 'oo000oo0000'
6
+ @ns_decl = "xmlns='#{Mods::MODS_NS}'"
7
+ @mods_xml = "<mods #{@ns_decl}><note>gdor_mods_fields testing</note></mods>"
8
+ end
9
+
10
+ let :logger do
11
+ Logger.new StringIO.new
12
+ end
13
+
14
+ def sdb_for_mods(m)
15
+ resource = Harvestdor::Indexer::Resource.new(double, @fake_druid)
16
+ allow(resource).to receive(:public_xml).and_return(nil)
17
+ allow(resource).to receive(:mods).and_return(Nokogiri::XML(m))
18
+ GDor::Indexer::SolrDocBuilder.new(resource, logger)
19
+ end
20
+
21
+ context 'doc_hash_from_mods' do
22
+ # see https://consul.stanford.edu/display/NGDE/Required+and+Recommended+Solr+Fields+for+SearchWorks+documents
23
+
24
+ context 'summary_search solr field from <abstract>' do
25
+ it 'is populated when the MODS has a top level <abstract> element' do
26
+ m = "<mods #{@ns_decl}><abstract>blah blah</abstract></mods>"
27
+ sdb = sdb_for_mods(m)
28
+ expect(sdb.doc_hash_from_mods[:summary_search]).to match_array ['blah blah']
29
+ end
30
+ it 'has a value for each abstract element' do
31
+ m = "<mods #{@ns_decl}>
32
+ <abstract>one</abstract>
33
+ <abstract>two</abstract>
34
+ </mods>"
35
+ sdb = sdb_for_mods(m)
36
+ expect(sdb.doc_hash_from_mods[:summary_search]).to match_array %w(one two)
37
+ end
38
+ it 'does not be present when there is no top level <abstract> element' do
39
+ m = "<mods #{@ns_decl}><relatedItem><abstract>blah blah</abstract></relatedItem></mods>"
40
+ sdb = sdb_for_mods(m)
41
+ expect(sdb.doc_hash_from_mods[:summary_search]).to be_nil
42
+ end
43
+ it 'does not be present if there are only empty abstract elements in the MODS' do
44
+ m = "<mods #{@ns_decl}><abstract/><note>notit</note></mods>"
45
+ sdb = sdb_for_mods(m)
46
+ expect(sdb.doc_hash_from_mods[:summary_search]).to be_nil
47
+ end
48
+ it 'summary_display should not be populated - it is a copy field' do
49
+ m = "<mods #{@ns_decl}><abstract>blah blah</abstract></mods>"
50
+ sdb = sdb_for_mods(m)
51
+ expect(sdb.doc_hash_from_mods[:summary_display]).to be_nil
52
+ end
53
+ end # summary_search / <abstract>
54
+
55
+ it 'language: should call sw_language_facet in stanford-mods gem to populate language field' do
56
+ sdb = sdb_for_mods(@mods_xml)
57
+ smr = sdb.smods_rec
58
+ expect(smr).to receive(:sw_language_facet)
59
+ sdb.doc_hash_from_mods
60
+ end
61
+
62
+ context 'physical solr field from <physicalDescription><extent>' do
63
+ it 'is populated when the MODS has mods/physicalDescription/extent element' do
64
+ m = "<mods #{@ns_decl}><physicalDescription><extent>blah blah</extent></physicalDescription></mods>"
65
+ sdb = sdb_for_mods(m)
66
+ expect(sdb.doc_hash_from_mods[:physical]).to match_array ['blah blah']
67
+ end
68
+ it 'has a value for each extent element' do
69
+ m = "<mods #{@ns_decl}>
70
+ <physicalDescription>
71
+ <extent>one</extent>
72
+ <extent>two</extent>
73
+ </physicalDescription>
74
+ <physicalDescription><extent>three</extent></physicalDescription>
75
+ </mods>"
76
+ sdb = sdb_for_mods(m)
77
+ expect(sdb.doc_hash_from_mods[:physical]).to match_array %w(one two three)
78
+ end
79
+ it 'does not be present when there is no top level <physicalDescription> element' do
80
+ m = "<mods #{@ns_decl}><relatedItem><physicalDescription><extent>foo</extent></physicalDescription></relatedItem></mods>"
81
+ sdb = sdb_for_mods(m)
82
+ expect(sdb.doc_hash_from_mods[:physical]).to be_nil
83
+ end
84
+ it 'does not be present if there are only empty physicalDescription or extent elements in the MODS' do
85
+ m = "<mods #{@ns_decl}><physicalDescription/><physicalDescription><extent/></physicalDescription><note>notit</note></mods>"
86
+ sdb = sdb_for_mods(m)
87
+ expect(sdb.doc_hash_from_mods[:physical]).to be_nil
88
+ end
89
+ end # physical field from physicalDescription/extent
90
+
91
+ context 'url_suppl solr field from /mods/relatedItem/location/url' do
92
+ it 'is populated when the MODS has mods/relatedItem/location/url' do
93
+ m = "<mods #{@ns_decl}><relatedItem><location><url>url.org</url></location></relatedItem></mods>"
94
+ sdb = sdb_for_mods(m)
95
+ expect(sdb.doc_hash_from_mods[:url_suppl]).to match_array ['url.org']
96
+ end
97
+ it 'has a value for each mods/relatedItem/location/url element' do
98
+ m = "<mods #{@ns_decl}>
99
+ <relatedItem>
100
+ <location><url>one</url></location>
101
+ <location>
102
+ <url>two</url>
103
+ <url>three</url>
104
+ </location>
105
+ </relatedItem>
106
+ <relatedItem><location><url>four</url></location></relatedItem>
107
+ </mods>"
108
+ sdb = sdb_for_mods(m)
109
+ expect(sdb.doc_hash_from_mods[:url_suppl]).to match_array %w(one two three four)
110
+ end
111
+ it 'does not be populated from /mods/location/url element' do
112
+ m = "<mods #{@ns_decl}><location><url>hi</url></location></mods>"
113
+ sdb = sdb_for_mods(m)
114
+ expect(sdb.doc_hash_from_mods[:url_suppl]).to be_nil
115
+ end
116
+ it 'does not be present if there are only empty relatedItem/location/url elements in the MODS' do
117
+ m = "<mods #{@ns_decl}>
118
+ <relatedItem><location><url/></location></relatedItem>
119
+ <relatedItem><location/></relatedItem>
120
+ <relatedItem/><note>notit</note></mods>"
121
+ sdb = sdb_for_mods(m)
122
+ expect(sdb.doc_hash_from_mods[:url_suppl]).to be_nil
123
+ end
124
+ end
125
+
126
+ context 'toc_search solr field from <tableOfContents>' do
127
+ it 'has a value for each tableOfContents element' do
128
+ m = "<mods #{@ns_decl}>
129
+ <tableOfContents>one</tableOfContents>
130
+ <tableOfContents>two</tableOfContents>
131
+ </mods>"
132
+ sdb = sdb_for_mods(m)
133
+ expect(sdb.doc_hash_from_mods[:toc_search]).to match_array %w(one two)
134
+ end
135
+ it 'does not be present when there is no top level <tableOfContents> element' do
136
+ m = "<mods #{@ns_decl}><relatedItem><tableOfContents>foo</tableOfContents></relatedItem></mods>"
137
+ sdb = sdb_for_mods(m)
138
+ expect(sdb.doc_hash_from_mods[:toc_search]).to be_nil
139
+ end
140
+ it 'does not be present if there are only empty tableOfContents elements in the MODS' do
141
+ m = "<mods #{@ns_decl}><tableOfContents/><note>notit</note></mods>"
142
+ sdb = sdb_for_mods(m)
143
+ expect(sdb.doc_hash_from_mods[:toc_search]).to be_nil
144
+ end
145
+ end
146
+
147
+ context 'format fields' do
148
+ context 'format_main_ssim' do
149
+ it 'calls #format_main_ssim method' do
150
+ m = "<mods #{@ns_decl}><note>nope</typeOfResource></mods>"
151
+ sdb = sdb_for_mods(m)
152
+ expect(sdb).to receive(:format_main_ssim)
153
+ sdb.doc_hash_from_mods[:format_main_ssim]
154
+ end
155
+ it 'has a value when MODS data provides' do
156
+ m = "<mods #{@ns_decl}><typeOfResource>software, multimedia</typeOfResource><genre>dataset</genre></mods>"
157
+ sdb = sdb_for_mods(m)
158
+ expect(sdb.doc_hash_from_mods[:format_main_ssim]).to match_array ['Dataset']
159
+ end
160
+ it 'returns empty Array if there is no value' do
161
+ m = "<mods #{@ns_decl}><note>nope</typeOfResource></mods>"
162
+ sdb = sdb_for_mods(m)
163
+ expect(sdb.doc_hash_from_mods[:format_main_ssim]).to eq([])
164
+ end
165
+ end
166
+ context 'format Solr field' do
167
+ it 'calls #format method' do
168
+ m = "<mods #{@ns_decl}><note>nope</typeOfResource></mods>"
169
+ sdb = sdb_for_mods(m)
170
+ expect(sdb).to receive(:format)
171
+ sdb.doc_hash_from_mods[:format]
172
+ end
173
+ it 'has a value when MODS data provides' do
174
+ m = "<mods #{@ns_decl}><typeOfResource>software, multimedia</typeOfResource></mods>"
175
+ sdb = sdb_for_mods(m)
176
+ expect(sdb.doc_hash_from_mods[:format]).to match_array ['Computer File']
177
+ end
178
+ it 'returns empty Array if there is no value' do
179
+ m = "<mods #{@ns_decl}><note>nope</typeOfResource></mods>"
180
+ sdb = sdb_for_mods(m)
181
+ expect(sdb.doc_hash_from_mods[:format]).to eq([])
182
+ end
183
+ end
184
+ end
185
+
186
+ context 'title fields' do
187
+ before(:all) do
188
+ @title_mods = "<mods #{@ns_decl}>
189
+ <titleInfo><title>Jerk</title><nonSort>The</nonSort><subTitle>is whom?</subTitle></titleInfo>
190
+ <titleInfo><title>Joke</title></titleInfo>
191
+ <titleInfo type='alternative'><title>Alternative</title></titleInfo>
192
+ </mods>"
193
+ end
194
+ let :sdb do
195
+ sdb_for_mods(@title_mods)
196
+ end
197
+ before(:each) do
198
+ @title_doc_hash = sdb.doc_hash_from_mods
199
+ end
200
+ it 'calls the appropriate methods in the stanford-mods gem to populate the fields' do
201
+ smr = sdb.smods_rec
202
+ expect(smr).to receive(:sw_short_title).at_least(:once)
203
+ expect(smr).to receive(:sw_full_title).at_least(:once)
204
+ expect(smr).to receive(:sw_title_display)
205
+ expect(smr).to receive(:sw_addl_titles)
206
+ expect(smr).to receive(:sw_sort_title)
207
+ sdb.doc_hash_from_mods
208
+ end
209
+ context 'search fields' do
210
+ it 'title_245a_search' do
211
+ expect(@title_doc_hash[:title_245a_search]).to eq('The Jerk')
212
+ end
213
+ it 'title_245_search' do
214
+ expect(@title_doc_hash[:title_245_search]).to eq('The Jerk : is whom?')
215
+ end
216
+ it 'title_variant_search' do
217
+ expect(@title_doc_hash[:title_variant_search]).to match_array %w(Joke Alternative)
218
+ end
219
+ it 'title_related_search should not be populated from MODS' do
220
+ expect(@title_doc_hash[:title_related_search]).to be_nil
221
+ end
222
+ end
223
+ context 'display fields' do
224
+ it 'title_display' do
225
+ expect(@title_doc_hash[:title_display]).to eq('The Jerk : is whom?')
226
+ end
227
+ it 'title_245a_display' do
228
+ expect(@title_doc_hash[:title_245a_display]).to eq('The Jerk')
229
+ end
230
+ it 'title_245c_display should not be populated from MODS' do
231
+ expect(@title_doc_hash[:title_245c_display]).to be_nil
232
+ end
233
+ it 'title_full_display' do
234
+ expect(@title_doc_hash[:title_full_display]).to eq('The Jerk : is whom?')
235
+ end
236
+ it 'removes trailing commas in title_display' do
237
+ title_mods = "<mods #{@ns_decl}>
238
+ <titleInfo><title>Jerk</title><nonSort>The</nonSort><subTitle>is whom,</subTitle></titleInfo>
239
+ <titleInfo><title>Joke</title></titleInfo>
240
+ <titleInfo type='alternative'><title>Alternative</title></titleInfo>
241
+ </mods>"
242
+ sdb = sdb_for_mods(title_mods)
243
+ @title_doc_hash = sdb.doc_hash_from_mods
244
+ @title_doc_hash
245
+ expect(@title_doc_hash[:title_display]).to eq('The Jerk : is whom')
246
+ end
247
+ it 'title_variant_display should not be populated - it is a copy field' do
248
+ expect(@title_doc_hash[:title_variant_display]).to be_nil
249
+ end
250
+ end
251
+ it 'title_sort' do
252
+ expect(@title_doc_hash[:title_sort]).to eq('Jerk is whom')
253
+ end
254
+ end # title fields
255
+
256
+ context 'author fields' do
257
+ before(:all) do
258
+ @name_mods = "<mods #{@ns_decl}>
259
+ <name type='personal'>
260
+ <namePart type='given'>John</namePart>
261
+ <namePart type='family'>Huston</namePart>
262
+ <role><roleTerm type='code' authority='marcrelator'>drt</roleTerm></role>
263
+ <displayForm>q</displayForm>
264
+ </name>
265
+ <name type='personal'><namePart>Crusty The Clown</namePart></name>
266
+ <name type='corporate'><namePart>Watchful Eye</namePart></name>
267
+ <name type='corporate'>
268
+ <namePart>Exciting Prints</namePart>
269
+ <role><roleTerm type='text'>lithographer</roleTerm></role>
270
+ </name>
271
+ <name type='conference'><namePart>conference</namePart></name>
272
+ </mods>"
273
+ end
274
+ let :sdb do
275
+ sdb_for_mods(@name_mods)
276
+ end
277
+ before(:each) do
278
+ @author_doc_hash = sdb.doc_hash_from_mods
279
+ end
280
+ it 'calls the appropriate methods in the stanford-mods gem to populate the fields' do
281
+ smr = sdb.smods_rec
282
+ expect(smr).to receive(:sw_main_author)
283
+ expect(smr).to receive(:sw_addl_authors)
284
+ expect(smr).to receive(:sw_person_authors).exactly(3).times
285
+ expect(smr).to receive(:sw_impersonal_authors)
286
+ expect(smr).to receive(:sw_corporate_authors)
287
+ expect(smr).to receive(:sw_meeting_authors)
288
+ expect(smr).to receive(:sw_sort_author)
289
+ sdb.doc_hash_from_mods
290
+ end
291
+ context 'search fields' do
292
+ it 'author_1xx_search' do
293
+ expect(@author_doc_hash[:author_1xx_search]).to eq('Crusty The Clown')
294
+ end
295
+ it 'author_7xx_search' do
296
+ skip 'Should this return all authors? or only 7xx authors?'
297
+ expect(@author_doc_hash[:author_7xx_search]).to match_array ['q', 'Watchful Eye', 'Exciting Prints', 'conference']
298
+ end
299
+ it 'author_8xx_search should not be populated from MODS' do
300
+ expect(@author_doc_hash[:author_8xx_search]).to be_nil
301
+ end
302
+ end
303
+ context 'facet fields' do
304
+ it 'author_person_facet' do
305
+ expect(@author_doc_hash[:author_person_facet]).to match_array ['q', 'Crusty The Clown']
306
+ end
307
+ it 'author_other_facet' do
308
+ expect(@author_doc_hash[:author_other_facet]).to match_array ['Watchful Eye', 'Exciting Prints', 'conference']
309
+ end
310
+ end
311
+ context 'display fields' do
312
+ it 'author_person_display' do
313
+ expect(@author_doc_hash[:author_person_display]).to match_array ['q', 'Crusty The Clown']
314
+ end
315
+ it 'author_person_full_display' do
316
+ expect(@author_doc_hash[:author_person_full_display]).to match_array ['q', 'Crusty The Clown']
317
+ end
318
+ it 'author_corp_display' do
319
+ expect(@author_doc_hash[:author_corp_display]).to match_array ['Watchful Eye', 'Exciting Prints']
320
+ end
321
+ it 'author_meeting_display' do
322
+ expect(@author_doc_hash[:author_meeting_display]).to match_array ['conference']
323
+ end
324
+ end
325
+ it 'author_sort' do
326
+ expect(@author_doc_hash[:author_sort]).to eq('Crusty The Clown')
327
+ end
328
+ end # author fields
329
+
330
+ context 'subject fields' do
331
+ before(:all) do
332
+ @genre = 'genre top level'
333
+ @cart_coord = '6 00 S, 71 30 E'
334
+ @s_genre = 'genre in subject'
335
+ @geo = 'Somewhere'
336
+ @geo_code = 'us'
337
+ @hier_geo_country = 'France'
338
+ @s_name = 'name in subject'
339
+ @occupation = 'worker bee'
340
+ @temporal = 'temporal'
341
+ @s_title = 'title in subject'
342
+ @topic = 'topic'
343
+ @m = "<mods #{@ns_decl}>
344
+ <genre>#{@genre}</genre>
345
+ <subject><cartographics><coordinates>#{@cart_coord}</coordinates></cartographics></subject>
346
+ <subject><genre>#{@s_genre}</genre></subject>
347
+ <subject><geographic>#{@geo}</geographic></subject>
348
+ <subject><geographicCode authority='iso3166'>#{@geo_code}</geographicCode></subject>
349
+ <subject><hierarchicalGeographic><country>#{@hier_geo_country}</country></hierarchicalGeographic></subject>
350
+ <subject><name><namePart>#{@s_name}</namePart></name></subject>
351
+ <subject><occupation>#{@occupation}</occupation></subject>
352
+ <subject><temporal>#{@temporal}</temporal></subject>
353
+ <subject><titleInfo><title>#{@s_title}</title></titleInfo></subject>
354
+ <subject><topic>#{@topic}</topic></subject>
355
+ <typeOfResource>still image</typeOfResource>
356
+ </mods>"
357
+ @m_no_subject = "<mods #{@ns_decl}><note>notit</note></mods>"
358
+ end
359
+ let :sdb do
360
+ sdb = sdb_for_mods(@m)
361
+ end
362
+ before(:each) do
363
+ @subject_doc_hash = sdb.doc_hash_from_mods
364
+ end
365
+ it 'calls the appropriate methods in stanford-mods to populate the Solr fields' do
366
+ expect(sdb.smods_rec).to receive(:topic_search)
367
+ expect(sdb.smods_rec).to receive(:geographic_search)
368
+ expect(sdb.smods_rec).to receive(:subject_other_search)
369
+ expect(sdb.smods_rec).to receive(:subject_other_subvy_search)
370
+ expect(sdb.smods_rec).to receive(:subject_all_search)
371
+ expect(sdb.smods_rec).to receive(:topic_facet)
372
+ expect(sdb.smods_rec).to receive(:geographic_facet)
373
+ expect(sdb.smods_rec).to receive(:era_facet)
374
+ sdb.doc_hash_from_mods
375
+ end
376
+ context 'search fields' do
377
+ context 'topic_search' do
378
+ it 'onlies include genre and topic' do
379
+ expect(@subject_doc_hash[:topic_search]).to match_array [@genre, @topic]
380
+ end
381
+ context 'functional tests checking results from stanford-mods methods' do
382
+ it 'is nil if there are no values in the MODS' do
383
+ sdb = sdb_for_mods(@m_no_subject)
384
+ expect(sdb.doc_hash_from_mods[:topic_search]).to be_nil
385
+ end
386
+ it 'does not be nil if there are only subject/topic elements (no <genre>)' do
387
+ m = "<mods #{@ns_decl}><subject><topic>#{@topic}</topic></subject></mods>"
388
+ sdb = sdb_for_mods(m)
389
+ expect(sdb.doc_hash_from_mods[:topic_search]).to match_array [@topic]
390
+ end
391
+ it 'does not be nil if there are only <genre> elements (no subject/topic elements)' do
392
+ m = "<mods #{@ns_decl}><genre>#{@genre}</genre></mods>"
393
+ sdb = sdb_for_mods(m)
394
+ expect(sdb.doc_hash_from_mods[:topic_search]).to match_array [@genre]
395
+ end
396
+ it 'has a separate value for each topic subelement' do
397
+ m = "<mods #{@ns_decl}>
398
+ <subject>
399
+ <topic>first</topic>
400
+ <topic>second</topic>
401
+ </subject>
402
+ <subject><topic>third</topic></subject>
403
+ </mods>"
404
+ sdb = sdb_for_mods(m)
405
+ expect(sdb.doc_hash_from_mods[:topic_search]).to match_array %w(first second third)
406
+ end
407
+ end # functional tests checking results from stanford-mods methods
408
+ end # topic_search
409
+
410
+ context 'geographic_search' do
411
+ it 'includes geographic and hierarchicalGeographic' do
412
+ expect(@subject_doc_hash[:geographic_search]).to match_array [@geo, @hier_geo_country]
413
+ end
414
+ it 'calls sw_geographic_search (from stanford-mods gem)' do
415
+ m = "<mods #{@ns_decl}><subject><geographic>#{@geo}</geographic></subject></mods>"
416
+ sdb = sdb_for_mods(m)
417
+ expect(sdb.smods_rec).to receive(:sw_geographic_search).at_least(1).times
418
+ sdb.doc_hash_from_mods
419
+ end
420
+ it "logs an info message when it encounters a geographicCode encoding it doesn't translate" do
421
+ m = "<mods #{@ns_decl}><subject><geographicCode authority='iso3166'>ca</geographicCode></subject></mods>"
422
+ sdb = sdb_for_mods(m)
423
+ expect(sdb.smods_rec.sw_logger).to receive(:info).with(/#{@fake_druid} has subject geographicCode element with untranslated encoding \(iso3166\): <geographicCode authority=.*>ca<\/geographicCode>/).at_least(1).times
424
+ sdb.doc_hash_from_mods
425
+ end
426
+ end # geographic_search
427
+
428
+ context 'subject_other_search' do
429
+ it 'includes occupation, subject names, and subject titles' do
430
+ expect(@subject_doc_hash[:subject_other_search]).to match_array [@occupation, @s_name, @s_title]
431
+ end
432
+ context 'functional tests checking results from stanford-mods methods' do
433
+ it 'is nil if there are no values in the MODS' do
434
+ sdb = sdb_for_mods(@mods_xml)
435
+ expect(sdb.doc_hash_from_mods[:subject_other_search]).to be_nil
436
+ end
437
+ it 'does not be nil if there are only subject/name elements' do
438
+ m = "<mods #{@ns_decl}><subject><name><namePart>#{@s_name}</namePart></name></subject></mods>"
439
+ sdb = sdb_for_mods(m)
440
+ expect(sdb.doc_hash_from_mods[:subject_other_search]).to match_array [@s_name]
441
+ end
442
+ it 'does not be nil if there are only subject/occupation elements' do
443
+ m = "<mods #{@ns_decl}><subject><occupation>#{@occupation}</occupation></subject></mods>"
444
+ sdb = sdb_for_mods(m)
445
+ expect(sdb.doc_hash_from_mods[:subject_other_search]).to match_array [@occupation]
446
+ end
447
+ it 'does not be nil if there are only subject/titleInfo elements' do
448
+ m = "<mods #{@ns_decl}><subject><titleInfo><title>#{@s_title}</title></titleInfo></subject></mods>"
449
+ sdb = sdb_for_mods(m)
450
+ expect(sdb.doc_hash_from_mods[:subject_other_search]).to match_array [@s_title]
451
+ end
452
+ it 'has a separate value for each occupation subelement' do
453
+ m = "<mods #{@ns_decl}>
454
+ <subject>
455
+ <occupation>first</occupation>
456
+ <occupation>second</occupation>
457
+ </subject>
458
+ <subject><occupation>third</occupation></subject>
459
+ </mods>"
460
+ sdb = sdb_for_mods(m)
461
+ expect(sdb.doc_hash_from_mods[:subject_other_search]).to match_array %w(first second third)
462
+ end
463
+ end # functional tests checking results from stanford-mods methods
464
+ end # subject_other_search
465
+
466
+ context 'subject_other_subvy_search' do
467
+ it 'includes temporal and genre SUBelement' do
468
+ expect(@subject_doc_hash[:subject_other_subvy_search]).to match_array [@temporal, @s_genre]
469
+ end
470
+ context 'functional tests checking results from stanford-mods methods' do
471
+ it 'is nil if there are no values in the MODS' do
472
+ sdb = sdb_for_mods(@mods_xml)
473
+ expect(sdb.doc_hash_from_mods[:subject_other_subvy_search]).to be_nil
474
+ end
475
+ it 'does not be nil if there are only subject/temporal elements (no subject/genre)' do
476
+ m = "<mods #{@ns_decl}><subject><temporal>#{@temporal}</temporal></subject></mods>"
477
+ sdb = sdb_for_mods(m)
478
+ expect(sdb.doc_hash_from_mods[:subject_other_subvy_search]).to match_array [@temporal]
479
+ end
480
+ it 'does not be nil if there are only subject/genre elements (no subject/temporal)' do
481
+ m = "<mods #{@ns_decl}><subject><genre>#{@s_genre}</genre></subject></mods>"
482
+ sdb = sdb_for_mods(m)
483
+ expect(sdb.doc_hash_from_mods[:subject_other_subvy_search]).to match_array [@s_genre]
484
+ end
485
+ context 'genre subelement' do
486
+ it 'has a separate value for each genre element' do
487
+ m = "<mods #{@ns_decl}>
488
+ <subject>
489
+ <genre>first</genre>
490
+ <genre>second</genre>
491
+ </subject>
492
+ <subject><genre>third</genre></subject>
493
+ </mods>"
494
+ sdb = sdb_for_mods(m)
495
+ expect(sdb.doc_hash_from_mods[:subject_other_subvy_search]).to match_array %w(first second third)
496
+ end
497
+ end # genre subelement
498
+ end # "functional tests checking results from stanford-mods methods"
499
+ end # subject_other_subvy_search
500
+
501
+ context 'subject_all_search' do
502
+ it 'contains top level <genre> element data' do
503
+ expect(@subject_doc_hash[:subject_all_search]).to include(@genre)
504
+ end
505
+ it 'does not contain cartographic sub element' do
506
+ expect(@subject_doc_hash[:subject_all_search]).not_to include(@cart_coord)
507
+ end
508
+ it 'does not include codes from hierarchicalGeographic sub element' do
509
+ expect(@subject_doc_hash[:subject_all_search]).not_to include(@geo_code)
510
+ end
511
+ it 'contains all other subject subelement data' do
512
+ expect(@subject_doc_hash[:subject_all_search]).to include(@s_genre)
513
+ expect(@subject_doc_hash[:subject_all_search]).to include(@geo)
514
+ expect(@subject_doc_hash[:subject_all_search]).to include(@hier_geo_country)
515
+ expect(@subject_doc_hash[:subject_all_search]).to include(@s_name)
516
+ expect(@subject_doc_hash[:subject_all_search]).to include(@occupation)
517
+ expect(@subject_doc_hash[:subject_all_search]).to include(@temporal)
518
+ expect(@subject_doc_hash[:subject_all_search]).to include(@s_title)
519
+ expect(@subject_doc_hash[:subject_all_search]).to include(@topic)
520
+ end
521
+ end # subject_all_search
522
+ end # search fields
523
+
524
+ context 'facet fields' do
525
+ context 'topic_facet' do
526
+ it 'includes topic subelement' do
527
+ expect(@subject_doc_hash[:topic_facet]).to include(@topic)
528
+ end
529
+ it 'includes sw_subject_names' do
530
+ expect(@subject_doc_hash[:topic_facet]).to include(@s_name)
531
+ end
532
+ it 'includes sw_subject_titles' do
533
+ expect(@subject_doc_hash[:topic_facet]).to include(@s_title)
534
+ end
535
+ it 'includes occupation subelement' do
536
+ expect(@subject_doc_hash[:topic_facet]).to include(@occupation)
537
+ end
538
+ it 'has the trailing punctuation removed' do
539
+ m = "<mods #{@ns_decl}><subject>
540
+ <topic>comma,</topic>
541
+ <occupation>semicolon;</occupation>
542
+ <titleInfo><title>backslash \\</title></titleInfo>
543
+ <name><namePart>internal, punct;uation</namePart></name>
544
+ </subject></mods>"
545
+ sdb = sdb_for_mods(m)
546
+ doc_hash = sdb.doc_hash_from_mods
547
+ expect(doc_hash[:topic_facet]).to include('comma')
548
+ expect(doc_hash[:topic_facet]).to include('semicolon')
549
+ expect(doc_hash[:topic_facet]).to include('backslash')
550
+ expect(doc_hash[:topic_facet]).to include('internal, punct;uation')
551
+ end
552
+ end # topic_facet
553
+
554
+ context 'geographic_facet' do
555
+ it 'includes geographic subelement' do
556
+ expect(@subject_doc_hash[:geographic_facet]).to include(@geo)
557
+ end
558
+ it 'is like geographic_search with the trailing punctuation (and preceding spaces) removed' do
559
+ m = "<mods #{@ns_decl}><subject>
560
+ <geographic>comma,</geographic>
561
+ <geographic>semicolon;</geographic>
562
+ <geographic>backslash \\</geographic>
563
+ <geographic>internal, punct;uation</geographic>
564
+ </subject></mods>"
565
+ sdb = sdb_for_mods(m)
566
+ doc_hash = sdb.doc_hash_from_mods
567
+ expect(doc_hash[:geographic_facet]).to include('comma')
568
+ expect(doc_hash[:geographic_facet]).to include('semicolon')
569
+ expect(doc_hash[:geographic_facet]).to include('backslash')
570
+ expect(doc_hash[:geographic_facet]).to include('internal, punct;uation')
571
+ end
572
+ end
573
+
574
+ it 'era_facet should be temporal subelement with the trailing punctuation removed' do
575
+ m = "<mods #{@ns_decl}><subject>
576
+ <temporal>comma,</temporal>
577
+ <temporal>semicolon;</temporal>
578
+ <temporal>backslash \\</temporal>
579
+ <temporal>internal, punct;uation</temporal>
580
+ </subject></mods>"
581
+ sdb = sdb_for_mods(m)
582
+ doc_hash = sdb.doc_hash_from_mods
583
+ expect(doc_hash[:era_facet]).to include('comma')
584
+ expect(doc_hash[:era_facet]).to include('semicolon')
585
+ expect(doc_hash[:era_facet]).to include('backslash')
586
+ expect(doc_hash[:era_facet]).to include('internal, punct;uation')
587
+ end
588
+ end # facet fields
589
+ end # subject fields
590
+
591
+ context 'publication date fields' do
592
+ it 'populates all date fields' do
593
+ m = "<mods #{@ns_decl}><originInfo>
594
+ <dateIssued>13th century AH / 19th CE</dateIssued>
595
+ </originInfo></mods>"
596
+ sdb = sdb_for_mods(m)
597
+ doc_hash = sdb.doc_hash_from_mods
598
+ expect(doc_hash[:pub_date]).to eq('19th century')
599
+ expect(doc_hash[:pub_date_sort]).to eq('1800')
600
+ expect(doc_hash[:publication_year_isi]).to eq('1800')
601
+ expect(doc_hash[:pub_year_tisim]).to eq('1800') # date slider
602
+ expect(doc_hash[:pub_date_display]).to eq('13th century AH / 19th CE')
603
+ expect(doc_hash[:imprint_display]).to eq('13th century AH / 19th CE')
604
+ end
605
+ it 'does not populate the date slider for BC dates' do
606
+ m = "<mods #{@ns_decl}><originInfo><dateIssued>199 B.C.</dateIssued></originInfo></mods>"
607
+ sdb = sdb_for_mods(m)
608
+ doc_hash = sdb.doc_hash_from_mods
609
+ expect(doc_hash).to_not have_key(:pub_year_tisim)
610
+ end
611
+
612
+ context 'pub_date_sort integration tests' do
613
+ let :sdb do
614
+ sdb = sdb_for_mods("<mods #{@ns_decl}> </mods>")
615
+ end
616
+ it 'works on normal dates' do
617
+ allow(sdb.smods_rec).to receive(:pub_date).and_return('1945')
618
+ expect(sdb.doc_hash_from_mods[:pub_date_sort]).to eq('1945')
619
+ end
620
+ it 'works on 3 digit dates' do
621
+ allow(sdb.smods_rec).to receive(:pub_date).and_return('945')
622
+ expect(sdb.doc_hash_from_mods[:pub_date_sort]).to eq('0945')
623
+ end
624
+ it 'works on century dates' do
625
+ allow(sdb.smods_rec).to receive(:pub_date).and_return('16--')
626
+ expect(sdb.doc_hash_from_mods[:pub_date_sort]).to eq('1600')
627
+ end
628
+ it 'works on 3 digit century dates' do
629
+ allow(sdb.smods_rec).to receive(:pub_date).and_return('9--')
630
+ expect(sdb.doc_hash_from_mods[:pub_date_sort]).to eq('0900')
631
+ end
632
+ end # pub_date_sort
633
+
634
+ context 'pub_year_tisim for date slider' do
635
+ it 'takes single dateCreated' do
636
+ m = "<mods #{@ns_decl}><originInfo>
637
+ <dateCreated>1904</dateCreated>
638
+ </originInfo></mods>"
639
+ sdb = sdb_for_mods(m)
640
+ doc_hash = sdb.doc_hash_from_mods
641
+ expect(doc_hash[:pub_year_tisim]).to eq('1904')
642
+ end
643
+ it 'correctlies parse a ranged date' do
644
+ m = "<mods #{@ns_decl}><originInfo>
645
+ <dateCreated>Text dated June 4, 1594; miniatures added by 1596</dateCreated>
646
+ </originInfo></mods>"
647
+ sdb = sdb_for_mods(m)
648
+ doc_hash = sdb.doc_hash_from_mods
649
+ expect(doc_hash[:pub_year_tisim]).to eq('1594')
650
+ end
651
+ it 'finds year in an expanded English form' do
652
+ m = "<mods #{@ns_decl}><originInfo>
653
+ <dateCreated>Aug. 3rd, 1886</dateCreated>
654
+ </originInfo></mods>"
655
+ sdb = sdb_for_mods(m)
656
+ doc_hash = sdb.doc_hash_from_mods
657
+ expect(doc_hash[:pub_year_tisim]).to eq('1886')
658
+ end
659
+ it 'removes question marks and brackets' do
660
+ m = "<mods #{@ns_decl}><originInfo>
661
+ <dateCreated>Aug. 3rd, [18]86?</dateCreated>
662
+ </originInfo></mods>"
663
+ sdb = sdb_for_mods(m)
664
+ doc_hash = sdb.doc_hash_from_mods
665
+ expect(doc_hash[:pub_year_tisim]).to eq('1886')
666
+ end
667
+ it 'ignores an s after the decade' do
668
+ m = "<mods #{@ns_decl}><originInfo>
669
+ <dateCreated>early 1890s</dateCreated>
670
+ </originInfo></mods>"
671
+ sdb = sdb_for_mods(m)
672
+ doc_hash = sdb.doc_hash_from_mods
673
+ expect(doc_hash[:pub_year_tisim]).to eq('1890')
674
+ end
675
+ it 'chooses a date ending with CE if there are multiple dates' do
676
+ m = "<mods #{@ns_decl}><originInfo>
677
+ <dateIssued>7192 AM (li-Adam) / 1684 CE</dateIssued>
678
+ </originInfo></mods>"
679
+ sdb = sdb_for_mods(m)
680
+ doc_hash = sdb.doc_hash_from_mods
681
+ expect(doc_hash[:pub_year_tisim]).to eq('1684')
682
+ end
683
+ it 'takes first year from hyphenated range (for now)' do
684
+ m = "<mods #{@ns_decl}><originInfo>
685
+ <dateIssued>1282 AH / 1865-6 CE</dateIssued>
686
+ </originInfo></mods>"
687
+ sdb = sdb_for_mods(m)
688
+ doc_hash = sdb.doc_hash_from_mods
689
+ expect(doc_hash[:pub_year_tisim]).to eq('1865')
690
+ end
691
+ end # pub_year_tisim method
692
+
693
+ context 'difficult pub dates' do
694
+ it 'should handle multiple pub dates (to be implemented - esp for date slider)'
695
+
696
+ it 'should choose the latest date??? (to be implemented - esp for sorting and date slider)'
697
+
698
+ it 'handles nnth century dates' do
699
+ m = "<mods #{@ns_decl}><originInfo>
700
+ <dateIssued>13th century AH / 19th CE</dateIssued>
701
+ </originInfo></mods>"
702
+ sdb = sdb_for_mods(m)
703
+ doc_hash = sdb.doc_hash_from_mods
704
+ expect(doc_hash[:pub_date]).to eq('19th century')
705
+ expect(doc_hash[:pub_date_sort]).to eq('1800')
706
+ expect(doc_hash[:pub_year_tisim]).to eq('1800')
707
+ expect(doc_hash[:publication_year_isi]).to eq('1800')
708
+ expect(doc_hash[:imprint_display]).to eq('13th century AH / 19th CE')
709
+ end
710
+ it 'handles multiple CE dates' do
711
+ m = "<mods #{@ns_decl}><originInfo>
712
+ <dateIssued>6 Dhu al-Hijjah 923 AH / 1517 CE -- 7 Rabi I 924 AH / 1518 CE</dateIssued>
713
+ </originInfo></mods>"
714
+ sdb = sdb_for_mods(m)
715
+ doc_hash = sdb.doc_hash_from_mods
716
+ expect(doc_hash[:pub_date_sort]).to eq('1517')
717
+ expect(doc_hash[:pub_date]).to eq('1517')
718
+ expect(doc_hash[:pub_year_tisim]).to eq('1517')
719
+ end
720
+ it 'handles specific century case from walters' do
721
+ m = "<mods #{@ns_decl}><originInfo>
722
+ <dateIssued>Late 14th or early 15th century CE</dateIssued>
723
+ </originInfo></mods>"
724
+ sdb = sdb_for_mods(m)
725
+ doc_hash = sdb.doc_hash_from_mods
726
+ expect(doc_hash[:pub_date_sort]).to eq('1400')
727
+ expect(doc_hash[:pub_year_tisim]).to eq('1400')
728
+ expect(doc_hash[:publication_year_isi]).to eq('1400')
729
+ expect(doc_hash[:pub_date]).to eq('15th century')
730
+ expect(doc_hash[:imprint_display]).to eq('Late 14th or early 15th century CE')
731
+ end
732
+ it 'works on explicit 3 digit dates' do
733
+ m = "<mods #{@ns_decl}><originInfo>
734
+ <dateIssued>966 CE</dateIssued>
735
+ </originInfo></mods>"
736
+ sdb = sdb_for_mods(m)
737
+ doc_hash = sdb.doc_hash_from_mods
738
+ expect(doc_hash[:pub_date_sort]).to eq('0966')
739
+ expect(doc_hash[:pub_date]).to eq('966')
740
+ expect(doc_hash[:pub_year_tisim]).to eq('0966')
741
+ expect(doc_hash[:publication_year_isi]).to eq('0966')
742
+ expect(doc_hash[:imprint_display]).to eq('966 CE')
743
+ end
744
+ it 'works on 3 digit century dates' do
745
+ m = "<mods #{@ns_decl}><originInfo>
746
+ <dateIssued>3rd century AH / 9th CE</dateIssued>
747
+ </originInfo></mods>"
748
+ sdb = sdb_for_mods(m)
749
+ doc_hash = sdb.doc_hash_from_mods
750
+ expect(doc_hash[:pub_date_sort]).to eq('0800')
751
+ expect(doc_hash[:pub_year_tisim]).to eq('0800')
752
+ expect(doc_hash[:pub_date]).to eq('9th century')
753
+ expect(doc_hash[:publication_year_isi]).to eq('0800')
754
+ expect(doc_hash[:imprint_display]).to eq('3rd century AH / 9th CE')
755
+ end
756
+ it 'works on 3 digit BC dates' do
757
+ m = "<mods #{@ns_decl}><originInfo>
758
+ <dateCreated>300 B.C.</dateCreated>
759
+ </originInfo></mods>"
760
+ sdb = sdb_for_mods(m)
761
+ doc_hash = sdb.doc_hash_from_mods
762
+ expect(doc_hash[:pub_date_sort]).to eq('-700')
763
+ expect(doc_hash[:pub_year_tisim]).to be_nil
764
+ expect(doc_hash[:pub_date]).to eq('300 B.C.')
765
+ expect(doc_hash[:imprint_display]).to eq('300 B.C.')
766
+ # doc_hash[:creation_year_isi].should =='-300'
767
+ end
768
+ end # difficult pub dates
769
+ end # publication date fields
770
+ end # doc_hash_from_mods
771
+
772
+ context '#format' do
773
+ it 'gets format from call to stanford-mods searchworks format method' do
774
+ m = "<mods #{@ns_decl}><typeOfResource>still image</typeOfResouce></mods>"
775
+ sdb = sdb_for_mods(m)
776
+ expect(sdb.smods_rec).to receive(:format).and_call_original
777
+ expect(sdb.format).to match_array ['Image']
778
+ end
779
+ it 'returns empty Array and log warning if there is no value' do
780
+ sdb = sdb_for_mods(@mods_xml)
781
+ expect(sdb.logger).to receive(:warn).with("#{@fake_druid} has no SearchWorks format from MODS - check <typeOfResource> and other implicated MODS elements")
782
+ expect(sdb.format).to eq([])
783
+ end
784
+ end # context #format
785
+
786
+ context '#format_main_ssim' do
787
+ it 'gets format_main_ssim from call to stanford-mods searchworks format_main method' do
788
+ m = "<mods #{@ns_decl}><typeOfResource>still image</typeOfResouce></mods>"
789
+ sdb = sdb_for_mods(m)
790
+ expect(sdb.smods_rec).to receive(:format_main).and_call_original
791
+ expect(sdb.format_main_ssim).to match_array ['Image']
792
+ end
793
+ it 'returns empty Array and log warning if there is no value' do
794
+ sdb = sdb_for_mods(@mods_xml)
795
+ expect(sdb.logger).to receive(:warn).with("#{@fake_druid} has no SearchWorks Resource Type from MODS - check <typeOfResource> and other implicated MODS elements")
796
+ expect(sdb.format_main_ssim).to eq([])
797
+ end
798
+ end # context format_main_ssim
799
+
800
+ context 'genre_ssim' do
801
+ it 'gets genre_ssim from call to stanford-mods searchworks sw_genre method' do
802
+ m = "<mods #{@ns_decl}><genre>technical report</genre></mods>"
803
+ sdb = sdb_for_mods(m)
804
+ expect(sdb.smods_rec).to receive(:sw_genre).and_call_original
805
+ expect(sdb.genre_ssim).to match_array ['Technical report']
806
+ end
807
+ it 'returns empty Array if there is no value' do
808
+ sdb = sdb_for_mods(@mods_xml)
809
+ expect(sdb.genre_ssim).to eq([])
810
+ end
811
+ end # context genre_ssim
812
+ end