gdor-indexer 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,26 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+
3
+ # for test coverage
4
+ require 'simplecov'
5
+ SimpleCov.start do
6
+ # exclude from coverage
7
+ add_filter 'spec/'
8
+ add_filter 'config/deploy'
9
+ add_filter 'config/deploy.rb'
10
+ end
11
+
12
+ require 'gdor/indexer'
13
+ require 'rspec/matchers' # req by equivalent-xml custom matcher `be_equivalent_to`
14
+ require 'equivalent-xml'
15
+ require 'vcr'
16
+ require 'stringio'
17
+
18
+ VCR.configure do |c|
19
+ c.cassette_library_dir = 'spec/vcr_cassettes'
20
+ c.hook_into :webmock
21
+ c.allow_http_connections_when_no_cassette = true
22
+ c.configure_rspec_metadata!
23
+ end
24
+
25
+ # RSpec.configure do |config|
26
+ # end
@@ -0,0 +1,812 @@
1
+ require 'spec_helper'
2
+
3
+ describe GDor::Indexer::ModsFields do
4
+ before(:all) do
5
+ @fake_druid = 'oo000oo0000'
6
+ @ns_decl = "xmlns='#{Mods::MODS_NS}'"
7
+ @mods_xml = "<mods #{@ns_decl}><note>gdor_mods_fields testing</note></mods>"
8
+ end
9
+
10
+ let :logger do
11
+ Logger.new StringIO.new
12
+ end
13
+
14
+ def sdb_for_mods(m)
15
+ resource = Harvestdor::Indexer::Resource.new(double, @fake_druid)
16
+ allow(resource).to receive(:public_xml).and_return(nil)
17
+ allow(resource).to receive(:mods).and_return(Nokogiri::XML(m))
18
+ GDor::Indexer::SolrDocBuilder.new(resource, logger)
19
+ end
20
+
21
+ context 'doc_hash_from_mods' do
22
+ # see https://consul.stanford.edu/display/NGDE/Required+and+Recommended+Solr+Fields+for+SearchWorks+documents
23
+
24
+ context 'summary_search solr field from <abstract>' do
25
+ it 'is populated when the MODS has a top level <abstract> element' do
26
+ m = "<mods #{@ns_decl}><abstract>blah blah</abstract></mods>"
27
+ sdb = sdb_for_mods(m)
28
+ expect(sdb.doc_hash_from_mods[:summary_search]).to match_array ['blah blah']
29
+ end
30
+ it 'has a value for each abstract element' do
31
+ m = "<mods #{@ns_decl}>
32
+ <abstract>one</abstract>
33
+ <abstract>two</abstract>
34
+ </mods>"
35
+ sdb = sdb_for_mods(m)
36
+ expect(sdb.doc_hash_from_mods[:summary_search]).to match_array %w(one two)
37
+ end
38
+ it 'does not be present when there is no top level <abstract> element' do
39
+ m = "<mods #{@ns_decl}><relatedItem><abstract>blah blah</abstract></relatedItem></mods>"
40
+ sdb = sdb_for_mods(m)
41
+ expect(sdb.doc_hash_from_mods[:summary_search]).to be_nil
42
+ end
43
+ it 'does not be present if there are only empty abstract elements in the MODS' do
44
+ m = "<mods #{@ns_decl}><abstract/><note>notit</note></mods>"
45
+ sdb = sdb_for_mods(m)
46
+ expect(sdb.doc_hash_from_mods[:summary_search]).to be_nil
47
+ end
48
+ it 'summary_display should not be populated - it is a copy field' do
49
+ m = "<mods #{@ns_decl}><abstract>blah blah</abstract></mods>"
50
+ sdb = sdb_for_mods(m)
51
+ expect(sdb.doc_hash_from_mods[:summary_display]).to be_nil
52
+ end
53
+ end # summary_search / <abstract>
54
+
55
+ it 'language: should call sw_language_facet in stanford-mods gem to populate language field' do
56
+ sdb = sdb_for_mods(@mods_xml)
57
+ smr = sdb.smods_rec
58
+ expect(smr).to receive(:sw_language_facet)
59
+ sdb.doc_hash_from_mods
60
+ end
61
+
62
+ context 'physical solr field from <physicalDescription><extent>' do
63
+ it 'is populated when the MODS has mods/physicalDescription/extent element' do
64
+ m = "<mods #{@ns_decl}><physicalDescription><extent>blah blah</extent></physicalDescription></mods>"
65
+ sdb = sdb_for_mods(m)
66
+ expect(sdb.doc_hash_from_mods[:physical]).to match_array ['blah blah']
67
+ end
68
+ it 'has a value for each extent element' do
69
+ m = "<mods #{@ns_decl}>
70
+ <physicalDescription>
71
+ <extent>one</extent>
72
+ <extent>two</extent>
73
+ </physicalDescription>
74
+ <physicalDescription><extent>three</extent></physicalDescription>
75
+ </mods>"
76
+ sdb = sdb_for_mods(m)
77
+ expect(sdb.doc_hash_from_mods[:physical]).to match_array %w(one two three)
78
+ end
79
+ it 'does not be present when there is no top level <physicalDescription> element' do
80
+ m = "<mods #{@ns_decl}><relatedItem><physicalDescription><extent>foo</extent></physicalDescription></relatedItem></mods>"
81
+ sdb = sdb_for_mods(m)
82
+ expect(sdb.doc_hash_from_mods[:physical]).to be_nil
83
+ end
84
+ it 'does not be present if there are only empty physicalDescription or extent elements in the MODS' do
85
+ m = "<mods #{@ns_decl}><physicalDescription/><physicalDescription><extent/></physicalDescription><note>notit</note></mods>"
86
+ sdb = sdb_for_mods(m)
87
+ expect(sdb.doc_hash_from_mods[:physical]).to be_nil
88
+ end
89
+ end # physical field from physicalDescription/extent
90
+
91
+ context 'url_suppl solr field from /mods/relatedItem/location/url' do
92
+ it 'is populated when the MODS has mods/relatedItem/location/url' do
93
+ m = "<mods #{@ns_decl}><relatedItem><location><url>url.org</url></location></relatedItem></mods>"
94
+ sdb = sdb_for_mods(m)
95
+ expect(sdb.doc_hash_from_mods[:url_suppl]).to match_array ['url.org']
96
+ end
97
+ it 'has a value for each mods/relatedItem/location/url element' do
98
+ m = "<mods #{@ns_decl}>
99
+ <relatedItem>
100
+ <location><url>one</url></location>
101
+ <location>
102
+ <url>two</url>
103
+ <url>three</url>
104
+ </location>
105
+ </relatedItem>
106
+ <relatedItem><location><url>four</url></location></relatedItem>
107
+ </mods>"
108
+ sdb = sdb_for_mods(m)
109
+ expect(sdb.doc_hash_from_mods[:url_suppl]).to match_array %w(one two three four)
110
+ end
111
+ it 'does not be populated from /mods/location/url element' do
112
+ m = "<mods #{@ns_decl}><location><url>hi</url></location></mods>"
113
+ sdb = sdb_for_mods(m)
114
+ expect(sdb.doc_hash_from_mods[:url_suppl]).to be_nil
115
+ end
116
+ it 'does not be present if there are only empty relatedItem/location/url elements in the MODS' do
117
+ m = "<mods #{@ns_decl}>
118
+ <relatedItem><location><url/></location></relatedItem>
119
+ <relatedItem><location/></relatedItem>
120
+ <relatedItem/><note>notit</note></mods>"
121
+ sdb = sdb_for_mods(m)
122
+ expect(sdb.doc_hash_from_mods[:url_suppl]).to be_nil
123
+ end
124
+ end
125
+
126
+ context 'toc_search solr field from <tableOfContents>' do
127
+ it 'has a value for each tableOfContents element' do
128
+ m = "<mods #{@ns_decl}>
129
+ <tableOfContents>one</tableOfContents>
130
+ <tableOfContents>two</tableOfContents>
131
+ </mods>"
132
+ sdb = sdb_for_mods(m)
133
+ expect(sdb.doc_hash_from_mods[:toc_search]).to match_array %w(one two)
134
+ end
135
+ it 'does not be present when there is no top level <tableOfContents> element' do
136
+ m = "<mods #{@ns_decl}><relatedItem><tableOfContents>foo</tableOfContents></relatedItem></mods>"
137
+ sdb = sdb_for_mods(m)
138
+ expect(sdb.doc_hash_from_mods[:toc_search]).to be_nil
139
+ end
140
+ it 'does not be present if there are only empty tableOfContents elements in the MODS' do
141
+ m = "<mods #{@ns_decl}><tableOfContents/><note>notit</note></mods>"
142
+ sdb = sdb_for_mods(m)
143
+ expect(sdb.doc_hash_from_mods[:toc_search]).to be_nil
144
+ end
145
+ end
146
+
147
+ context 'format fields' do
148
+ context 'format_main_ssim' do
149
+ it 'calls #format_main_ssim method' do
150
+ m = "<mods #{@ns_decl}><note>nope</typeOfResource></mods>"
151
+ sdb = sdb_for_mods(m)
152
+ expect(sdb).to receive(:format_main_ssim)
153
+ sdb.doc_hash_from_mods[:format_main_ssim]
154
+ end
155
+ it 'has a value when MODS data provides' do
156
+ m = "<mods #{@ns_decl}><typeOfResource>software, multimedia</typeOfResource><genre>dataset</genre></mods>"
157
+ sdb = sdb_for_mods(m)
158
+ expect(sdb.doc_hash_from_mods[:format_main_ssim]).to match_array ['Dataset']
159
+ end
160
+ it 'returns empty Array if there is no value' do
161
+ m = "<mods #{@ns_decl}><note>nope</typeOfResource></mods>"
162
+ sdb = sdb_for_mods(m)
163
+ expect(sdb.doc_hash_from_mods[:format_main_ssim]).to eq([])
164
+ end
165
+ end
166
+ context 'format Solr field' do
167
+ it 'calls #format method' do
168
+ m = "<mods #{@ns_decl}><note>nope</typeOfResource></mods>"
169
+ sdb = sdb_for_mods(m)
170
+ expect(sdb).to receive(:format)
171
+ sdb.doc_hash_from_mods[:format]
172
+ end
173
+ it 'has a value when MODS data provides' do
174
+ m = "<mods #{@ns_decl}><typeOfResource>software, multimedia</typeOfResource></mods>"
175
+ sdb = sdb_for_mods(m)
176
+ expect(sdb.doc_hash_from_mods[:format]).to match_array ['Computer File']
177
+ end
178
+ it 'returns empty Array if there is no value' do
179
+ m = "<mods #{@ns_decl}><note>nope</typeOfResource></mods>"
180
+ sdb = sdb_for_mods(m)
181
+ expect(sdb.doc_hash_from_mods[:format]).to eq([])
182
+ end
183
+ end
184
+ end
185
+
186
+ context 'title fields' do
187
+ before(:all) do
188
+ @title_mods = "<mods #{@ns_decl}>
189
+ <titleInfo><title>Jerk</title><nonSort>The</nonSort><subTitle>is whom?</subTitle></titleInfo>
190
+ <titleInfo><title>Joke</title></titleInfo>
191
+ <titleInfo type='alternative'><title>Alternative</title></titleInfo>
192
+ </mods>"
193
+ end
194
+ let :sdb do
195
+ sdb_for_mods(@title_mods)
196
+ end
197
+ before(:each) do
198
+ @title_doc_hash = sdb.doc_hash_from_mods
199
+ end
200
+ it 'calls the appropriate methods in the stanford-mods gem to populate the fields' do
201
+ smr = sdb.smods_rec
202
+ expect(smr).to receive(:sw_short_title).at_least(:once)
203
+ expect(smr).to receive(:sw_full_title).at_least(:once)
204
+ expect(smr).to receive(:sw_title_display)
205
+ expect(smr).to receive(:sw_addl_titles)
206
+ expect(smr).to receive(:sw_sort_title)
207
+ sdb.doc_hash_from_mods
208
+ end
209
+ context 'search fields' do
210
+ it 'title_245a_search' do
211
+ expect(@title_doc_hash[:title_245a_search]).to eq('The Jerk')
212
+ end
213
+ it 'title_245_search' do
214
+ expect(@title_doc_hash[:title_245_search]).to eq('The Jerk : is whom?')
215
+ end
216
+ it 'title_variant_search' do
217
+ expect(@title_doc_hash[:title_variant_search]).to match_array %w(Joke Alternative)
218
+ end
219
+ it 'title_related_search should not be populated from MODS' do
220
+ expect(@title_doc_hash[:title_related_search]).to be_nil
221
+ end
222
+ end
223
+ context 'display fields' do
224
+ it 'title_display' do
225
+ expect(@title_doc_hash[:title_display]).to eq('The Jerk : is whom?')
226
+ end
227
+ it 'title_245a_display' do
228
+ expect(@title_doc_hash[:title_245a_display]).to eq('The Jerk')
229
+ end
230
+ it 'title_245c_display should not be populated from MODS' do
231
+ expect(@title_doc_hash[:title_245c_display]).to be_nil
232
+ end
233
+ it 'title_full_display' do
234
+ expect(@title_doc_hash[:title_full_display]).to eq('The Jerk : is whom?')
235
+ end
236
+ it 'removes trailing commas in title_display' do
237
+ title_mods = "<mods #{@ns_decl}>
238
+ <titleInfo><title>Jerk</title><nonSort>The</nonSort><subTitle>is whom,</subTitle></titleInfo>
239
+ <titleInfo><title>Joke</title></titleInfo>
240
+ <titleInfo type='alternative'><title>Alternative</title></titleInfo>
241
+ </mods>"
242
+ sdb = sdb_for_mods(title_mods)
243
+ @title_doc_hash = sdb.doc_hash_from_mods
244
+ @title_doc_hash
245
+ expect(@title_doc_hash[:title_display]).to eq('The Jerk : is whom')
246
+ end
247
+ it 'title_variant_display should not be populated - it is a copy field' do
248
+ expect(@title_doc_hash[:title_variant_display]).to be_nil
249
+ end
250
+ end
251
+ it 'title_sort' do
252
+ expect(@title_doc_hash[:title_sort]).to eq('Jerk is whom')
253
+ end
254
+ end # title fields
255
+
256
+ context 'author fields' do
257
+ before(:all) do
258
+ @name_mods = "<mods #{@ns_decl}>
259
+ <name type='personal'>
260
+ <namePart type='given'>John</namePart>
261
+ <namePart type='family'>Huston</namePart>
262
+ <role><roleTerm type='code' authority='marcrelator'>drt</roleTerm></role>
263
+ <displayForm>q</displayForm>
264
+ </name>
265
+ <name type='personal'><namePart>Crusty The Clown</namePart></name>
266
+ <name type='corporate'><namePart>Watchful Eye</namePart></name>
267
+ <name type='corporate'>
268
+ <namePart>Exciting Prints</namePart>
269
+ <role><roleTerm type='text'>lithographer</roleTerm></role>
270
+ </name>
271
+ <name type='conference'><namePart>conference</namePart></name>
272
+ </mods>"
273
+ end
274
+ let :sdb do
275
+ sdb_for_mods(@name_mods)
276
+ end
277
+ before(:each) do
278
+ @author_doc_hash = sdb.doc_hash_from_mods
279
+ end
280
+ it 'calls the appropriate methods in the stanford-mods gem to populate the fields' do
281
+ smr = sdb.smods_rec
282
+ expect(smr).to receive(:sw_main_author)
283
+ expect(smr).to receive(:sw_addl_authors)
284
+ expect(smr).to receive(:sw_person_authors).exactly(3).times
285
+ expect(smr).to receive(:sw_impersonal_authors)
286
+ expect(smr).to receive(:sw_corporate_authors)
287
+ expect(smr).to receive(:sw_meeting_authors)
288
+ expect(smr).to receive(:sw_sort_author)
289
+ sdb.doc_hash_from_mods
290
+ end
291
+ context 'search fields' do
292
+ it 'author_1xx_search' do
293
+ expect(@author_doc_hash[:author_1xx_search]).to eq('Crusty The Clown')
294
+ end
295
+ it 'author_7xx_search' do
296
+ skip 'Should this return all authors? or only 7xx authors?'
297
+ expect(@author_doc_hash[:author_7xx_search]).to match_array ['q', 'Watchful Eye', 'Exciting Prints', 'conference']
298
+ end
299
+ it 'author_8xx_search should not be populated from MODS' do
300
+ expect(@author_doc_hash[:author_8xx_search]).to be_nil
301
+ end
302
+ end
303
+ context 'facet fields' do
304
+ it 'author_person_facet' do
305
+ expect(@author_doc_hash[:author_person_facet]).to match_array ['q', 'Crusty The Clown']
306
+ end
307
+ it 'author_other_facet' do
308
+ expect(@author_doc_hash[:author_other_facet]).to match_array ['Watchful Eye', 'Exciting Prints', 'conference']
309
+ end
310
+ end
311
+ context 'display fields' do
312
+ it 'author_person_display' do
313
+ expect(@author_doc_hash[:author_person_display]).to match_array ['q', 'Crusty The Clown']
314
+ end
315
+ it 'author_person_full_display' do
316
+ expect(@author_doc_hash[:author_person_full_display]).to match_array ['q', 'Crusty The Clown']
317
+ end
318
+ it 'author_corp_display' do
319
+ expect(@author_doc_hash[:author_corp_display]).to match_array ['Watchful Eye', 'Exciting Prints']
320
+ end
321
+ it 'author_meeting_display' do
322
+ expect(@author_doc_hash[:author_meeting_display]).to match_array ['conference']
323
+ end
324
+ end
325
+ it 'author_sort' do
326
+ expect(@author_doc_hash[:author_sort]).to eq('Crusty The Clown')
327
+ end
328
+ end # author fields
329
+
330
+ context 'subject fields' do
331
+ before(:all) do
332
+ @genre = 'genre top level'
333
+ @cart_coord = '6 00 S, 71 30 E'
334
+ @s_genre = 'genre in subject'
335
+ @geo = 'Somewhere'
336
+ @geo_code = 'us'
337
+ @hier_geo_country = 'France'
338
+ @s_name = 'name in subject'
339
+ @occupation = 'worker bee'
340
+ @temporal = 'temporal'
341
+ @s_title = 'title in subject'
342
+ @topic = 'topic'
343
+ @m = "<mods #{@ns_decl}>
344
+ <genre>#{@genre}</genre>
345
+ <subject><cartographics><coordinates>#{@cart_coord}</coordinates></cartographics></subject>
346
+ <subject><genre>#{@s_genre}</genre></subject>
347
+ <subject><geographic>#{@geo}</geographic></subject>
348
+ <subject><geographicCode authority='iso3166'>#{@geo_code}</geographicCode></subject>
349
+ <subject><hierarchicalGeographic><country>#{@hier_geo_country}</country></hierarchicalGeographic></subject>
350
+ <subject><name><namePart>#{@s_name}</namePart></name></subject>
351
+ <subject><occupation>#{@occupation}</occupation></subject>
352
+ <subject><temporal>#{@temporal}</temporal></subject>
353
+ <subject><titleInfo><title>#{@s_title}</title></titleInfo></subject>
354
+ <subject><topic>#{@topic}</topic></subject>
355
+ <typeOfResource>still image</typeOfResource>
356
+ </mods>"
357
+ @m_no_subject = "<mods #{@ns_decl}><note>notit</note></mods>"
358
+ end
359
+ let :sdb do
360
+ sdb = sdb_for_mods(@m)
361
+ end
362
+ before(:each) do
363
+ @subject_doc_hash = sdb.doc_hash_from_mods
364
+ end
365
+ it 'calls the appropriate methods in stanford-mods to populate the Solr fields' do
366
+ expect(sdb.smods_rec).to receive(:topic_search)
367
+ expect(sdb.smods_rec).to receive(:geographic_search)
368
+ expect(sdb.smods_rec).to receive(:subject_other_search)
369
+ expect(sdb.smods_rec).to receive(:subject_other_subvy_search)
370
+ expect(sdb.smods_rec).to receive(:subject_all_search)
371
+ expect(sdb.smods_rec).to receive(:topic_facet)
372
+ expect(sdb.smods_rec).to receive(:geographic_facet)
373
+ expect(sdb.smods_rec).to receive(:era_facet)
374
+ sdb.doc_hash_from_mods
375
+ end
376
+ context 'search fields' do
377
+ context 'topic_search' do
378
+ it 'onlies include genre and topic' do
379
+ expect(@subject_doc_hash[:topic_search]).to match_array [@genre, @topic]
380
+ end
381
+ context 'functional tests checking results from stanford-mods methods' do
382
+ it 'is nil if there are no values in the MODS' do
383
+ sdb = sdb_for_mods(@m_no_subject)
384
+ expect(sdb.doc_hash_from_mods[:topic_search]).to be_nil
385
+ end
386
+ it 'does not be nil if there are only subject/topic elements (no <genre>)' do
387
+ m = "<mods #{@ns_decl}><subject><topic>#{@topic}</topic></subject></mods>"
388
+ sdb = sdb_for_mods(m)
389
+ expect(sdb.doc_hash_from_mods[:topic_search]).to match_array [@topic]
390
+ end
391
+ it 'does not be nil if there are only <genre> elements (no subject/topic elements)' do
392
+ m = "<mods #{@ns_decl}><genre>#{@genre}</genre></mods>"
393
+ sdb = sdb_for_mods(m)
394
+ expect(sdb.doc_hash_from_mods[:topic_search]).to match_array [@genre]
395
+ end
396
+ it 'has a separate value for each topic subelement' do
397
+ m = "<mods #{@ns_decl}>
398
+ <subject>
399
+ <topic>first</topic>
400
+ <topic>second</topic>
401
+ </subject>
402
+ <subject><topic>third</topic></subject>
403
+ </mods>"
404
+ sdb = sdb_for_mods(m)
405
+ expect(sdb.doc_hash_from_mods[:topic_search]).to match_array %w(first second third)
406
+ end
407
+ end # functional tests checking results from stanford-mods methods
408
+ end # topic_search
409
+
410
+ context 'geographic_search' do
411
+ it 'includes geographic and hierarchicalGeographic' do
412
+ expect(@subject_doc_hash[:geographic_search]).to match_array [@geo, @hier_geo_country]
413
+ end
414
+ it 'calls sw_geographic_search (from stanford-mods gem)' do
415
+ m = "<mods #{@ns_decl}><subject><geographic>#{@geo}</geographic></subject></mods>"
416
+ sdb = sdb_for_mods(m)
417
+ expect(sdb.smods_rec).to receive(:sw_geographic_search).at_least(1).times
418
+ sdb.doc_hash_from_mods
419
+ end
420
+ it "logs an info message when it encounters a geographicCode encoding it doesn't translate" do
421
+ m = "<mods #{@ns_decl}><subject><geographicCode authority='iso3166'>ca</geographicCode></subject></mods>"
422
+ sdb = sdb_for_mods(m)
423
+ expect(sdb.smods_rec.sw_logger).to receive(:info).with(/#{@fake_druid} has subject geographicCode element with untranslated encoding \(iso3166\): <geographicCode authority=.*>ca<\/geographicCode>/).at_least(1).times
424
+ sdb.doc_hash_from_mods
425
+ end
426
+ end # geographic_search
427
+
428
+ context 'subject_other_search' do
429
+ it 'includes occupation, subject names, and subject titles' do
430
+ expect(@subject_doc_hash[:subject_other_search]).to match_array [@occupation, @s_name, @s_title]
431
+ end
432
+ context 'functional tests checking results from stanford-mods methods' do
433
+ it 'is nil if there are no values in the MODS' do
434
+ sdb = sdb_for_mods(@mods_xml)
435
+ expect(sdb.doc_hash_from_mods[:subject_other_search]).to be_nil
436
+ end
437
+ it 'does not be nil if there are only subject/name elements' do
438
+ m = "<mods #{@ns_decl}><subject><name><namePart>#{@s_name}</namePart></name></subject></mods>"
439
+ sdb = sdb_for_mods(m)
440
+ expect(sdb.doc_hash_from_mods[:subject_other_search]).to match_array [@s_name]
441
+ end
442
+ it 'does not be nil if there are only subject/occupation elements' do
443
+ m = "<mods #{@ns_decl}><subject><occupation>#{@occupation}</occupation></subject></mods>"
444
+ sdb = sdb_for_mods(m)
445
+ expect(sdb.doc_hash_from_mods[:subject_other_search]).to match_array [@occupation]
446
+ end
447
+ it 'does not be nil if there are only subject/titleInfo elements' do
448
+ m = "<mods #{@ns_decl}><subject><titleInfo><title>#{@s_title}</title></titleInfo></subject></mods>"
449
+ sdb = sdb_for_mods(m)
450
+ expect(sdb.doc_hash_from_mods[:subject_other_search]).to match_array [@s_title]
451
+ end
452
+ it 'has a separate value for each occupation subelement' do
453
+ m = "<mods #{@ns_decl}>
454
+ <subject>
455
+ <occupation>first</occupation>
456
+ <occupation>second</occupation>
457
+ </subject>
458
+ <subject><occupation>third</occupation></subject>
459
+ </mods>"
460
+ sdb = sdb_for_mods(m)
461
+ expect(sdb.doc_hash_from_mods[:subject_other_search]).to match_array %w(first second third)
462
+ end
463
+ end # functional tests checking results from stanford-mods methods
464
+ end # subject_other_search
465
+
466
+ context 'subject_other_subvy_search' do
467
+ it 'includes temporal and genre SUBelement' do
468
+ expect(@subject_doc_hash[:subject_other_subvy_search]).to match_array [@temporal, @s_genre]
469
+ end
470
+ context 'functional tests checking results from stanford-mods methods' do
471
+ it 'is nil if there are no values in the MODS' do
472
+ sdb = sdb_for_mods(@mods_xml)
473
+ expect(sdb.doc_hash_from_mods[:subject_other_subvy_search]).to be_nil
474
+ end
475
+ it 'does not be nil if there are only subject/temporal elements (no subject/genre)' do
476
+ m = "<mods #{@ns_decl}><subject><temporal>#{@temporal}</temporal></subject></mods>"
477
+ sdb = sdb_for_mods(m)
478
+ expect(sdb.doc_hash_from_mods[:subject_other_subvy_search]).to match_array [@temporal]
479
+ end
480
+ it 'does not be nil if there are only subject/genre elements (no subject/temporal)' do
481
+ m = "<mods #{@ns_decl}><subject><genre>#{@s_genre}</genre></subject></mods>"
482
+ sdb = sdb_for_mods(m)
483
+ expect(sdb.doc_hash_from_mods[:subject_other_subvy_search]).to match_array [@s_genre]
484
+ end
485
+ context 'genre subelement' do
486
+ it 'has a separate value for each genre element' do
487
+ m = "<mods #{@ns_decl}>
488
+ <subject>
489
+ <genre>first</genre>
490
+ <genre>second</genre>
491
+ </subject>
492
+ <subject><genre>third</genre></subject>
493
+ </mods>"
494
+ sdb = sdb_for_mods(m)
495
+ expect(sdb.doc_hash_from_mods[:subject_other_subvy_search]).to match_array %w(first second third)
496
+ end
497
+ end # genre subelement
498
+ end # "functional tests checking results from stanford-mods methods"
499
+ end # subject_other_subvy_search
500
+
501
+ context 'subject_all_search' do
502
+ it 'contains top level <genre> element data' do
503
+ expect(@subject_doc_hash[:subject_all_search]).to include(@genre)
504
+ end
505
+ it 'does not contain cartographic sub element' do
506
+ expect(@subject_doc_hash[:subject_all_search]).not_to include(@cart_coord)
507
+ end
508
+ it 'does not include codes from hierarchicalGeographic sub element' do
509
+ expect(@subject_doc_hash[:subject_all_search]).not_to include(@geo_code)
510
+ end
511
+ it 'contains all other subject subelement data' do
512
+ expect(@subject_doc_hash[:subject_all_search]).to include(@s_genre)
513
+ expect(@subject_doc_hash[:subject_all_search]).to include(@geo)
514
+ expect(@subject_doc_hash[:subject_all_search]).to include(@hier_geo_country)
515
+ expect(@subject_doc_hash[:subject_all_search]).to include(@s_name)
516
+ expect(@subject_doc_hash[:subject_all_search]).to include(@occupation)
517
+ expect(@subject_doc_hash[:subject_all_search]).to include(@temporal)
518
+ expect(@subject_doc_hash[:subject_all_search]).to include(@s_title)
519
+ expect(@subject_doc_hash[:subject_all_search]).to include(@topic)
520
+ end
521
+ end # subject_all_search
522
+ end # search fields
523
+
524
+ context 'facet fields' do
525
+ context 'topic_facet' do
526
+ it 'includes topic subelement' do
527
+ expect(@subject_doc_hash[:topic_facet]).to include(@topic)
528
+ end
529
+ it 'includes sw_subject_names' do
530
+ expect(@subject_doc_hash[:topic_facet]).to include(@s_name)
531
+ end
532
+ it 'includes sw_subject_titles' do
533
+ expect(@subject_doc_hash[:topic_facet]).to include(@s_title)
534
+ end
535
+ it 'includes occupation subelement' do
536
+ expect(@subject_doc_hash[:topic_facet]).to include(@occupation)
537
+ end
538
+ it 'has the trailing punctuation removed' do
539
+ m = "<mods #{@ns_decl}><subject>
540
+ <topic>comma,</topic>
541
+ <occupation>semicolon;</occupation>
542
+ <titleInfo><title>backslash \\</title></titleInfo>
543
+ <name><namePart>internal, punct;uation</namePart></name>
544
+ </subject></mods>"
545
+ sdb = sdb_for_mods(m)
546
+ doc_hash = sdb.doc_hash_from_mods
547
+ expect(doc_hash[:topic_facet]).to include('comma')
548
+ expect(doc_hash[:topic_facet]).to include('semicolon')
549
+ expect(doc_hash[:topic_facet]).to include('backslash')
550
+ expect(doc_hash[:topic_facet]).to include('internal, punct;uation')
551
+ end
552
+ end # topic_facet
553
+
554
+ context 'geographic_facet' do
555
+ it 'includes geographic subelement' do
556
+ expect(@subject_doc_hash[:geographic_facet]).to include(@geo)
557
+ end
558
+ it 'is like geographic_search with the trailing punctuation (and preceding spaces) removed' do
559
+ m = "<mods #{@ns_decl}><subject>
560
+ <geographic>comma,</geographic>
561
+ <geographic>semicolon;</geographic>
562
+ <geographic>backslash \\</geographic>
563
+ <geographic>internal, punct;uation</geographic>
564
+ </subject></mods>"
565
+ sdb = sdb_for_mods(m)
566
+ doc_hash = sdb.doc_hash_from_mods
567
+ expect(doc_hash[:geographic_facet]).to include('comma')
568
+ expect(doc_hash[:geographic_facet]).to include('semicolon')
569
+ expect(doc_hash[:geographic_facet]).to include('backslash')
570
+ expect(doc_hash[:geographic_facet]).to include('internal, punct;uation')
571
+ end
572
+ end
573
+
574
+ it 'era_facet should be temporal subelement with the trailing punctuation removed' do
575
+ m = "<mods #{@ns_decl}><subject>
576
+ <temporal>comma,</temporal>
577
+ <temporal>semicolon;</temporal>
578
+ <temporal>backslash \\</temporal>
579
+ <temporal>internal, punct;uation</temporal>
580
+ </subject></mods>"
581
+ sdb = sdb_for_mods(m)
582
+ doc_hash = sdb.doc_hash_from_mods
583
+ expect(doc_hash[:era_facet]).to include('comma')
584
+ expect(doc_hash[:era_facet]).to include('semicolon')
585
+ expect(doc_hash[:era_facet]).to include('backslash')
586
+ expect(doc_hash[:era_facet]).to include('internal, punct;uation')
587
+ end
588
+ end # facet fields
589
+ end # subject fields
590
+
591
+ context 'publication date fields' do
592
+ it 'populates all date fields' do
593
+ m = "<mods #{@ns_decl}><originInfo>
594
+ <dateIssued>13th century AH / 19th CE</dateIssued>
595
+ </originInfo></mods>"
596
+ sdb = sdb_for_mods(m)
597
+ doc_hash = sdb.doc_hash_from_mods
598
+ expect(doc_hash[:pub_date]).to eq('19th century')
599
+ expect(doc_hash[:pub_date_sort]).to eq('1800')
600
+ expect(doc_hash[:publication_year_isi]).to eq('1800')
601
+ expect(doc_hash[:pub_year_tisim]).to eq('1800') # date slider
602
+ expect(doc_hash[:pub_date_display]).to eq('13th century AH / 19th CE')
603
+ expect(doc_hash[:imprint_display]).to eq('13th century AH / 19th CE')
604
+ end
605
+ it 'does not populate the date slider for BC dates' do
606
+ m = "<mods #{@ns_decl}><originInfo><dateIssued>199 B.C.</dateIssued></originInfo></mods>"
607
+ sdb = sdb_for_mods(m)
608
+ doc_hash = sdb.doc_hash_from_mods
609
+ expect(doc_hash).to_not have_key(:pub_year_tisim)
610
+ end
611
+
612
+ context 'pub_date_sort integration tests' do
613
+ let :sdb do
614
+ sdb = sdb_for_mods("<mods #{@ns_decl}> </mods>")
615
+ end
616
+ it 'works on normal dates' do
617
+ allow(sdb.smods_rec).to receive(:pub_date).and_return('1945')
618
+ expect(sdb.doc_hash_from_mods[:pub_date_sort]).to eq('1945')
619
+ end
620
+ it 'works on 3 digit dates' do
621
+ allow(sdb.smods_rec).to receive(:pub_date).and_return('945')
622
+ expect(sdb.doc_hash_from_mods[:pub_date_sort]).to eq('0945')
623
+ end
624
+ it 'works on century dates' do
625
+ allow(sdb.smods_rec).to receive(:pub_date).and_return('16--')
626
+ expect(sdb.doc_hash_from_mods[:pub_date_sort]).to eq('1600')
627
+ end
628
+ it 'works on 3 digit century dates' do
629
+ allow(sdb.smods_rec).to receive(:pub_date).and_return('9--')
630
+ expect(sdb.doc_hash_from_mods[:pub_date_sort]).to eq('0900')
631
+ end
632
+ end # pub_date_sort
633
+
634
+ context 'pub_year_tisim for date slider' do
635
+ it 'takes single dateCreated' do
636
+ m = "<mods #{@ns_decl}><originInfo>
637
+ <dateCreated>1904</dateCreated>
638
+ </originInfo></mods>"
639
+ sdb = sdb_for_mods(m)
640
+ doc_hash = sdb.doc_hash_from_mods
641
+ expect(doc_hash[:pub_year_tisim]).to eq('1904')
642
+ end
643
+ it 'correctlies parse a ranged date' do
644
+ m = "<mods #{@ns_decl}><originInfo>
645
+ <dateCreated>Text dated June 4, 1594; miniatures added by 1596</dateCreated>
646
+ </originInfo></mods>"
647
+ sdb = sdb_for_mods(m)
648
+ doc_hash = sdb.doc_hash_from_mods
649
+ expect(doc_hash[:pub_year_tisim]).to eq('1594')
650
+ end
651
+ it 'finds year in an expanded English form' do
652
+ m = "<mods #{@ns_decl}><originInfo>
653
+ <dateCreated>Aug. 3rd, 1886</dateCreated>
654
+ </originInfo></mods>"
655
+ sdb = sdb_for_mods(m)
656
+ doc_hash = sdb.doc_hash_from_mods
657
+ expect(doc_hash[:pub_year_tisim]).to eq('1886')
658
+ end
659
+ it 'removes question marks and brackets' do
660
+ m = "<mods #{@ns_decl}><originInfo>
661
+ <dateCreated>Aug. 3rd, [18]86?</dateCreated>
662
+ </originInfo></mods>"
663
+ sdb = sdb_for_mods(m)
664
+ doc_hash = sdb.doc_hash_from_mods
665
+ expect(doc_hash[:pub_year_tisim]).to eq('1886')
666
+ end
667
+ it 'ignores an s after the decade' do
668
+ m = "<mods #{@ns_decl}><originInfo>
669
+ <dateCreated>early 1890s</dateCreated>
670
+ </originInfo></mods>"
671
+ sdb = sdb_for_mods(m)
672
+ doc_hash = sdb.doc_hash_from_mods
673
+ expect(doc_hash[:pub_year_tisim]).to eq('1890')
674
+ end
675
+ it 'chooses a date ending with CE if there are multiple dates' do
676
+ m = "<mods #{@ns_decl}><originInfo>
677
+ <dateIssued>7192 AM (li-Adam) / 1684 CE</dateIssued>
678
+ </originInfo></mods>"
679
+ sdb = sdb_for_mods(m)
680
+ doc_hash = sdb.doc_hash_from_mods
681
+ expect(doc_hash[:pub_year_tisim]).to eq('1684')
682
+ end
683
+ it 'takes first year from hyphenated range (for now)' do
684
+ m = "<mods #{@ns_decl}><originInfo>
685
+ <dateIssued>1282 AH / 1865-6 CE</dateIssued>
686
+ </originInfo></mods>"
687
+ sdb = sdb_for_mods(m)
688
+ doc_hash = sdb.doc_hash_from_mods
689
+ expect(doc_hash[:pub_year_tisim]).to eq('1865')
690
+ end
691
+ end # pub_year_tisim method
692
+
693
+ context 'difficult pub dates' do
694
+ it 'should handle multiple pub dates (to be implemented - esp for date slider)'
695
+
696
+ it 'should choose the latest date??? (to be implemented - esp for sorting and date slider)'
697
+
698
+ it 'handles nnth century dates' do
699
+ m = "<mods #{@ns_decl}><originInfo>
700
+ <dateIssued>13th century AH / 19th CE</dateIssued>
701
+ </originInfo></mods>"
702
+ sdb = sdb_for_mods(m)
703
+ doc_hash = sdb.doc_hash_from_mods
704
+ expect(doc_hash[:pub_date]).to eq('19th century')
705
+ expect(doc_hash[:pub_date_sort]).to eq('1800')
706
+ expect(doc_hash[:pub_year_tisim]).to eq('1800')
707
+ expect(doc_hash[:publication_year_isi]).to eq('1800')
708
+ expect(doc_hash[:imprint_display]).to eq('13th century AH / 19th CE')
709
+ end
710
+ it 'handles multiple CE dates' do
711
+ m = "<mods #{@ns_decl}><originInfo>
712
+ <dateIssued>6 Dhu al-Hijjah 923 AH / 1517 CE -- 7 Rabi I 924 AH / 1518 CE</dateIssued>
713
+ </originInfo></mods>"
714
+ sdb = sdb_for_mods(m)
715
+ doc_hash = sdb.doc_hash_from_mods
716
+ expect(doc_hash[:pub_date_sort]).to eq('1517')
717
+ expect(doc_hash[:pub_date]).to eq('1517')
718
+ expect(doc_hash[:pub_year_tisim]).to eq('1517')
719
+ end
720
+ it 'handles specific century case from walters' do
721
+ m = "<mods #{@ns_decl}><originInfo>
722
+ <dateIssued>Late 14th or early 15th century CE</dateIssued>
723
+ </originInfo></mods>"
724
+ sdb = sdb_for_mods(m)
725
+ doc_hash = sdb.doc_hash_from_mods
726
+ expect(doc_hash[:pub_date_sort]).to eq('1400')
727
+ expect(doc_hash[:pub_year_tisim]).to eq('1400')
728
+ expect(doc_hash[:publication_year_isi]).to eq('1400')
729
+ expect(doc_hash[:pub_date]).to eq('15th century')
730
+ expect(doc_hash[:imprint_display]).to eq('Late 14th or early 15th century CE')
731
+ end
732
+ it 'works on explicit 3 digit dates' do
733
+ m = "<mods #{@ns_decl}><originInfo>
734
+ <dateIssued>966 CE</dateIssued>
735
+ </originInfo></mods>"
736
+ sdb = sdb_for_mods(m)
737
+ doc_hash = sdb.doc_hash_from_mods
738
+ expect(doc_hash[:pub_date_sort]).to eq('0966')
739
+ expect(doc_hash[:pub_date]).to eq('966')
740
+ expect(doc_hash[:pub_year_tisim]).to eq('0966')
741
+ expect(doc_hash[:publication_year_isi]).to eq('0966')
742
+ expect(doc_hash[:imprint_display]).to eq('966 CE')
743
+ end
744
+ it 'works on 3 digit century dates' do
745
+ m = "<mods #{@ns_decl}><originInfo>
746
+ <dateIssued>3rd century AH / 9th CE</dateIssued>
747
+ </originInfo></mods>"
748
+ sdb = sdb_for_mods(m)
749
+ doc_hash = sdb.doc_hash_from_mods
750
+ expect(doc_hash[:pub_date_sort]).to eq('0800')
751
+ expect(doc_hash[:pub_year_tisim]).to eq('0800')
752
+ expect(doc_hash[:pub_date]).to eq('9th century')
753
+ expect(doc_hash[:publication_year_isi]).to eq('0800')
754
+ expect(doc_hash[:imprint_display]).to eq('3rd century AH / 9th CE')
755
+ end
756
+ it 'works on 3 digit BC dates' do
757
+ m = "<mods #{@ns_decl}><originInfo>
758
+ <dateCreated>300 B.C.</dateCreated>
759
+ </originInfo></mods>"
760
+ sdb = sdb_for_mods(m)
761
+ doc_hash = sdb.doc_hash_from_mods
762
+ expect(doc_hash[:pub_date_sort]).to eq('-700')
763
+ expect(doc_hash[:pub_year_tisim]).to be_nil
764
+ expect(doc_hash[:pub_date]).to eq('300 B.C.')
765
+ expect(doc_hash[:imprint_display]).to eq('300 B.C.')
766
+ # doc_hash[:creation_year_isi].should =='-300'
767
+ end
768
+ end # difficult pub dates
769
+ end # publication date fields
770
+ end # doc_hash_from_mods
771
+
772
+ context '#format' do
773
+ it 'gets format from call to stanford-mods searchworks format method' do
774
+ m = "<mods #{@ns_decl}><typeOfResource>still image</typeOfResouce></mods>"
775
+ sdb = sdb_for_mods(m)
776
+ expect(sdb.smods_rec).to receive(:format).and_call_original
777
+ expect(sdb.format).to match_array ['Image']
778
+ end
779
+ it 'returns empty Array and log warning if there is no value' do
780
+ sdb = sdb_for_mods(@mods_xml)
781
+ expect(sdb.logger).to receive(:warn).with("#{@fake_druid} has no SearchWorks format from MODS - check <typeOfResource> and other implicated MODS elements")
782
+ expect(sdb.format).to eq([])
783
+ end
784
+ end # context #format
785
+
786
+ context '#format_main_ssim' do
787
+ it 'gets format_main_ssim from call to stanford-mods searchworks format_main method' do
788
+ m = "<mods #{@ns_decl}><typeOfResource>still image</typeOfResouce></mods>"
789
+ sdb = sdb_for_mods(m)
790
+ expect(sdb.smods_rec).to receive(:format_main).and_call_original
791
+ expect(sdb.format_main_ssim).to match_array ['Image']
792
+ end
793
+ it 'returns empty Array and log warning if there is no value' do
794
+ sdb = sdb_for_mods(@mods_xml)
795
+ expect(sdb.logger).to receive(:warn).with("#{@fake_druid} has no SearchWorks Resource Type from MODS - check <typeOfResource> and other implicated MODS elements")
796
+ expect(sdb.format_main_ssim).to eq([])
797
+ end
798
+ end # context format_main_ssim
799
+
800
+ context 'genre_ssim' do
801
+ it 'gets genre_ssim from call to stanford-mods searchworks sw_genre method' do
802
+ m = "<mods #{@ns_decl}><genre>technical report</genre></mods>"
803
+ sdb = sdb_for_mods(m)
804
+ expect(sdb.smods_rec).to receive(:sw_genre).and_call_original
805
+ expect(sdb.genre_ssim).to match_array ['Technical report']
806
+ end
807
+ it 'returns empty Array if there is no value' do
808
+ sdb = sdb_for_mods(@mods_xml)
809
+ expect(sdb.genre_ssim).to eq([])
810
+ end
811
+ end # context genre_ssim
812
+ end