gdor-indexer 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gdor-indexer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naomi Dushay
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2015-10-30 00:00:00.000000000 Z
13
+ date: 2016-01-06 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: harvestdor-indexer
@@ -32,14 +32,14 @@ dependencies:
32
32
  requirements:
33
33
  - - ">="
34
34
  - !ruby/object:Gem::Version
35
- version: '0'
35
+ version: 1.3.4
36
36
  type: :runtime
37
37
  prerelease: false
38
38
  version_requirements: !ruby/object:Gem::Requirement
39
39
  requirements:
40
40
  - - ">="
41
41
  - !ruby/object:Gem::Version
42
- version: '0'
42
+ version: 1.3.4
43
43
  - !ruby/object:Gem::Dependency
44
44
  name: nokogiri
45
45
  requirement: !ruby/object:Gem::Requirement
@@ -377,8 +377,10 @@ files:
377
377
  - lib/gdor/indexer/version.rb
378
378
  - spec/config/walters_integration_spec.yml
379
379
  - spec/spec_helper.rb
380
- - spec/unit/gdor_mods_fields_spec.rb
381
380
  - spec/unit/indexer_spec.rb
381
+ - spec/unit/mods_fields_spec.rb
382
+ - spec/unit/mods_pub_fields_spec.rb
383
+ - spec/unit/mods_subject_fields_spec.rb
382
384
  - spec/unit/public_xml_fields_spec.rb
383
385
  - spec/unit/solr_doc_builder_spec.rb
384
386
  - spec/unit/solr_doc_hash_spec.rb
@@ -410,8 +412,10 @@ summary: Gryphondor Solr indexing logic
410
412
  test_files:
411
413
  - spec/config/walters_integration_spec.yml
412
414
  - spec/spec_helper.rb
413
- - spec/unit/gdor_mods_fields_spec.rb
414
415
  - spec/unit/indexer_spec.rb
416
+ - spec/unit/mods_fields_spec.rb
417
+ - spec/unit/mods_pub_fields_spec.rb
418
+ - spec/unit/mods_subject_fields_spec.rb
415
419
  - spec/unit/public_xml_fields_spec.rb
416
420
  - spec/unit/solr_doc_builder_spec.rb
417
421
  - spec/unit/solr_doc_hash_spec.rb
@@ -1,813 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe GDor::Indexer::ModsFields do
4
- before(:all) do
5
- @fake_druid = 'oo000oo0000'
6
- @ns_decl = "xmlns='#{Mods::MODS_NS}'"
7
- @mods_xml = "<mods #{@ns_decl}><note>gdor_mods_fields testing</note></mods>"
8
- end
9
-
10
- def sdb_for_mods(m)
11
- resource = Harvestdor::Indexer::Resource.new(double, @fake_druid)
12
- allow(resource).to receive(:public_xml).and_return(nil)
13
- allow(resource).to receive(:mods).and_return(Nokogiri::XML(m))
14
- i = Harvestdor::Indexer.new
15
- i.logger.level = Logger::WARN
16
- allow(resource).to receive(:indexer).and_return(i)
17
- lgr = Logger.new(StringIO.new)
18
- lgr.level = Logger::WARN
19
- GDor::Indexer::SolrDocBuilder.new(resource, lgr)
20
- end
21
-
22
- context 'doc_hash_from_mods' do
23
- # see https://consul.stanford.edu/display/NGDE/Required+and+Recommended+Solr+Fields+for+SearchWorks+documents
24
-
25
- context 'summary_search solr field from <abstract>' do
26
- it 'is populated when the MODS has a top level <abstract> element' do
27
- m = "<mods #{@ns_decl}><abstract>blah blah</abstract></mods>"
28
- sdb = sdb_for_mods(m)
29
- expect(sdb.doc_hash_from_mods[:summary_search]).to match_array ['blah blah']
30
- end
31
- it 'has a value for each abstract element' do
32
- m = "<mods #{@ns_decl}>
33
- <abstract>one</abstract>
34
- <abstract>two</abstract>
35
- </mods>"
36
- sdb = sdb_for_mods(m)
37
- expect(sdb.doc_hash_from_mods[:summary_search]).to match_array %w(one two)
38
- end
39
- it 'does not be present when there is no top level <abstract> element' do
40
- m = "<mods #{@ns_decl}><relatedItem><abstract>blah blah</abstract></relatedItem></mods>"
41
- sdb = sdb_for_mods(m)
42
- expect(sdb.doc_hash_from_mods[:summary_search]).to be_nil
43
- end
44
- it 'does not be present if there are only empty abstract elements in the MODS' do
45
- m = "<mods #{@ns_decl}><abstract/><note>notit</note></mods>"
46
- sdb = sdb_for_mods(m)
47
- expect(sdb.doc_hash_from_mods[:summary_search]).to be_nil
48
- end
49
- it 'summary_display should not be populated - it is a copy field' do
50
- m = "<mods #{@ns_decl}><abstract>blah blah</abstract></mods>"
51
- sdb = sdb_for_mods(m)
52
- expect(sdb.doc_hash_from_mods[:summary_display]).to be_nil
53
- end
54
- end # summary_search / <abstract>
55
-
56
- it 'language: should call sw_language_facet in stanford-mods gem to populate language field' do
57
- sdb = sdb_for_mods(@mods_xml)
58
- smr = sdb.smods_rec
59
- expect(smr).to receive(:sw_language_facet)
60
- sdb.doc_hash_from_mods
61
- end
62
-
63
- context 'physical solr field from <physicalDescription><extent>' do
64
- it 'is populated when the MODS has mods/physicalDescription/extent element' do
65
- m = "<mods #{@ns_decl}><physicalDescription><extent>blah blah</extent></physicalDescription></mods>"
66
- sdb = sdb_for_mods(m)
67
- expect(sdb.doc_hash_from_mods[:physical]).to match_array ['blah blah']
68
- end
69
- it 'has a value for each extent element' do
70
- m = "<mods #{@ns_decl}>
71
- <physicalDescription>
72
- <extent>one</extent>
73
- <extent>two</extent>
74
- </physicalDescription>
75
- <physicalDescription><extent>three</extent></physicalDescription>
76
- </mods>"
77
- sdb = sdb_for_mods(m)
78
- expect(sdb.doc_hash_from_mods[:physical]).to match_array %w(one two three)
79
- end
80
- it 'does not be present when there is no top level <physicalDescription> element' do
81
- m = "<mods #{@ns_decl}><relatedItem><physicalDescription><extent>foo</extent></physicalDescription></relatedItem></mods>"
82
- sdb = sdb_for_mods(m)
83
- expect(sdb.doc_hash_from_mods[:physical]).to be_nil
84
- end
85
- it 'does not be present if there are only empty physicalDescription or extent elements in the MODS' do
86
- m = "<mods #{@ns_decl}><physicalDescription/><physicalDescription><extent/></physicalDescription><note>notit</note></mods>"
87
- sdb = sdb_for_mods(m)
88
- expect(sdb.doc_hash_from_mods[:physical]).to be_nil
89
- end
90
- end # physical field from physicalDescription/extent
91
-
92
- context 'url_suppl solr field from /mods/relatedItem/location/url' do
93
- it 'is populated when the MODS has mods/relatedItem/location/url' do
94
- m = "<mods #{@ns_decl}><relatedItem><location><url>url.org</url></location></relatedItem></mods>"
95
- sdb = sdb_for_mods(m)
96
- expect(sdb.doc_hash_from_mods[:url_suppl]).to match_array ['url.org']
97
- end
98
- it 'has a value for each mods/relatedItem/location/url element' do
99
- m = "<mods #{@ns_decl}>
100
- <relatedItem>
101
- <location><url>one</url></location>
102
- <location>
103
- <url>two</url>
104
- <url>three</url>
105
- </location>
106
- </relatedItem>
107
- <relatedItem><location><url>four</url></location></relatedItem>
108
- </mods>"
109
- sdb = sdb_for_mods(m)
110
- expect(sdb.doc_hash_from_mods[:url_suppl]).to match_array %w(one two three four)
111
- end
112
- it 'does not be populated from /mods/location/url element' do
113
- m = "<mods #{@ns_decl}><location><url>hi</url></location></mods>"
114
- sdb = sdb_for_mods(m)
115
- expect(sdb.doc_hash_from_mods[:url_suppl]).to be_nil
116
- end
117
- it 'does not be present if there are only empty relatedItem/location/url elements in the MODS' do
118
- m = "<mods #{@ns_decl}>
119
- <relatedItem><location><url/></location></relatedItem>
120
- <relatedItem><location/></relatedItem>
121
- <relatedItem/><note>notit</note></mods>"
122
- sdb = sdb_for_mods(m)
123
- expect(sdb.doc_hash_from_mods[:url_suppl]).to be_nil
124
- end
125
- end
126
-
127
- context 'toc_search solr field from <tableOfContents>' do
128
- it 'has a value for each tableOfContents element' do
129
- m = "<mods #{@ns_decl}>
130
- <tableOfContents>one</tableOfContents>
131
- <tableOfContents>two</tableOfContents>
132
- </mods>"
133
- sdb = sdb_for_mods(m)
134
- expect(sdb.doc_hash_from_mods[:toc_search]).to match_array %w(one two)
135
- end
136
- it 'does not be present when there is no top level <tableOfContents> element' do
137
- m = "<mods #{@ns_decl}><relatedItem><tableOfContents>foo</tableOfContents></relatedItem></mods>"
138
- sdb = sdb_for_mods(m)
139
- expect(sdb.doc_hash_from_mods[:toc_search]).to be_nil
140
- end
141
- it 'does not be present if there are only empty tableOfContents elements in the MODS' do
142
- m = "<mods #{@ns_decl}><tableOfContents/><note>notit</note></mods>"
143
- sdb = sdb_for_mods(m)
144
- expect(sdb.doc_hash_from_mods[:toc_search]).to be_nil
145
- end
146
- end
147
-
148
- context 'format fields' do
149
- context 'format_main_ssim' do
150
- it 'calls #format_main_ssim method' do
151
- m = "<mods #{@ns_decl}><note>nope</typeOfResource></mods>"
152
- sdb = sdb_for_mods(m)
153
- expect(sdb).to receive(:format_main_ssim)
154
- sdb.doc_hash_from_mods[:format_main_ssim]
155
- end
156
- it 'has a value when MODS data provides' do
157
- m = "<mods #{@ns_decl}><typeOfResource>software, multimedia</typeOfResource><genre>dataset</genre></mods>"
158
- sdb = sdb_for_mods(m)
159
- expect(sdb.doc_hash_from_mods[:format_main_ssim]).to match_array ['Dataset']
160
- end
161
- it 'returns empty Array if there is no value' do
162
- m = "<mods #{@ns_decl}><note>nope</typeOfResource></mods>"
163
- sdb = sdb_for_mods(m)
164
- expect(sdb.doc_hash_from_mods[:format_main_ssim]).to eq([])
165
- end
166
- end
167
- context 'format Solr field' do
168
- it 'calls #format method' do
169
- m = "<mods #{@ns_decl}><note>nope</typeOfResource></mods>"
170
- sdb = sdb_for_mods(m)
171
- expect(sdb).to receive(:format)
172
- sdb.doc_hash_from_mods[:format]
173
- end
174
- it 'has a value when MODS data provides' do
175
- m = "<mods #{@ns_decl}><typeOfResource>software, multimedia</typeOfResource></mods>"
176
- sdb = sdb_for_mods(m)
177
- expect(sdb.doc_hash_from_mods[:format]).to match_array ['Computer File']
178
- end
179
- it 'returns empty Array if there is no value' do
180
- m = "<mods #{@ns_decl}><note>nope</typeOfResource></mods>"
181
- sdb = sdb_for_mods(m)
182
- expect(sdb.doc_hash_from_mods[:format]).to eq([])
183
- end
184
- end
185
- end
186
-
187
- context 'title fields' do
188
- before(:all) do
189
- @title_mods = "<mods #{@ns_decl}>
190
- <titleInfo><title>Jerk</title><nonSort>The</nonSort><subTitle>is whom?</subTitle></titleInfo>
191
- <titleInfo><title>Joke</title></titleInfo>
192
- <titleInfo type='alternative'><title>Alternative</title></titleInfo>
193
- </mods>"
194
- end
195
- let :sdb do
196
- sdb_for_mods(@title_mods)
197
- end
198
- before(:each) do
199
- @title_doc_hash = sdb.doc_hash_from_mods
200
- end
201
- it 'calls the appropriate methods in the stanford-mods gem to populate the fields' do
202
- smr = sdb.smods_rec
203
- expect(smr).to receive(:sw_short_title).at_least(:once)
204
- expect(smr).to receive(:sw_full_title).at_least(:once)
205
- expect(smr).to receive(:sw_title_display)
206
- expect(smr).to receive(:sw_addl_titles)
207
- expect(smr).to receive(:sw_sort_title)
208
- sdb.doc_hash_from_mods
209
- end
210
- context 'search fields' do
211
- it 'title_245a_search' do
212
- expect(@title_doc_hash[:title_245a_search]).to eq('The Jerk')
213
- end
214
- it 'title_245_search' do
215
- expect(@title_doc_hash[:title_245_search]).to eq('The Jerk : is whom?')
216
- end
217
- it 'title_variant_search' do
218
- expect(@title_doc_hash[:title_variant_search]).to match_array %w(Joke Alternative)
219
- end
220
- it 'title_related_search should not be populated from MODS' do
221
- expect(@title_doc_hash[:title_related_search]).to be_nil
222
- end
223
- end
224
- context 'display fields' do
225
- it 'title_display' do
226
- expect(@title_doc_hash[:title_display]).to eq('The Jerk : is whom?')
227
- end
228
- it 'title_245a_display' do
229
- expect(@title_doc_hash[:title_245a_display]).to eq('The Jerk')
230
- end
231
- it 'title_245c_display should not be populated from MODS' do
232
- expect(@title_doc_hash[:title_245c_display]).to be_nil
233
- end
234
- it 'title_full_display' do
235
- expect(@title_doc_hash[:title_full_display]).to eq('The Jerk : is whom?')
236
- end
237
- it 'removes trailing commas in title_display' do
238
- title_mods = "<mods #{@ns_decl}>
239
- <titleInfo><title>Jerk</title><nonSort>The</nonSort><subTitle>is whom,</subTitle></titleInfo>
240
- <titleInfo><title>Joke</title></titleInfo>
241
- <titleInfo type='alternative'><title>Alternative</title></titleInfo>
242
- </mods>"
243
- sdb = sdb_for_mods(title_mods)
244
- @title_doc_hash = sdb.doc_hash_from_mods
245
- @title_doc_hash
246
- expect(@title_doc_hash[:title_display]).to eq('The Jerk : is whom')
247
- end
248
- it 'title_variant_display should not be populated - it is a copy field' do
249
- expect(@title_doc_hash[:title_variant_display]).to be_nil
250
- end
251
- end
252
- it 'title_sort' do
253
- expect(@title_doc_hash[:title_sort]).to eq('Jerk is whom')
254
- end
255
- end # title fields
256
-
257
- context 'author fields' do
258
- before(:all) do
259
- @name_mods = "<mods #{@ns_decl}>
260
- <name type='personal'>
261
- <namePart type='given'>John</namePart>
262
- <namePart type='family'>Huston</namePart>
263
- <role><roleTerm type='code' authority='marcrelator'>drt</roleTerm></role>
264
- <displayForm>q</displayForm>
265
- </name>
266
- <name type='personal'><namePart>Crusty The Clown</namePart></name>
267
- <name type='corporate'><namePart>Watchful Eye</namePart></name>
268
- <name type='corporate'>
269
- <namePart>Exciting Prints</namePart>
270
- <role><roleTerm type='text'>lithographer</roleTerm></role>
271
- </name>
272
- <name type='conference'><namePart>conference</namePart></name>
273
- </mods>"
274
- end
275
- let :sdb do
276
- sdb_for_mods(@name_mods)
277
- end
278
- before(:each) do
279
- @author_doc_hash = sdb.doc_hash_from_mods
280
- end
281
- it 'calls the appropriate methods in the stanford-mods gem to populate the fields' do
282
- smr = sdb.smods_rec
283
- expect(smr).to receive(:sw_main_author)
284
- expect(smr).to receive(:sw_addl_authors)
285
- expect(smr).to receive(:sw_person_authors).exactly(3).times
286
- expect(smr).to receive(:sw_impersonal_authors)
287
- expect(smr).to receive(:sw_corporate_authors)
288
- expect(smr).to receive(:sw_meeting_authors)
289
- expect(smr).to receive(:sw_sort_author)
290
- sdb.doc_hash_from_mods
291
- end
292
- context 'search fields' do
293
- it 'author_1xx_search' do
294
- expect(@author_doc_hash[:author_1xx_search]).to eq('Crusty The Clown')
295
- end
296
- it 'author_7xx_search' do
297
- skip 'Should this return all authors? or only 7xx authors?'
298
- expect(@author_doc_hash[:author_7xx_search]).to match_array ['q', 'Watchful Eye', 'Exciting Prints', 'conference']
299
- end
300
- it 'author_8xx_search should not be populated from MODS' do
301
- expect(@author_doc_hash[:author_8xx_search]).to be_nil
302
- end
303
- end
304
- context 'facet fields' do
305
- it 'author_person_facet' do
306
- expect(@author_doc_hash[:author_person_facet]).to match_array ['q', 'Crusty The Clown']
307
- end
308
- it 'author_other_facet' do
309
- expect(@author_doc_hash[:author_other_facet]).to match_array ['Watchful Eye', 'Exciting Prints', 'conference']
310
- end
311
- end
312
- context 'display fields' do
313
- it 'author_person_display' do
314
- expect(@author_doc_hash[:author_person_display]).to match_array ['q', 'Crusty The Clown']
315
- end
316
- it 'author_person_full_display' do
317
- expect(@author_doc_hash[:author_person_full_display]).to match_array ['q', 'Crusty The Clown']
318
- end
319
- it 'author_corp_display' do
320
- expect(@author_doc_hash[:author_corp_display]).to match_array ['Watchful Eye', 'Exciting Prints']
321
- end
322
- it 'author_meeting_display' do
323
- expect(@author_doc_hash[:author_meeting_display]).to match_array ['conference']
324
- end
325
- end
326
- it 'author_sort' do
327
- expect(@author_doc_hash[:author_sort]).to eq('Crusty The Clown')
328
- end
329
- end # author fields
330
-
331
- context 'subject fields' do
332
- before(:all) do
333
- @genre = 'genre top level'
334
- @cart_coord = '6 00 S, 71 30 E'
335
- @s_genre = 'genre in subject'
336
- @geo = 'Somewhere'
337
- @geo_code = 'us'
338
- @hier_geo_country = 'France'
339
- @s_name = 'name in subject'
340
- @occupation = 'worker bee'
341
- @temporal = 'temporal'
342
- @s_title = 'title in subject'
343
- @topic = 'topic'
344
- @m = "<mods #{@ns_decl}>
345
- <genre>#{@genre}</genre>
346
- <subject><cartographics><coordinates>#{@cart_coord}</coordinates></cartographics></subject>
347
- <subject><genre>#{@s_genre}</genre></subject>
348
- <subject><geographic>#{@geo}</geographic></subject>
349
- <subject><geographicCode authority='iso3166'>#{@geo_code}</geographicCode></subject>
350
- <subject><hierarchicalGeographic><country>#{@hier_geo_country}</country></hierarchicalGeographic></subject>
351
- <subject><name><namePart>#{@s_name}</namePart></name></subject>
352
- <subject><occupation>#{@occupation}</occupation></subject>
353
- <subject><temporal>#{@temporal}</temporal></subject>
354
- <subject><titleInfo><title>#{@s_title}</title></titleInfo></subject>
355
- <subject><topic>#{@topic}</topic></subject>
356
- <typeOfResource>still image</typeOfResource>
357
- </mods>"
358
- @m_no_subject = "<mods #{@ns_decl}><note>notit</note></mods>"
359
- end
360
- let :sdb do
361
- sdb = sdb_for_mods(@m)
362
- end
363
- before(:each) do
364
- @subject_doc_hash = sdb.doc_hash_from_mods
365
- end
366
- it 'calls the appropriate methods in stanford-mods to populate the Solr fields' do
367
- expect(sdb.smods_rec).to receive(:topic_search)
368
- expect(sdb.smods_rec).to receive(:geographic_search)
369
- expect(sdb.smods_rec).to receive(:subject_other_search)
370
- expect(sdb.smods_rec).to receive(:subject_other_subvy_search)
371
- expect(sdb.smods_rec).to receive(:subject_all_search)
372
- expect(sdb.smods_rec).to receive(:topic_facet)
373
- expect(sdb.smods_rec).to receive(:geographic_facet)
374
- expect(sdb.smods_rec).to receive(:era_facet)
375
- sdb.doc_hash_from_mods
376
- end
377
- context 'search fields' do
378
- context 'topic_search' do
379
- it 'onlies include genre and topic' do
380
- expect(@subject_doc_hash[:topic_search]).to match_array [@genre, @topic]
381
- end
382
- context 'functional tests checking results from stanford-mods methods' do
383
- it 'is nil if there are no values in the MODS' do
384
- sdb = sdb_for_mods(@m_no_subject)
385
- expect(sdb.doc_hash_from_mods[:topic_search]).to be_nil
386
- end
387
- it 'does not be nil if there are only subject/topic elements (no <genre>)' do
388
- m = "<mods #{@ns_decl}><subject><topic>#{@topic}</topic></subject></mods>"
389
- sdb = sdb_for_mods(m)
390
- expect(sdb.doc_hash_from_mods[:topic_search]).to match_array [@topic]
391
- end
392
- it 'does not be nil if there are only <genre> elements (no subject/topic elements)' do
393
- m = "<mods #{@ns_decl}><genre>#{@genre}</genre></mods>"
394
- sdb = sdb_for_mods(m)
395
- expect(sdb.doc_hash_from_mods[:topic_search]).to match_array [@genre]
396
- end
397
- it 'has a separate value for each topic subelement' do
398
- m = "<mods #{@ns_decl}>
399
- <subject>
400
- <topic>first</topic>
401
- <topic>second</topic>
402
- </subject>
403
- <subject><topic>third</topic></subject>
404
- </mods>"
405
- sdb = sdb_for_mods(m)
406
- expect(sdb.doc_hash_from_mods[:topic_search]).to match_array %w(first second third)
407
- end
408
- end # functional tests checking results from stanford-mods methods
409
- end # topic_search
410
-
411
- context 'geographic_search' do
412
- it 'includes geographic and hierarchicalGeographic' do
413
- expect(@subject_doc_hash[:geographic_search]).to match_array [@geo, @hier_geo_country]
414
- end
415
- it 'calls sw_geographic_search (from stanford-mods gem)' do
416
- m = "<mods #{@ns_decl}><subject><geographic>#{@geo}</geographic></subject></mods>"
417
- sdb = sdb_for_mods(m)
418
- expect(sdb.smods_rec).to receive(:sw_geographic_search).at_least(1).times
419
- sdb.doc_hash_from_mods
420
- end
421
- it "logs an info message when it encounters a geographicCode encoding it doesn't translate" do
422
- m = "<mods #{@ns_decl}><subject><geographicCode authority='iso3166'>ca</geographicCode></subject></mods>"
423
- sdb = sdb_for_mods(m)
424
- expect(sdb.smods_rec.sw_logger).to receive(:info).with(/#{@fake_druid} has subject geographicCode element with untranslated encoding \(iso3166\): <geographicCode authority=.*>ca<\/geographicCode>/).at_least(1).times
425
- sdb.doc_hash_from_mods
426
- end
427
- end # geographic_search
428
-
429
- context 'subject_other_search' do
430
- it 'includes occupation, subject names, and subject titles' do
431
- expect(@subject_doc_hash[:subject_other_search]).to match_array [@occupation, @s_name, @s_title]
432
- end
433
- context 'functional tests checking results from stanford-mods methods' do
434
- it 'is nil if there are no values in the MODS' do
435
- sdb = sdb_for_mods(@mods_xml)
436
- expect(sdb.doc_hash_from_mods[:subject_other_search]).to be_nil
437
- end
438
- it 'does not be nil if there are only subject/name elements' do
439
- m = "<mods #{@ns_decl}><subject><name><namePart>#{@s_name}</namePart></name></subject></mods>"
440
- sdb = sdb_for_mods(m)
441
- expect(sdb.doc_hash_from_mods[:subject_other_search]).to match_array [@s_name]
442
- end
443
- it 'does not be nil if there are only subject/occupation elements' do
444
- m = "<mods #{@ns_decl}><subject><occupation>#{@occupation}</occupation></subject></mods>"
445
- sdb = sdb_for_mods(m)
446
- expect(sdb.doc_hash_from_mods[:subject_other_search]).to match_array [@occupation]
447
- end
448
- it 'does not be nil if there are only subject/titleInfo elements' do
449
- m = "<mods #{@ns_decl}><subject><titleInfo><title>#{@s_title}</title></titleInfo></subject></mods>"
450
- sdb = sdb_for_mods(m)
451
- expect(sdb.doc_hash_from_mods[:subject_other_search]).to match_array [@s_title]
452
- end
453
- it 'has a separate value for each occupation subelement' do
454
- m = "<mods #{@ns_decl}>
455
- <subject>
456
- <occupation>first</occupation>
457
- <occupation>second</occupation>
458
- </subject>
459
- <subject><occupation>third</occupation></subject>
460
- </mods>"
461
- sdb = sdb_for_mods(m)
462
- expect(sdb.doc_hash_from_mods[:subject_other_search]).to match_array %w(first second third)
463
- end
464
- end # functional tests checking results from stanford-mods methods
465
- end # subject_other_search
466
-
467
- context 'subject_other_subvy_search' do
468
- it 'includes temporal and genre SUBelement' do
469
- expect(@subject_doc_hash[:subject_other_subvy_search]).to match_array [@temporal, @s_genre]
470
- end
471
- context 'functional tests checking results from stanford-mods methods' do
472
- it 'is nil if there are no values in the MODS' do
473
- sdb = sdb_for_mods(@mods_xml)
474
- expect(sdb.doc_hash_from_mods[:subject_other_subvy_search]).to be_nil
475
- end
476
- it 'does not be nil if there are only subject/temporal elements (no subject/genre)' do
477
- m = "<mods #{@ns_decl}><subject><temporal>#{@temporal}</temporal></subject></mods>"
478
- sdb = sdb_for_mods(m)
479
- expect(sdb.doc_hash_from_mods[:subject_other_subvy_search]).to match_array [@temporal]
480
- end
481
- it 'does not be nil if there are only subject/genre elements (no subject/temporal)' do
482
- m = "<mods #{@ns_decl}><subject><genre>#{@s_genre}</genre></subject></mods>"
483
- sdb = sdb_for_mods(m)
484
- expect(sdb.doc_hash_from_mods[:subject_other_subvy_search]).to match_array [@s_genre]
485
- end
486
- context 'genre subelement' do
487
- it 'has a separate value for each genre element' do
488
- m = "<mods #{@ns_decl}>
489
- <subject>
490
- <genre>first</genre>
491
- <genre>second</genre>
492
- </subject>
493
- <subject><genre>third</genre></subject>
494
- </mods>"
495
- sdb = sdb_for_mods(m)
496
- expect(sdb.doc_hash_from_mods[:subject_other_subvy_search]).to match_array %w(first second third)
497
- end
498
- end # genre subelement
499
- end # "functional tests checking results from stanford-mods methods"
500
- end # subject_other_subvy_search
501
-
502
- context 'subject_all_search' do
503
- it 'contains top level <genre> element data' do
504
- expect(@subject_doc_hash[:subject_all_search]).to include(@genre)
505
- end
506
- it 'does not contain cartographic sub element' do
507
- expect(@subject_doc_hash[:subject_all_search]).not_to include(@cart_coord)
508
- end
509
- it 'does not include codes from hierarchicalGeographic sub element' do
510
- expect(@subject_doc_hash[:subject_all_search]).not_to include(@geo_code)
511
- end
512
- it 'contains all other subject subelement data' do
513
- expect(@subject_doc_hash[:subject_all_search]).to include(@s_genre)
514
- expect(@subject_doc_hash[:subject_all_search]).to include(@geo)
515
- expect(@subject_doc_hash[:subject_all_search]).to include(@hier_geo_country)
516
- expect(@subject_doc_hash[:subject_all_search]).to include(@s_name)
517
- expect(@subject_doc_hash[:subject_all_search]).to include(@occupation)
518
- expect(@subject_doc_hash[:subject_all_search]).to include(@temporal)
519
- expect(@subject_doc_hash[:subject_all_search]).to include(@s_title)
520
- expect(@subject_doc_hash[:subject_all_search]).to include(@topic)
521
- end
522
- end # subject_all_search
523
- end # search fields
524
-
525
- context 'facet fields' do
526
- context 'topic_facet' do
527
- it 'includes topic subelement' do
528
- expect(@subject_doc_hash[:topic_facet]).to include(@topic)
529
- end
530
- it 'includes sw_subject_names' do
531
- expect(@subject_doc_hash[:topic_facet]).to include(@s_name)
532
- end
533
- it 'includes sw_subject_titles' do
534
- expect(@subject_doc_hash[:topic_facet]).to include(@s_title)
535
- end
536
- it 'includes occupation subelement' do
537
- expect(@subject_doc_hash[:topic_facet]).to include(@occupation)
538
- end
539
- it 'has the trailing punctuation removed' do
540
- m = "<mods #{@ns_decl}><subject>
541
- <topic>comma,</topic>
542
- <occupation>semicolon;</occupation>
543
- <titleInfo><title>backslash \\</title></titleInfo>
544
- <name><namePart>internal, punct;uation</namePart></name>
545
- </subject></mods>"
546
- sdb = sdb_for_mods(m)
547
- doc_hash = sdb.doc_hash_from_mods
548
- expect(doc_hash[:topic_facet]).to include('comma')
549
- expect(doc_hash[:topic_facet]).to include('semicolon')
550
- expect(doc_hash[:topic_facet]).to include('backslash')
551
- expect(doc_hash[:topic_facet]).to include('internal, punct;uation')
552
- end
553
- end # topic_facet
554
-
555
- context 'geographic_facet' do
556
- it 'includes geographic subelement' do
557
- expect(@subject_doc_hash[:geographic_facet]).to include(@geo)
558
- end
559
- it 'is like geographic_search with the trailing punctuation (and preceding spaces) removed' do
560
- m = "<mods #{@ns_decl}><subject>
561
- <geographic>comma,</geographic>
562
- <geographic>semicolon;</geographic>
563
- <geographic>backslash \\</geographic>
564
- <geographic>internal, punct;uation</geographic>
565
- </subject></mods>"
566
- sdb = sdb_for_mods(m)
567
- doc_hash = sdb.doc_hash_from_mods
568
- expect(doc_hash[:geographic_facet]).to include('comma')
569
- expect(doc_hash[:geographic_facet]).to include('semicolon')
570
- expect(doc_hash[:geographic_facet]).to include('backslash')
571
- expect(doc_hash[:geographic_facet]).to include('internal, punct;uation')
572
- end
573
- end
574
-
575
- it 'era_facet should be temporal subelement with the trailing punctuation removed' do
576
- m = "<mods #{@ns_decl}><subject>
577
- <temporal>comma,</temporal>
578
- <temporal>semicolon;</temporal>
579
- <temporal>backslash \\</temporal>
580
- <temporal>internal, punct;uation</temporal>
581
- </subject></mods>"
582
- sdb = sdb_for_mods(m)
583
- doc_hash = sdb.doc_hash_from_mods
584
- expect(doc_hash[:era_facet]).to include('comma')
585
- expect(doc_hash[:era_facet]).to include('semicolon')
586
- expect(doc_hash[:era_facet]).to include('backslash')
587
- expect(doc_hash[:era_facet]).to include('internal, punct;uation')
588
- end
589
- end # facet fields
590
- end # subject fields
591
-
592
- context 'publication date fields' do
593
- it 'populates all date fields' do
594
- m = "<mods #{@ns_decl}><originInfo>
595
- <dateIssued>13th century AH / 19th CE</dateIssued>
596
- </originInfo></mods>"
597
- sdb = sdb_for_mods(m)
598
- doc_hash = sdb.doc_hash_from_mods
599
- expect(doc_hash[:pub_date]).to eq('19th century')
600
- expect(doc_hash[:pub_date_sort]).to eq('1800')
601
- expect(doc_hash[:publication_year_isi]).to eq('1800')
602
- expect(doc_hash[:pub_year_tisim]).to eq('1800') # date slider
603
- expect(doc_hash[:pub_date_display]).to eq('13th century AH / 19th CE')
604
- expect(doc_hash[:imprint_display]).to eq('13th century AH / 19th CE')
605
- end
606
- it 'does not populate the date slider for BC dates' do
607
- m = "<mods #{@ns_decl}><originInfo><dateIssued>199 B.C.</dateIssued></originInfo></mods>"
608
- sdb = sdb_for_mods(m)
609
- doc_hash = sdb.doc_hash_from_mods
610
- expect(doc_hash).to_not have_key(:pub_year_tisim)
611
- end
612
-
613
- context 'pub_date_sort integration tests' do
614
- let :sdb do
615
- sdb = sdb_for_mods("<mods #{@ns_decl}> </mods>")
616
- end
617
- it 'works on normal dates' do
618
- allow(sdb.smods_rec).to receive(:pub_date).and_return('1945')
619
- expect(sdb.doc_hash_from_mods[:pub_date_sort]).to eq('1945')
620
- end
621
- it 'works on 3 digit dates' do
622
- allow(sdb.smods_rec).to receive(:pub_date).and_return('945')
623
- expect(sdb.doc_hash_from_mods[:pub_date_sort]).to eq('0945')
624
- end
625
- it 'works on century dates' do
626
- allow(sdb.smods_rec).to receive(:pub_date).and_return('16--')
627
- expect(sdb.doc_hash_from_mods[:pub_date_sort]).to eq('1600')
628
- end
629
- it 'works on 3 digit century dates' do
630
- allow(sdb.smods_rec).to receive(:pub_date).and_return('9--')
631
- expect(sdb.doc_hash_from_mods[:pub_date_sort]).to eq('0900')
632
- end
633
- end # pub_date_sort
634
-
635
- context 'pub_year_tisim for date slider' do
636
- it 'takes single dateCreated' do
637
- m = "<mods #{@ns_decl}><originInfo>
638
- <dateCreated>1904</dateCreated>
639
- </originInfo></mods>"
640
- sdb = sdb_for_mods(m)
641
- doc_hash = sdb.doc_hash_from_mods
642
- expect(doc_hash[:pub_year_tisim]).to eq('1904')
643
- end
644
- it 'correctlies parse a ranged date' do
645
- m = "<mods #{@ns_decl}><originInfo>
646
- <dateCreated>Text dated June 4, 1594; miniatures added by 1596</dateCreated>
647
- </originInfo></mods>"
648
- sdb = sdb_for_mods(m)
649
- doc_hash = sdb.doc_hash_from_mods
650
- expect(doc_hash[:pub_year_tisim]).to eq('1594')
651
- end
652
- it 'finds year in an expanded English form' do
653
- m = "<mods #{@ns_decl}><originInfo>
654
- <dateCreated>Aug. 3rd, 1886</dateCreated>
655
- </originInfo></mods>"
656
- sdb = sdb_for_mods(m)
657
- doc_hash = sdb.doc_hash_from_mods
658
- expect(doc_hash[:pub_year_tisim]).to eq('1886')
659
- end
660
- it 'removes question marks and brackets' do
661
- m = "<mods #{@ns_decl}><originInfo>
662
- <dateCreated>Aug. 3rd, [18]86?</dateCreated>
663
- </originInfo></mods>"
664
- sdb = sdb_for_mods(m)
665
- doc_hash = sdb.doc_hash_from_mods
666
- expect(doc_hash[:pub_year_tisim]).to eq('1886')
667
- end
668
- it 'ignores an s after the decade' do
669
- m = "<mods #{@ns_decl}><originInfo>
670
- <dateCreated>early 1890s</dateCreated>
671
- </originInfo></mods>"
672
- sdb = sdb_for_mods(m)
673
- doc_hash = sdb.doc_hash_from_mods
674
- expect(doc_hash[:pub_year_tisim]).to eq('1890')
675
- end
676
- it 'chooses a date ending with CE if there are multiple dates' do
677
- m = "<mods #{@ns_decl}><originInfo>
678
- <dateIssued>7192 AM (li-Adam) / 1684 CE</dateIssued>
679
- </originInfo></mods>"
680
- sdb = sdb_for_mods(m)
681
- doc_hash = sdb.doc_hash_from_mods
682
- expect(doc_hash[:pub_year_tisim]).to eq('1684')
683
- end
684
- it 'takes first year from hyphenated range (for now)' do
685
- m = "<mods #{@ns_decl}><originInfo>
686
- <dateIssued>1282 AH / 1865-6 CE</dateIssued>
687
- </originInfo></mods>"
688
- sdb = sdb_for_mods(m)
689
- doc_hash = sdb.doc_hash_from_mods
690
- expect(doc_hash[:pub_year_tisim]).to eq('1865')
691
- end
692
- end # pub_year_tisim method
693
-
694
- context 'difficult pub dates' do
695
- it 'should handle multiple pub dates (to be implemented - esp for date slider)'
696
-
697
- it 'should choose the latest date??? (to be implemented - esp for sorting and date slider)'
698
-
699
- it 'handles nnth century dates' do
700
- m = "<mods #{@ns_decl}><originInfo>
701
- <dateIssued>13th century AH / 19th CE</dateIssued>
702
- </originInfo></mods>"
703
- sdb = sdb_for_mods(m)
704
- doc_hash = sdb.doc_hash_from_mods
705
- expect(doc_hash[:pub_date]).to eq('19th century')
706
- expect(doc_hash[:pub_date_sort]).to eq('1800')
707
- expect(doc_hash[:pub_year_tisim]).to eq('1800')
708
- expect(doc_hash[:publication_year_isi]).to eq('1800')
709
- expect(doc_hash[:imprint_display]).to eq('13th century AH / 19th CE')
710
- end
711
- it 'handles multiple CE dates' do
712
- m = "<mods #{@ns_decl}><originInfo>
713
- <dateIssued>6 Dhu al-Hijjah 923 AH / 1517 CE -- 7 Rabi I 924 AH / 1518 CE</dateIssued>
714
- </originInfo></mods>"
715
- sdb = sdb_for_mods(m)
716
- doc_hash = sdb.doc_hash_from_mods
717
- expect(doc_hash[:pub_date_sort]).to eq('1517')
718
- expect(doc_hash[:pub_date]).to eq('1517')
719
- expect(doc_hash[:pub_year_tisim]).to eq('1517')
720
- end
721
- it 'handles specific century case from walters' do
722
- m = "<mods #{@ns_decl}><originInfo>
723
- <dateIssued>Late 14th or early 15th century CE</dateIssued>
724
- </originInfo></mods>"
725
- sdb = sdb_for_mods(m)
726
- doc_hash = sdb.doc_hash_from_mods
727
- expect(doc_hash[:pub_date_sort]).to eq('1400')
728
- expect(doc_hash[:pub_year_tisim]).to eq('1400')
729
- expect(doc_hash[:publication_year_isi]).to eq('1400')
730
- expect(doc_hash[:pub_date]).to eq('15th century')
731
- expect(doc_hash[:imprint_display]).to eq('Late 14th or early 15th century CE')
732
- end
733
- it 'works on explicit 3 digit dates' do
734
- m = "<mods #{@ns_decl}><originInfo>
735
- <dateIssued>966 CE</dateIssued>
736
- </originInfo></mods>"
737
- sdb = sdb_for_mods(m)
738
- doc_hash = sdb.doc_hash_from_mods
739
- expect(doc_hash[:pub_date_sort]).to eq('0966')
740
- expect(doc_hash[:pub_date]).to eq('966')
741
- expect(doc_hash[:pub_year_tisim]).to eq('0966')
742
- expect(doc_hash[:publication_year_isi]).to eq('0966')
743
- expect(doc_hash[:imprint_display]).to eq('966 CE')
744
- end
745
- it 'works on 3 digit century dates' do
746
- m = "<mods #{@ns_decl}><originInfo>
747
- <dateIssued>3rd century AH / 9th CE</dateIssued>
748
- </originInfo></mods>"
749
- sdb = sdb_for_mods(m)
750
- doc_hash = sdb.doc_hash_from_mods
751
- expect(doc_hash[:pub_date_sort]).to eq('0800')
752
- expect(doc_hash[:pub_year_tisim]).to eq('0800')
753
- expect(doc_hash[:pub_date]).to eq('9th century')
754
- expect(doc_hash[:publication_year_isi]).to eq('0800')
755
- expect(doc_hash[:imprint_display]).to eq('3rd century AH / 9th CE')
756
- end
757
- it 'works on 3 digit BC dates' do
758
- m = "<mods #{@ns_decl}><originInfo>
759
- <dateCreated>300 B.C.</dateCreated>
760
- </originInfo></mods>"
761
- sdb = sdb_for_mods(m)
762
- doc_hash = sdb.doc_hash_from_mods
763
- expect(doc_hash[:pub_date_sort]).to eq('-700')
764
- expect(doc_hash[:pub_year_tisim]).to be_nil
765
- expect(doc_hash[:pub_date]).to eq('300 B.C.')
766
- expect(doc_hash[:imprint_display]).to eq('300 B.C.')
767
- # doc_hash[:creation_year_isi].should =='-300'
768
- end
769
- end # difficult pub dates
770
- end # publication date fields
771
- end # doc_hash_from_mods
772
-
773
- context '#format' do
774
- it 'gets format from call to stanford-mods searchworks format method' do
775
- m = "<mods #{@ns_decl}><typeOfResource>still image</typeOfResouce></mods>"
776
- sdb = sdb_for_mods(m)
777
- expect(sdb.smods_rec).to receive(:format).and_call_original
778
- expect(sdb.format).to match_array ['Image']
779
- end
780
- it 'returns empty Array and log warning if there is no value' do
781
- sdb = sdb_for_mods(@mods_xml)
782
- expect(sdb.logger).to receive(:warn).with("#{@fake_druid} has no SearchWorks format from MODS - check <typeOfResource> and other implicated MODS elements")
783
- expect(sdb.format).to eq([])
784
- end
785
- end # context #format
786
-
787
- context '#format_main_ssim' do
788
- it 'gets format_main_ssim from call to stanford-mods searchworks format_main method' do
789
- m = "<mods #{@ns_decl}><typeOfResource>still image</typeOfResouce></mods>"
790
- sdb = sdb_for_mods(m)
791
- expect(sdb.smods_rec).to receive(:format_main).and_call_original
792
- expect(sdb.format_main_ssim).to match_array ['Image']
793
- end
794
- it 'returns empty Array and log warning if there is no value' do
795
- sdb = sdb_for_mods(@mods_xml)
796
- expect(sdb.logger).to receive(:warn).with("#{@fake_druid} has no SearchWorks Resource Type from MODS - check <typeOfResource> and other implicated MODS elements")
797
- expect(sdb.format_main_ssim).to eq([])
798
- end
799
- end # context format_main_ssim
800
-
801
- context 'genre_ssim' do
802
- it 'gets genre_ssim from call to stanford-mods searchworks sw_genre method' do
803
- m = "<mods #{@ns_decl}><genre>technical report</genre></mods>"
804
- sdb = sdb_for_mods(m)
805
- expect(sdb.smods_rec).to receive(:sw_genre).and_call_original
806
- expect(sdb.genre_ssim).to match_array ['Technical report']
807
- end
808
- it 'returns empty Array if there is no value' do
809
- sdb = sdb_for_mods(@mods_xml)
810
- expect(sdb.genre_ssim).to eq([])
811
- end
812
- end # context genre_ssim
813
- end