gdor-indexer 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gdor-indexer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naomi Dushay
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2015-10-30 00:00:00.000000000 Z
13
+ date: 2016-01-06 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: harvestdor-indexer
@@ -32,14 +32,14 @@ dependencies:
32
32
  requirements:
33
33
  - - ">="
34
34
  - !ruby/object:Gem::Version
35
- version: '0'
35
+ version: 1.3.4
36
36
  type: :runtime
37
37
  prerelease: false
38
38
  version_requirements: !ruby/object:Gem::Requirement
39
39
  requirements:
40
40
  - - ">="
41
41
  - !ruby/object:Gem::Version
42
- version: '0'
42
+ version: 1.3.4
43
43
  - !ruby/object:Gem::Dependency
44
44
  name: nokogiri
45
45
  requirement: !ruby/object:Gem::Requirement
@@ -377,8 +377,10 @@ files:
377
377
  - lib/gdor/indexer/version.rb
378
378
  - spec/config/walters_integration_spec.yml
379
379
  - spec/spec_helper.rb
380
- - spec/unit/gdor_mods_fields_spec.rb
381
380
  - spec/unit/indexer_spec.rb
381
+ - spec/unit/mods_fields_spec.rb
382
+ - spec/unit/mods_pub_fields_spec.rb
383
+ - spec/unit/mods_subject_fields_spec.rb
382
384
  - spec/unit/public_xml_fields_spec.rb
383
385
  - spec/unit/solr_doc_builder_spec.rb
384
386
  - spec/unit/solr_doc_hash_spec.rb
@@ -410,8 +412,10 @@ summary: Gryphondor Solr indexing logic
410
412
  test_files:
411
413
  - spec/config/walters_integration_spec.yml
412
414
  - spec/spec_helper.rb
413
- - spec/unit/gdor_mods_fields_spec.rb
414
415
  - spec/unit/indexer_spec.rb
416
+ - spec/unit/mods_fields_spec.rb
417
+ - spec/unit/mods_pub_fields_spec.rb
418
+ - spec/unit/mods_subject_fields_spec.rb
415
419
  - spec/unit/public_xml_fields_spec.rb
416
420
  - spec/unit/solr_doc_builder_spec.rb
417
421
  - spec/unit/solr_doc_hash_spec.rb
@@ -1,813 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe GDor::Indexer::ModsFields do
4
- before(:all) do
5
- @fake_druid = 'oo000oo0000'
6
- @ns_decl = "xmlns='#{Mods::MODS_NS}'"
7
- @mods_xml = "<mods #{@ns_decl}><note>gdor_mods_fields testing</note></mods>"
8
- end
9
-
10
- def sdb_for_mods(m)
11
- resource = Harvestdor::Indexer::Resource.new(double, @fake_druid)
12
- allow(resource).to receive(:public_xml).and_return(nil)
13
- allow(resource).to receive(:mods).and_return(Nokogiri::XML(m))
14
- i = Harvestdor::Indexer.new
15
- i.logger.level = Logger::WARN
16
- allow(resource).to receive(:indexer).and_return(i)
17
- lgr = Logger.new(StringIO.new)
18
- lgr.level = Logger::WARN
19
- GDor::Indexer::SolrDocBuilder.new(resource, lgr)
20
- end
21
-
22
- context 'doc_hash_from_mods' do
23
- # see https://consul.stanford.edu/display/NGDE/Required+and+Recommended+Solr+Fields+for+SearchWorks+documents
24
-
25
- context 'summary_search solr field from <abstract>' do
26
- it 'is populated when the MODS has a top level <abstract> element' do
27
- m = "<mods #{@ns_decl}><abstract>blah blah</abstract></mods>"
28
- sdb = sdb_for_mods(m)
29
- expect(sdb.doc_hash_from_mods[:summary_search]).to match_array ['blah blah']
30
- end
31
- it 'has a value for each abstract element' do
32
- m = "<mods #{@ns_decl}>
33
- <abstract>one</abstract>
34
- <abstract>two</abstract>
35
- </mods>"
36
- sdb = sdb_for_mods(m)
37
- expect(sdb.doc_hash_from_mods[:summary_search]).to match_array %w(one two)
38
- end
39
- it 'does not be present when there is no top level <abstract> element' do
40
- m = "<mods #{@ns_decl}><relatedItem><abstract>blah blah</abstract></relatedItem></mods>"
41
- sdb = sdb_for_mods(m)
42
- expect(sdb.doc_hash_from_mods[:summary_search]).to be_nil
43
- end
44
- it 'does not be present if there are only empty abstract elements in the MODS' do
45
- m = "<mods #{@ns_decl}><abstract/><note>notit</note></mods>"
46
- sdb = sdb_for_mods(m)
47
- expect(sdb.doc_hash_from_mods[:summary_search]).to be_nil
48
- end
49
- it 'summary_display should not be populated - it is a copy field' do
50
- m = "<mods #{@ns_decl}><abstract>blah blah</abstract></mods>"
51
- sdb = sdb_for_mods(m)
52
- expect(sdb.doc_hash_from_mods[:summary_display]).to be_nil
53
- end
54
- end # summary_search / <abstract>
55
-
56
- it 'language: should call sw_language_facet in stanford-mods gem to populate language field' do
57
- sdb = sdb_for_mods(@mods_xml)
58
- smr = sdb.smods_rec
59
- expect(smr).to receive(:sw_language_facet)
60
- sdb.doc_hash_from_mods
61
- end
62
-
63
- context 'physical solr field from <physicalDescription><extent>' do
64
- it 'is populated when the MODS has mods/physicalDescription/extent element' do
65
- m = "<mods #{@ns_decl}><physicalDescription><extent>blah blah</extent></physicalDescription></mods>"
66
- sdb = sdb_for_mods(m)
67
- expect(sdb.doc_hash_from_mods[:physical]).to match_array ['blah blah']
68
- end
69
- it 'has a value for each extent element' do
70
- m = "<mods #{@ns_decl}>
71
- <physicalDescription>
72
- <extent>one</extent>
73
- <extent>two</extent>
74
- </physicalDescription>
75
- <physicalDescription><extent>three</extent></physicalDescription>
76
- </mods>"
77
- sdb = sdb_for_mods(m)
78
- expect(sdb.doc_hash_from_mods[:physical]).to match_array %w(one two three)
79
- end
80
- it 'does not be present when there is no top level <physicalDescription> element' do
81
- m = "<mods #{@ns_decl}><relatedItem><physicalDescription><extent>foo</extent></physicalDescription></relatedItem></mods>"
82
- sdb = sdb_for_mods(m)
83
- expect(sdb.doc_hash_from_mods[:physical]).to be_nil
84
- end
85
- it 'does not be present if there are only empty physicalDescription or extent elements in the MODS' do
86
- m = "<mods #{@ns_decl}><physicalDescription/><physicalDescription><extent/></physicalDescription><note>notit</note></mods>"
87
- sdb = sdb_for_mods(m)
88
- expect(sdb.doc_hash_from_mods[:physical]).to be_nil
89
- end
90
- end # physical field from physicalDescription/extent
91
-
92
- context 'url_suppl solr field from /mods/relatedItem/location/url' do
93
- it 'is populated when the MODS has mods/relatedItem/location/url' do
94
- m = "<mods #{@ns_decl}><relatedItem><location><url>url.org</url></location></relatedItem></mods>"
95
- sdb = sdb_for_mods(m)
96
- expect(sdb.doc_hash_from_mods[:url_suppl]).to match_array ['url.org']
97
- end
98
- it 'has a value for each mods/relatedItem/location/url element' do
99
- m = "<mods #{@ns_decl}>
100
- <relatedItem>
101
- <location><url>one</url></location>
102
- <location>
103
- <url>two</url>
104
- <url>three</url>
105
- </location>
106
- </relatedItem>
107
- <relatedItem><location><url>four</url></location></relatedItem>
108
- </mods>"
109
- sdb = sdb_for_mods(m)
110
- expect(sdb.doc_hash_from_mods[:url_suppl]).to match_array %w(one two three four)
111
- end
112
- it 'does not be populated from /mods/location/url element' do
113
- m = "<mods #{@ns_decl}><location><url>hi</url></location></mods>"
114
- sdb = sdb_for_mods(m)
115
- expect(sdb.doc_hash_from_mods[:url_suppl]).to be_nil
116
- end
117
- it 'does not be present if there are only empty relatedItem/location/url elements in the MODS' do
118
- m = "<mods #{@ns_decl}>
119
- <relatedItem><location><url/></location></relatedItem>
120
- <relatedItem><location/></relatedItem>
121
- <relatedItem/><note>notit</note></mods>"
122
- sdb = sdb_for_mods(m)
123
- expect(sdb.doc_hash_from_mods[:url_suppl]).to be_nil
124
- end
125
- end
126
-
127
- context 'toc_search solr field from <tableOfContents>' do
128
- it 'has a value for each tableOfContents element' do
129
- m = "<mods #{@ns_decl}>
130
- <tableOfContents>one</tableOfContents>
131
- <tableOfContents>two</tableOfContents>
132
- </mods>"
133
- sdb = sdb_for_mods(m)
134
- expect(sdb.doc_hash_from_mods[:toc_search]).to match_array %w(one two)
135
- end
136
- it 'does not be present when there is no top level <tableOfContents> element' do
137
- m = "<mods #{@ns_decl}><relatedItem><tableOfContents>foo</tableOfContents></relatedItem></mods>"
138
- sdb = sdb_for_mods(m)
139
- expect(sdb.doc_hash_from_mods[:toc_search]).to be_nil
140
- end
141
- it 'does not be present if there are only empty tableOfContents elements in the MODS' do
142
- m = "<mods #{@ns_decl}><tableOfContents/><note>notit</note></mods>"
143
- sdb = sdb_for_mods(m)
144
- expect(sdb.doc_hash_from_mods[:toc_search]).to be_nil
145
- end
146
- end
147
-
148
- context 'format fields' do
149
- context 'format_main_ssim' do
150
- it 'calls #format_main_ssim method' do
151
- m = "<mods #{@ns_decl}><note>nope</typeOfResource></mods>"
152
- sdb = sdb_for_mods(m)
153
- expect(sdb).to receive(:format_main_ssim)
154
- sdb.doc_hash_from_mods[:format_main_ssim]
155
- end
156
- it 'has a value when MODS data provides' do
157
- m = "<mods #{@ns_decl}><typeOfResource>software, multimedia</typeOfResource><genre>dataset</genre></mods>"
158
- sdb = sdb_for_mods(m)
159
- expect(sdb.doc_hash_from_mods[:format_main_ssim]).to match_array ['Dataset']
160
- end
161
- it 'returns empty Array if there is no value' do
162
- m = "<mods #{@ns_decl}><note>nope</typeOfResource></mods>"
163
- sdb = sdb_for_mods(m)
164
- expect(sdb.doc_hash_from_mods[:format_main_ssim]).to eq([])
165
- end
166
- end
167
- context 'format Solr field' do
168
- it 'calls #format method' do
169
- m = "<mods #{@ns_decl}><note>nope</typeOfResource></mods>"
170
- sdb = sdb_for_mods(m)
171
- expect(sdb).to receive(:format)
172
- sdb.doc_hash_from_mods[:format]
173
- end
174
- it 'has a value when MODS data provides' do
175
- m = "<mods #{@ns_decl}><typeOfResource>software, multimedia</typeOfResource></mods>"
176
- sdb = sdb_for_mods(m)
177
- expect(sdb.doc_hash_from_mods[:format]).to match_array ['Computer File']
178
- end
179
- it 'returns empty Array if there is no value' do
180
- m = "<mods #{@ns_decl}><note>nope</typeOfResource></mods>"
181
- sdb = sdb_for_mods(m)
182
- expect(sdb.doc_hash_from_mods[:format]).to eq([])
183
- end
184
- end
185
- end
186
-
187
- context 'title fields' do
188
- before(:all) do
189
- @title_mods = "<mods #{@ns_decl}>
190
- <titleInfo><title>Jerk</title><nonSort>The</nonSort><subTitle>is whom?</subTitle></titleInfo>
191
- <titleInfo><title>Joke</title></titleInfo>
192
- <titleInfo type='alternative'><title>Alternative</title></titleInfo>
193
- </mods>"
194
- end
195
- let :sdb do
196
- sdb_for_mods(@title_mods)
197
- end
198
- before(:each) do
199
- @title_doc_hash = sdb.doc_hash_from_mods
200
- end
201
- it 'calls the appropriate methods in the stanford-mods gem to populate the fields' do
202
- smr = sdb.smods_rec
203
- expect(smr).to receive(:sw_short_title).at_least(:once)
204
- expect(smr).to receive(:sw_full_title).at_least(:once)
205
- expect(smr).to receive(:sw_title_display)
206
- expect(smr).to receive(:sw_addl_titles)
207
- expect(smr).to receive(:sw_sort_title)
208
- sdb.doc_hash_from_mods
209
- end
210
- context 'search fields' do
211
- it 'title_245a_search' do
212
- expect(@title_doc_hash[:title_245a_search]).to eq('The Jerk')
213
- end
214
- it 'title_245_search' do
215
- expect(@title_doc_hash[:title_245_search]).to eq('The Jerk : is whom?')
216
- end
217
- it 'title_variant_search' do
218
- expect(@title_doc_hash[:title_variant_search]).to match_array %w(Joke Alternative)
219
- end
220
- it 'title_related_search should not be populated from MODS' do
221
- expect(@title_doc_hash[:title_related_search]).to be_nil
222
- end
223
- end
224
- context 'display fields' do
225
- it 'title_display' do
226
- expect(@title_doc_hash[:title_display]).to eq('The Jerk : is whom?')
227
- end
228
- it 'title_245a_display' do
229
- expect(@title_doc_hash[:title_245a_display]).to eq('The Jerk')
230
- end
231
- it 'title_245c_display should not be populated from MODS' do
232
- expect(@title_doc_hash[:title_245c_display]).to be_nil
233
- end
234
- it 'title_full_display' do
235
- expect(@title_doc_hash[:title_full_display]).to eq('The Jerk : is whom?')
236
- end
237
- it 'removes trailing commas in title_display' do
238
- title_mods = "<mods #{@ns_decl}>
239
- <titleInfo><title>Jerk</title><nonSort>The</nonSort><subTitle>is whom,</subTitle></titleInfo>
240
- <titleInfo><title>Joke</title></titleInfo>
241
- <titleInfo type='alternative'><title>Alternative</title></titleInfo>
242
- </mods>"
243
- sdb = sdb_for_mods(title_mods)
244
- @title_doc_hash = sdb.doc_hash_from_mods
245
- @title_doc_hash
246
- expect(@title_doc_hash[:title_display]).to eq('The Jerk : is whom')
247
- end
248
- it 'title_variant_display should not be populated - it is a copy field' do
249
- expect(@title_doc_hash[:title_variant_display]).to be_nil
250
- end
251
- end
252
- it 'title_sort' do
253
- expect(@title_doc_hash[:title_sort]).to eq('Jerk is whom')
254
- end
255
- end # title fields
256
-
257
- context 'author fields' do
258
- before(:all) do
259
- @name_mods = "<mods #{@ns_decl}>
260
- <name type='personal'>
261
- <namePart type='given'>John</namePart>
262
- <namePart type='family'>Huston</namePart>
263
- <role><roleTerm type='code' authority='marcrelator'>drt</roleTerm></role>
264
- <displayForm>q</displayForm>
265
- </name>
266
- <name type='personal'><namePart>Crusty The Clown</namePart></name>
267
- <name type='corporate'><namePart>Watchful Eye</namePart></name>
268
- <name type='corporate'>
269
- <namePart>Exciting Prints</namePart>
270
- <role><roleTerm type='text'>lithographer</roleTerm></role>
271
- </name>
272
- <name type='conference'><namePart>conference</namePart></name>
273
- </mods>"
274
- end
275
- let :sdb do
276
- sdb_for_mods(@name_mods)
277
- end
278
- before(:each) do
279
- @author_doc_hash = sdb.doc_hash_from_mods
280
- end
281
- it 'calls the appropriate methods in the stanford-mods gem to populate the fields' do
282
- smr = sdb.smods_rec
283
- expect(smr).to receive(:sw_main_author)
284
- expect(smr).to receive(:sw_addl_authors)
285
- expect(smr).to receive(:sw_person_authors).exactly(3).times
286
- expect(smr).to receive(:sw_impersonal_authors)
287
- expect(smr).to receive(:sw_corporate_authors)
288
- expect(smr).to receive(:sw_meeting_authors)
289
- expect(smr).to receive(:sw_sort_author)
290
- sdb.doc_hash_from_mods
291
- end
292
- context 'search fields' do
293
- it 'author_1xx_search' do
294
- expect(@author_doc_hash[:author_1xx_search]).to eq('Crusty The Clown')
295
- end
296
- it 'author_7xx_search' do
297
- skip 'Should this return all authors? or only 7xx authors?'
298
- expect(@author_doc_hash[:author_7xx_search]).to match_array ['q', 'Watchful Eye', 'Exciting Prints', 'conference']
299
- end
300
- it 'author_8xx_search should not be populated from MODS' do
301
- expect(@author_doc_hash[:author_8xx_search]).to be_nil
302
- end
303
- end
304
- context 'facet fields' do
305
- it 'author_person_facet' do
306
- expect(@author_doc_hash[:author_person_facet]).to match_array ['q', 'Crusty The Clown']
307
- end
308
- it 'author_other_facet' do
309
- expect(@author_doc_hash[:author_other_facet]).to match_array ['Watchful Eye', 'Exciting Prints', 'conference']
310
- end
311
- end
312
- context 'display fields' do
313
- it 'author_person_display' do
314
- expect(@author_doc_hash[:author_person_display]).to match_array ['q', 'Crusty The Clown']
315
- end
316
- it 'author_person_full_display' do
317
- expect(@author_doc_hash[:author_person_full_display]).to match_array ['q', 'Crusty The Clown']
318
- end
319
- it 'author_corp_display' do
320
- expect(@author_doc_hash[:author_corp_display]).to match_array ['Watchful Eye', 'Exciting Prints']
321
- end
322
- it 'author_meeting_display' do
323
- expect(@author_doc_hash[:author_meeting_display]).to match_array ['conference']
324
- end
325
- end
326
- it 'author_sort' do
327
- expect(@author_doc_hash[:author_sort]).to eq('Crusty The Clown')
328
- end
329
- end # author fields
330
-
331
- context 'subject fields' do
332
- before(:all) do
333
- @genre = 'genre top level'
334
- @cart_coord = '6 00 S, 71 30 E'
335
- @s_genre = 'genre in subject'
336
- @geo = 'Somewhere'
337
- @geo_code = 'us'
338
- @hier_geo_country = 'France'
339
- @s_name = 'name in subject'
340
- @occupation = 'worker bee'
341
- @temporal = 'temporal'
342
- @s_title = 'title in subject'
343
- @topic = 'topic'
344
- @m = "<mods #{@ns_decl}>
345
- <genre>#{@genre}</genre>
346
- <subject><cartographics><coordinates>#{@cart_coord}</coordinates></cartographics></subject>
347
- <subject><genre>#{@s_genre}</genre></subject>
348
- <subject><geographic>#{@geo}</geographic></subject>
349
- <subject><geographicCode authority='iso3166'>#{@geo_code}</geographicCode></subject>
350
- <subject><hierarchicalGeographic><country>#{@hier_geo_country}</country></hierarchicalGeographic></subject>
351
- <subject><name><namePart>#{@s_name}</namePart></name></subject>
352
- <subject><occupation>#{@occupation}</occupation></subject>
353
- <subject><temporal>#{@temporal}</temporal></subject>
354
- <subject><titleInfo><title>#{@s_title}</title></titleInfo></subject>
355
- <subject><topic>#{@topic}</topic></subject>
356
- <typeOfResource>still image</typeOfResource>
357
- </mods>"
358
- @m_no_subject = "<mods #{@ns_decl}><note>notit</note></mods>"
359
- end
360
- let :sdb do
361
- sdb = sdb_for_mods(@m)
362
- end
363
- before(:each) do
364
- @subject_doc_hash = sdb.doc_hash_from_mods
365
- end
366
- it 'calls the appropriate methods in stanford-mods to populate the Solr fields' do
367
- expect(sdb.smods_rec).to receive(:topic_search)
368
- expect(sdb.smods_rec).to receive(:geographic_search)
369
- expect(sdb.smods_rec).to receive(:subject_other_search)
370
- expect(sdb.smods_rec).to receive(:subject_other_subvy_search)
371
- expect(sdb.smods_rec).to receive(:subject_all_search)
372
- expect(sdb.smods_rec).to receive(:topic_facet)
373
- expect(sdb.smods_rec).to receive(:geographic_facet)
374
- expect(sdb.smods_rec).to receive(:era_facet)
375
- sdb.doc_hash_from_mods
376
- end
377
- context 'search fields' do
378
- context 'topic_search' do
379
- it 'onlies include genre and topic' do
380
- expect(@subject_doc_hash[:topic_search]).to match_array [@genre, @topic]
381
- end
382
- context 'functional tests checking results from stanford-mods methods' do
383
- it 'is nil if there are no values in the MODS' do
384
- sdb = sdb_for_mods(@m_no_subject)
385
- expect(sdb.doc_hash_from_mods[:topic_search]).to be_nil
386
- end
387
- it 'does not be nil if there are only subject/topic elements (no <genre>)' do
388
- m = "<mods #{@ns_decl}><subject><topic>#{@topic}</topic></subject></mods>"
389
- sdb = sdb_for_mods(m)
390
- expect(sdb.doc_hash_from_mods[:topic_search]).to match_array [@topic]
391
- end
392
- it 'does not be nil if there are only <genre> elements (no subject/topic elements)' do
393
- m = "<mods #{@ns_decl}><genre>#{@genre}</genre></mods>"
394
- sdb = sdb_for_mods(m)
395
- expect(sdb.doc_hash_from_mods[:topic_search]).to match_array [@genre]
396
- end
397
- it 'has a separate value for each topic subelement' do
398
- m = "<mods #{@ns_decl}>
399
- <subject>
400
- <topic>first</topic>
401
- <topic>second</topic>
402
- </subject>
403
- <subject><topic>third</topic></subject>
404
- </mods>"
405
- sdb = sdb_for_mods(m)
406
- expect(sdb.doc_hash_from_mods[:topic_search]).to match_array %w(first second third)
407
- end
408
- end # functional tests checking results from stanford-mods methods
409
- end # topic_search
410
-
411
- context 'geographic_search' do
412
- it 'includes geographic and hierarchicalGeographic' do
413
- expect(@subject_doc_hash[:geographic_search]).to match_array [@geo, @hier_geo_country]
414
- end
415
- it 'calls sw_geographic_search (from stanford-mods gem)' do
416
- m = "<mods #{@ns_decl}><subject><geographic>#{@geo}</geographic></subject></mods>"
417
- sdb = sdb_for_mods(m)
418
- expect(sdb.smods_rec).to receive(:sw_geographic_search).at_least(1).times
419
- sdb.doc_hash_from_mods
420
- end
421
- it "logs an info message when it encounters a geographicCode encoding it doesn't translate" do
422
- m = "<mods #{@ns_decl}><subject><geographicCode authority='iso3166'>ca</geographicCode></subject></mods>"
423
- sdb = sdb_for_mods(m)
424
- expect(sdb.smods_rec.sw_logger).to receive(:info).with(/#{@fake_druid} has subject geographicCode element with untranslated encoding \(iso3166\): <geographicCode authority=.*>ca<\/geographicCode>/).at_least(1).times
425
- sdb.doc_hash_from_mods
426
- end
427
- end # geographic_search
428
-
429
- context 'subject_other_search' do
430
- it 'includes occupation, subject names, and subject titles' do
431
- expect(@subject_doc_hash[:subject_other_search]).to match_array [@occupation, @s_name, @s_title]
432
- end
433
- context 'functional tests checking results from stanford-mods methods' do
434
- it 'is nil if there are no values in the MODS' do
435
- sdb = sdb_for_mods(@mods_xml)
436
- expect(sdb.doc_hash_from_mods[:subject_other_search]).to be_nil
437
- end
438
- it 'does not be nil if there are only subject/name elements' do
439
- m = "<mods #{@ns_decl}><subject><name><namePart>#{@s_name}</namePart></name></subject></mods>"
440
- sdb = sdb_for_mods(m)
441
- expect(sdb.doc_hash_from_mods[:subject_other_search]).to match_array [@s_name]
442
- end
443
- it 'does not be nil if there are only subject/occupation elements' do
444
- m = "<mods #{@ns_decl}><subject><occupation>#{@occupation}</occupation></subject></mods>"
445
- sdb = sdb_for_mods(m)
446
- expect(sdb.doc_hash_from_mods[:subject_other_search]).to match_array [@occupation]
447
- end
448
- it 'does not be nil if there are only subject/titleInfo elements' do
449
- m = "<mods #{@ns_decl}><subject><titleInfo><title>#{@s_title}</title></titleInfo></subject></mods>"
450
- sdb = sdb_for_mods(m)
451
- expect(sdb.doc_hash_from_mods[:subject_other_search]).to match_array [@s_title]
452
- end
453
- it 'has a separate value for each occupation subelement' do
454
- m = "<mods #{@ns_decl}>
455
- <subject>
456
- <occupation>first</occupation>
457
- <occupation>second</occupation>
458
- </subject>
459
- <subject><occupation>third</occupation></subject>
460
- </mods>"
461
- sdb = sdb_for_mods(m)
462
- expect(sdb.doc_hash_from_mods[:subject_other_search]).to match_array %w(first second third)
463
- end
464
- end # functional tests checking results from stanford-mods methods
465
- end # subject_other_search
466
-
467
- context 'subject_other_subvy_search' do
468
- it 'includes temporal and genre SUBelement' do
469
- expect(@subject_doc_hash[:subject_other_subvy_search]).to match_array [@temporal, @s_genre]
470
- end
471
- context 'functional tests checking results from stanford-mods methods' do
472
- it 'is nil if there are no values in the MODS' do
473
- sdb = sdb_for_mods(@mods_xml)
474
- expect(sdb.doc_hash_from_mods[:subject_other_subvy_search]).to be_nil
475
- end
476
- it 'does not be nil if there are only subject/temporal elements (no subject/genre)' do
477
- m = "<mods #{@ns_decl}><subject><temporal>#{@temporal}</temporal></subject></mods>"
478
- sdb = sdb_for_mods(m)
479
- expect(sdb.doc_hash_from_mods[:subject_other_subvy_search]).to match_array [@temporal]
480
- end
481
- it 'does not be nil if there are only subject/genre elements (no subject/temporal)' do
482
- m = "<mods #{@ns_decl}><subject><genre>#{@s_genre}</genre></subject></mods>"
483
- sdb = sdb_for_mods(m)
484
- expect(sdb.doc_hash_from_mods[:subject_other_subvy_search]).to match_array [@s_genre]
485
- end
486
- context 'genre subelement' do
487
- it 'has a separate value for each genre element' do
488
- m = "<mods #{@ns_decl}>
489
- <subject>
490
- <genre>first</genre>
491
- <genre>second</genre>
492
- </subject>
493
- <subject><genre>third</genre></subject>
494
- </mods>"
495
- sdb = sdb_for_mods(m)
496
- expect(sdb.doc_hash_from_mods[:subject_other_subvy_search]).to match_array %w(first second third)
497
- end
498
- end # genre subelement
499
- end # "functional tests checking results from stanford-mods methods"
500
- end # subject_other_subvy_search
501
-
502
- context 'subject_all_search' do
503
- it 'contains top level <genre> element data' do
504
- expect(@subject_doc_hash[:subject_all_search]).to include(@genre)
505
- end
506
- it 'does not contain cartographic sub element' do
507
- expect(@subject_doc_hash[:subject_all_search]).not_to include(@cart_coord)
508
- end
509
- it 'does not include codes from hierarchicalGeographic sub element' do
510
- expect(@subject_doc_hash[:subject_all_search]).not_to include(@geo_code)
511
- end
512
- it 'contains all other subject subelement data' do
513
- expect(@subject_doc_hash[:subject_all_search]).to include(@s_genre)
514
- expect(@subject_doc_hash[:subject_all_search]).to include(@geo)
515
- expect(@subject_doc_hash[:subject_all_search]).to include(@hier_geo_country)
516
- expect(@subject_doc_hash[:subject_all_search]).to include(@s_name)
517
- expect(@subject_doc_hash[:subject_all_search]).to include(@occupation)
518
- expect(@subject_doc_hash[:subject_all_search]).to include(@temporal)
519
- expect(@subject_doc_hash[:subject_all_search]).to include(@s_title)
520
- expect(@subject_doc_hash[:subject_all_search]).to include(@topic)
521
- end
522
- end # subject_all_search
523
- end # search fields
524
-
525
- context 'facet fields' do
526
- context 'topic_facet' do
527
- it 'includes topic subelement' do
528
- expect(@subject_doc_hash[:topic_facet]).to include(@topic)
529
- end
530
- it 'includes sw_subject_names' do
531
- expect(@subject_doc_hash[:topic_facet]).to include(@s_name)
532
- end
533
- it 'includes sw_subject_titles' do
534
- expect(@subject_doc_hash[:topic_facet]).to include(@s_title)
535
- end
536
- it 'includes occupation subelement' do
537
- expect(@subject_doc_hash[:topic_facet]).to include(@occupation)
538
- end
539
- it 'has the trailing punctuation removed' do
540
- m = "<mods #{@ns_decl}><subject>
541
- <topic>comma,</topic>
542
- <occupation>semicolon;</occupation>
543
- <titleInfo><title>backslash \\</title></titleInfo>
544
- <name><namePart>internal, punct;uation</namePart></name>
545
- </subject></mods>"
546
- sdb = sdb_for_mods(m)
547
- doc_hash = sdb.doc_hash_from_mods
548
- expect(doc_hash[:topic_facet]).to include('comma')
549
- expect(doc_hash[:topic_facet]).to include('semicolon')
550
- expect(doc_hash[:topic_facet]).to include('backslash')
551
- expect(doc_hash[:topic_facet]).to include('internal, punct;uation')
552
- end
553
- end # topic_facet
554
-
555
- context 'geographic_facet' do
556
- it 'includes geographic subelement' do
557
- expect(@subject_doc_hash[:geographic_facet]).to include(@geo)
558
- end
559
- it 'is like geographic_search with the trailing punctuation (and preceding spaces) removed' do
560
- m = "<mods #{@ns_decl}><subject>
561
- <geographic>comma,</geographic>
562
- <geographic>semicolon;</geographic>
563
- <geographic>backslash \\</geographic>
564
- <geographic>internal, punct;uation</geographic>
565
- </subject></mods>"
566
- sdb = sdb_for_mods(m)
567
- doc_hash = sdb.doc_hash_from_mods
568
- expect(doc_hash[:geographic_facet]).to include('comma')
569
- expect(doc_hash[:geographic_facet]).to include('semicolon')
570
- expect(doc_hash[:geographic_facet]).to include('backslash')
571
- expect(doc_hash[:geographic_facet]).to include('internal, punct;uation')
572
- end
573
- end
574
-
575
- it 'era_facet should be temporal subelement with the trailing punctuation removed' do
576
- m = "<mods #{@ns_decl}><subject>
577
- <temporal>comma,</temporal>
578
- <temporal>semicolon;</temporal>
579
- <temporal>backslash \\</temporal>
580
- <temporal>internal, punct;uation</temporal>
581
- </subject></mods>"
582
- sdb = sdb_for_mods(m)
583
- doc_hash = sdb.doc_hash_from_mods
584
- expect(doc_hash[:era_facet]).to include('comma')
585
- expect(doc_hash[:era_facet]).to include('semicolon')
586
- expect(doc_hash[:era_facet]).to include('backslash')
587
- expect(doc_hash[:era_facet]).to include('internal, punct;uation')
588
- end
589
- end # facet fields
590
- end # subject fields
591
-
592
- context 'publication date fields' do
593
- it 'populates all date fields' do
594
- m = "<mods #{@ns_decl}><originInfo>
595
- <dateIssued>13th century AH / 19th CE</dateIssued>
596
- </originInfo></mods>"
597
- sdb = sdb_for_mods(m)
598
- doc_hash = sdb.doc_hash_from_mods
599
- expect(doc_hash[:pub_date]).to eq('19th century')
600
- expect(doc_hash[:pub_date_sort]).to eq('1800')
601
- expect(doc_hash[:publication_year_isi]).to eq('1800')
602
- expect(doc_hash[:pub_year_tisim]).to eq('1800') # date slider
603
- expect(doc_hash[:pub_date_display]).to eq('13th century AH / 19th CE')
604
- expect(doc_hash[:imprint_display]).to eq('13th century AH / 19th CE')
605
- end
606
- it 'does not populate the date slider for BC dates' do
607
- m = "<mods #{@ns_decl}><originInfo><dateIssued>199 B.C.</dateIssued></originInfo></mods>"
608
- sdb = sdb_for_mods(m)
609
- doc_hash = sdb.doc_hash_from_mods
610
- expect(doc_hash).to_not have_key(:pub_year_tisim)
611
- end
612
-
613
- context 'pub_date_sort integration tests' do
614
- let :sdb do
615
- sdb = sdb_for_mods("<mods #{@ns_decl}> </mods>")
616
- end
617
- it 'works on normal dates' do
618
- allow(sdb.smods_rec).to receive(:pub_date).and_return('1945')
619
- expect(sdb.doc_hash_from_mods[:pub_date_sort]).to eq('1945')
620
- end
621
- it 'works on 3 digit dates' do
622
- allow(sdb.smods_rec).to receive(:pub_date).and_return('945')
623
- expect(sdb.doc_hash_from_mods[:pub_date_sort]).to eq('0945')
624
- end
625
- it 'works on century dates' do
626
- allow(sdb.smods_rec).to receive(:pub_date).and_return('16--')
627
- expect(sdb.doc_hash_from_mods[:pub_date_sort]).to eq('1600')
628
- end
629
- it 'works on 3 digit century dates' do
630
- allow(sdb.smods_rec).to receive(:pub_date).and_return('9--')
631
- expect(sdb.doc_hash_from_mods[:pub_date_sort]).to eq('0900')
632
- end
633
- end # pub_date_sort
634
-
635
- context 'pub_year_tisim for date slider' do
636
- it 'takes single dateCreated' do
637
- m = "<mods #{@ns_decl}><originInfo>
638
- <dateCreated>1904</dateCreated>
639
- </originInfo></mods>"
640
- sdb = sdb_for_mods(m)
641
- doc_hash = sdb.doc_hash_from_mods
642
- expect(doc_hash[:pub_year_tisim]).to eq('1904')
643
- end
644
- it 'correctlies parse a ranged date' do
645
- m = "<mods #{@ns_decl}><originInfo>
646
- <dateCreated>Text dated June 4, 1594; miniatures added by 1596</dateCreated>
647
- </originInfo></mods>"
648
- sdb = sdb_for_mods(m)
649
- doc_hash = sdb.doc_hash_from_mods
650
- expect(doc_hash[:pub_year_tisim]).to eq('1594')
651
- end
652
- it 'finds year in an expanded English form' do
653
- m = "<mods #{@ns_decl}><originInfo>
654
- <dateCreated>Aug. 3rd, 1886</dateCreated>
655
- </originInfo></mods>"
656
- sdb = sdb_for_mods(m)
657
- doc_hash = sdb.doc_hash_from_mods
658
- expect(doc_hash[:pub_year_tisim]).to eq('1886')
659
- end
660
- it 'removes question marks and brackets' do
661
- m = "<mods #{@ns_decl}><originInfo>
662
- <dateCreated>Aug. 3rd, [18]86?</dateCreated>
663
- </originInfo></mods>"
664
- sdb = sdb_for_mods(m)
665
- doc_hash = sdb.doc_hash_from_mods
666
- expect(doc_hash[:pub_year_tisim]).to eq('1886')
667
- end
668
- it 'ignores an s after the decade' do
669
- m = "<mods #{@ns_decl}><originInfo>
670
- <dateCreated>early 1890s</dateCreated>
671
- </originInfo></mods>"
672
- sdb = sdb_for_mods(m)
673
- doc_hash = sdb.doc_hash_from_mods
674
- expect(doc_hash[:pub_year_tisim]).to eq('1890')
675
- end
676
- it 'chooses a date ending with CE if there are multiple dates' do
677
- m = "<mods #{@ns_decl}><originInfo>
678
- <dateIssued>7192 AM (li-Adam) / 1684 CE</dateIssued>
679
- </originInfo></mods>"
680
- sdb = sdb_for_mods(m)
681
- doc_hash = sdb.doc_hash_from_mods
682
- expect(doc_hash[:pub_year_tisim]).to eq('1684')
683
- end
684
- it 'takes first year from hyphenated range (for now)' do
685
- m = "<mods #{@ns_decl}><originInfo>
686
- <dateIssued>1282 AH / 1865-6 CE</dateIssued>
687
- </originInfo></mods>"
688
- sdb = sdb_for_mods(m)
689
- doc_hash = sdb.doc_hash_from_mods
690
- expect(doc_hash[:pub_year_tisim]).to eq('1865')
691
- end
692
- end # pub_year_tisim method
693
-
694
- context 'difficult pub dates' do
695
- it 'should handle multiple pub dates (to be implemented - esp for date slider)'
696
-
697
- it 'should choose the latest date??? (to be implemented - esp for sorting and date slider)'
698
-
699
- it 'handles nnth century dates' do
700
- m = "<mods #{@ns_decl}><originInfo>
701
- <dateIssued>13th century AH / 19th CE</dateIssued>
702
- </originInfo></mods>"
703
- sdb = sdb_for_mods(m)
704
- doc_hash = sdb.doc_hash_from_mods
705
- expect(doc_hash[:pub_date]).to eq('19th century')
706
- expect(doc_hash[:pub_date_sort]).to eq('1800')
707
- expect(doc_hash[:pub_year_tisim]).to eq('1800')
708
- expect(doc_hash[:publication_year_isi]).to eq('1800')
709
- expect(doc_hash[:imprint_display]).to eq('13th century AH / 19th CE')
710
- end
711
- it 'handles multiple CE dates' do
712
- m = "<mods #{@ns_decl}><originInfo>
713
- <dateIssued>6 Dhu al-Hijjah 923 AH / 1517 CE -- 7 Rabi I 924 AH / 1518 CE</dateIssued>
714
- </originInfo></mods>"
715
- sdb = sdb_for_mods(m)
716
- doc_hash = sdb.doc_hash_from_mods
717
- expect(doc_hash[:pub_date_sort]).to eq('1517')
718
- expect(doc_hash[:pub_date]).to eq('1517')
719
- expect(doc_hash[:pub_year_tisim]).to eq('1517')
720
- end
721
- it 'handles specific century case from walters' do
722
- m = "<mods #{@ns_decl}><originInfo>
723
- <dateIssued>Late 14th or early 15th century CE</dateIssued>
724
- </originInfo></mods>"
725
- sdb = sdb_for_mods(m)
726
- doc_hash = sdb.doc_hash_from_mods
727
- expect(doc_hash[:pub_date_sort]).to eq('1400')
728
- expect(doc_hash[:pub_year_tisim]).to eq('1400')
729
- expect(doc_hash[:publication_year_isi]).to eq('1400')
730
- expect(doc_hash[:pub_date]).to eq('15th century')
731
- expect(doc_hash[:imprint_display]).to eq('Late 14th or early 15th century CE')
732
- end
733
- it 'works on explicit 3 digit dates' do
734
- m = "<mods #{@ns_decl}><originInfo>
735
- <dateIssued>966 CE</dateIssued>
736
- </originInfo></mods>"
737
- sdb = sdb_for_mods(m)
738
- doc_hash = sdb.doc_hash_from_mods
739
- expect(doc_hash[:pub_date_sort]).to eq('0966')
740
- expect(doc_hash[:pub_date]).to eq('966')
741
- expect(doc_hash[:pub_year_tisim]).to eq('0966')
742
- expect(doc_hash[:publication_year_isi]).to eq('0966')
743
- expect(doc_hash[:imprint_display]).to eq('966 CE')
744
- end
745
- it 'works on 3 digit century dates' do
746
- m = "<mods #{@ns_decl}><originInfo>
747
- <dateIssued>3rd century AH / 9th CE</dateIssued>
748
- </originInfo></mods>"
749
- sdb = sdb_for_mods(m)
750
- doc_hash = sdb.doc_hash_from_mods
751
- expect(doc_hash[:pub_date_sort]).to eq('0800')
752
- expect(doc_hash[:pub_year_tisim]).to eq('0800')
753
- expect(doc_hash[:pub_date]).to eq('9th century')
754
- expect(doc_hash[:publication_year_isi]).to eq('0800')
755
- expect(doc_hash[:imprint_display]).to eq('3rd century AH / 9th CE')
756
- end
757
- it 'works on 3 digit BC dates' do
758
- m = "<mods #{@ns_decl}><originInfo>
759
- <dateCreated>300 B.C.</dateCreated>
760
- </originInfo></mods>"
761
- sdb = sdb_for_mods(m)
762
- doc_hash = sdb.doc_hash_from_mods
763
- expect(doc_hash[:pub_date_sort]).to eq('-700')
764
- expect(doc_hash[:pub_year_tisim]).to be_nil
765
- expect(doc_hash[:pub_date]).to eq('300 B.C.')
766
- expect(doc_hash[:imprint_display]).to eq('300 B.C.')
767
- # doc_hash[:creation_year_isi].should =='-300'
768
- end
769
- end # difficult pub dates
770
- end # publication date fields
771
- end # doc_hash_from_mods
772
-
773
- context '#format' do
774
- it 'gets format from call to stanford-mods searchworks format method' do
775
- m = "<mods #{@ns_decl}><typeOfResource>still image</typeOfResouce></mods>"
776
- sdb = sdb_for_mods(m)
777
- expect(sdb.smods_rec).to receive(:format).and_call_original
778
- expect(sdb.format).to match_array ['Image']
779
- end
780
- it 'returns empty Array and log warning if there is no value' do
781
- sdb = sdb_for_mods(@mods_xml)
782
- expect(sdb.logger).to receive(:warn).with("#{@fake_druid} has no SearchWorks format from MODS - check <typeOfResource> and other implicated MODS elements")
783
- expect(sdb.format).to eq([])
784
- end
785
- end # context #format
786
-
787
- context '#format_main_ssim' do
788
- it 'gets format_main_ssim from call to stanford-mods searchworks format_main method' do
789
- m = "<mods #{@ns_decl}><typeOfResource>still image</typeOfResouce></mods>"
790
- sdb = sdb_for_mods(m)
791
- expect(sdb.smods_rec).to receive(:format_main).and_call_original
792
- expect(sdb.format_main_ssim).to match_array ['Image']
793
- end
794
- it 'returns empty Array and log warning if there is no value' do
795
- sdb = sdb_for_mods(@mods_xml)
796
- expect(sdb.logger).to receive(:warn).with("#{@fake_druid} has no SearchWorks Resource Type from MODS - check <typeOfResource> and other implicated MODS elements")
797
- expect(sdb.format_main_ssim).to eq([])
798
- end
799
- end # context format_main_ssim
800
-
801
- context 'genre_ssim' do
802
- it 'gets genre_ssim from call to stanford-mods searchworks sw_genre method' do
803
- m = "<mods #{@ns_decl}><genre>technical report</genre></mods>"
804
- sdb = sdb_for_mods(m)
805
- expect(sdb.smods_rec).to receive(:sw_genre).and_call_original
806
- expect(sdb.genre_ssim).to match_array ['Technical report']
807
- end
808
- it 'returns empty Array if there is no value' do
809
- sdb = sdb_for_mods(@mods_xml)
810
- expect(sdb.genre_ssim).to eq([])
811
- end
812
- end # context genre_ssim
813
- end