gdor-indexer 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,128 @@
1
+ require 'spec_helper'
2
+
3
+ describe GDor::Indexer::SolrDocBuilder do
4
+ before(:all) do
5
+ @fake_druid = 'oo000oo0000'
6
+ @ns_decl = "xmlns='#{Mods::MODS_NS}'"
7
+ @mods_xml = "<mods #{@ns_decl}><note>SolrDocBuilder test</note></mods>"
8
+ @ng_mods_xml = Nokogiri::XML(@mods_xml)
9
+ end
10
+
11
+ let :logger do
12
+ Logger.new(StringIO.new)
13
+ end
14
+
15
+ def sdb_for_data(mods, pub_xml)
16
+ resource = Harvestdor::Indexer::Resource.new(double, @fake_druid)
17
+ allow(resource).to receive(:mods).and_return(Nokogiri::XML(mods))
18
+ allow(resource).to receive(:public_xml).and_return(Nokogiri::XML(pub_xml))
19
+ GDor::Indexer::SolrDocBuilder.new(resource, logger)
20
+ end
21
+
22
+ # NOTE:
23
+ # "Doubles, stubs, and message expectations are all cleaned out after each example."
24
+ # per https://www.relishapp.com/rspec/rspec-mocks/docs/scope
25
+
26
+ context 'doc_hash' do
27
+ before(:all) do
28
+ cmd_xml = "<contentMetadata type='image' objectId='#{@fake_druid}'></contentMetadata>"
29
+ @pub_xml = "<publicObject id='druid#{@fake_druid}'>#{cmd_xml}</publicObject>"
30
+ end
31
+ let :doc_hash do
32
+ sdb_for_data(@mods_xml, @pub_xml).doc_hash
33
+ end
34
+ before(:each) do
35
+ @doc_hash = doc_hash
36
+ end
37
+ it 'id field should be set to druid for non-merged record' do
38
+ expect(@doc_hash[:id]).to eq(@fake_druid)
39
+ end
40
+ it 'does not have the gdor fields set in indexer.rb' do
41
+ expect(@doc_hash).to_not have_key(:druid)
42
+ expect(@doc_hash).to_not have_key(:access_facet)
43
+ expect(@doc_hash).to_not have_key(:url_fulltext)
44
+ expect(@doc_hash).to_not have_key(:display_type)
45
+ expect(@doc_hash).to_not have_key(:file_id)
46
+ end
47
+ it 'has the full MODS in the modsxml field for non-merged record' do
48
+ expect(@doc_hash[:modsxml]).to be_equivalent_to @mods_xml
49
+ end
50
+ end # doc hash
51
+
52
+ context '#catkey' do
53
+ before(:all) do
54
+ @identity_md_start = "<publicObject><identityMetadata objectId='#{@fake_druid}'>"
55
+ @identity_md_end = '</identityMetadata></publicObject>'
56
+ @empty_id_md = "#{@identity_md_start}#{@identity_md_end}"
57
+ @barcode_id_md = "#{@identity_md_start}<otherId name=\"barcode\">666</otherId>#{@identity_md_end}"
58
+ end
59
+ it 'is nil if there is no indication of catkey in identityMetadata' do
60
+ sdb = sdb_for_data(@mods_xml, @empty_id_md)
61
+ expect(sdb.catkey).to be_nil
62
+ end
63
+ it 'takes a catkey in identityMetadata/otherId with name attribute of catkey' do
64
+ pub_xml = "#{@identity_md_start}<otherId name=\"catkey\">12345</otherId>#{@identity_md_end}"
65
+ sdb = sdb_for_data(@mods_xml, pub_xml)
66
+ expect(sdb.catkey).to eq('12345')
67
+ end
68
+ it 'is nil if there is no indication of catkey in identityMetadata even if there is a catkey in the mods' do
69
+ m = "<mods #{@ns_decl}><recordInfo>
70
+ <recordIdentifier source=\"SIRSI\">a6780453</recordIdentifier>
71
+ </recordInfo></mods>"
72
+ sdb = sdb_for_data(@mods_xml, @empty_id_md)
73
+ expect(sdb.catkey).to be_nil
74
+ end
75
+ it 'logs an error when there is identityMetadata/otherId with name attribute of barcode but there is no catkey in mods' do
76
+ sdb = sdb_for_data(@mods_xml, @barcode_id_md)
77
+ expect(logger).to receive(:error).with(/#{@fake_druid} has barcode .* in identityMetadata but no SIRSI catkey in mods/)
78
+ sdb.catkey
79
+ end
80
+
81
+ context 'catkey from mods' do
82
+ it 'looks for catkey in mods if identityMetadata/otherId with name attribute of barcode is found' do
83
+ sdb = sdb_for_data(@mods_xml, @barcode_id_md)
84
+ smr = sdb.smods_rec
85
+ expect(smr).to receive(:record_info).and_call_original # this is as close as I can figure to @smods_rec.record_info.recordIdentifier
86
+ sdb.catkey
87
+ end
88
+ it 'is nil if there is no catkey in the mods' do
89
+ m = "<mods #{@ns_decl}><recordInfo>
90
+ <descriptionStandard>dacs</descriptionStandard>
91
+ </recordInfo></mods>"
92
+ sdb = sdb_for_data(m, @barcode_id_md)
93
+
94
+ expect(sdb.catkey).to be_nil
95
+ end
96
+ it 'populated when source attribute is SIRSI' do
97
+ m = "<mods #{@ns_decl}><recordInfo>
98
+ <recordIdentifier source=\"SIRSI\">a6780453</recordIdentifier>
99
+ </recordInfo></mods>"
100
+ sdb = sdb_for_data(m, @barcode_id_md)
101
+ expect(sdb.catkey).not_to be_nil
102
+ end
103
+ it 'not populated when source attribute is not SIRSI' do
104
+ m = "<mods #{@ns_decl}><recordInfo>
105
+ <recordIdentifier source=\"FOO\">a6780453</recordIdentifier>
106
+ </recordInfo></mods>"
107
+ sdb = sdb_for_data(m, @barcode_id_md)
108
+ expect(sdb.catkey).to be_nil
109
+ end
110
+ it 'removes the a at the beginning of the catkey' do
111
+ m = "<mods #{@ns_decl}><recordInfo>
112
+ <recordIdentifier source=\"SIRSI\">a6780453</recordIdentifier>
113
+ </recordInfo></mods>"
114
+ sdb = sdb_for_data(m, @barcode_id_md)
115
+ expect(sdb.catkey).to eq('6780453')
116
+ end
117
+ end
118
+ end # #catkey
119
+
120
+ context 'using Harvestdor::Client' do
121
+ context '#smods_rec (called in initialize method)' do
122
+ it 'returns Stanford::Mods::Record object' do
123
+ sdb = sdb_for_data(@mods_xml, nil)
124
+ expect(sdb.smods_rec).to be_an_instance_of(Stanford::Mods::Record)
125
+ end
126
+ end
127
+ end # context using Harvestdor::Client
128
+ end
@@ -0,0 +1,399 @@
1
+ require 'spec_helper'
2
+
3
+ describe GDor::Indexer::SolrDocHash do
4
+ context '#field_present?' do
5
+ context 'actual field value is boolean true' do
6
+ subject do
7
+ described_class.new(a: true)
8
+ end
9
+ it 'true if expected value is nil' do
10
+ expect(subject).to be_field_present(:a)
11
+ end
12
+ it 'false if expected value is String' do
13
+ expect(subject).not_to be_field_present(:a, 'true')
14
+ end
15
+ it 'false if expected value is Regex' do
16
+ expect(subject).not_to be_field_present(a: /true/)
17
+ end
18
+ end
19
+
20
+ context 'expected value is nil' do
21
+ subject { described_class.new({}) }
22
+ it 'false if the field is not in the doc_hash' do
23
+ expect(subject).not_to be_field_present(:any)
24
+ end
25
+ it 'false if hash[field] is nil' do
26
+ subject[:foo] = nil
27
+ expect(subject).not_to be_field_present(:foo)
28
+ end
29
+ it 'false if hash[field] is an empty String' do
30
+ subject[:foo] = ''
31
+ expect(subject).not_to be_field_present(:foo)
32
+ end
33
+ it 'true if hash[field] is a non-empty String' do
34
+ subject[:foo] = 'bar'
35
+ expect(subject).to be_field_present(:foo)
36
+ end
37
+ it 'false if hash[field] is an empty Array' do
38
+ subject[:foo] = []
39
+ expect(subject).not_to be_field_present(:foo)
40
+ end
41
+ it 'false if hash[field] is an Array with only empty String values' do
42
+ subject[:foo] = ['', '']
43
+ expect(subject).not_to be_field_present(:foo)
44
+ end
45
+ it 'false if hash[field] is an Array with only nil String values' do
46
+ subject[:foo] = [nil]
47
+ expect(subject).not_to be_field_present(:foo)
48
+ end
49
+ it 'true if hash[field] is a non-empty Array' do
50
+ subject[:foo] = ['a']
51
+ expect(subject).to be_field_present(:foo)
52
+ end
53
+ it 'false if doc_hash[field] is not a String or Array' do
54
+ subject[:foo] = {}
55
+ expect(subject).not_to be_field_present(:foo)
56
+ end
57
+ end
58
+
59
+ context 'expected value is a String' do
60
+ subject { described_class.new({}) }
61
+
62
+ it 'true if hash[field] is a String and matches' do
63
+ subject[:foo] = 'a'
64
+ expect(subject).to be_field_present(:foo, 'a')
65
+ end
66
+ it "false if hash[field] is a String and doesn't match" do
67
+ subject[:foo] = 'a'
68
+ expect(subject).not_to be_field_present(:foo, 'b')
69
+ end
70
+ it 'true if hash[field] is an Array with a value that matches' do
71
+ subject[:foo] = %w(a b)
72
+ expect(subject).to be_field_present(:foo, 'a')
73
+ end
74
+ it 'false if hash[field] is an Array with no value that matches' do
75
+ subject[:foo] = %w(a b)
76
+ expect(subject).not_to be_field_present(:foo, 'c')
77
+ end
78
+ it 'false if hash[field] is not a String or Array' do
79
+ subject[:foo] = {}
80
+ expect(subject).not_to be_field_present(:foo, 'a')
81
+ end
82
+ end
83
+
84
+ context 'expected value is Regex' do
85
+ subject { described_class.new({}) }
86
+
87
+ it 'true if hash[field] is a String and matches' do
88
+ subject[:foo] = 'aba'
89
+ expect(subject).to be_field_present(:foo, /b/)
90
+ end
91
+ it "false if hash[field] is a String and doesn't match" do
92
+ subject[:foo] = 'aaaaa'
93
+ expect(subject).not_to be_field_present(:foo, /b/)
94
+ end
95
+ it 'true if hash[field] is an Array with a value that matches' do
96
+ subject[:foo] = %w(a b)
97
+ expect(subject).to be_field_present(:foo, /b/)
98
+ end
99
+ it 'false if hash[field] is an Array with no value that matches' do
100
+ subject[:foo] = %w(a b)
101
+ expect(subject).not_to be_field_present(:foo, /c/)
102
+ end
103
+ it 'false if hash[field] is not a String or Array' do
104
+ subject[:foo] = {}
105
+ expect(subject).not_to be_field_present(:foo, /a/)
106
+ end
107
+ end
108
+ end # field_present?
109
+
110
+ context '#combine' do
111
+ context 'orig has no key' do
112
+ subject do
113
+ described_class.new({})
114
+ end
115
+
116
+ it 'result has no key if new value is nil' do
117
+ expect(subject.combine(foo: nil)).to eq({})
118
+ end
119
+ it 'result has no key if new value is empty String' do
120
+ expect(subject.combine(foo: '')).to eq({})
121
+ end
122
+ it 'result has new value if new value is non-empty String' do
123
+ expect(subject.combine(foo: 'bar')).to eq(foo: 'bar')
124
+ end
125
+ it 'result has no key if new value is empty Array' do
126
+ expect(subject.combine(foo: [])).to eq({})
127
+ end
128
+ it 'result has new value new value is non-empty Array' do
129
+ expect(subject.combine(foo: ['bar'])).to eq(foo: ['bar'])
130
+ end
131
+ it 'result has no key if new value is not String or Array' do
132
+ expect(subject.combine(foo: {})).to eq({})
133
+ end
134
+ end # orig has no key
135
+ context 'orig value is nil' do
136
+ subject do
137
+ described_class.new(foo: nil)
138
+ end
139
+ it 'result has no key if new value is nil' do
140
+ expect(subject.combine(foo: nil)).to eq({})
141
+ end
142
+ it 'result has no key if new value is empty String' do
143
+ expect(subject.combine(foo: '')).to eq({})
144
+ end
145
+ it 'result has new value if new value is non-empty String' do
146
+ expect(subject.combine(foo: 'bar')).to eq(foo: 'bar')
147
+ end
148
+ it 'result has no key if new value is empty Array' do
149
+ expect(subject.combine(foo: [])).to eq({})
150
+ end
151
+ it 'result has new value if new value is non-empty Array' do
152
+ expect(subject.combine(foo: ['bar'])).to eq(foo: ['bar'])
153
+ end
154
+ it 'result has no key if new value is not String or Array' do
155
+ expect(subject.combine(foo: {})).to eq({})
156
+ end
157
+ end # orig value is nil
158
+ context 'orig value is empty String' do
159
+ subject do
160
+ described_class.new(foo: '')
161
+ end
162
+ it 'result has no key if new value is nil' do
163
+ expect(subject.combine(foo: nil)).to eq({})
164
+ end
165
+ it 'result has no key if new value is empty String' do
166
+ expect(subject.combine(foo: '')).to eq({})
167
+ end
168
+ it 'result has new value if new value is non-empty String' do
169
+ expect(subject.combine(foo: 'bar')).to eq(foo: 'bar')
170
+ end
171
+ it 'result has no key if new value is empty Array' do
172
+ expect(subject.combine(foo: [])).to eq({})
173
+ end
174
+ it 'result has new value if new value is non-empty Array' do
175
+ expect(subject.combine(foo: ['bar'])).to eq(foo: ['bar'])
176
+ end
177
+ it 'result has no key if new value is not String or Array' do
178
+ expect(subject.combine(foo: {})).to eq({})
179
+ end
180
+ end # orig value is empty String
181
+ context 'orig value is non-empty String' do
182
+ subject do
183
+ described_class.new(foo: 'a')
184
+ end
185
+ it 'result is orig value if new value is nil' do
186
+ expect(subject.combine(foo: nil)).to eq(foo: 'a')
187
+ end
188
+ it 'result is orig value if new value is empty String' do
189
+ expect(subject.combine(foo: '')).to eq(foo: 'a')
190
+ end
191
+ it 'result is Array of old and new values if new value is non-empty String' do
192
+ expect(subject.combine(foo: 'bar')).to eq(foo: %w(a bar))
193
+ end
194
+ it 'result is orig value if new value is empty Array' do
195
+ expect(subject.combine(foo: [])).to eq(foo: 'a')
196
+ end
197
+ it 'result Array of old and new values if new value is non-empty Array' do
198
+ expect(subject.combine(foo: %w(bar ness))).to eq(foo: %w(a bar ness))
199
+ end
200
+ it 'result is orig value if new value is not String or Array' do
201
+ expect(subject.combine(foo: :bar)).to eq(foo: ['a', :bar])
202
+ end
203
+ end # orig value is String
204
+ context 'orig value is empty Array' do
205
+ subject do
206
+ described_class.new(foo: [])
207
+ end
208
+ it 'result has no key if new value is nil' do
209
+ expect(subject.combine(foo: nil)).to eq({})
210
+ end
211
+ it 'result has no key if new value is empty String' do
212
+ expect(subject.combine(foo: '')).to eq({})
213
+ end
214
+ it 'result is new value if new value is non-empty String' do
215
+ expect(subject.combine(foo: 'bar')).to eq(foo: 'bar')
216
+ end
217
+ it 'result has no key if new value is empty Array' do
218
+ expect(subject.combine(foo: [])).to eq({})
219
+ end
220
+ it 'result is new values if new value is non-empty Array' do
221
+ expect(subject.combine(foo: %w(bar ness))).to eq(foo: %w(bar ness))
222
+ end
223
+ it 'result has no key if new value is not String or Array' do
224
+ expect(subject.combine(foo: {})).to eq({})
225
+ end
226
+ end # orig value is empty Array
227
+ context 'orig value is non-empty Array' do
228
+ subject do
229
+ described_class.new(foo: %w(a b))
230
+ end
231
+ it 'result is orig value if new value is nil' do
232
+ expect(subject.combine(foo: nil)).to eq(foo: %w(a b))
233
+ end
234
+ it 'result is orig value if new value is empty String' do
235
+ expect(subject.combine(foo: '')).to eq(foo: %w(a b))
236
+ end
237
+ it 'result is Array of old and new values if new value is non-empty String' do
238
+ expect(subject.combine(foo: 'bar')).to eq(foo: %w(a b bar))
239
+ end
240
+ it 'result is orig value if new value is empty Array' do
241
+ expect(subject.combine(foo: [])).to eq(foo: %w(a b))
242
+ end
243
+ it 'result Array of old and new values if new value is non-empty Array' do
244
+ expect(subject.combine(foo: %w(bar ness))).to eq(foo: %w(a b bar ness))
245
+ end
246
+ it 'result is orig value if new value is not String or Array' do
247
+ expect(subject.combine(foo: :bar)).to eq(foo: ['a', 'b', :bar])
248
+ end
249
+ end # orig value is non-empty Array
250
+ end # combine
251
+
252
+ context '#validate_item' do
253
+ let(:collection_druid) { 'xyz' }
254
+ let(:mock_config) { Confstruct::Configuration.new }
255
+
256
+ before do
257
+ described_class.any_instance.stub(validate_gdor_fields: [])
258
+ end
259
+
260
+ it 'calls validate_gdor_fields' do
261
+ hash = described_class.new({})
262
+ expect(hash).to receive(:validate_gdor_fields).and_return([])
263
+ hash.validate_item(mock_config)
264
+ end
265
+ it 'has a value if collection is wrong' do
266
+ hash = described_class.new(collection: 'junk',
267
+ collection_with_title: "#{collection_druid}-|-asdasdf",
268
+ file_id: ['anything'])
269
+ expect(hash).to receive(:validate_gdor_fields).and_return([])
270
+ expect(hash.validate_item(mock_config).first).to match(/collection /)
271
+ end
272
+ it 'has a value if collection_with_title is missing' do
273
+ hash = described_class.new(collection: collection_druid,
274
+ collection_with_title: nil,
275
+ file_id: ['anything'])
276
+ expect(hash.validate_item(mock_config).first).to match(/collection_with_title /)
277
+ end
278
+ it 'has a value if collection_with_title is missing the title' do
279
+ hash = described_class.new(collection: collection_druid,
280
+ collection_with_title: "#{collection_druid}-|-",
281
+ file_id: ['anything'])
282
+ expect(hash.validate_item(mock_config).first).to match(/collection_with_title /)
283
+ end
284
+ it 'has a value if file_id field is missing' do
285
+ hash = described_class.new(collection: collection_druid,
286
+ collection_with_title: "#{collection_druid}-|-asdasdf",
287
+ file_id: nil)
288
+ expect(hash.validate_item(mock_config).first).to match(/file_id/)
289
+ end
290
+ it 'does not have a value if gdor_fields and item fields are ok' do
291
+ hash = described_class.new(collection: collection_druid,
292
+ collection_with_title: "#{collection_druid}-|-asdasdf",
293
+ file_id: ['anything'])
294
+ expect(hash.validate_item(mock_config)).to eq([])
295
+ end
296
+ end # validate_item
297
+
298
+ context '#validate_collection' do
299
+ let(:mock_config) { Confstruct::Configuration.new }
300
+
301
+ before do
302
+ described_class.any_instance.stub(validate_gdor_fields: [])
303
+ end
304
+
305
+ it 'calls validate_gdor_fields' do
306
+ hash = described_class.new({})
307
+ expect(hash).to receive(:validate_gdor_fields).and_return([])
308
+ hash.validate_collection(mock_config)
309
+ end
310
+ it 'has a value if collection_type is missing' do
311
+ hash = described_class.new(format_main_ssim: 'Archive/Manuscript')
312
+
313
+ expect(hash.validate_collection(mock_config).first).to match(/collection_type/)
314
+ end
315
+ it "has a value if collection_type is not 'Digital Collection'" do
316
+ hash = described_class.new(collection_type: 'lalalalala', format_main_ssim: 'Archive/Manuscript')
317
+ expect(hash.validate_collection(mock_config).first).to match(/collection_type/)
318
+ end
319
+ it 'has a value if format_main_ssim is missing' do
320
+ hash = described_class.new(collection_type: 'Digital Collection')
321
+ expect(hash.validate_collection(mock_config).first).to match(/format_main_ssim/)
322
+ end
323
+ it "has a value if format_main_ssim doesn't include 'Archive/Manuscript'" do
324
+ hash = described_class.new(format_main_ssim: 'lalalalala', collection_type: 'Digital Collection')
325
+ expect(hash.validate_collection(mock_config).first).to match(/format_main_ssim/)
326
+ end
327
+ it 'does not have a value if gdor_fields, collection_type and format_main_ssim are ok' do
328
+ hash = described_class.new(collection_type: 'Digital Collection', format_main_ssim: 'Archive/Manuscript')
329
+ expect(hash.validate_collection(mock_config)).to eq([])
330
+ end
331
+ end # validate_collection
332
+
333
+ context '#validate_gdor_fields' do
334
+ let(:druid) { 'druid' }
335
+ let(:purl_url) { mock_config.harvestdor.purl }
336
+ let(:mock_config) do
337
+ Confstruct::Configuration.new do
338
+ harvestdor do
339
+ purl 'https://some.uri'
340
+ end
341
+ end
342
+ end
343
+
344
+ it 'returns an empty Array when there are no problems' do
345
+ hash = described_class.new(access_facet: 'Online',
346
+ druid: druid,
347
+ url_fulltext: "#{purl_url}/#{druid}",
348
+ display_type: 'image',
349
+ building_facet: 'Stanford Digital Repository')
350
+ expect(hash.validate_gdor_fields(mock_config)).to eq([])
351
+ end
352
+ it 'has a value for each missing field' do
353
+ hash = described_class.new({})
354
+ expect(hash.validate_gdor_fields(mock_config).length).to eq(5)
355
+ end
356
+ it 'has a value for an unrecognized display_type' do
357
+ hash = described_class.new(access_facet: 'Online',
358
+ druid: druid,
359
+ url_fulltext: "#{purl_url}/#{druid}",
360
+ display_type: 'zzzz',
361
+ building_facet: 'Stanford Digital Repository')
362
+ expect(hash.validate_gdor_fields(mock_config).first).to match(/display_type/)
363
+ end
364
+ it "has a value for access_facet other than 'Online'" do
365
+ hash = described_class.new(access_facet: 'BAD',
366
+ druid: druid,
367
+ url_fulltext: "#{purl_url}/#{druid}",
368
+ display_type: 'image',
369
+ building_facet: 'Stanford Digital Repository')
370
+ expect(hash.validate_gdor_fields(mock_config).first).to match(/access_facet/)
371
+ end
372
+ it "has a value for building_facet other than 'Stanford Digital Repository'" do
373
+ hash = described_class.new(access_facet: 'Online',
374
+ druid: druid,
375
+ url_fulltext: "#{purl_url}/#{druid}",
376
+ display_type: 'image',
377
+ building_facet: 'WRONG')
378
+ expect(hash.validate_gdor_fields(mock_config).first).to match(/building_facet/)
379
+ end
380
+ end # validate_gdor_fields
381
+
382
+ context '#validation_mods' do
383
+ let(:mock_config) { {} }
384
+ it 'has no validation messages for a complete record' do
385
+ hash = described_class.new(modsxml: 'whatever',
386
+ title_display: 'title',
387
+ pub_year_tisim: 'some year',
388
+ author_person_display: 'author',
389
+ format_main_ssim: 'Image',
390
+ format: 'Image',
391
+ language: 'English')
392
+ expect(hash.validate_mods(mock_config).length).to eq(0)
393
+ end
394
+ it 'has validation messages for each missing field' do
395
+ hash = described_class.new(id: 'whatever')
396
+ expect(hash.validate_mods(mock_config).length).to eq(7)
397
+ end
398
+ end
399
+ end