gdor-indexer 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,128 @@
1
+ require 'spec_helper'
2
+
3
+ describe GDor::Indexer::SolrDocBuilder do
4
+ before(:all) do
5
+ @fake_druid = 'oo000oo0000'
6
+ @ns_decl = "xmlns='#{Mods::MODS_NS}'"
7
+ @mods_xml = "<mods #{@ns_decl}><note>SolrDocBuilder test</note></mods>"
8
+ @ng_mods_xml = Nokogiri::XML(@mods_xml)
9
+ end
10
+
11
+ let :logger do
12
+ Logger.new(StringIO.new)
13
+ end
14
+
15
+ def sdb_for_data(mods, pub_xml)
16
+ resource = Harvestdor::Indexer::Resource.new(double, @fake_druid)
17
+ allow(resource).to receive(:mods).and_return(Nokogiri::XML(mods))
18
+ allow(resource).to receive(:public_xml).and_return(Nokogiri::XML(pub_xml))
19
+ GDor::Indexer::SolrDocBuilder.new(resource, logger)
20
+ end
21
+
22
+ # NOTE:
23
+ # "Doubles, stubs, and message expectations are all cleaned out after each example."
24
+ # per https://www.relishapp.com/rspec/rspec-mocks/docs/scope
25
+
26
+ context 'doc_hash' do
27
+ before(:all) do
28
+ cmd_xml = "<contentMetadata type='image' objectId='#{@fake_druid}'></contentMetadata>"
29
+ @pub_xml = "<publicObject id='druid#{@fake_druid}'>#{cmd_xml}</publicObject>"
30
+ end
31
+ let :doc_hash do
32
+ sdb_for_data(@mods_xml, @pub_xml).doc_hash
33
+ end
34
+ before(:each) do
35
+ @doc_hash = doc_hash
36
+ end
37
+ it 'id field should be set to druid for non-merged record' do
38
+ expect(@doc_hash[:id]).to eq(@fake_druid)
39
+ end
40
+ it 'does not have the gdor fields set in indexer.rb' do
41
+ expect(@doc_hash).to_not have_key(:druid)
42
+ expect(@doc_hash).to_not have_key(:access_facet)
43
+ expect(@doc_hash).to_not have_key(:url_fulltext)
44
+ expect(@doc_hash).to_not have_key(:display_type)
45
+ expect(@doc_hash).to_not have_key(:file_id)
46
+ end
47
+ it 'has the full MODS in the modsxml field for non-merged record' do
48
+ expect(@doc_hash[:modsxml]).to be_equivalent_to @mods_xml
49
+ end
50
+ end # doc hash
51
+
52
+ context '#catkey' do
53
+ before(:all) do
54
+ @identity_md_start = "<publicObject><identityMetadata objectId='#{@fake_druid}'>"
55
+ @identity_md_end = '</identityMetadata></publicObject>'
56
+ @empty_id_md = "#{@identity_md_start}#{@identity_md_end}"
57
+ @barcode_id_md = "#{@identity_md_start}<otherId name=\"barcode\">666</otherId>#{@identity_md_end}"
58
+ end
59
+ it 'is nil if there is no indication of catkey in identityMetadata' do
60
+ sdb = sdb_for_data(@mods_xml, @empty_id_md)
61
+ expect(sdb.catkey).to be_nil
62
+ end
63
+ it 'takes a catkey in identityMetadata/otherId with name attribute of catkey' do
64
+ pub_xml = "#{@identity_md_start}<otherId name=\"catkey\">12345</otherId>#{@identity_md_end}"
65
+ sdb = sdb_for_data(@mods_xml, pub_xml)
66
+ expect(sdb.catkey).to eq('12345')
67
+ end
68
+ it 'is nil if there is no indication of catkey in identityMetadata even if there is a catkey in the mods' do
69
+ m = "<mods #{@ns_decl}><recordInfo>
70
+ <recordIdentifier source=\"SIRSI\">a6780453</recordIdentifier>
71
+ </recordInfo></mods>"
72
+ sdb = sdb_for_data(@mods_xml, @empty_id_md)
73
+ expect(sdb.catkey).to be_nil
74
+ end
75
+ it 'logs an error when there is identityMetadata/otherId with name attribute of barcode but there is no catkey in mods' do
76
+ sdb = sdb_for_data(@mods_xml, @barcode_id_md)
77
+ expect(logger).to receive(:error).with(/#{@fake_druid} has barcode .* in identityMetadata but no SIRSI catkey in mods/)
78
+ sdb.catkey
79
+ end
80
+
81
+ context 'catkey from mods' do
82
+ it 'looks for catkey in mods if identityMetadata/otherId with name attribute of barcode is found' do
83
+ sdb = sdb_for_data(@mods_xml, @barcode_id_md)
84
+ smr = sdb.smods_rec
85
+ expect(smr).to receive(:record_info).and_call_original # this is as close as I can figure to @smods_rec.record_info.recordIdentifier
86
+ sdb.catkey
87
+ end
88
+ it 'is nil if there is no catkey in the mods' do
89
+ m = "<mods #{@ns_decl}><recordInfo>
90
+ <descriptionStandard>dacs</descriptionStandard>
91
+ </recordInfo></mods>"
92
+ sdb = sdb_for_data(m, @barcode_id_md)
93
+
94
+ expect(sdb.catkey).to be_nil
95
+ end
96
+ it 'populated when source attribute is SIRSI' do
97
+ m = "<mods #{@ns_decl}><recordInfo>
98
+ <recordIdentifier source=\"SIRSI\">a6780453</recordIdentifier>
99
+ </recordInfo></mods>"
100
+ sdb = sdb_for_data(m, @barcode_id_md)
101
+ expect(sdb.catkey).not_to be_nil
102
+ end
103
+ it 'not populated when source attribute is not SIRSI' do
104
+ m = "<mods #{@ns_decl}><recordInfo>
105
+ <recordIdentifier source=\"FOO\">a6780453</recordIdentifier>
106
+ </recordInfo></mods>"
107
+ sdb = sdb_for_data(m, @barcode_id_md)
108
+ expect(sdb.catkey).to be_nil
109
+ end
110
+ it 'removes the a at the beginning of the catkey' do
111
+ m = "<mods #{@ns_decl}><recordInfo>
112
+ <recordIdentifier source=\"SIRSI\">a6780453</recordIdentifier>
113
+ </recordInfo></mods>"
114
+ sdb = sdb_for_data(m, @barcode_id_md)
115
+ expect(sdb.catkey).to eq('6780453')
116
+ end
117
+ end
118
+ end # #catkey
119
+
120
+ context 'using Harvestdor::Client' do
121
+ context '#smods_rec (called in initialize method)' do
122
+ it 'returns Stanford::Mods::Record object' do
123
+ sdb = sdb_for_data(@mods_xml, nil)
124
+ expect(sdb.smods_rec).to be_an_instance_of(Stanford::Mods::Record)
125
+ end
126
+ end
127
+ end # context using Harvestdor::Client
128
+ end
@@ -0,0 +1,399 @@
1
+ require 'spec_helper'
2
+
3
+ describe GDor::Indexer::SolrDocHash do
4
+ context '#field_present?' do
5
+ context 'actual field value is boolean true' do
6
+ subject do
7
+ described_class.new(a: true)
8
+ end
9
+ it 'true if expected value is nil' do
10
+ expect(subject).to be_field_present(:a)
11
+ end
12
+ it 'false if expected value is String' do
13
+ expect(subject).not_to be_field_present(:a, 'true')
14
+ end
15
+ it 'false if expected value is Regex' do
16
+ expect(subject).not_to be_field_present(a: /true/)
17
+ end
18
+ end
19
+
20
+ context 'expected value is nil' do
21
+ subject { described_class.new({}) }
22
+ it 'false if the field is not in the doc_hash' do
23
+ expect(subject).not_to be_field_present(:any)
24
+ end
25
+ it 'false if hash[field] is nil' do
26
+ subject[:foo] = nil
27
+ expect(subject).not_to be_field_present(:foo)
28
+ end
29
+ it 'false if hash[field] is an empty String' do
30
+ subject[:foo] = ''
31
+ expect(subject).not_to be_field_present(:foo)
32
+ end
33
+ it 'true if hash[field] is a non-empty String' do
34
+ subject[:foo] = 'bar'
35
+ expect(subject).to be_field_present(:foo)
36
+ end
37
+ it 'false if hash[field] is an empty Array' do
38
+ subject[:foo] = []
39
+ expect(subject).not_to be_field_present(:foo)
40
+ end
41
+ it 'false if hash[field] is an Array with only empty String values' do
42
+ subject[:foo] = ['', '']
43
+ expect(subject).not_to be_field_present(:foo)
44
+ end
45
+ it 'false if hash[field] is an Array with only nil String values' do
46
+ subject[:foo] = [nil]
47
+ expect(subject).not_to be_field_present(:foo)
48
+ end
49
+ it 'true if hash[field] is a non-empty Array' do
50
+ subject[:foo] = ['a']
51
+ expect(subject).to be_field_present(:foo)
52
+ end
53
+ it 'false if doc_hash[field] is not a String or Array' do
54
+ subject[:foo] = {}
55
+ expect(subject).not_to be_field_present(:foo)
56
+ end
57
+ end
58
+
59
+ context 'expected value is a String' do
60
+ subject { described_class.new({}) }
61
+
62
+ it 'true if hash[field] is a String and matches' do
63
+ subject[:foo] = 'a'
64
+ expect(subject).to be_field_present(:foo, 'a')
65
+ end
66
+ it "false if hash[field] is a String and doesn't match" do
67
+ subject[:foo] = 'a'
68
+ expect(subject).not_to be_field_present(:foo, 'b')
69
+ end
70
+ it 'true if hash[field] is an Array with a value that matches' do
71
+ subject[:foo] = %w(a b)
72
+ expect(subject).to be_field_present(:foo, 'a')
73
+ end
74
+ it 'false if hash[field] is an Array with no value that matches' do
75
+ subject[:foo] = %w(a b)
76
+ expect(subject).not_to be_field_present(:foo, 'c')
77
+ end
78
+ it 'false if hash[field] is not a String or Array' do
79
+ subject[:foo] = {}
80
+ expect(subject).not_to be_field_present(:foo, 'a')
81
+ end
82
+ end
83
+
84
+ context 'expected value is Regex' do
85
+ subject { described_class.new({}) }
86
+
87
+ it 'true if hash[field] is a String and matches' do
88
+ subject[:foo] = 'aba'
89
+ expect(subject).to be_field_present(:foo, /b/)
90
+ end
91
+ it "false if hash[field] is a String and doesn't match" do
92
+ subject[:foo] = 'aaaaa'
93
+ expect(subject).not_to be_field_present(:foo, /b/)
94
+ end
95
+ it 'true if hash[field] is an Array with a value that matches' do
96
+ subject[:foo] = %w(a b)
97
+ expect(subject).to be_field_present(:foo, /b/)
98
+ end
99
+ it 'false if hash[field] is an Array with no value that matches' do
100
+ subject[:foo] = %w(a b)
101
+ expect(subject).not_to be_field_present(:foo, /c/)
102
+ end
103
+ it 'false if hash[field] is not a String or Array' do
104
+ subject[:foo] = {}
105
+ expect(subject).not_to be_field_present(:foo, /a/)
106
+ end
107
+ end
108
+ end # field_present?
109
+
110
+ context '#combine' do
111
+ context 'orig has no key' do
112
+ subject do
113
+ described_class.new({})
114
+ end
115
+
116
+ it 'result has no key if new value is nil' do
117
+ expect(subject.combine(foo: nil)).to eq({})
118
+ end
119
+ it 'result has no key if new value is empty String' do
120
+ expect(subject.combine(foo: '')).to eq({})
121
+ end
122
+ it 'result has new value if new value is non-empty String' do
123
+ expect(subject.combine(foo: 'bar')).to eq(foo: 'bar')
124
+ end
125
+ it 'result has no key if new value is empty Array' do
126
+ expect(subject.combine(foo: [])).to eq({})
127
+ end
128
+ it 'result has new value new value is non-empty Array' do
129
+ expect(subject.combine(foo: ['bar'])).to eq(foo: ['bar'])
130
+ end
131
+ it 'result has no key if new value is not String or Array' do
132
+ expect(subject.combine(foo: {})).to eq({})
133
+ end
134
+ end # orig has no key
135
+ context 'orig value is nil' do
136
+ subject do
137
+ described_class.new(foo: nil)
138
+ end
139
+ it 'result has no key if new value is nil' do
140
+ expect(subject.combine(foo: nil)).to eq({})
141
+ end
142
+ it 'result has no key if new value is empty String' do
143
+ expect(subject.combine(foo: '')).to eq({})
144
+ end
145
+ it 'result has new value if new value is non-empty String' do
146
+ expect(subject.combine(foo: 'bar')).to eq(foo: 'bar')
147
+ end
148
+ it 'result has no key if new value is empty Array' do
149
+ expect(subject.combine(foo: [])).to eq({})
150
+ end
151
+ it 'result has new value if new value is non-empty Array' do
152
+ expect(subject.combine(foo: ['bar'])).to eq(foo: ['bar'])
153
+ end
154
+ it 'result has no key if new value is not String or Array' do
155
+ expect(subject.combine(foo: {})).to eq({})
156
+ end
157
+ end # orig value is nil
158
+ context 'orig value is empty String' do
159
+ subject do
160
+ described_class.new(foo: '')
161
+ end
162
+ it 'result has no key if new value is nil' do
163
+ expect(subject.combine(foo: nil)).to eq({})
164
+ end
165
+ it 'result has no key if new value is empty String' do
166
+ expect(subject.combine(foo: '')).to eq({})
167
+ end
168
+ it 'result has new value if new value is non-empty String' do
169
+ expect(subject.combine(foo: 'bar')).to eq(foo: 'bar')
170
+ end
171
+ it 'result has no key if new value is empty Array' do
172
+ expect(subject.combine(foo: [])).to eq({})
173
+ end
174
+ it 'result has new value if new value is non-empty Array' do
175
+ expect(subject.combine(foo: ['bar'])).to eq(foo: ['bar'])
176
+ end
177
+ it 'result has no key if new value is not String or Array' do
178
+ expect(subject.combine(foo: {})).to eq({})
179
+ end
180
+ end # orig value is empty String
181
+ context 'orig value is non-empty String' do
182
+ subject do
183
+ described_class.new(foo: 'a')
184
+ end
185
+ it 'result is orig value if new value is nil' do
186
+ expect(subject.combine(foo: nil)).to eq(foo: 'a')
187
+ end
188
+ it 'result is orig value if new value is empty String' do
189
+ expect(subject.combine(foo: '')).to eq(foo: 'a')
190
+ end
191
+ it 'result is Array of old and new values if new value is non-empty String' do
192
+ expect(subject.combine(foo: 'bar')).to eq(foo: %w(a bar))
193
+ end
194
+ it 'result is orig value if new value is empty Array' do
195
+ expect(subject.combine(foo: [])).to eq(foo: 'a')
196
+ end
197
+ it 'result Array of old and new values if new value is non-empty Array' do
198
+ expect(subject.combine(foo: %w(bar ness))).to eq(foo: %w(a bar ness))
199
+ end
200
+ it 'result is orig value if new value is not String or Array' do
201
+ expect(subject.combine(foo: :bar)).to eq(foo: ['a', :bar])
202
+ end
203
+ end # orig value is String
204
+ context 'orig value is empty Array' do
205
+ subject do
206
+ described_class.new(foo: [])
207
+ end
208
+ it 'result has no key if new value is nil' do
209
+ expect(subject.combine(foo: nil)).to eq({})
210
+ end
211
+ it 'result has no key if new value is empty String' do
212
+ expect(subject.combine(foo: '')).to eq({})
213
+ end
214
+ it 'result is new value if new value is non-empty String' do
215
+ expect(subject.combine(foo: 'bar')).to eq(foo: 'bar')
216
+ end
217
+ it 'result has no key if new value is empty Array' do
218
+ expect(subject.combine(foo: [])).to eq({})
219
+ end
220
+ it 'result is new values if new value is non-empty Array' do
221
+ expect(subject.combine(foo: %w(bar ness))).to eq(foo: %w(bar ness))
222
+ end
223
+ it 'result has no key if new value is not String or Array' do
224
+ expect(subject.combine(foo: {})).to eq({})
225
+ end
226
+ end # orig value is empty Array
227
+ context 'orig value is non-empty Array' do
228
+ subject do
229
+ described_class.new(foo: %w(a b))
230
+ end
231
+ it 'result is orig value if new value is nil' do
232
+ expect(subject.combine(foo: nil)).to eq(foo: %w(a b))
233
+ end
234
+ it 'result is orig value if new value is empty String' do
235
+ expect(subject.combine(foo: '')).to eq(foo: %w(a b))
236
+ end
237
+ it 'result is Array of old and new values if new value is non-empty String' do
238
+ expect(subject.combine(foo: 'bar')).to eq(foo: %w(a b bar))
239
+ end
240
+ it 'result is orig value if new value is empty Array' do
241
+ expect(subject.combine(foo: [])).to eq(foo: %w(a b))
242
+ end
243
+ it 'result Array of old and new values if new value is non-empty Array' do
244
+ expect(subject.combine(foo: %w(bar ness))).to eq(foo: %w(a b bar ness))
245
+ end
246
+ it 'result is orig value if new value is not String or Array' do
247
+ expect(subject.combine(foo: :bar)).to eq(foo: ['a', 'b', :bar])
248
+ end
249
+ end # orig value is non-empty Array
250
+ end # combine
251
+
252
+ context '#validate_item' do
253
+ let(:collection_druid) { 'xyz' }
254
+ let(:mock_config) { Confstruct::Configuration.new }
255
+
256
+ before do
257
+ described_class.any_instance.stub(validate_gdor_fields: [])
258
+ end
259
+
260
+ it 'calls validate_gdor_fields' do
261
+ hash = described_class.new({})
262
+ expect(hash).to receive(:validate_gdor_fields).and_return([])
263
+ hash.validate_item(mock_config)
264
+ end
265
+ it 'has a value if collection is wrong' do
266
+ hash = described_class.new(collection: 'junk',
267
+ collection_with_title: "#{collection_druid}-|-asdasdf",
268
+ file_id: ['anything'])
269
+ expect(hash).to receive(:validate_gdor_fields).and_return([])
270
+ expect(hash.validate_item(mock_config).first).to match(/collection /)
271
+ end
272
+ it 'has a value if collection_with_title is missing' do
273
+ hash = described_class.new(collection: collection_druid,
274
+ collection_with_title: nil,
275
+ file_id: ['anything'])
276
+ expect(hash.validate_item(mock_config).first).to match(/collection_with_title /)
277
+ end
278
+ it 'has a value if collection_with_title is missing the title' do
279
+ hash = described_class.new(collection: collection_druid,
280
+ collection_with_title: "#{collection_druid}-|-",
281
+ file_id: ['anything'])
282
+ expect(hash.validate_item(mock_config).first).to match(/collection_with_title /)
283
+ end
284
+ it 'has a value if file_id field is missing' do
285
+ hash = described_class.new(collection: collection_druid,
286
+ collection_with_title: "#{collection_druid}-|-asdasdf",
287
+ file_id: nil)
288
+ expect(hash.validate_item(mock_config).first).to match(/file_id/)
289
+ end
290
+ it 'does not have a value if gdor_fields and item fields are ok' do
291
+ hash = described_class.new(collection: collection_druid,
292
+ collection_with_title: "#{collection_druid}-|-asdasdf",
293
+ file_id: ['anything'])
294
+ expect(hash.validate_item(mock_config)).to eq([])
295
+ end
296
+ end # validate_item
297
+
298
+ context '#validate_collection' do
299
+ let(:mock_config) { Confstruct::Configuration.new }
300
+
301
+ before do
302
+ described_class.any_instance.stub(validate_gdor_fields: [])
303
+ end
304
+
305
+ it 'calls validate_gdor_fields' do
306
+ hash = described_class.new({})
307
+ expect(hash).to receive(:validate_gdor_fields).and_return([])
308
+ hash.validate_collection(mock_config)
309
+ end
310
+ it 'has a value if collection_type is missing' do
311
+ hash = described_class.new(format_main_ssim: 'Archive/Manuscript')
312
+
313
+ expect(hash.validate_collection(mock_config).first).to match(/collection_type/)
314
+ end
315
+ it "has a value if collection_type is not 'Digital Collection'" do
316
+ hash = described_class.new(collection_type: 'lalalalala', format_main_ssim: 'Archive/Manuscript')
317
+ expect(hash.validate_collection(mock_config).first).to match(/collection_type/)
318
+ end
319
+ it 'has a value if format_main_ssim is missing' do
320
+ hash = described_class.new(collection_type: 'Digital Collection')
321
+ expect(hash.validate_collection(mock_config).first).to match(/format_main_ssim/)
322
+ end
323
+ it "has a value if format_main_ssim doesn't include 'Archive/Manuscript'" do
324
+ hash = described_class.new(format_main_ssim: 'lalalalala', collection_type: 'Digital Collection')
325
+ expect(hash.validate_collection(mock_config).first).to match(/format_main_ssim/)
326
+ end
327
+ it 'does not have a value if gdor_fields, collection_type and format_main_ssim are ok' do
328
+ hash = described_class.new(collection_type: 'Digital Collection', format_main_ssim: 'Archive/Manuscript')
329
+ expect(hash.validate_collection(mock_config)).to eq([])
330
+ end
331
+ end # validate_collection
332
+
333
+ context '#validate_gdor_fields' do
334
+ let(:druid) { 'druid' }
335
+ let(:purl_url) { mock_config.harvestdor.purl }
336
+ let(:mock_config) do
337
+ Confstruct::Configuration.new do
338
+ harvestdor do
339
+ purl 'https://some.uri'
340
+ end
341
+ end
342
+ end
343
+
344
+ it 'returns an empty Array when there are no problems' do
345
+ hash = described_class.new(access_facet: 'Online',
346
+ druid: druid,
347
+ url_fulltext: "#{purl_url}/#{druid}",
348
+ display_type: 'image',
349
+ building_facet: 'Stanford Digital Repository')
350
+ expect(hash.validate_gdor_fields(mock_config)).to eq([])
351
+ end
352
+ it 'has a value for each missing field' do
353
+ hash = described_class.new({})
354
+ expect(hash.validate_gdor_fields(mock_config).length).to eq(5)
355
+ end
356
+ it 'has a value for an unrecognized display_type' do
357
+ hash = described_class.new(access_facet: 'Online',
358
+ druid: druid,
359
+ url_fulltext: "#{purl_url}/#{druid}",
360
+ display_type: 'zzzz',
361
+ building_facet: 'Stanford Digital Repository')
362
+ expect(hash.validate_gdor_fields(mock_config).first).to match(/display_type/)
363
+ end
364
+ it "has a value for access_facet other than 'Online'" do
365
+ hash = described_class.new(access_facet: 'BAD',
366
+ druid: druid,
367
+ url_fulltext: "#{purl_url}/#{druid}",
368
+ display_type: 'image',
369
+ building_facet: 'Stanford Digital Repository')
370
+ expect(hash.validate_gdor_fields(mock_config).first).to match(/access_facet/)
371
+ end
372
+ it "has a value for building_facet other than 'Stanford Digital Repository'" do
373
+ hash = described_class.new(access_facet: 'Online',
374
+ druid: druid,
375
+ url_fulltext: "#{purl_url}/#{druid}",
376
+ display_type: 'image',
377
+ building_facet: 'WRONG')
378
+ expect(hash.validate_gdor_fields(mock_config).first).to match(/building_facet/)
379
+ end
380
+ end # validate_gdor_fields
381
+
382
+ context '#validation_mods' do
383
+ let(:mock_config) { {} }
384
+ it 'has no validation messages for a complete record' do
385
+ hash = described_class.new(modsxml: 'whatever',
386
+ title_display: 'title',
387
+ pub_year_tisim: 'some year',
388
+ author_person_display: 'author',
389
+ format_main_ssim: 'Image',
390
+ format: 'Image',
391
+ language: 'English')
392
+ expect(hash.validate_mods(mock_config).length).to eq(0)
393
+ end
394
+ it 'has validation messages for each missing field' do
395
+ hash = described_class.new(id: 'whatever')
396
+ expect(hash.validate_mods(mock_config).length).to eq(7)
397
+ end
398
+ end
399
+ end