gdor-indexer 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,411 @@
1
+ require 'spec_helper'
2
+
3
+ describe GDor::Indexer do
4
+ before(:all) do
5
+ @config_yml_path = File.join(File.dirname(__FILE__), '..', 'config', 'walters_integration_spec.yml')
6
+ require 'yaml'
7
+ @yaml = YAML.load_file(@config_yml_path)
8
+ @ns_decl = "xmlns='#{Mods::MODS_NS}'"
9
+ @fake_druid = 'oo000oo0000'
10
+ @coll_druid_from_test_config = 'ww121ss5000'
11
+ @mods_xml = "<mods #{@ns_decl}><note>Indexer test</note></mods>"
12
+ @ng_mods_xml = Nokogiri::XML("<mods #{@ns_decl}><note>Indexer test</note></mods>")
13
+ @pub_xml = "<publicObject id='druid#{@fake_druid}'></publicObject>"
14
+ @ng_pub_xml = Nokogiri::XML("<publicObject id='druid#{@fake_druid}'></publicObject>")
15
+ end
16
+ before(:each) do
17
+ @indexer = described_class.new(@config_yml_path) do |config|
18
+ config.whitelist = ['druid:ww121ss5000']
19
+ end
20
+ allow(@indexer.solr_client).to receive(:add)
21
+ end
22
+
23
+ let :resource do
24
+ r = Harvestdor::Indexer::Resource.new(double, @fake_druid)
25
+ allow(r).to receive(:collections).and_return []
26
+ allow(r).to receive(:mods).and_return Nokogiri::XML(@mods_xml)
27
+ allow(r).to receive(:public_xml).and_return Nokogiri::XML(@pub_xml)
28
+ allow(r).to receive(:public_xml?).and_return true
29
+ allow(r).to receive(:content_metadata).and_return nil
30
+ allow(r).to receive(:collection?).and_return false
31
+ r
32
+ end
33
+
34
+ let :collection do
35
+ r = Harvestdor::Indexer::Resource.new(double, @coll_druid_from_test_config)
36
+ allow(r).to receive(:collections).and_return []
37
+ allow(r).to receive(:mods).and_return Nokogiri::XML(@mods_xml)
38
+ allow(r).to receive(:public_xml).and_return Nokogiri::XML(@pub_xml)
39
+ allow(r).to receive(:public_xml?).and_return true
40
+ allow(r).to receive(:content_metadata).and_return nil
41
+ allow(r).to receive(:identity_md_obj_label).and_return ''
42
+ allow(r).to receive(:collection?).and_return true
43
+ r
44
+ end
45
+
46
+ context 'logging' do
47
+ it 'writes the log file to the directory indicated by log_dir' do
48
+ @indexer.logger.info('walters_integration_spec logging test message')
49
+ expect(File).to exist(File.join(@yaml['harvestdor']['log_dir'], @yaml['harvestdor']['log_name']))
50
+ end
51
+ end
52
+
53
+ describe '#harvest_and_index' do
54
+ before :each do
55
+ allow(@indexer.harvestdor).to receive(:each_resource)
56
+ allow(@indexer).to receive(:solr_client).and_return(double(commit!: nil))
57
+ allow(@indexer).to receive(:log_results)
58
+ allow(@indexer).to receive(:email_results)
59
+ end
60
+ it 'logs and email results' do
61
+ expect(@indexer).to receive(:log_results)
62
+ expect(@indexer).to receive(:email_results)
63
+
64
+ @indexer.harvest_and_index
65
+ end
66
+ it 'indexs each resource' do
67
+ allow(@indexer).to receive(:harvestdor).and_return(Class.new do
68
+ def initialize(*items)
69
+ @items = items
70
+ end
71
+
72
+ def each_resource(_opts = {})
73
+ @items.each { |x| yield x }
74
+ end
75
+
76
+ def logger
77
+ Logger.new(STDERR)
78
+ end
79
+ end.new(collection, resource))
80
+
81
+ expect(@indexer).to receive(:index).with(collection)
82
+ expect(@indexer).to receive(:index).with(resource)
83
+
84
+ @indexer.harvest_and_index
85
+ end
86
+ it 'sends a solr commit' do
87
+ expect(@indexer.solr_client).to receive(:commit!)
88
+ @indexer.harvest_and_index
89
+ end
90
+ it 'does not commit if nocommit is set' do
91
+ expect(@indexer.solr_client).to_not receive(:commit!)
92
+ @indexer.harvest_and_index(true)
93
+ end
94
+ end
95
+
96
+ describe '#index' do
97
+ it 'indexs collections as collections' do
98
+ expect(@indexer).to receive(:collection_solr_document).with(collection)
99
+ @indexer.index collection
100
+ end
101
+
102
+ it 'indexs other resources as items' do
103
+ expect(@indexer).to receive(:item_solr_document).with(resource)
104
+ @indexer.index resource
105
+ end
106
+ end
107
+
108
+ describe '#index_with_exception_handling' do
109
+ it 'capture,s log, and re-raise any exception thrown by the indexing process' do
110
+ expect(@indexer).to receive(:index).with(resource).and_raise 'xyz'
111
+ expect(@indexer.logger).to receive(:error)
112
+ expect { @indexer.index_with_exception_handling(resource) }.to raise_error RuntimeError
113
+ expect(@indexer.druids_failed_to_ix).to include resource.druid
114
+ end
115
+ end
116
+
117
+ context '#item_solr_document' do
118
+ context 'unmerged' do
119
+ it 'calls Harvestdor::Indexer.solr_add' do
120
+ doc_hash = @indexer.item_solr_document(resource)
121
+ expect(doc_hash).to include id: @fake_druid
122
+ end
123
+ it 'calls validate_item' do
124
+ expect_any_instance_of(GDor::Indexer::SolrDocHash).to receive(:validate_item).and_return([])
125
+ @indexer.item_solr_document resource
126
+ end
127
+ it 'calls GDor::Indexer::SolrDocBuilder.validate_mods' do
128
+ allow_any_instance_of(GDor::Indexer::SolrDocHash).to receive(:validate_item).and_return([])
129
+ expect_any_instance_of(GDor::Indexer::SolrDocHash).to receive(:validate_mods).and_return([])
130
+ @indexer.item_solr_document resource
131
+ end
132
+ it 'calls add_coll_info' do
133
+ expect(@indexer).to receive(:add_coll_info)
134
+ @indexer.item_solr_document resource
135
+ end
136
+ it 'has fields populated from the collection record' do
137
+ sdb = double
138
+ allow(sdb).to receive(:doc_hash).and_return(GDor::Indexer::SolrDocHash.new)
139
+ allow(sdb).to receive(:display_type)
140
+ allow(sdb).to receive(:file_ids)
141
+ allow(sdb.doc_hash).to receive(:validate_mods).and_return([])
142
+ allow(GDor::Indexer::SolrDocBuilder).to receive(:new).and_return(sdb)
143
+ allow(resource).to receive(:collections).and_return([double(druid: 'foo', bare_druid: 'foo', identity_md_obj_label: 'bar')])
144
+ doc_hash = @indexer.item_solr_document resource
145
+ expect(doc_hash).to include druid: @fake_druid, collection: ['foo'], collection_with_title: ['foo-|-bar']
146
+ end
147
+ it 'has fields populated from the MODS' do
148
+ title = 'fake title in mods'
149
+ ng_mods = Nokogiri::XML("<mods #{@ns_decl}><titleInfo><title>#{title}</title></titleInfo></mods>")
150
+ allow(resource).to receive(:mods).and_return(ng_mods)
151
+ doc_hash = @indexer.item_solr_document resource
152
+ expect(doc_hash).to include id: @fake_druid, title_display: title
153
+ end
154
+ it 'populates url_fulltext field with purl page url' do
155
+ doc_hash = @indexer.item_solr_document resource
156
+ expect(doc_hash).to include id: @fake_druid, url_fulltext: "#{@yaml['harvestdor']['purl']}/#{@fake_druid}"
157
+ end
158
+ it 'populates druid and access_facet fields' do
159
+ doc_hash = @indexer.item_solr_document resource
160
+ expect(doc_hash).to include id: @fake_druid, druid: @fake_druid, access_facet: 'Online'
161
+ end
162
+ it 'populates display_type field by calling display_type method' do
163
+ expect_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:display_type).and_return('foo')
164
+ doc_hash = @indexer.item_solr_document resource
165
+ expect(doc_hash).to include id: @fake_druid, display_type: 'foo'
166
+ end
167
+ it 'populates file_id field by calling file_ids method' do
168
+ expect_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:file_ids).at_least(1).times.and_return(['foo'])
169
+ doc_hash = @indexer.item_solr_document resource
170
+ expect(doc_hash).to include id: @fake_druid, file_id: ['foo']
171
+ end
172
+ it 'populates building_facet field with Stanford Digital Repository' do
173
+ doc_hash = @indexer.item_solr_document resource
174
+ expect(doc_hash).to include id: @fake_druid, building_facet: 'Stanford Digital Repository'
175
+ end
176
+ end # unmerged item
177
+ end # item_solr_document
178
+
179
+ context '#collection_solr_document' do
180
+ context 'unmerged' do
181
+ it 'calls validate_collection' do
182
+ doc_hash = GDor::Indexer::SolrDocHash.new
183
+ allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(doc_hash) # speed up the test
184
+ expect(doc_hash).to receive(:validate_collection).and_return([])
185
+ doc_hash = @indexer.collection_solr_document collection
186
+ end
187
+ it 'calls GDor::Indexer::SolrDocBuilder.validate_mods' do
188
+ doc_hash = GDor::Indexer::SolrDocHash.new
189
+ allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(doc_hash) # speed up the test
190
+ expect(doc_hash).to receive(:validate_mods).and_return([])
191
+ doc_hash = @indexer.collection_solr_document collection
192
+ end
193
+ it 'populates druid and access_facet fields' do
194
+ doc_hash = @indexer.collection_solr_document collection
195
+ expect(doc_hash).to include druid: @coll_druid_from_test_config, access_facet: 'Online'
196
+ end
197
+ it 'populates url_fulltext field with purl page url' do
198
+ doc_hash = @indexer.collection_solr_document collection
199
+ expect(doc_hash).to include url_fulltext: "#{@yaml['harvestdor']['purl']}/#{@coll_druid_from_test_config}"
200
+ end
201
+ it "collection_type should be 'Digital Collection'" do
202
+ allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(GDor::Indexer::SolrDocHash.new) # speed up the test
203
+
204
+ doc_hash = @indexer.collection_solr_document collection
205
+ expect(doc_hash).to include collection_type: 'Digital Collection'
206
+ end
207
+ context 'add format_main_ssim Archive/Manuscript' do
208
+ it 'no other values' do
209
+ allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(GDor::Indexer::SolrDocHash.new)
210
+
211
+ doc_hash = @indexer.collection_solr_document collection
212
+ expect(doc_hash).to include format_main_ssim: 'Archive/Manuscript'
213
+ end
214
+ it 'other values present' do
215
+ allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(GDor::Indexer::SolrDocHash.new({ format_main_ssim: %w(Image Video) }))
216
+
217
+ doc_hash = @indexer.collection_solr_document collection
218
+ expect(doc_hash).to include format_main_ssim: ['Image', 'Video', 'Archive/Manuscript']
219
+ end
220
+ it 'already has values Archive/Manuscript' do
221
+ allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(GDor::Indexer::SolrDocHash.new({ format_main_ssim: 'Archive/Manuscript' }))
222
+
223
+ doc_hash = @indexer.collection_solr_document collection
224
+ expect(doc_hash).to include format_main_ssim: ['Archive/Manuscript']
225
+ end
226
+ end
227
+ it 'populates building_facet field with Stanford Digital Repository' do
228
+ doc_hash = @indexer.collection_solr_document collection
229
+ expect(doc_hash).to include building_facet: 'Stanford Digital Repository'
230
+ end
231
+ end # unmerged collection
232
+ end # index_coll_obj_per_config
233
+
234
+ context '#add_coll_info and supporting methods' do
235
+ before(:each) do
236
+ @coll_druids_array = [collection]
237
+ end
238
+
239
+ it 'adds no collection field values to doc_hash if there are none' do
240
+ doc_hash = GDor::Indexer::SolrDocHash.new({})
241
+ @indexer.add_coll_info(doc_hash, nil)
242
+ expect(doc_hash[:collection]).to be_nil
243
+ expect(doc_hash[:collection_with_title]).to be_nil
244
+ expect(doc_hash[:display_type]).to be_nil
245
+ end
246
+
247
+ context 'collection field' do
248
+ it 'is added field to doc hash' do
249
+ doc_hash = GDor::Indexer::SolrDocHash.new({})
250
+ @indexer.add_coll_info(doc_hash, @coll_druids_array)
251
+ expect(doc_hash[:collection]).to match_array [@coll_druid_from_test_config]
252
+ end
253
+ it 'adds two values to doc_hash when object belongs to two collections' do
254
+ coll_druid1 = 'oo111oo2222'
255
+ coll_druid2 = 'oo333oo4444'
256
+ doc_hash = GDor::Indexer::SolrDocHash.new({})
257
+ @indexer.add_coll_info(doc_hash, [double(druid: coll_druid1, bare_druid: coll_druid1, public_xml: @ng_pub_xml, identity_md_obj_label: ''), double(druid: coll_druid2, bare_druid: coll_druid2, public_xml: @ng_pub_xml, identity_md_obj_label: '')])
258
+ expect(doc_hash[:collection]).to match_array [coll_druid1, coll_druid2]
259
+ end
260
+ end
261
+
262
+ context 'collection_with_title field' do
263
+ it 'is added to doc_hash' do
264
+ coll_druid = 'oo000oo1234'
265
+ doc_hash = GDor::Indexer::SolrDocHash.new({})
266
+ @indexer.add_coll_info(doc_hash, [double(druid: coll_druid, bare_druid: coll_druid, public_xml: @ng_pub_xml, identity_md_obj_label: 'zzz')])
267
+ expect(doc_hash[:collection_with_title]).to match_array ["#{coll_druid}-|-zzz"]
268
+ end
269
+ it 'adds two values to doc_hash when object belongs to two collections' do
270
+ coll_druid1 = 'oo111oo2222'
271
+ coll_druid2 = 'oo333oo4444'
272
+ doc_hash = GDor::Indexer::SolrDocHash.new({})
273
+ @indexer.add_coll_info(doc_hash, [double(druid: coll_druid1, bare_druid: coll_druid1, public_xml: @ng_pub_xml, identity_md_obj_label: 'foo'), double(druid: coll_druid2, bare_druid: coll_druid2, public_xml: @ng_pub_xml, identity_md_obj_label: 'bar')])
274
+ expect(doc_hash[:collection_with_title]).to match_array ["#{coll_druid1}-|-foo", "#{coll_druid2}-|-bar"]
275
+ end
276
+ # other tests show it uses druid when coll rec isn't merged
277
+ end
278
+
279
+ context '#coll_display_types_from_items' do
280
+ before(:each) do
281
+ @indexer.coll_display_types_from_items(collection)
282
+ end
283
+ it 'gets single item display_type for single collection (and no dups)' do
284
+ allow(@indexer).to receive(:identity_md_obj_label)
285
+ doc_hash = GDor::Indexer::SolrDocHash.new({ display_type: 'image' })
286
+ @indexer.add_coll_info(doc_hash, @coll_druids_array)
287
+ doc_hash = GDor::Indexer::SolrDocHash.new({ display_type: 'image' })
288
+ @indexer.add_coll_info(doc_hash, @coll_druids_array)
289
+ expect(@indexer.coll_display_types_from_items(collection)).to match_array ['image']
290
+ end
291
+ it 'gets multiple formats from multiple items for single collection' do
292
+ allow(@indexer).to receive(:identity_md_obj_label)
293
+ doc_hash = GDor::Indexer::SolrDocHash.new({ display_type: 'image' })
294
+ @indexer.add_coll_info(doc_hash, @coll_druids_array)
295
+ doc_hash = GDor::Indexer::SolrDocHash.new({ display_type: 'file' })
296
+ @indexer.add_coll_info(doc_hash, @coll_druids_array)
297
+ expect(@indexer.coll_display_types_from_items(collection)).to match_array %w(image file)
298
+ end
299
+ end # coll_display_types_from_items
300
+ end # add_coll_info
301
+
302
+ context '#num_found_in_solr' do
303
+ before :each do
304
+ @unmerged_collection_response = { 'response' => { 'numFound' => '1', 'docs' => [{ 'id' => 'dm212rn7381', 'url_fulltext' => ['https://purl.stanford.edu/dm212rn7381'] }] } }
305
+ @item_response = { 'response' => { 'numFound' => '265', 'docs' => [{ 'id' => 'dm212rn7381' }] } }
306
+ end
307
+
308
+ it 'counts the items and the collection object in the solr index after indexing' do
309
+ allow(@indexer.solr_client.client).to receive(:get) do |_wt, params|
310
+ if params[:params][:fq].include?('id:"dm212rn7381"')
311
+ @unmerged_collection_response
312
+ else
313
+ @item_response
314
+ end
315
+ end
316
+ expect(@indexer.num_found_in_solr(collection: 'dm212rn7381')).to eq(266)
317
+ end
318
+ end # num_found_in_solr
319
+
320
+ context '#email_report_body' do
321
+ before :each do
322
+ @indexer.config.notification = 'notification-list@example.com'
323
+ allow(@indexer).to receive(:num_found_in_solr).and_return(500)
324
+ allow(@indexer.harvestdor).to receive(:resources).and_return([collection])
325
+ allow(collection).to receive(:items).and_return([1, 2, 3])
326
+ allow(collection).to receive(:identity_md_obj_label).and_return('testcoll title')
327
+ end
328
+
329
+ subject do
330
+ @indexer.email_report_body
331
+ end
332
+
333
+ it 'email body includes coll id' do
334
+ expect(subject).to match /testcoll indexed coll record is: ww121ss5000/
335
+ end
336
+
337
+ it 'email body includes coll title' do
338
+ expect(subject).to match /coll title: testcoll title/
339
+ end
340
+
341
+ it 'email body includes failed to index druids' do
342
+ @indexer.instance_variable_set(:@druids_failed_to_ix, %w(a b))
343
+ expect(subject).to match /records that may have failed to index \(merged recs as druids, not ckeys\): \na\nb\n\n/
344
+ end
345
+
346
+ it 'email body include validation messages' do
347
+ @indexer.instance_variable_set(:@validation_messages, ['this is a validation message'])
348
+ expect(subject).to match /this is a validation message/
349
+ end
350
+
351
+ it 'email includes reference to full log' do
352
+ expect(subject).to match /full log is at gdor_indexer\/shared\/spec\/test_logs\/testcoll\.log/
353
+ end
354
+ end
355
+
356
+ describe '#email_results' do
357
+ before :each do
358
+ @indexer.config.notification = 'notification-list@example.com'
359
+ allow(@indexer).to receive(:send_email)
360
+ allow(@indexer).to receive(:email_report_body).and_return('Report Body')
361
+ end
362
+
363
+ it 'has an appropriate subject' do
364
+ expect(@indexer).to receive(:send_email) do |_to, opts|
365
+ expect(opts[:subject]).to match /is finished/
366
+ end
367
+
368
+ @indexer.email_results
369
+ end
370
+
371
+ it 'sends the email to the notification list' do
372
+ expect(@indexer).to receive(:send_email) do |to, _opts|
373
+ expect(to).to eq @indexer.config.notification
374
+ end
375
+
376
+ @indexer.email_results
377
+ end
378
+
379
+ it 'has the report body' do
380
+ expect(@indexer).to receive(:send_email) do |_to, opts|
381
+ expect(opts[:body]).to eq 'Report Body'
382
+ end
383
+
384
+ @indexer.email_results
385
+ end
386
+ end
387
+
388
+ describe '#send_email' do
389
+ it 'sends an email to the right list' do
390
+ expect_any_instance_of(Mail::Message).to receive(:deliver!) do |mail|
391
+ expect(mail.to).to match_array ['notification-list@example.com']
392
+ end
393
+ @indexer.send_email 'notification-list@example.com', {}
394
+ end
395
+
396
+ it 'has the appropriate options set' do
397
+ expect_any_instance_of(Mail::Message).to receive(:deliver!) do |mail|
398
+ expect(mail.subject).to eq 'Subject'
399
+ expect(mail.from).to match_array ['rspec']
400
+ expect(mail.body).to eq 'Body'
401
+ end
402
+ @indexer.send_email 'notification-list@example.com', { from: 'rspec', subject: 'Subject', body: 'Body' }
403
+ end
404
+ end
405
+
406
+ # context "skip heartbeat" do
407
+ # it "allows me to use a fake url for dor-fetcher-client" do
408
+ # expect {GDor::Indexer.new(@config_yml_path)}.not_to raise_error
409
+ # end
410
+ # end
411
+ end
@@ -0,0 +1,286 @@
1
+ require 'spec_helper'
2
+
3
+ describe GDor::Indexer::PublicXmlFields do
4
+ before(:all) do
5
+ @fake_druid = 'oo000oo0000'
6
+ @ns_decl = "xmlns='#{Mods::MODS_NS}'"
7
+ @mods_xml = "<mods #{@ns_decl}><note>public_xml_fields tests</note></mods>"
8
+ @empty_pub_xml = "<publicObject id='druid:#{@fake_druid}'></publicObject>"
9
+ end
10
+
11
+ let :logger do
12
+ Logger.new(StringIO.new)
13
+ end
14
+
15
+ def sdb_for_pub_xml(m)
16
+ resource = Harvestdor::Indexer::Resource.new(double, @fake_druid)
17
+ allow(resource).to receive(:public_xml).and_return(Nokogiri::XML(m))
18
+ allow(resource).to receive(:mods).and_return(@mods_xml)
19
+ GDor::Indexer::SolrDocBuilder.new(resource, logger)
20
+ end
21
+
22
+ def sdb_for_content_md(m)
23
+ resource = Harvestdor::Indexer::Resource.new(double, @fake_druid)
24
+ allow(resource).to receive(:content_metadata).and_return(Nokogiri::XML(m).root)
25
+ allow(resource).to receive(:public_xml).and_return(@empty_pub_xml)
26
+ allow(resource).to receive(:mods).and_return(@mods_xml)
27
+ GDor::Indexer::SolrDocBuilder.new(resource, logger)
28
+ end
29
+
30
+ # NOTE:
31
+ # "Doubles, stubs, and message expectations are all cleaned out after each example."
32
+ # per https://www.relishapp.com/rspec/rspec-mocks/docs/scope
33
+
34
+ context 'contentMetadata fields and methods' do
35
+ before(:all) do
36
+ @content_md_start = "<contentMetadata objectId='#{@fake_druid}'>"
37
+ @content_md_end = '</contentMetadata>'
38
+ @cntnt_md_type = 'image'
39
+ @cntnt_md_xml = "<contentMetadata type='#{@cntnt_md_type}' objectId='#{@fake_druid}'>#{@content_md_end}"
40
+ @pub_xml = "<publicObject id='druid:#{@fake_druid}'>#{@cntnt_md_xml}</publicObject>"
41
+ @ng_pub_xml = Nokogiri::XML(@pub_xml)
42
+ end
43
+
44
+ context 'dor_content_type' do
45
+ it 'is the value of the type attribute on <contentMetadata> element' do
46
+ val = 'foo'
47
+ cntnt_md = "<contentMetadata type='#{val}'>#{@content_md_end}"
48
+ sdb = sdb_for_content_md(cntnt_md)
49
+ expect(sdb.send(:dor_content_type)).to eq(val)
50
+ end
51
+ it 'logs an error message if there is no content type' do
52
+ cntnt_md = "#{@content_md_start}#{@content_md_end}"
53
+ sdb = sdb_for_content_md(cntnt_md)
54
+ expect(sdb.logger).to receive(:error).with("#{@fake_druid} has no DOR content type (<contentMetadata> element may be missing type attribute)")
55
+ sdb.send(:dor_content_type)
56
+ end
57
+ end
58
+
59
+ context 'display_type' do
60
+ let :sdb do
61
+ sdb_for_pub_xml @empty_pub_xml
62
+ end
63
+
64
+ it "'image' for dor_content_type 'image'" do
65
+ allow(sdb).to receive(:dor_content_type).and_return('image')
66
+ expect(sdb.display_type).to eq('image')
67
+ end
68
+ it "'image' for dor_content_type 'manuscript'" do
69
+ allow(sdb).to receive(:dor_content_type).and_return('manuscript')
70
+ expect(sdb.display_type).to eq('image')
71
+ end
72
+ it "'image' for dor_content_type 'map'" do
73
+ allow(sdb).to receive(:dor_content_type).and_return('map')
74
+ expect(sdb.display_type).to eq('image')
75
+ end
76
+ it "'file' for dor_content_type 'media'" do
77
+ allow(sdb).to receive(:dor_content_type).and_return('media')
78
+ expect(sdb.display_type).to eq('file')
79
+ end
80
+ it "'book' for dor_content_type 'book'" do
81
+ allow(sdb).to receive(:dor_content_type).and_return('book')
82
+ expect(sdb.display_type).to eq('book')
83
+ end
84
+ it "'file' for unrecognized dor_content_type" do
85
+ allow(sdb).to receive(:dor_content_type).and_return('foo')
86
+ expect(sdb.display_type).to eq('file')
87
+ end
88
+ end # display_type
89
+
90
+ context '#file_ids' do
91
+ context 'file display_type' do
92
+ context 'contentMetadata type=file, resource type=file' do
93
+ it 'is id attrib of file element in single resource element with type=file' do
94
+ m = '<contentMetadata type="file" objectId="xh812jt9999">
95
+ <resource type="file" sequence="1" id="xh812jt9999_1">
96
+ <label>John A. Blume Earthquake Engineering Center Technical Report 180</label>
97
+ <file id="TR180_Shahi.pdf" mimetype="application/pdf" size="4949212" />
98
+ </resource></contentMetadata>'
99
+ sdb = sdb_for_content_md(m)
100
+ expect(sdb.file_ids).to match_array ['TR180_Shahi.pdf']
101
+ end
102
+ it 'is id attrib of file elements in multiple resource elements with type=file' do
103
+ m = '<contentMetadata objectId="jt108hm9275" type="file">
104
+ <resource id="jt108hm9275_1" sequence="1" type="file">
105
+ <label>Access to Energy newsletter, 1973-1994</label>
106
+ <file id="ATE.PDF" mimetype="application/pdf" size="16297305" />
107
+ </resource>
108
+ <resource id="jt108hm9275_8" sequence="8" type="file">
109
+ <label>Computer Forum Festschrift for Edward Feigenbaum, 2006 (part 6)</label>
110
+ <file id="SC0524_2013-047_b8_811.mp4" mimetype="video/mp4" size="860912776" />
111
+ </resource>
112
+ <resource id="jt108hm9275_9" sequence="9" type="file">
113
+ <label>Stanford AI Lab (SAILDART) files</label>
114
+ <file id="SAILDART.zip" mimetype="application/zip" size="472230479" />
115
+ </resource>
116
+ <resource id="jt108hm9275_10" sequence="10" type="file">
117
+ <label>WTDS Interview: Douglas C. Engelbart, 2006 Apr 13</label>
118
+ <file id="DougEngelbart041306.wav" mimetype="audio/x-wav" size="273705910" />
119
+ </resource></contentMetadata>'
120
+ sdb = sdb_for_content_md(m)
121
+ expect(sdb.file_ids).to match_array ['ATE.PDF', 'SC0524_2013-047_b8_811.mp4', 'SAILDART.zip', 'DougEngelbart041306.wav']
122
+ end
123
+ end # contentMetadata type=file, resource type=file
124
+ it 'contentMetadata type=geo, resource type=object' do
125
+ m = '<contentMetadata objectId="druid:qk786js7484" type="geo">
126
+ <resource id="druid:qk786js7484_1" sequence="1" type="object">
127
+ <label>Data</label>
128
+ <file id="data.zip" mimetype="application/zip" role="master" size="10776648" />
129
+ </resource>
130
+ <resource id="druid:qk786js7484_2" sequence="2" type="preview">
131
+ <label>Preview</label>
132
+ <file id="preview.jpg" mimetype="image/jpeg" role="master" size="140661">
133
+ <imageData height="846" width="919"/>
134
+ </file>
135
+ </resource></contentMetadata>'
136
+ sdb = sdb_for_content_md(m)
137
+ expect(sdb.file_ids).to match_array ['data.zip', 'preview.jpg']
138
+ end
139
+
140
+ # FIXME: non-file resource types
141
+ end # file display_type
142
+ context 'image display_type' do
143
+ context 'contentMetadata type=image' do
144
+ it 'resource type=image should be id attrib of file elements' do
145
+ m = '<contentMetadata objectId="rg759wj0953" type="image">
146
+ <resource id="rg759wj0953_1" sequence="1" type="image">
147
+ <label>Image 1</label>
148
+ <file id="rg759wj0953_00_0003.jp2" mimetype="image/jp2" size="13248250">
149
+ <imageData width="6254" height="11236"/>
150
+ </file>
151
+ </resource>
152
+ <resource id="rg759wj0953_2" sequence="2" type="image">
153
+ <label>Image 2</label>
154
+ <file id="rg759wj0953_00_00_0001.jp2" mimetype="image/jp2" size="8484503">
155
+ <imageData width="7266" height="6188"/>
156
+ </file>
157
+ </resource></contentMetadata>'
158
+ sdb = sdb_for_content_md m
159
+ expect(sdb.file_ids).to match_array ['rg759wj0953_00_0003.jp2', 'rg759wj0953_00_00_0001.jp2']
160
+ end
161
+ it 'resource type=object should be ignored' do
162
+ m = '<contentMetadata objectId="ny981gz0831" type="image">
163
+ <resource id="ny981gz0831_1" sequence="1" type="object">
164
+ <label>File 1</label>
165
+ <file id="da39a3ee5e6b4b0d3255bfef95601890afd80709.dderr" mimetype="application/x-symlink" size="26634" />
166
+ <file id="da39a3ee5e6b4b0d3255bfef95601890afd80709.img" mimetype="application/x-symlink" size="368640" />
167
+ <file id="da39a3ee5e6b4b0d3255bfef95601890afd80709.img.sha" mimetype="application/x-symlink" size="173" />
168
+ </resource></contentMetadata>'
169
+ sdb = sdb_for_content_md(m)
170
+ expect(sdb.file_ids).to be_nil
171
+ end
172
+ end # contentMetadata type=image
173
+ context 'contentMetadata type=map, resource type=image' do
174
+ it 'is id attrib of file elements' do
175
+ m = '<contentMetadata objectId="druid:rf935xg1061" type="map">
176
+ <resource id="0001" sequence="1" type="image">
177
+ <file id="rf935xg1061_00_0001.jp2" mimetype="image/jp2" size="20204910">
178
+ <imageData height="7248" width="14787"/>
179
+ </file>
180
+ </resource>
181
+ <resource id="0002" sequence="2" type="image">
182
+ <file id="rf935xg1061_00_0002.jp2" mimetype="image/jp2" size="20209446">
183
+ <imageData height="7248" width="14787"/>
184
+ </file>
185
+ </resource></contentMetadata>'
186
+ sdb = sdb_for_content_md(m)
187
+ expect(sdb.file_ids).to match_array ['rf935xg1061_00_0001.jp2', 'rf935xg1061_00_0002.jp2']
188
+ end
189
+ end # contentMetadata type=map, resource type=image
190
+ context 'contentMetadata type=manuscript' do
191
+ it 'resource type=image' do
192
+ m = '<contentMetadata objectId="druid:my191bb7431" type="manuscript">
193
+ <resource id="manuscript-image-1" sequence="1" type="image">
194
+ <label>Front Outer Board</label>
195
+ <file format="JPEG2000" id="T0000001.jp2" mimetype="image/jp2" size="7553958">
196
+ <imageData height="4578" width="3442"/>
197
+ </file>
198
+ </resource>
199
+ <resource id="manuscript-image-343" sequence="343" type="image">
200
+ <label>Spine</label>
201
+ <file format="JPEG2000" id="T0000343.jp2" mimetype="image/jp2" size="1929355">
202
+ <imageData height="4611" width="986"/>
203
+ </file>
204
+ </resource>
205
+ </contentMetadata>'
206
+ sdb = sdb_for_content_md(m)
207
+ expect(sdb.file_ids).to match_array ['T0000001.jp2', 'T0000343.jp2']
208
+ end
209
+ it 'resource type=page should be ignored' do
210
+ m = '<contentMetadata objectId="druid:Bodley342" type="manuscript">
211
+ <resource type="page" sequence="1" id="image-1">
212
+ <label>1</label>
213
+ <file mimetype="image/jp2" format="JPEG2000" size="1319924" id="asn0001-M.jp2">
214
+ <imageData height="3466" width="2405"/>
215
+ </file>
216
+ </resource>
217
+ <resource type="page" sequence="453" id="image-453">
218
+ <label>453</label>
219
+ <file mimetype="image/jp2" format="JPEG2000" size="1457066" id="asn0452-M.jp2">
220
+ <imageData height="3431" width="2431"/>
221
+ </file>
222
+ </resource></contentMetadata>'
223
+ sdb = sdb_for_content_md(m)
224
+ expect(sdb.file_ids).to be_nil
225
+ end
226
+ end # contentMetadata type=manuscript
227
+ end # image display_type
228
+
229
+ it 'is nil for book display_type' do
230
+ m = '<contentMetadata type="book" objectId="xm901jg3836">
231
+ <resource type="image" sequence="1" id="xm901jg3836_1">
232
+ <label>Item 1</label>
233
+ <file id="xm901jg3836_00_0002.jp2" mimetype="image/jp2" size="1152852">
234
+ <imageData width="2091" height="2905"/>
235
+ </file>
236
+ </resource>
237
+ <resource type="image" sequence="608" id="xm901jg3836_608">
238
+ <label>Item 608</label>
239
+ <file id="xm901jg3836_00_0609.jp2" mimetype="image/jp2" size="1152297">
240
+ <imageData width="2090" height="2905"/>
241
+ </file>
242
+ </resource></contentMetadata>'
243
+ sdb = sdb_for_content_md(m)
244
+ expect(sdb.file_ids).to be_nil
245
+ end
246
+ it 'is id attrib of file elements for media display_type' do
247
+ m = '<contentMetadata objectId="jy496kh1727" type="media">
248
+ <resource sequence="1" id="jy496kh1727_1" type="audio">
249
+ <label>Tape 1, Pass 1</label>
250
+ <file id="jy496kh1727_sl.mp3" mimetype="audio/mpeg" size="57010677" />
251
+ </resource>
252
+ <resource sequence="2" id="jy496kh1727_2" type="image">
253
+ <label>Image of media (1 of 3)</label>
254
+ <file id="jy496kh1727_img_1.jp2" mimetype="image/jp2" size="1277821">
255
+ <imageData width="2659" height="2535"/>
256
+ </file>
257
+ </resource></contentMetadata>'
258
+ sdb = sdb_for_content_md(m)
259
+ expect(sdb.file_ids).to match_array ['jy496kh1727_sl.mp3', 'jy496kh1727_img_1.jp2']
260
+ end
261
+ it 'is nil if there are no <resource> elements in the contentMetadata' do
262
+ m = '<contentMetadata objectId="jy496kh1727" type="file"></contentMetadata>'
263
+ sdb = sdb_for_content_md(m)
264
+ expect(sdb.file_ids).to be_nil
265
+ end
266
+ it 'is nil if there are no <file> elements in the contentMetadata' do
267
+ m = '<contentMetadata objectId="jy496kh1727" type="file">
268
+ <resource sequence="1" id="jy496kh1727_1" type="file">
269
+ <label>Tape 1, Pass 1</label>
270
+ </resource>
271
+ <resource sequence="2" id="jy496kh1727_2" type="image">
272
+ <label>Image of media (1 of 3)</label>
273
+ </resource></contentMetadata>'
274
+ sdb = sdb_for_content_md(m)
275
+ expect(sdb.file_ids).to be_nil
276
+ end
277
+ it 'is nil if there are no id elements on file elements' do
278
+ m = "#{@content_md_start}<resource type='image'><file/></resource>#{@content_md_end}"
279
+ sdb = sdb_for_content_md(m)
280
+ expect(sdb.file_ids).to be_nil
281
+ end
282
+
283
+ # TODO: multiple file elements in a single resource element
284
+ end # file_ids
285
+ end # contentMetadata fields and methods
286
+ end