gdor-indexer 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,411 @@
1
+ require 'spec_helper'
2
+
3
+ describe GDor::Indexer do
4
+ before(:all) do
5
+ @config_yml_path = File.join(File.dirname(__FILE__), '..', 'config', 'walters_integration_spec.yml')
6
+ require 'yaml'
7
+ @yaml = YAML.load_file(@config_yml_path)
8
+ @ns_decl = "xmlns='#{Mods::MODS_NS}'"
9
+ @fake_druid = 'oo000oo0000'
10
+ @coll_druid_from_test_config = 'ww121ss5000'
11
+ @mods_xml = "<mods #{@ns_decl}><note>Indexer test</note></mods>"
12
+ @ng_mods_xml = Nokogiri::XML("<mods #{@ns_decl}><note>Indexer test</note></mods>")
13
+ @pub_xml = "<publicObject id='druid#{@fake_druid}'></publicObject>"
14
+ @ng_pub_xml = Nokogiri::XML("<publicObject id='druid#{@fake_druid}'></publicObject>")
15
+ end
16
+ before(:each) do
17
+ @indexer = described_class.new(@config_yml_path) do |config|
18
+ config.whitelist = ['druid:ww121ss5000']
19
+ end
20
+ allow(@indexer.solr_client).to receive(:add)
21
+ end
22
+
23
+ let :resource do
24
+ r = Harvestdor::Indexer::Resource.new(double, @fake_druid)
25
+ allow(r).to receive(:collections).and_return []
26
+ allow(r).to receive(:mods).and_return Nokogiri::XML(@mods_xml)
27
+ allow(r).to receive(:public_xml).and_return Nokogiri::XML(@pub_xml)
28
+ allow(r).to receive(:public_xml?).and_return true
29
+ allow(r).to receive(:content_metadata).and_return nil
30
+ allow(r).to receive(:collection?).and_return false
31
+ r
32
+ end
33
+
34
+ let :collection do
35
+ r = Harvestdor::Indexer::Resource.new(double, @coll_druid_from_test_config)
36
+ allow(r).to receive(:collections).and_return []
37
+ allow(r).to receive(:mods).and_return Nokogiri::XML(@mods_xml)
38
+ allow(r).to receive(:public_xml).and_return Nokogiri::XML(@pub_xml)
39
+ allow(r).to receive(:public_xml?).and_return true
40
+ allow(r).to receive(:content_metadata).and_return nil
41
+ allow(r).to receive(:identity_md_obj_label).and_return ''
42
+ allow(r).to receive(:collection?).and_return true
43
+ r
44
+ end
45
+
46
+ context 'logging' do
47
+ it 'writes the log file to the directory indicated by log_dir' do
48
+ @indexer.logger.info('walters_integration_spec logging test message')
49
+ expect(File).to exist(File.join(@yaml['harvestdor']['log_dir'], @yaml['harvestdor']['log_name']))
50
+ end
51
+ end
52
+
53
+ describe '#harvest_and_index' do
54
+ before :each do
55
+ allow(@indexer.harvestdor).to receive(:each_resource)
56
+ allow(@indexer).to receive(:solr_client).and_return(double(commit!: nil))
57
+ allow(@indexer).to receive(:log_results)
58
+ allow(@indexer).to receive(:email_results)
59
+ end
60
+ it 'logs and email results' do
61
+ expect(@indexer).to receive(:log_results)
62
+ expect(@indexer).to receive(:email_results)
63
+
64
+ @indexer.harvest_and_index
65
+ end
66
+ it 'indexs each resource' do
67
+ allow(@indexer).to receive(:harvestdor).and_return(Class.new do
68
+ def initialize(*items)
69
+ @items = items
70
+ end
71
+
72
+ def each_resource(_opts = {})
73
+ @items.each { |x| yield x }
74
+ end
75
+
76
+ def logger
77
+ Logger.new(STDERR)
78
+ end
79
+ end.new(collection, resource))
80
+
81
+ expect(@indexer).to receive(:index).with(collection)
82
+ expect(@indexer).to receive(:index).with(resource)
83
+
84
+ @indexer.harvest_and_index
85
+ end
86
+ it 'sends a solr commit' do
87
+ expect(@indexer.solr_client).to receive(:commit!)
88
+ @indexer.harvest_and_index
89
+ end
90
+ it 'does not commit if nocommit is set' do
91
+ expect(@indexer.solr_client).to_not receive(:commit!)
92
+ @indexer.harvest_and_index(true)
93
+ end
94
+ end
95
+
96
+ describe '#index' do
97
+ it 'indexs collections as collections' do
98
+ expect(@indexer).to receive(:collection_solr_document).with(collection)
99
+ @indexer.index collection
100
+ end
101
+
102
+ it 'indexs other resources as items' do
103
+ expect(@indexer).to receive(:item_solr_document).with(resource)
104
+ @indexer.index resource
105
+ end
106
+ end
107
+
108
+ describe '#index_with_exception_handling' do
109
+ it 'capture,s log, and re-raise any exception thrown by the indexing process' do
110
+ expect(@indexer).to receive(:index).with(resource).and_raise 'xyz'
111
+ expect(@indexer.logger).to receive(:error)
112
+ expect { @indexer.index_with_exception_handling(resource) }.to raise_error RuntimeError
113
+ expect(@indexer.druids_failed_to_ix).to include resource.druid
114
+ end
115
+ end
116
+
117
+ context '#item_solr_document' do
118
+ context 'unmerged' do
119
+ it 'calls Harvestdor::Indexer.solr_add' do
120
+ doc_hash = @indexer.item_solr_document(resource)
121
+ expect(doc_hash).to include id: @fake_druid
122
+ end
123
+ it 'calls validate_item' do
124
+ expect_any_instance_of(GDor::Indexer::SolrDocHash).to receive(:validate_item).and_return([])
125
+ @indexer.item_solr_document resource
126
+ end
127
+ it 'calls GDor::Indexer::SolrDocBuilder.validate_mods' do
128
+ allow_any_instance_of(GDor::Indexer::SolrDocHash).to receive(:validate_item).and_return([])
129
+ expect_any_instance_of(GDor::Indexer::SolrDocHash).to receive(:validate_mods).and_return([])
130
+ @indexer.item_solr_document resource
131
+ end
132
+ it 'calls add_coll_info' do
133
+ expect(@indexer).to receive(:add_coll_info)
134
+ @indexer.item_solr_document resource
135
+ end
136
+ it 'has fields populated from the collection record' do
137
+ sdb = double
138
+ allow(sdb).to receive(:doc_hash).and_return(GDor::Indexer::SolrDocHash.new)
139
+ allow(sdb).to receive(:display_type)
140
+ allow(sdb).to receive(:file_ids)
141
+ allow(sdb.doc_hash).to receive(:validate_mods).and_return([])
142
+ allow(GDor::Indexer::SolrDocBuilder).to receive(:new).and_return(sdb)
143
+ allow(resource).to receive(:collections).and_return([double(druid: 'foo', bare_druid: 'foo', identity_md_obj_label: 'bar')])
144
+ doc_hash = @indexer.item_solr_document resource
145
+ expect(doc_hash).to include druid: @fake_druid, collection: ['foo'], collection_with_title: ['foo-|-bar']
146
+ end
147
+ it 'has fields populated from the MODS' do
148
+ title = 'fake title in mods'
149
+ ng_mods = Nokogiri::XML("<mods #{@ns_decl}><titleInfo><title>#{title}</title></titleInfo></mods>")
150
+ allow(resource).to receive(:mods).and_return(ng_mods)
151
+ doc_hash = @indexer.item_solr_document resource
152
+ expect(doc_hash).to include id: @fake_druid, title_display: title
153
+ end
154
+ it 'populates url_fulltext field with purl page url' do
155
+ doc_hash = @indexer.item_solr_document resource
156
+ expect(doc_hash).to include id: @fake_druid, url_fulltext: "#{@yaml['harvestdor']['purl']}/#{@fake_druid}"
157
+ end
158
+ it 'populates druid and access_facet fields' do
159
+ doc_hash = @indexer.item_solr_document resource
160
+ expect(doc_hash).to include id: @fake_druid, druid: @fake_druid, access_facet: 'Online'
161
+ end
162
+ it 'populates display_type field by calling display_type method' do
163
+ expect_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:display_type).and_return('foo')
164
+ doc_hash = @indexer.item_solr_document resource
165
+ expect(doc_hash).to include id: @fake_druid, display_type: 'foo'
166
+ end
167
+ it 'populates file_id field by calling file_ids method' do
168
+ expect_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:file_ids).at_least(1).times.and_return(['foo'])
169
+ doc_hash = @indexer.item_solr_document resource
170
+ expect(doc_hash).to include id: @fake_druid, file_id: ['foo']
171
+ end
172
+ it 'populates building_facet field with Stanford Digital Repository' do
173
+ doc_hash = @indexer.item_solr_document resource
174
+ expect(doc_hash).to include id: @fake_druid, building_facet: 'Stanford Digital Repository'
175
+ end
176
+ end # unmerged item
177
+ end # item_solr_document
178
+
179
+ context '#collection_solr_document' do
180
+ context 'unmerged' do
181
+ it 'calls validate_collection' do
182
+ doc_hash = GDor::Indexer::SolrDocHash.new
183
+ allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(doc_hash) # speed up the test
184
+ expect(doc_hash).to receive(:validate_collection).and_return([])
185
+ doc_hash = @indexer.collection_solr_document collection
186
+ end
187
+ it 'calls GDor::Indexer::SolrDocBuilder.validate_mods' do
188
+ doc_hash = GDor::Indexer::SolrDocHash.new
189
+ allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(doc_hash) # speed up the test
190
+ expect(doc_hash).to receive(:validate_mods).and_return([])
191
+ doc_hash = @indexer.collection_solr_document collection
192
+ end
193
+ it 'populates druid and access_facet fields' do
194
+ doc_hash = @indexer.collection_solr_document collection
195
+ expect(doc_hash).to include druid: @coll_druid_from_test_config, access_facet: 'Online'
196
+ end
197
+ it 'populates url_fulltext field with purl page url' do
198
+ doc_hash = @indexer.collection_solr_document collection
199
+ expect(doc_hash).to include url_fulltext: "#{@yaml['harvestdor']['purl']}/#{@coll_druid_from_test_config}"
200
+ end
201
+ it "collection_type should be 'Digital Collection'" do
202
+ allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(GDor::Indexer::SolrDocHash.new) # speed up the test
203
+
204
+ doc_hash = @indexer.collection_solr_document collection
205
+ expect(doc_hash).to include collection_type: 'Digital Collection'
206
+ end
207
+ context 'add format_main_ssim Archive/Manuscript' do
208
+ it 'no other values' do
209
+ allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(GDor::Indexer::SolrDocHash.new)
210
+
211
+ doc_hash = @indexer.collection_solr_document collection
212
+ expect(doc_hash).to include format_main_ssim: 'Archive/Manuscript'
213
+ end
214
+ it 'other values present' do
215
+ allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(GDor::Indexer::SolrDocHash.new({ format_main_ssim: %w(Image Video) }))
216
+
217
+ doc_hash = @indexer.collection_solr_document collection
218
+ expect(doc_hash).to include format_main_ssim: ['Image', 'Video', 'Archive/Manuscript']
219
+ end
220
+ it 'already has values Archive/Manuscript' do
221
+ allow_any_instance_of(GDor::Indexer::SolrDocBuilder).to receive(:doc_hash).and_return(GDor::Indexer::SolrDocHash.new({ format_main_ssim: 'Archive/Manuscript' }))
222
+
223
+ doc_hash = @indexer.collection_solr_document collection
224
+ expect(doc_hash).to include format_main_ssim: ['Archive/Manuscript']
225
+ end
226
+ end
227
+ it 'populates building_facet field with Stanford Digital Repository' do
228
+ doc_hash = @indexer.collection_solr_document collection
229
+ expect(doc_hash).to include building_facet: 'Stanford Digital Repository'
230
+ end
231
+ end # unmerged collection
232
+ end # index_coll_obj_per_config
233
+
234
+ context '#add_coll_info and supporting methods' do
235
+ before(:each) do
236
+ @coll_druids_array = [collection]
237
+ end
238
+
239
+ it 'adds no collection field values to doc_hash if there are none' do
240
+ doc_hash = GDor::Indexer::SolrDocHash.new({})
241
+ @indexer.add_coll_info(doc_hash, nil)
242
+ expect(doc_hash[:collection]).to be_nil
243
+ expect(doc_hash[:collection_with_title]).to be_nil
244
+ expect(doc_hash[:display_type]).to be_nil
245
+ end
246
+
247
+ context 'collection field' do
248
+ it 'is added field to doc hash' do
249
+ doc_hash = GDor::Indexer::SolrDocHash.new({})
250
+ @indexer.add_coll_info(doc_hash, @coll_druids_array)
251
+ expect(doc_hash[:collection]).to match_array [@coll_druid_from_test_config]
252
+ end
253
+ it 'adds two values to doc_hash when object belongs to two collections' do
254
+ coll_druid1 = 'oo111oo2222'
255
+ coll_druid2 = 'oo333oo4444'
256
+ doc_hash = GDor::Indexer::SolrDocHash.new({})
257
+ @indexer.add_coll_info(doc_hash, [double(druid: coll_druid1, bare_druid: coll_druid1, public_xml: @ng_pub_xml, identity_md_obj_label: ''), double(druid: coll_druid2, bare_druid: coll_druid2, public_xml: @ng_pub_xml, identity_md_obj_label: '')])
258
+ expect(doc_hash[:collection]).to match_array [coll_druid1, coll_druid2]
259
+ end
260
+ end
261
+
262
+ context 'collection_with_title field' do
263
+ it 'is added to doc_hash' do
264
+ coll_druid = 'oo000oo1234'
265
+ doc_hash = GDor::Indexer::SolrDocHash.new({})
266
+ @indexer.add_coll_info(doc_hash, [double(druid: coll_druid, bare_druid: coll_druid, public_xml: @ng_pub_xml, identity_md_obj_label: 'zzz')])
267
+ expect(doc_hash[:collection_with_title]).to match_array ["#{coll_druid}-|-zzz"]
268
+ end
269
+ it 'adds two values to doc_hash when object belongs to two collections' do
270
+ coll_druid1 = 'oo111oo2222'
271
+ coll_druid2 = 'oo333oo4444'
272
+ doc_hash = GDor::Indexer::SolrDocHash.new({})
273
+ @indexer.add_coll_info(doc_hash, [double(druid: coll_druid1, bare_druid: coll_druid1, public_xml: @ng_pub_xml, identity_md_obj_label: 'foo'), double(druid: coll_druid2, bare_druid: coll_druid2, public_xml: @ng_pub_xml, identity_md_obj_label: 'bar')])
274
+ expect(doc_hash[:collection_with_title]).to match_array ["#{coll_druid1}-|-foo", "#{coll_druid2}-|-bar"]
275
+ end
276
+ # other tests show it uses druid when coll rec isn't merged
277
+ end
278
+
279
+ context '#coll_display_types_from_items' do
280
+ before(:each) do
281
+ @indexer.coll_display_types_from_items(collection)
282
+ end
283
+ it 'gets single item display_type for single collection (and no dups)' do
284
+ allow(@indexer).to receive(:identity_md_obj_label)
285
+ doc_hash = GDor::Indexer::SolrDocHash.new({ display_type: 'image' })
286
+ @indexer.add_coll_info(doc_hash, @coll_druids_array)
287
+ doc_hash = GDor::Indexer::SolrDocHash.new({ display_type: 'image' })
288
+ @indexer.add_coll_info(doc_hash, @coll_druids_array)
289
+ expect(@indexer.coll_display_types_from_items(collection)).to match_array ['image']
290
+ end
291
+ it 'gets multiple formats from multiple items for single collection' do
292
+ allow(@indexer).to receive(:identity_md_obj_label)
293
+ doc_hash = GDor::Indexer::SolrDocHash.new({ display_type: 'image' })
294
+ @indexer.add_coll_info(doc_hash, @coll_druids_array)
295
+ doc_hash = GDor::Indexer::SolrDocHash.new({ display_type: 'file' })
296
+ @indexer.add_coll_info(doc_hash, @coll_druids_array)
297
+ expect(@indexer.coll_display_types_from_items(collection)).to match_array %w(image file)
298
+ end
299
+ end # coll_display_types_from_items
300
+ end # add_coll_info
301
+
302
+ context '#num_found_in_solr' do
303
+ before :each do
304
+ @unmerged_collection_response = { 'response' => { 'numFound' => '1', 'docs' => [{ 'id' => 'dm212rn7381', 'url_fulltext' => ['https://purl.stanford.edu/dm212rn7381'] }] } }
305
+ @item_response = { 'response' => { 'numFound' => '265', 'docs' => [{ 'id' => 'dm212rn7381' }] } }
306
+ end
307
+
308
+ it 'counts the items and the collection object in the solr index after indexing' do
309
+ allow(@indexer.solr_client.client).to receive(:get) do |_wt, params|
310
+ if params[:params][:fq].include?('id:"dm212rn7381"')
311
+ @unmerged_collection_response
312
+ else
313
+ @item_response
314
+ end
315
+ end
316
+ expect(@indexer.num_found_in_solr(collection: 'dm212rn7381')).to eq(266)
317
+ end
318
+ end # num_found_in_solr
319
+
320
+ context '#email_report_body' do
321
+ before :each do
322
+ @indexer.config.notification = 'notification-list@example.com'
323
+ allow(@indexer).to receive(:num_found_in_solr).and_return(500)
324
+ allow(@indexer.harvestdor).to receive(:resources).and_return([collection])
325
+ allow(collection).to receive(:items).and_return([1, 2, 3])
326
+ allow(collection).to receive(:identity_md_obj_label).and_return('testcoll title')
327
+ end
328
+
329
+ subject do
330
+ @indexer.email_report_body
331
+ end
332
+
333
+ it 'email body includes coll id' do
334
+ expect(subject).to match /testcoll indexed coll record is: ww121ss5000/
335
+ end
336
+
337
+ it 'email body includes coll title' do
338
+ expect(subject).to match /coll title: testcoll title/
339
+ end
340
+
341
+ it 'email body includes failed to index druids' do
342
+ @indexer.instance_variable_set(:@druids_failed_to_ix, %w(a b))
343
+ expect(subject).to match /records that may have failed to index \(merged recs as druids, not ckeys\): \na\nb\n\n/
344
+ end
345
+
346
+ it 'email body include validation messages' do
347
+ @indexer.instance_variable_set(:@validation_messages, ['this is a validation message'])
348
+ expect(subject).to match /this is a validation message/
349
+ end
350
+
351
+ it 'email includes reference to full log' do
352
+ expect(subject).to match /full log is at gdor_indexer\/shared\/spec\/test_logs\/testcoll\.log/
353
+ end
354
+ end
355
+
356
+ describe '#email_results' do
357
+ before :each do
358
+ @indexer.config.notification = 'notification-list@example.com'
359
+ allow(@indexer).to receive(:send_email)
360
+ allow(@indexer).to receive(:email_report_body).and_return('Report Body')
361
+ end
362
+
363
+ it 'has an appropriate subject' do
364
+ expect(@indexer).to receive(:send_email) do |_to, opts|
365
+ expect(opts[:subject]).to match /is finished/
366
+ end
367
+
368
+ @indexer.email_results
369
+ end
370
+
371
+ it 'sends the email to the notification list' do
372
+ expect(@indexer).to receive(:send_email) do |to, _opts|
373
+ expect(to).to eq @indexer.config.notification
374
+ end
375
+
376
+ @indexer.email_results
377
+ end
378
+
379
+ it 'has the report body' do
380
+ expect(@indexer).to receive(:send_email) do |_to, opts|
381
+ expect(opts[:body]).to eq 'Report Body'
382
+ end
383
+
384
+ @indexer.email_results
385
+ end
386
+ end
387
+
388
+ describe '#send_email' do
389
+ it 'sends an email to the right list' do
390
+ expect_any_instance_of(Mail::Message).to receive(:deliver!) do |mail|
391
+ expect(mail.to).to match_array ['notification-list@example.com']
392
+ end
393
+ @indexer.send_email 'notification-list@example.com', {}
394
+ end
395
+
396
+ it 'has the appropriate options set' do
397
+ expect_any_instance_of(Mail::Message).to receive(:deliver!) do |mail|
398
+ expect(mail.subject).to eq 'Subject'
399
+ expect(mail.from).to match_array ['rspec']
400
+ expect(mail.body).to eq 'Body'
401
+ end
402
+ @indexer.send_email 'notification-list@example.com', { from: 'rspec', subject: 'Subject', body: 'Body' }
403
+ end
404
+ end
405
+
406
+ # context "skip heartbeat" do
407
+ # it "allows me to use a fake url for dor-fetcher-client" do
408
+ # expect {GDor::Indexer.new(@config_yml_path)}.not_to raise_error
409
+ # end
410
+ # end
411
+ end
@@ -0,0 +1,286 @@
1
+ require 'spec_helper'
2
+
3
+ describe GDor::Indexer::PublicXmlFields do
4
+ before(:all) do
5
+ @fake_druid = 'oo000oo0000'
6
+ @ns_decl = "xmlns='#{Mods::MODS_NS}'"
7
+ @mods_xml = "<mods #{@ns_decl}><note>public_xml_fields tests</note></mods>"
8
+ @empty_pub_xml = "<publicObject id='druid:#{@fake_druid}'></publicObject>"
9
+ end
10
+
11
+ let :logger do
12
+ Logger.new(StringIO.new)
13
+ end
14
+
15
+ def sdb_for_pub_xml(m)
16
+ resource = Harvestdor::Indexer::Resource.new(double, @fake_druid)
17
+ allow(resource).to receive(:public_xml).and_return(Nokogiri::XML(m))
18
+ allow(resource).to receive(:mods).and_return(@mods_xml)
19
+ GDor::Indexer::SolrDocBuilder.new(resource, logger)
20
+ end
21
+
22
+ def sdb_for_content_md(m)
23
+ resource = Harvestdor::Indexer::Resource.new(double, @fake_druid)
24
+ allow(resource).to receive(:content_metadata).and_return(Nokogiri::XML(m).root)
25
+ allow(resource).to receive(:public_xml).and_return(@empty_pub_xml)
26
+ allow(resource).to receive(:mods).and_return(@mods_xml)
27
+ GDor::Indexer::SolrDocBuilder.new(resource, logger)
28
+ end
29
+
30
+ # NOTE:
31
+ # "Doubles, stubs, and message expectations are all cleaned out after each example."
32
+ # per https://www.relishapp.com/rspec/rspec-mocks/docs/scope
33
+
34
+ context 'contentMetadata fields and methods' do
35
+ before(:all) do
36
+ @content_md_start = "<contentMetadata objectId='#{@fake_druid}'>"
37
+ @content_md_end = '</contentMetadata>'
38
+ @cntnt_md_type = 'image'
39
+ @cntnt_md_xml = "<contentMetadata type='#{@cntnt_md_type}' objectId='#{@fake_druid}'>#{@content_md_end}"
40
+ @pub_xml = "<publicObject id='druid:#{@fake_druid}'>#{@cntnt_md_xml}</publicObject>"
41
+ @ng_pub_xml = Nokogiri::XML(@pub_xml)
42
+ end
43
+
44
+ context 'dor_content_type' do
45
+ it 'is the value of the type attribute on <contentMetadata> element' do
46
+ val = 'foo'
47
+ cntnt_md = "<contentMetadata type='#{val}'>#{@content_md_end}"
48
+ sdb = sdb_for_content_md(cntnt_md)
49
+ expect(sdb.send(:dor_content_type)).to eq(val)
50
+ end
51
+ it 'logs an error message if there is no content type' do
52
+ cntnt_md = "#{@content_md_start}#{@content_md_end}"
53
+ sdb = sdb_for_content_md(cntnt_md)
54
+ expect(sdb.logger).to receive(:error).with("#{@fake_druid} has no DOR content type (<contentMetadata> element may be missing type attribute)")
55
+ sdb.send(:dor_content_type)
56
+ end
57
+ end
58
+
59
+ context 'display_type' do
60
+ let :sdb do
61
+ sdb_for_pub_xml @empty_pub_xml
62
+ end
63
+
64
+ it "'image' for dor_content_type 'image'" do
65
+ allow(sdb).to receive(:dor_content_type).and_return('image')
66
+ expect(sdb.display_type).to eq('image')
67
+ end
68
+ it "'image' for dor_content_type 'manuscript'" do
69
+ allow(sdb).to receive(:dor_content_type).and_return('manuscript')
70
+ expect(sdb.display_type).to eq('image')
71
+ end
72
+ it "'image' for dor_content_type 'map'" do
73
+ allow(sdb).to receive(:dor_content_type).and_return('map')
74
+ expect(sdb.display_type).to eq('image')
75
+ end
76
+ it "'file' for dor_content_type 'media'" do
77
+ allow(sdb).to receive(:dor_content_type).and_return('media')
78
+ expect(sdb.display_type).to eq('file')
79
+ end
80
+ it "'book' for dor_content_type 'book'" do
81
+ allow(sdb).to receive(:dor_content_type).and_return('book')
82
+ expect(sdb.display_type).to eq('book')
83
+ end
84
+ it "'file' for unrecognized dor_content_type" do
85
+ allow(sdb).to receive(:dor_content_type).and_return('foo')
86
+ expect(sdb.display_type).to eq('file')
87
+ end
88
+ end # display_type
89
+
90
+ context '#file_ids' do
91
+ context 'file display_type' do
92
+ context 'contentMetadata type=file, resource type=file' do
93
+ it 'is id attrib of file element in single resource element with type=file' do
94
+ m = '<contentMetadata type="file" objectId="xh812jt9999">
95
+ <resource type="file" sequence="1" id="xh812jt9999_1">
96
+ <label>John A. Blume Earthquake Engineering Center Technical Report 180</label>
97
+ <file id="TR180_Shahi.pdf" mimetype="application/pdf" size="4949212" />
98
+ </resource></contentMetadata>'
99
+ sdb = sdb_for_content_md(m)
100
+ expect(sdb.file_ids).to match_array ['TR180_Shahi.pdf']
101
+ end
102
+ it 'is id attrib of file elements in multiple resource elements with type=file' do
103
+ m = '<contentMetadata objectId="jt108hm9275" type="file">
104
+ <resource id="jt108hm9275_1" sequence="1" type="file">
105
+ <label>Access to Energy newsletter, 1973-1994</label>
106
+ <file id="ATE.PDF" mimetype="application/pdf" size="16297305" />
107
+ </resource>
108
+ <resource id="jt108hm9275_8" sequence="8" type="file">
109
+ <label>Computer Forum Festschrift for Edward Feigenbaum, 2006 (part 6)</label>
110
+ <file id="SC0524_2013-047_b8_811.mp4" mimetype="video/mp4" size="860912776" />
111
+ </resource>
112
+ <resource id="jt108hm9275_9" sequence="9" type="file">
113
+ <label>Stanford AI Lab (SAILDART) files</label>
114
+ <file id="SAILDART.zip" mimetype="application/zip" size="472230479" />
115
+ </resource>
116
+ <resource id="jt108hm9275_10" sequence="10" type="file">
117
+ <label>WTDS Interview: Douglas C. Engelbart, 2006 Apr 13</label>
118
+ <file id="DougEngelbart041306.wav" mimetype="audio/x-wav" size="273705910" />
119
+ </resource></contentMetadata>'
120
+ sdb = sdb_for_content_md(m)
121
+ expect(sdb.file_ids).to match_array ['ATE.PDF', 'SC0524_2013-047_b8_811.mp4', 'SAILDART.zip', 'DougEngelbart041306.wav']
122
+ end
123
+ end # contentMetadata type=file, resource type=file
124
+ it 'contentMetadata type=geo, resource type=object' do
125
+ m = '<contentMetadata objectId="druid:qk786js7484" type="geo">
126
+ <resource id="druid:qk786js7484_1" sequence="1" type="object">
127
+ <label>Data</label>
128
+ <file id="data.zip" mimetype="application/zip" role="master" size="10776648" />
129
+ </resource>
130
+ <resource id="druid:qk786js7484_2" sequence="2" type="preview">
131
+ <label>Preview</label>
132
+ <file id="preview.jpg" mimetype="image/jpeg" role="master" size="140661">
133
+ <imageData height="846" width="919"/>
134
+ </file>
135
+ </resource></contentMetadata>'
136
+ sdb = sdb_for_content_md(m)
137
+ expect(sdb.file_ids).to match_array ['data.zip', 'preview.jpg']
138
+ end
139
+
140
+ # FIXME: non-file resource types
141
+ end # file display_type
142
+ context 'image display_type' do
143
+ context 'contentMetadata type=image' do
144
+ it 'resource type=image should be id attrib of file elements' do
145
+ m = '<contentMetadata objectId="rg759wj0953" type="image">
146
+ <resource id="rg759wj0953_1" sequence="1" type="image">
147
+ <label>Image 1</label>
148
+ <file id="rg759wj0953_00_0003.jp2" mimetype="image/jp2" size="13248250">
149
+ <imageData width="6254" height="11236"/>
150
+ </file>
151
+ </resource>
152
+ <resource id="rg759wj0953_2" sequence="2" type="image">
153
+ <label>Image 2</label>
154
+ <file id="rg759wj0953_00_00_0001.jp2" mimetype="image/jp2" size="8484503">
155
+ <imageData width="7266" height="6188"/>
156
+ </file>
157
+ </resource></contentMetadata>'
158
+ sdb = sdb_for_content_md m
159
+ expect(sdb.file_ids).to match_array ['rg759wj0953_00_0003.jp2', 'rg759wj0953_00_00_0001.jp2']
160
+ end
161
+ it 'resource type=object should be ignored' do
162
+ m = '<contentMetadata objectId="ny981gz0831" type="image">
163
+ <resource id="ny981gz0831_1" sequence="1" type="object">
164
+ <label>File 1</label>
165
+ <file id="da39a3ee5e6b4b0d3255bfef95601890afd80709.dderr" mimetype="application/x-symlink" size="26634" />
166
+ <file id="da39a3ee5e6b4b0d3255bfef95601890afd80709.img" mimetype="application/x-symlink" size="368640" />
167
+ <file id="da39a3ee5e6b4b0d3255bfef95601890afd80709.img.sha" mimetype="application/x-symlink" size="173" />
168
+ </resource></contentMetadata>'
169
+ sdb = sdb_for_content_md(m)
170
+ expect(sdb.file_ids).to be_nil
171
+ end
172
+ end # contentMetadata type=image
173
+ context 'contentMetadata type=map, resource type=image' do
174
+ it 'is id attrib of file elements' do
175
+ m = '<contentMetadata objectId="druid:rf935xg1061" type="map">
176
+ <resource id="0001" sequence="1" type="image">
177
+ <file id="rf935xg1061_00_0001.jp2" mimetype="image/jp2" size="20204910">
178
+ <imageData height="7248" width="14787"/>
179
+ </file>
180
+ </resource>
181
+ <resource id="0002" sequence="2" type="image">
182
+ <file id="rf935xg1061_00_0002.jp2" mimetype="image/jp2" size="20209446">
183
+ <imageData height="7248" width="14787"/>
184
+ </file>
185
+ </resource></contentMetadata>'
186
+ sdb = sdb_for_content_md(m)
187
+ expect(sdb.file_ids).to match_array ['rf935xg1061_00_0001.jp2', 'rf935xg1061_00_0002.jp2']
188
+ end
189
+ end # contentMetadata type=map, resource type=image
190
+ context 'contentMetadata type=manuscript' do
191
+ it 'resource type=image' do
192
+ m = '<contentMetadata objectId="druid:my191bb7431" type="manuscript">
193
+ <resource id="manuscript-image-1" sequence="1" type="image">
194
+ <label>Front Outer Board</label>
195
+ <file format="JPEG2000" id="T0000001.jp2" mimetype="image/jp2" size="7553958">
196
+ <imageData height="4578" width="3442"/>
197
+ </file>
198
+ </resource>
199
+ <resource id="manuscript-image-343" sequence="343" type="image">
200
+ <label>Spine</label>
201
+ <file format="JPEG2000" id="T0000343.jp2" mimetype="image/jp2" size="1929355">
202
+ <imageData height="4611" width="986"/>
203
+ </file>
204
+ </resource>
205
+ </contentMetadata>'
206
+ sdb = sdb_for_content_md(m)
207
+ expect(sdb.file_ids).to match_array ['T0000001.jp2', 'T0000343.jp2']
208
+ end
209
+ it 'resource type=page should be ignored' do
210
+ m = '<contentMetadata objectId="druid:Bodley342" type="manuscript">
211
+ <resource type="page" sequence="1" id="image-1">
212
+ <label>1</label>
213
+ <file mimetype="image/jp2" format="JPEG2000" size="1319924" id="asn0001-M.jp2">
214
+ <imageData height="3466" width="2405"/>
215
+ </file>
216
+ </resource>
217
+ <resource type="page" sequence="453" id="image-453">
218
+ <label>453</label>
219
+ <file mimetype="image/jp2" format="JPEG2000" size="1457066" id="asn0452-M.jp2">
220
+ <imageData height="3431" width="2431"/>
221
+ </file>
222
+ </resource></contentMetadata>'
223
+ sdb = sdb_for_content_md(m)
224
+ expect(sdb.file_ids).to be_nil
225
+ end
226
+ end # contentMetadata type=manuscript
227
+ end # image display_type
228
+
229
+ it 'is nil for book display_type' do
230
+ m = '<contentMetadata type="book" objectId="xm901jg3836">
231
+ <resource type="image" sequence="1" id="xm901jg3836_1">
232
+ <label>Item 1</label>
233
+ <file id="xm901jg3836_00_0002.jp2" mimetype="image/jp2" size="1152852">
234
+ <imageData width="2091" height="2905"/>
235
+ </file>
236
+ </resource>
237
+ <resource type="image" sequence="608" id="xm901jg3836_608">
238
+ <label>Item 608</label>
239
+ <file id="xm901jg3836_00_0609.jp2" mimetype="image/jp2" size="1152297">
240
+ <imageData width="2090" height="2905"/>
241
+ </file>
242
+ </resource></contentMetadata>'
243
+ sdb = sdb_for_content_md(m)
244
+ expect(sdb.file_ids).to be_nil
245
+ end
246
+ it 'is id attrib of file elements for media display_type' do
247
+ m = '<contentMetadata objectId="jy496kh1727" type="media">
248
+ <resource sequence="1" id="jy496kh1727_1" type="audio">
249
+ <label>Tape 1, Pass 1</label>
250
+ <file id="jy496kh1727_sl.mp3" mimetype="audio/mpeg" size="57010677" />
251
+ </resource>
252
+ <resource sequence="2" id="jy496kh1727_2" type="image">
253
+ <label>Image of media (1 of 3)</label>
254
+ <file id="jy496kh1727_img_1.jp2" mimetype="image/jp2" size="1277821">
255
+ <imageData width="2659" height="2535"/>
256
+ </file>
257
+ </resource></contentMetadata>'
258
+ sdb = sdb_for_content_md(m)
259
+ expect(sdb.file_ids).to match_array ['jy496kh1727_sl.mp3', 'jy496kh1727_img_1.jp2']
260
+ end
261
+ it 'is nil if there are no <resource> elements in the contentMetadata' do
262
+ m = '<contentMetadata objectId="jy496kh1727" type="file"></contentMetadata>'
263
+ sdb = sdb_for_content_md(m)
264
+ expect(sdb.file_ids).to be_nil
265
+ end
266
+ it 'is nil if there are no <file> elements in the contentMetadata' do
267
+ m = '<contentMetadata objectId="jy496kh1727" type="file">
268
+ <resource sequence="1" id="jy496kh1727_1" type="file">
269
+ <label>Tape 1, Pass 1</label>
270
+ </resource>
271
+ <resource sequence="2" id="jy496kh1727_2" type="image">
272
+ <label>Image of media (1 of 3)</label>
273
+ </resource></contentMetadata>'
274
+ sdb = sdb_for_content_md(m)
275
+ expect(sdb.file_ids).to be_nil
276
+ end
277
+ it 'is nil if there are no id elements on file elements' do
278
+ m = "#{@content_md_start}<resource type='image'><file/></resource>#{@content_md_end}"
279
+ sdb = sdb_for_content_md(m)
280
+ expect(sdb.file_ids).to be_nil
281
+ end
282
+
283
+ # TODO: multiple file elements in a single resource element
284
+ end # file_ids
285
+ end # contentMetadata fields and methods
286
+ end